Hi all, am currently using the dataset from "https://data.cityofnewyork.us/api/views/833y-fsy8/rows.csv?accessType=DOWNLOAD" and am needing some assistance to clean up the data before I conduct my visualization.
I am having an issue to adding new column into the code. My current code is as such.
library(dplyr)
library(ggplot2)
nypd_data <- drop_na(nypdshootings) %>% select(-c(INCIDENT_KEY, LOCATION_DESC, X_COORD_CD, Y_COORD_CD, Latitude, Longitude, Lon_Lat, JURISDICTION_CODE, OCCUR_TIME))
#change the date type
nypd_data <- nypd_data %>% mutate(OCCUR_DATE = mdy(OCCUR_DATE))
#change logical boolean into int.
nypd_data$STATISTICAL_MURDER_FLAG [nypd_data$STATISTICAL_MURDER_FLAG == "true"] <- 1
nypd_data$STATISTICAL_MURDER_FLAG [nypd_data$STATISTICAL_MURDER_FLAG == "false"] <- 0
nypd_data$STATISTICAL_MURDER_FLAG <- as.integer(nypd_data$STATISTICAL_MURDER_FLAG)
nypd_murder_boro <- nypd_data %>% group_by(BORO)
#Summarising the dataset
nypd_data %>%
group_by(BORO, OCCUR_DATE,STATISTICAL_MURDER_FLAG) %>%
summarise(nypd_data, STATISTICAL_MURDER_FLAG = sum(STATISTICAL_MURDER_FLAG))
nypd_murder_boro_1 <- nypd_murder_boro %>% group_by(BORO, OCCUR_DATE) %>%
summarize(STATISTICAL_MURDER_FLAG = STATISTICAL_MURDER_FLAG) %>% select(BORO, OCCUR_DATE, STATISTICAL_MURDER_FLAG) %>% ungroup()
# Add new columns
nypd_data %>% add_column(nypd_murder_boro_1cummurder = ave)
nypd_murder_boro_1cummurder<−ave(nypdmurderboro1STATISTICAL_MURDER_FLAG, nypd_murder_boro_1BORO,FUN=cumsum)
nypdmurderboro1[′shooting′]=1nypdmurderboro1cumshooting <- ave(nypd_murder_boro_1shooting,nypdmurderboro1BORO, FUN = cumsum)
nypd_murder_boro_1$murderpercent <- with(nypd_murder_boro_1, cummurder/cumshooting *100)
My error code is as such --> Error: unexpected input in "nypd_murder_boro_1cummurder<−"
Could anyone assist me with cleaning up the last segment of the data for me ? The part on adding new column .