So below is all of my data. The idea is to create a point system scale for the independent variable
fraud_data <-read_dta("csew_fraud_may15aug15_vf.dta")
fraud_data <- filter(fraud_data, v712 >= 0, v712 <=9)
v712 <- c("1","2", "NA", "8", "9")
v712 <- as.numeric(v712)
v712 <- recode(v712, `1` = 2, `2` = 1, `NA` = 0, `8` = 0, `9` = 0)
fraud_data <- filter(fraud_data, v710 >= 0, v710 <=9)
v710 <- c("1","2", "NA", "8", "9")
v710 <- as.numeric(v710)
v710 <- recode(v710, `1` = 2, `2` = 1, `NA` = 0, `8` = 0, `9` = 0)
fraud_data <- filter(fraud_data, v71 >= 0, v71 <=9)
v71 <- c("1","2", "NA", "8", "9")
v71 <- as.numeric(v71)
v71 <- recode(v71, `1` = 2, `2` = 1, `NA` = 0, `8` = 0, `9` = 0)
fraud_data <- filter(fraud_data, racemot >= 0, racemot <=9)
racemot <- c("1","2","NA", "8", "9")
racemot <- as.numeric(racemot)
racemot <- recode(racemot, `1` = 2, `2` = 1, `NA` = 0, `8` = 0, `9` = 0)
fraud_data <- filter(fraud_data, ageoff2 >= 2, ageoff2 <=5)
ageoff2 <- c("2","3", "4", "5", "NA")
ageoff2 <- as.numeric(ageoff2)
ageoff2 <- recode(ageoff2, `2` = 4, `3`= 3, `4` = 2, `5` =1, `NA`=0)
fraud_data <- filter(fraud_data, offsex1 >= 0, offsex1 <=9)
offsex1 <- c("1","2", "NA", "8", "9")
offsex1 <- as.numeric(offsex1)
offsex1 <- recode(offsex1, `1` = 1, `2` = 2, `NA` = 0, `8` = 0, `9` = 0)
fraud_data <- filter(fraud_data, knewoff1 >= 0, knewoff1 <=8)
knewoff1 <- c("1","2", "3", "NA", "8")
knewoff1 <- as.numeric(knewoff1)
knewoff1 <- recode(knewoff1, `1` = 2, `2` = 1, `3` = 1, `8` = 0,`NA` = 0)
fraud_data <- filter(fraud_data, v77 >= 0, v77 <=9)
v77 <- c("1","2", "8", "9", "NA")
v77 <- as.numeric(v77)
v77 <- recode(v77, `1` = 2, `2` = 1, `8` = 0, `9` = 0, `NA` = 0)
fraud_data <- filter(fraud_data, v711 >= 0, v711 <=9)
v711 <- c("1","2", "8", "9", "NA")
v711 <- as.numeric(v711)
v711 <- recode(v711, `1` = 2, `2` = 1, `NA` = 0, `8` = 0, `9` = 0)
severity <- v71 + v712 + v710 + racemot + ageoff2 + offsex1 + knewoff1 + v77 + v711
glimpse(severity)
fraud_data %>%
ggplot(aes(severity)) +
geom_histogram()
hist(severity) comes out as one block and geom_histogram comes back with "Error: Aesthetics must be either length 1 or the same as the data (107): x"
Now there is a lot of data missing because in order to create "severity <-" I had to ignore a lot of variables because they all had to be 1:5
Can anyone see where I went wrong, my lecturer hasn't gotten back to me and the project is due soon