Hi,
I am trying to merge all string variables including comments longer than 3 characters so URN, Q6, Score and Year would be excluded in this df:
source <- data.frame(
stringsAsFactors = FALSE,
URN = c("H732585",
"H933818","H902416",
"H793061","H592160","H972119",
"H945230","H955322",
"H814977","H972992"),
QN3a = c("dsafs",NA,"aaa",
"djdjd fdfj","fff",
"f d ffsa j dsf aa sd","dffg ddjfcj mj",
"dkvvf ffk vjf fj","ttt","fddd"),
QN3b = c(NA,NA,"nil",NA,NA,
"no comments",NA,NA,"all good",NA),
QN5a = c("xxxxx","Nothing at all",
"I did not have any","Non",NA,
"Nothing","N/A","None",NA,
"Nothing really"),
QN5b = c("All good",NA,NA,NA,
"nothing",NA,NA,"na","daa ffss fssfsff sfasfa",NA),
Q6 = c("Yes","No",NA,NA,
"Yes",NA,NA,"No",NA,NA),
Score = c(100,90,35,20,50,90,
100,100,90,80),
Year = c(2021,2021,2020,2020,
2021,2021,2021,2021,2020,2020)
)
library(dplyr)
library(stringr)
library(tidyr)
result <- source %>%
mutate_at(vars(matches("QN5$|QN3$")), ~str_remove_all(.x, "^.{1,5}$")) %>% # Remove sentences with less than 5 characters
mutate_at(vars(matches("QN5$|QN3$")), ~str_remove_all(.x, "^(All//sgood|No\\scomments|N.?A|Nothing|None|Nil)$")) %>% # Remove sentences with no comments
mutate(all_comments = paste(QN3a,QN3b,QN5a,QN5b, sep="/"),
all_comments = str_remove_all(all_comments, "NA"), # Removes NAs
all_comments = str_remove_all(all_comments, "[:cntrl:]"), # Removes control characters like /n/r
all_comments = str_replace_all(all_comments, "\\s\\s+", " "), #Removes duplicated /
all_comments = str_replace_all(all_comments, "//+", "/"), # Removes extra spaces
all_comments = str_remove (all_comments, "/$"), # Removes / in the end
all_comments = str_remove (all_comments, "^/")) %>% # Removes / in the beginning
mutate(All_len = nchar(all_comments),
All_wcount = str_count(all_comments,'\\w+'))
Unfortunately:
- My str_remove_all rules do not work
- I don't know how to specify a code to take into account str with comments longer than 10 characters, rather then specifying them clearly in my code (QN3a,QN3b,QN5a,QN5b)
Can you help?