I'm attempting to write a function which compares the factor columns with the same columns names in two data frames. The function fails to return the correct result which should be NA's in d2 for z and zz fields
Identify rows in data frame d2 column c1 and c2 not in data frame d1 column c1 and c2 respectively, replace values with NA.
c1 <- c("A", "B", "C", "D", "E")
c2 <- c("AA", "BB", "CC", "DD", "EE")
d1 <- data.frame(c1, c2) # base data
c1 <- c("z", "B", "C", "D", "E")
c2 <- c("AA", "zz", "CC", "DD", "EE")
d2 <- data.frame(c1, c2) # new data
v <- colnames(d1)
replace <- NA
x <- d2[v]
repFact = function(x, d1, replace){
x1 <- unique(d1[,v]) # unique values in column of base data
y <- x # all values of column in new data
id <- which(!(y %in% x1)) # find id's of column values where y has values not in x1
x[id, v] <- NA # replace those values with NA
x
return(x)
}
d3 <- lapply(d2[v], repFact, d1[v], replace)