Loading some necessary libraries:
library("dplyr")
printf = function(...) cat(sprintf(...))
myeval = function(code, envir = NULL) {
eval(parse(text = code), envir = envir)
}
Running a very simple experiment:
dataset = data.frame(
altitude = as.double(c(18.172164, 23.217744, 26.650505, 37.851856, 26.627905)),
x1 = as.integer(c(15, 39, 1, 73, 8)),
x2 = as.integer(c(35, 52, 65, 60, 51)),
zip = as.factor(c("10204", "10406", "10107", "10807", "10106"))
)
dataset
## altitude x1 x2 zip
## 1 18.17216 15 35 10204
## 2 23.21774 39 52 10406
## 3 26.65050 1 65 10107
## 4 37.85186 73 60 10807
## 5 26.62790 8 51 10106
encoding = data.frame(
encoding = c(39.1, 38.2, 37.3, 36.4, 35.5),
row.names = c("10204", "10406", "10107", "10807", "10106")
)
encoding
## encoding
## 10204 39.1
## 10406 38.2
## 10107 37.3
## 10807 36.4
## 10106 35.5
I think something is wrong here:
varname = "zip" # it is a requirement that this value gets passed as a string
dataset_encoded = myeval(
sprintf('dataset %%>%% mutate(%s_encoded = encoding[%s, "encoding"])', varname, varname),
envir = environment()
)
printf("After running the code above, I get the follwoing:\n")
## After running the code above, I get the follwoing:
dataset_encoded
## altitude x1 x2 zip zip_encoded
## 1 18.17216 15 35 10204 37.3
## 2 23.21774 39 52 10406 36.4
## 3 26.65050 1 65 10107 38.2
## 4 37.85186 73 60 10807 35.5
## 5 26.62790 8 51 10106 39.1
Expected result: (according to the encoding we got earlier)
## altitude x1 x2 zip zip_encoded
## 1 18.17216 15 35 10204 39.1
## 2 23.21774 39 52 10406 38.2
## 3 26.65050 1 65 10107 37.3
## 4 37.85186 73 60 10807 36.4
## 5 26.62790 8 51 10106 35.5
My Question:
Do you know what is wrong on the mutate
sentence above?