It occurs to me that another benefit of the lookup table alternative I suggested over in the other thread is that it also happens to allow you to sidestep the non-standard evaluation business:
library(tidyverse)
# Setup example data
mrgb_trus <- data.frame(
MRGB_gleason = c("3+4", "4", "3+4", "4+4", "3+3",NA, "3+4", "3+3", NA, "4+3",
"3+3", "3+4", "3+4", NA, "3", "3+4", NA, NA, NA, NA, "4+3", "3+4", "3+3",
"4+3", "4+4", "4+5", "3+3", "4+3", "4+3", NA, NA, "3+3", "4+4", "3+4", "4+5",
"3+3", "5+4", NA, NA, "3+4", "4+3", NA, "3+3", "4+3", "3+4", "3+4", "3+4", NA,
"4+4", "4+3", "3+4", "3+4"),
stringsAsFactors = FALSE)
mrgb_lookup <- data.frame(
gleas_score = c("5+4", "5+5", "4+5", "4+4", "4+3", "3+4", "3+3", "3", "4", NA ),
gleas_grd_grp = c( "5", "5", "5", "4", "3", "2", "1", "1", "1", "0"),
stringsAsFactors = FALSE
)
gs_to_ggg <- function(df, lookup, colname_gs, colname_ggg) {
# Build the `by` parameter:
# - Gleason scores should be in the first col of lookup table
# - `colname_gs` should contain the name of the Gleason score
# variable in `df`
join_by <- names(lookup)[1]
names(join_by) <- colname_gs
df <- df %>% inner_join(lookup, by = join_by)
# The last column added to df will be the Gleason Grade Groups
# from the lookup table; rename it to value of `colname_ggg`
names(df)[length(df)] <- colname_ggg
df
}
mrgb_trus %>%
gs_to_ggg(
mrgb_lookup,
colname_gs = "MRGB_gleason",
colname_ggg = "MRGGG"
) %>%
head(20)
#> MRGB_gleason MRGGG
#> 1 3+4 2
#> 2 4 1
#> 3 3+4 2
#> 4 4+4 4
#> 5 3+3 1
#> 6 <NA> 0
#> 7 3+4 2
#> 8 3+3 1
#> 9 <NA> 0
#> 10 4+3 3
#> 11 3+3 1
#> 12 3+4 2
#> 13 3+4 2
#> 14 <NA> 0
#> 15 3 1
#> 16 3+4 2
#> 17 <NA> 0
#> 18 <NA> 0
#> 19 <NA> 0
#> 20 <NA> 0
Created on 2018-07-03 by the reprex package (v0.2.0).