I'd start with fixing the " "
values and knocking everything down to lowercase
LAB <- data.frame("ResOrg" = c("Positive", "Detected", "ESCHERICHIA COLI-CRE", "Ebola Disease", "Legionella Finding", " ", "Ecoli Detected", "Salmonella organism", " ", " "),
"TestResult" = c("ECOCRE", "Acinetobacter baumannii", " ", "Ecoli", "MSN", "CARBAPENEM RESISTANT KLEBSIELLA PNEUMONIAE", "CRKP", "Ebola","Candida auris"," "),
"Result" = c("CITDCR", "ETACRE", "Test", "KPS", " ", "Ecoli", "KLEPNE", "Ebola"," ","ENTEROBACTER (KLEBSIELLA) AERO"),
"Test" = c("Ebola ", "Ecoli KPS", "Organism", " ", "Klebsiella", "MSN", " ", "NDM", "KLEPNE", " "),
"Notes" = c(" ", " ", "SPECIMEN NOTES:258531008 WOUND SWAB L 20130731 V1: Disease RESISTANT EcoliTesting", "SPECIMEN:COLLECTION METHOD ID: URINE - E COLIGREATER THAN 100,000 COLONY FORMING UNITS PER MLOTHER", " ", "Positive for disease Producing", " ", "Tested for NDM, KPS and MSN", "KPS", "Carbapenem ResistantKlebsiella"))
replace_blanks <- function(x) gsub("^[^:alpha:]$",NA,LAB[,1])
LAB <- apply(LAB,2,replace_blanks)
LAB <- apply(LAB,2,tolower)
LAB
#> ResOrg TestResult Result
#> [1,] "positive" "positive" "positive"
#> [2,] "detected" "detected" "detected"
#> [3,] "escherichia coli-cre" "escherichia coli-cre" "escherichia coli-cre"
#> [4,] "ebola disease" "ebola disease" "ebola disease"
#> [5,] "legionella finding" "legionella finding" "legionella finding"
#> [6,] NA NA NA
#> [7,] "ecoli detected" "ecoli detected" "ecoli detected"
#> [8,] "salmonella organism" "salmonella organism" "salmonella organism"
#> [9,] NA NA NA
#> [10,] NA NA NA
#> Test Notes
#> [1,] "positive" "positive"
#> [2,] "detected" "detected"
#> [3,] "escherichia coli-cre" "escherichia coli-cre"
#> [4,] "ebola disease" "ebola disease"
#> [5,] "legionella finding" "legionella finding"
#> [6,] NA NA
#> [7,] "ecoli detected" "ecoli detected"
#> [8,] "salmonella organism" "salmonella organism"
#> [9,] NA NA
#> [10,] NA NA
Created on 2023-09-01 with reprex v2.0.2
and then do the pattern matching with %in%
as suggested here.
For dealing with the case where ResOrg
has value of No Organism
just give the subset (or filtered) portion of LAB_Org
as the argument to the fallbacks. I don't see how #3 happens because you are creating all new objects.