Filtering data with more than one String?

Hi, im trying to filter this data by the various words

The words that im triying to get from the data from the column "Nombre.producto.genrico" with the grepl funcion using the words "BIC , INFUSION , VOLUMETRICA , BOMBA "

Im trying not to brute force every other result with grepl, is there another way? another function that i can use?

The main problem that i have its that the grepl gets words like "BICICLETA" because i search for BIC, but BIC also stands for "Bomba Infusion Continua" and i dont know how to filter it

The code im using its the following

setwd("D:/OWMED/Database/Licitaciones/2020")

library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.1.3
library(reprex)
#> Warning: package 'reprex' was built under R version 4.1.3
Licita_1 <- read.csv2("lic_2020-1.csv")
Licita_2 <- read.csv2("lic_2020-2.csv")
Licita_3 <- read.csv2("lic_2020-3.csv")

Licita_2019 <- bind_rows( Licita_1 ,
                          Licita_2 ,
                          Licita_3 ,
)

Licita_data = Licita_2019 %>% select ("CodigoExterno"
                                      , "NombreProveedor"
                                      , "Nombre.producto.genrico"
                                      , "Oferta.seleccionada"
                                      , "NombreOrganismo"
                                      , "CantidadAdjudicada"
                                      , "sector"
                                      , "TiempoDuracionContrato")

Licita_data <- filter(Licita_data, grepl('BOMBA|INFUSION|BIC|VOLUMETRICA|DOSIFICADORES', Nombre.producto.genrico))

str(Licita_data,3)
#> 'data.frame':    2073 obs. of  8 variables:
#>  $ CodigoExterno          : chr  "2585-149-LE19" "2585-149-LE19" "1058039-1-LQ19" "1058039-1-LQ19" ...
#>  $ NombreProveedor        : chr  "AGROMAIPO" "OSM Ltda." "Caribean Pharma Ltda" "LABORATORIOS RECALCINE S.A" ...
#>  $ Nombre.producto.genrico: chr  "VACUNA ANTIRRÁBICA" "VACUNA ANTIRRÁBICA" "ÁCIDO ASCÓRBICO" "ÁCIDO ASCÓRBICO" ...
#>  $ Oferta.seleccionada    : chr  "No Seleccionada" "No Seleccionada" "No Seleccionada" "No Seleccionada" ...
#>  $ NombreOrganismo        : chr  "I MUNICIPALIDAD DE ARICA" "I MUNICIPALIDAD DE ARICA" "SERVICIO DE SALUD CONCEPCION ARAUCO CONS" "SERVICIO DE SALUD CONCEPCION ARAUCO CONS" ...
#>  $ CantidadAdjudicada     : num  0 0 0 0 0 1 0 0 0 1 ...
#>  $ sector                 : chr  "Municipalidades" "Municipalidades" "Salud" "Salud" ...
#>  $ TiempoDuracionContrato : int  0 0 24 24 24 24 24 24 24 24 ...

Created on 2022-03-31 by the reprex package (v2.0.1)

Is the second version of filter closer to what you want?

library(dplyr)
Licita_data <- data.frame(Nombre.producto.genrico = c("ALFA",
                                                      "BOMBA",
                                                      "BIC",
                                                      "BICICLETA",
                                                      "INFUSION A",
                                                      "INFUSIONES"))
#Licita_data <- 
Words <- c('BOMBA','INFUSION','BIC','VOLUMETRICA','DOSIFICADORES')
RegEx <- paste0("\\b",Words, "\\b", collapse = "|") #\\b is a word boundary
RegEx
#> [1] "\\bBOMBA\\b|\\bINFUSION\\b|\\bBIC\\b|\\bVOLUMETRICA\\b|\\bDOSIFICADORES\\b"
filter(Licita_data, grepl('BOMBA|INFUSION|BIC|VOLUMETRICA|DOSIFICADORES', Nombre.producto.genrico))
#>   Nombre.producto.genrico
#> 1                   BOMBA
#> 2                     BIC
#> 3               BICICLETA
#> 4              INFUSION A
#> 5              INFUSIONES
filter(Licita_data, grepl(RegEx, Nombre.producto.genrico))
#>   Nombre.producto.genrico
#> 1                   BOMBA
#> 2                     BIC
#> 3              INFUSION A

Created on 2022-03-30 by the reprex package (v2.0.1)

It is really close but i also get results like

"SETS DE BOMBA DE INCENDIO"
"PIEZAS DE REPUESTO DE LA BOMBA DE POZO"

Also, i really didnt understand the use of the \b boundary

Thanks you in advance

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.