Count how many times sentences appear in another vector

I need to count how many times sentences in a vector appear in another vector but haven´t figure it out. This is the vector of sentences:

centros_med <- c("Centro Nacional de Rehabilitacion Humberto Araya Rojas",
                 "Hospital Carlos Luis Valverde Vega",
                 "Hospital de Ciudad Neily",
                 "Hospital de Golfito Manuel Mora Valverde",
                 "Hospital de Guapiles",
                 "Hospital de La Anexion",
                 "Hospital de las Mujeres Adolfo Carit Eva",
                 "Hospital de Osa Tomas",
                 "Casas Casajus",
                 "Hospital de San Carlos",
                 "Hospital de San Vito",
                 "Hospital Enrique Baltodano Briceño",
                 "Hospital Fernando Escalante Pradilla",
                 "Hospital Los Chiles",
                 "Hospital Maximiliano Peralta Jimenez",
                 "Hospital Maximiliano Teran Valls",
                 "Hospital Mexico",
                 "Hospital Monseñor Victor Manuel Sanabria Martinez",
                 "Hospital Nacional de Geriatria y Gerontologia Raul Blanco Cervantes",
                 "Hospital Nacional de Niños Carlos Saenz Herrera",
                 "Hospital Nacional Psiquiatrico Manuel Antonio Chapui y Torres",
                 "Hospital Psiquiatrico Roberto Chacon Paut",
                 "Hospital Rafael Ángel Calderon Guardia",
                 "Hospital San Francisco de Asis",
                 "Hospital San Juan de Dios",
                 "Hospital San Vicente de Paul",
                 "Hospital Tony Facio Castro",
                 "Hospital Upala",
                 "Hospital William Allen Taylor")

Ahd this is the vector that I need to evaluate:

base_2017$Norma

[1] "CCSS"
[2] "CAJA COSTARRICENSE DEL SEGURO SOCIAL"
[3] "HOSPITAL SAN RAFAEL DE ALAJUELA"
[4] "HOSPITAL TONY FACIO"
[5] "HOSPITAL SAN RAFAEL DE ALAJUELA"
[6] "HOSPITAL ESCALANTE PRADILLA"
[7] "HOSPITAL SAN RAFAEL DE ALAJUELA"
[8] "HOSPITAL MEXICO"
[9] "HOSPITAL VICTOR MANUEL SANABRIA MARTINEZ"
[10] "HOSPITAL MAX PERALTA DE CARTAGO"
[11] "DIRECTOR DEL HOSPITAL SAN VICENTE DE PAUL"
[12] "HOSPITAL DR RAFAEL ANGEL CALDERON GUARDIA"
[13] "CAJA COSTARRICENSE DEL SEGURO SOCIAL Y HOSPITAL SAN RAFAEL DE ALAJUELA"
[14] "HOSPITAL FERNANDO ESCALANTE PRADILLA"
[15] "CLINICA OFTALMOLOGICA DE LA CCSS"
[16] "CAJA COSTARRICENSE DEL SEGURO SOCIAL Y CLINICA INTEGRADA DE TIBAS"
[17] "JEFES DEL SERVICIO DE NEUROCIRUGIA Y TERAPIA INTENSIVA DEL HOSPITAL MEXICO"
[18] "DIRECCION MEDICA DEL HOSPITAL ESCALANTE PRADILLA EN PEREZ ZELEDÓN"
[19] "CAJA COSTARRICENSE DEL SEGURO SOCIAL"
[20] "CAJA COSTARRICENSE DEL SEGURO SOCIAL"
[21] "HOSPITAL MEXICO, CAJA COSTARRICENSE DEL SEGURO SOCIAL"
[22] "CAJA COSTARRICENSE DEL SEGURO SOCIAL"
[23] "HOSPITAL ESCALANTE PRADILLA"
[24] "HOSPITAL ESCALANTE PRADILLA"
[25] "HOSPITAL CALDERON GUARDIA"
[26] "HOSPITAL CALDERON GUARDIA"
[27] "CAJA COSTARRICENSE DEL SEGURO SOCIAL"

etc...

You can do something like this

library(tidyverse)

centros_med <- c('Centro Nacional de Rehabilitacion Humberto Araya Rojas',
                 'Hospital Carlos Luis Valverde Vega',
                 'Hospital de Ciudad Neily',
                 'Hospital de Golfito Manuel Mora Valverde',
                 'Hospital de Guapiles',
                 'Hospital de La Anexion',
                 'Hospital de las Mujeres Adolfo Carit Eva',
                 'Hospital de Osa Tomas',
                 'Casas Casajus',
                 'Hospital de San Carlos',
                 'Hospital de San Vito',
                 'Hospital Enrique Baltodano Briceño',
                 'Hospital Fernando Escalante Pradilla',
                 'Hospital Los Chiles',
                 'Hospital Maximiliano Peralta Jimenez',
                 'Hospital Maximiliano Teran Valls',
                 'Hospital Mexico',
                 'Hospital Monseñor Victor Manuel Sanabria Martinez',
                 'Hospital Nacional de Geriatria y Gerontologia Raul Blanco Cervantes',
                 'Hospital Nacional de Niños Carlos Saenz Herrera',
                 'Hospital Nacional Psiquiatrico Manuel Antonio Chapui y Torres',
                 'Hospital Psiquiatrico Roberto Chacon Paut',
                 'Hospital Rafael Ángel Calderon Guardia',
                 'Hospital San Francisco de Asis',
                 'Hospital San Juan de Dios',
                 'Hospital San Vicente de Paul',
                 'Hospital Tony Facio Castro',
                 'Hospital Upala',
                 'Hospital William Allen Taylor')

base_2017 <- data.frame(stringsAsFactors = FALSE,
                        norma = c("CCSS",
                                  "CAJA COSTARRICENSE DEL SEGURO SOCIAL",
                                  "HOSPITAL SAN RAFAEL DE ALAJUELA","HOSPITAL TONY FACIO",
                                  "HOSPITAL SAN RAFAEL DE ALAJUELA","HOSPITAL ESCALANTE PRADILLA",
                                  "HOSPITAL SAN RAFAEL DE ALAJUELA","HOSPITAL MEXICO",
                                  "HOSPITAL VICTOR MANUEL SANABRIA MARTINEZ",
                                  "HOSPITAL MAX PERALTA DE CARTAGO",
                                  "DIRECTOR DEL HOSPITAL SAN VICENTE DE PAUL","HOSPITAL DR RAFAEL ANGEL CALDERON GUARDIA",
                                  "CAJA COSTARRICENSE DEL SEGURO SOCIAL Y HOSPITAL SAN RAFAEL DE ALAJUELA",
                                  "HOSPITAL FERNANDO ESCALANTE PRADILLA","CLINICA OFTALMOLOGICA DE LA CCSS",
                                  "CAJA COSTARRICENSE DEL SEGURO SOCIAL Y CLINICA INTEGRADA DE TIBAS",
                                  "JEFES DEL SERVICIO DE NEUROCIRUGIA Y TERAPIA INTENSIVA DEL HOSPITAL MEXICO",
                                  "DIRECCION MEDICA DEL HOSPITAL ESCALANTE PRADILLA EN PEREZ ZELEDÓN",
                                  "CAJA COSTARRICENSE DEL SEGURO SOCIAL",
                                  "CAJA COSTARRICENSE DEL SEGURO SOCIAL",
                                  "HOSPITAL MEXICO, CAJA COSTARRICENSE DEL SEGURO SOCIAL",
                                  "CAJA COSTARRICENSE DEL SEGURO SOCIAL","HOSPITAL ESCALANTE PRADILLA",
                                  "HOSPITAL ESCALANTE PRADILLA","HOSPITAL CALDERON GUARDIA",
                                  "HOSPITAL CALDERON GUARDIA",
                                  "CAJA COSTARRICENSE DEL SEGURO SOCIAL")
)
centros_med <- centros_med %>% 
    str_to_upper() 

base_2017 %>% 
    filter(norma %in% centros_med) %>% 
    count(norma)
#> # A tibble: 2 x 2
#>   norma                                    n
#>   <chr>                                <int>
#> 1 HOSPITAL FERNANDO ESCALANTE PRADILLA     1
#> 2 HOSPITAL MEXICO                          1

Created on 2020-01-18 by the reprex package (v0.3.0.9000)

1 Like

Thank you @andresrcs it worked perfectly! I really appreciate it

Great. Please mark the solution for the benefit of those to follow.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.