Split dataframe in two dataframes with different formats

jynusmac · May 16, 2023, 11:43am

Starting from this df:

df <- structure(list(Cod = "007", Mun = “Siguenza”, Mesas = "00002", 
                     Censo = "00000731", Escrut = "00000100", `%Censo` = "100", 
                     Votantes = "00000480", `%Votantes` = "00060", Abst = "00000220", 
                     `%Abst` = "00035", Blanc = "00000100", `%Blanc` = "00010", 
                     Nulos = "00000100", `%Nulos` = "00010", Elegir = "007", P1 = "0004", 
                     S1 = "PSE", V1 = "00000300", `%V1` = "00040", C1 = "005", 
                     P2 = "2187", S2 = "CBiz", V2 = "00000100", `%V2` = "00030", 
                     C2 = "001", P3 = "0002", S3 = "POP", V3 = "00000100", `%V3` = "00030", 
                     C3 = "001"), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"))

I want to change character to number (except Mun, S1, S2 and S3), remove leading zeros and obtain two dataframes with this structure:

Cod		7
Municipio		Siguenza
Mesas		2
Censo		731
Escrutado		100
%Censo		100
Votantes		480
%Votantes		60
Abst		220
%Abst		35
Blanco		100
%Blanco		10
Nulos		100
%Nulos		10
Elegir		7

and

Id	Part	Vot	%Vot	Elec
4	PSE	300	40	5
2187	Cbiz	100	30	1
2	POP	100	30	1

where id is equal to P1, P2 and P3; part is equal to S1,S2 and S3 and so on. No idea where to start.

Regards.

FJCC · May 16, 2023, 4:22pm

I think this gets you most of what you want. The value column of DF1 cannot be numeric because it contains the text "Siguenza".

df <- structure(list(Cod = "007", Mun = "Siguenza", Mesas = "00002", 
                     Censo = "00000731", Escrut = "00000100", `%Censo` = "100", 
                     Votantes = "00000480", `%Votantes` = "00060", Abst = "00000220", 
                     `%Abst` = "00035", Blanc = "00000100", `%Blanc` = "00010", 
                     Nulos = "00000100", `%Nulos` = "00010", Elegir = "007", P1 = "0004", 
                     S1 = "PSE", V1 = "00000300", `%V1` = "00040", C1 = "005", 
                     P2 = "2187", S2 = "CBiz", V2 = "00000100", `%V2` = "00030", 
                     C2 = "001", P3 = "0002", S3 = "POP", V3 = "00000100", `%V3` = "00030", 
                     C3 = "001"), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame"))
library(tidyr)
library(stringr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
DF1 <- df |> select(Cod:Elegir) |> 
  pivot_longer(cols = everything(),names_to = "Category") |> 
  mutate(value = str_remove(value, "^0+"))
DF1
#> # A tibble: 15 × 2
#>    Category  value   
#>    <chr>     <chr>   
#>  1 Cod       7       
#>  2 Mun       Siguenza
#>  3 Mesas     2       
#>  4 Censo     731     
#>  5 Escrut    100     
#>  6 %Censo    100     
#>  7 Votantes  480     
#>  8 %Votantes 60      
#>  9 Abst      220     
#> 10 %Abst     35      
#> 11 Blanc     100     
#> 12 %Blanc    10      
#> 13 Nulos     100     
#> 14 %Nulos    10      
#> 15 Elegir    7

DF2 <- df |> select(matches("P\\d+|S\\d+|V\\d+|%V\\d+|c\\d+")) |> 
  pivot_longer(cols = everything())
DF2$name <- rep(c("Id","Part","Vot","%Vot","Elec"), nrow(DF2)/5)
DF2 <- DF2 |> mutate(Row = rep(1:(nrow(DF2)/5),each=5)) |> 
  pivot_wider(names_from = "name", values_from = "value") |> 
  mutate(across(.cols = c("Id","Vot","%Vot","Elec"), .fns = as.numeric)) |> 
  select(-Row)

DF2
#> # A tibble: 3 × 5
#>      Id Part    Vot `%Vot`  Elec
#>   <dbl> <chr> <dbl>  <dbl> <dbl>
#> 1     4 PSE     300     40     5
#> 2  2187 CBiz    100     30     1
#> 3     2 POP     100     30     1

^{Created on 2023-05-16 with reprex v2.0.2}

system · May 23, 2023, 4:22pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.