Add columns to data frame based on row values in another column, include equations

In this code, the effort is to produce new columns based on values in a row of a data frame, and use a simple equation. In this example, I go from 4 columns to 14 columns. The last 4 I would like to be the result of a simple product.

library(tidyverse)

st_df1 <- data.frame(id=c('45300','27260','33100','36740','45300','27260','33100','36740'),
                    WGTP=c(34, 36, 40, 49,54, 16, 70, 59), 
                    pctp=c(0.11, 1, 0.37, 0.82,0.11, 1, 0.37, 0.82))
st_df1

# replicate ID so it is not lost when running pivot_wider
st_df1$id2 <- st_df1$id

# get one observatio for each ID
uid <- unique(st_df1$`id`,2)

# where there are 5 consecutive digits, create a new column
st_df2 <- st_df1 %>%
  pivot_wider(names_from = 'id2', values_from = 'pctp', values_fill = list(value = 0)) %>% 
  rename_with(~ gsub("(\\d{5})", "wt_\\1_y22", .x))

# get rid of NA
st_df2 <- st_df2 %>% replace(is.na(.), 0)
st_df2

# this is achieving manually the end result I want
# it is the product of the weight for each (wt...) and WGTP
st_df2$rhh45300_to22 <- st_df2$wt_45300_y22 * st_df2$WGTP
st_df2$rhh27260_to22 <- st_df2$wt_27260_y22 * st_df2$WGTP
st_df2$rhh33100_to22 <- st_df2$wt_33100_y22 * st_df2$WGTP
st_df2$rhh36740_to22 <- st_df2$wt_36740_y22 * st_df2$WGTP

st_df2

# attempt to automate is below
st_tib <- tibble(st_df2)

# I end up with TRUE/FALSE, but want an equation, replicating manual work above
lvars <- map(uid, ~ quo(str_detect(st_df2$id, !!.x)))
st_df3 <- st_df2 %>% 
  mutate(!!!lvars)
st_df3

The attempt ends with the columns I want created, but only a true/false result. I'm not sure how to multiply two columns as a part of this.

I used the .names argument of the across() function to construct the names of the output columns.

library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.3.3

st_df1 <- data.frame(id=c('45300','27260','33100','36740','45300','27260','33100','36740'),
                     WGTP=c(34, 36, 40, 49,54, 16, 70, 59), 
                     pctp=c(0.11, 1, 0.37, 0.82,0.11, 1, 0.37, 0.82))
st_df1
#>      id WGTP pctp
#> 1 45300   34 0.11
#> 2 27260   36 1.00
#> 3 33100   40 0.37
#> 4 36740   49 0.82
#> 5 45300   54 0.11
#> 6 27260   16 1.00
#> 7 33100   70 0.37
#> 8 36740   59 0.82

# replicate ID so it is not lost when running pivot_wider
st_df1$id2 <- st_df1$id

# get one observatio for each ID
uid <- unique(st_df1$`id`,2)

# where there are 5 consecutive digits, create a new column
st_df2 <- st_df1 %>%
  pivot_wider(names_from = 'id2', values_from = 'pctp', values_fill = list(value = 0)) %>% 
  rename_with(~ gsub("(\\d{5})", "wt_\\1_y22", .x))

# get rid of NA
st_df2 <- st_df2 %>% replace(is.na(.), 0)
st_df2
#> # A tibble: 8 × 6
#>   id     WGTP wt_45300_y22 wt_27260_y22 wt_33100_y22 wt_36740_y22
#>   <chr> <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
#> 1 45300    34         0.11            0         0            0   
#> 2 27260    36         0               1         0            0   
#> 3 33100    40         0               0         0.37         0   
#> 4 36740    49         0               0         0            0.82
#> 5 45300    54         0.11            0         0            0   
#> 6 27260    16         0               1         0            0   
#> 7 33100    70         0               0         0.37         0   
#> 8 36740    59         0               0         0            0.82

TEST <- st_df2 |> mutate(across(starts_with("wt"),
                        ~.x * WGTP,
                        .names = "rhh{str_extract({.col}, '\\\\d.+_')}to22"))
# this is achieving manually the end result I want
# it is the product of the weight for each (wt...) and WGTP
st_df2$rhh45300_to22 <- st_df2$wt_45300_y22 * st_df2$WGTP
st_df2$rhh27260_to22 <- st_df2$wt_27260_y22 * st_df2$WGTP
st_df2$rhh33100_to22 <- st_df2$wt_33100_y22 * st_df2$WGTP
st_df2$rhh36740_to22 <- st_df2$wt_36740_y22 * st_df2$WGTP


identical(TEST, st_df2)
#> [1] TRUE

Created on 2024-07-28 with reprex v2.0.2

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.