Subtract columns from two different datasets

JojoSouza · October 31, 2021, 11:47pm

I would like to know how I can do a subtraction between the dataset I got (All) with my df1 dataset. I inserted an image to illustrate the output I want. So you'll notice that I want to subtract the value of the coef of All from the columns of DR0.. of the df1.

library(dplyr)
library(tidyverse)
library(lubridate)

df1 <- structure(
  list(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
       Category = c("FDE","ABC","FDE","ABC"),
       Week= c("Wednesday","Wednesday","Friday","Friday"),
       DR1 = c(4,1,6,3),
       DR01 = c(4,1,4,3), DR02= c(4,2,6,2),DR03= c(9,5,4,7),
       DR04 = c(5,4,3,2),DR05 = c(5,4,5,4),
       DR06 = c(2,4,3,2)),
  class = "data.frame", row.names = c(NA, -4L))

> df1
       date1      date2 Category      Week DR1 DR01 DR02 DR03 DR04 DR05 DR06
1 2021-06-28 2021-06-30      FDE Wednesday   4    4    4    9    5    5    2
2 2021-06-28 2021-06-30      ABC Wednesday   1    1    2    5    4    4    4
3 2021-06-28 2021-07-01      FDE    Friday   6    4    6    4    3    5    3
4 2021-06-28 2021-07-01      ABC    Friday   3    3    2    7    2    4    2

return_coef <- function(dmda, CategoryChosse) {
  
  x<-df1 %>% select(starts_with("DR0"))
  
  x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
  PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
  
  med<-PV %>%
    group_by(Category,Week) %>%
    summarize(across(ends_with("PV"), median))
  
  SPV<-df1%>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~.x + 
                    get(paste0(cur_column(), '_PV')),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV<-data.frame(SPV)
  
  mat1 <- df1 %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1,"_",mat1, "_PV"))
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  datas<-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")
  
  datas <- datas %>% 
    group_by(Category) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Category == first(Category)])):max(Days)+1) %>%
    ungroup
  
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
  as.numeric(coef(mod)[2])
  
}

All<-cbind(df1 %>% select(date2, Category), coef = mapply(return_coef, df1$date2, df1$Category))
> All
       date2 Category coef
1 2021-06-30      FDE    4
2 2021-06-30      ABC    1
3 2021-07-01      FDE    6
4 2021-07-01      ABC    3

Output I want

technocrat · November 1, 2021, 12:22am

The subtraction is straightforward in {base}, as shown below. The output shows the differences represented textually, however. Is your question on constructing the strings?

suppressPackageStartupMessages({
  library(dplyr)
  library(lubridate)
  library(tidyr)
})
df1 <- data.frame(date1= c("2021-06-28","2021-06-28","2021-06-28","2021-06-28"),
       date2 = c("2021-06-30","2021-06-30","2021-07-01","2021-07-01"),
       Category = c("FDE","ABC","FDE","ABC"),
       Week= c("Wednesday","Wednesday","Friday","Friday"),
       DR1 = c(4,1,6,3),
       DR01 = c(4,1,4,3), DR02= c(4,2,6,2),DR03= c(9,5,4,7),
       DR04 = c(5,4,3,2),DR05 = c(5,4,5,4),
       DR06 = c(2,4,3,2))

return_coef <- function(dmda, CategoryChosse) {
  
  x<-df1 %>% select(starts_with("DR0"))
  
  x<-cbind(df1, setNames(df1$DR1 - x, paste0(names(x), "_PV")))
  PV<-select(x, date2,Week, Category, DR1, ends_with("PV"))
  
  med<-PV %>%
    group_by(Category,Week) %>%
    summarize(across(ends_with("PV"), median))
  
  SPV<-df1%>%
    inner_join(med, by = c('Category', 'Week')) %>%
    mutate(across(matches("^DR0\\d+$"), ~.x + 
                    get(paste0(cur_column(), '_PV')),
                  .names = '{col}_{col}_PV')) %>%
    select(date1:Category, DR01_DR01_PV:last_col())
  
  SPV<-data.frame(SPV)
  
  mat1 <- df1 %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(starts_with("DR0")) %>%
    pivot_longer(cols = everything()) %>%
    arrange(desc(row_number())) %>%
    mutate(cs = cumsum(value)) %>%
    filter(cs == 0) %>%
    pull(name)
  
  (dropnames <- paste0(mat1,"_",mat1, "_PV"))
  
  SPV <- SPV %>%
    filter(date2 == dmda, Category == CategoryChosse) %>%
    select(-any_of(dropnames))
  
  datas<-SPV %>%
    filter(date2 == ymd(dmda)) %>%
    group_by(Category) %>%
    summarize(across(starts_with("DR0"), sum)) %>%
    pivot_longer(cols= -Category, names_pattern = "DR0(.+)", values_to = "val") %>%
    mutate(name = readr::parse_number(name))
  colnames(datas)[-1]<-c("Days","Numbers")
  
  datas <- datas %>% 
    group_by(Category) %>% 
    slice((as.Date(dmda) - min(as.Date(df1$date1) [
      df1$Category == first(Category)])):max(Days)+1) %>%
    ungroup
  
  mod <- nls(Numbers ~ b1*Days^2+b2,start = list(b1 = 0,b2 = 0),data = datas, algorithm = "port")
  as.numeric(coef(mod)[2])
  
}

All<-cbind(df1 %>% select(date2, Category), coef = mapply(return_coef, df1$date2, df1$Category))
#> `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.
#> `summarise()` has grouped output by 'Category'. You can override using the `.groups` argument.

diffs <- df1[,5:11] - All[,3]
df2   <- df1[,1:4]
df2[,5:11] <- diffs
df2
#>        date1      date2 Category      Week           DR1          DR01
#> 1 2021-06-28 2021-06-30      FDE Wednesday -6.360605e-08 -6.360605e-08
#> 2 2021-06-28 2021-06-30      ABC Wednesday -1.300449e-08 -1.300449e-08
#> 3 2021-06-28 2021-07-01      FDE    Friday -1.020952e-07 -2.000000e+00
#> 4 2021-06-28 2021-07-01      ABC    Friday -1.160027e-08 -1.160027e-08
#>            DR02 DR03       DR04       DR05 DR06
#> 1 -6.360605e-08    5  0.9999999  0.9999999   -2
#> 2  1.000000e+00    4  3.0000000  3.0000000    3
#> 3 -1.020952e-07   -2 -3.0000001 -1.0000001   -3
#> 4 -1.000000e+00    4 -1.0000000  1.0000000   -1

JojoSouza · November 1, 2021, 12:28am

Thanks for the answer @technocrat , but the output table I inserted in the question is a little different from yours.

technocrat · November 1, 2021, 12:31am

That was my question. The caption asked for subtraction, but output shows character values. Do we differ in the calculation or just in the presentation?

system · November 22, 2021, 12:31am

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.