library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
#database
df1 <- data.frame( Id = rep(1:5, length=900),
date1 = as.Date( "2021-12-01"),
date2= rep(seq( as.Date("2021-01-01"), length.out=450, by=1), each = 2),
Category = rep(c("ABC", "EFG"), length.out = 900),
Week = rep(c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
"Saturday", "Sunday"), length.out = 900),
DR1 = sample( 200:250, 900, repl=TRUE),
setNames( replicate(365, { sample(0:900, 900)}, simplify=FALSE),
paste0("DRM", formatC(1:365, width = 2, format = "d", flag = "0"))))
# First idea: Calculate the median of the values resulting from the subtraction between DR01 and the values of the DRM columns
selection = startsWith(names(df1), "DRM")
df1[selection][is.na(df1[selection])] = 0
data1<-subset(df1,df1$date2<df1$date1)
dt1 <- as.data.table(data1)
cols <- grep("^DRM", colnames(dt1), value = TRUE)
result_median <-
dt1[, (paste0(cols, "_PV")) := DR1 - .SD, .SDcols = cols
][, lapply(.SD, median), by = .(Id, Category, Week), .SDcols = paste0(cols, "_PV") ]
# Second idea: After obtaining the median, I add the values found with the values of the DRM columns of my df1 database.
SP<-df1%>%
inner_join(result_median, by = c('Id','Category', 'Week')) %>%
mutate(across(matches("^DRM\\d+$"), ~.x +
get(paste0(cur_column(), '_PV')),
.names = '{col}_{col}_PV')) %>%
select(Id:Category, DRM01_DRM01_PV:last_col())
SP<-data.frame(SP)
# Third idea: The idea here is to specifically filter a line from `SP`, which will depend on what the user chooses, for that it will be necessary to choose an Id, date and Category.
# This code remove_values_0 I use because sometimes i have values equal to zero so i remove these columns ((this question was solved here: https://stackoverflow.com/questions/69452882/delete-column-depending-on-the-date-and-code-you-choose)
remove_values_0 <- df1 %>%
dplyr::filter(Id==idd,date2 == ymd(dmda), Category == CategoryChosse) %>%
select(starts_with("DRM")) %>%
pivot_longer(cols = everything()) %>%
arrange(desc(row_number())) %>%
mutate(cs = cumsum(value)) %>%
dplyr::filter(cs == 0) %>%
pull(name)
(dropnames <- paste0(remove_values_0,"_",remove_values_0, "_PV"))
# Selecting the id, dmda and CategoryChoose
idd="5"
dmda="2022-03-19"
CategoryChosse="ABC"
filterid_date_category <- SP %>%
filter(Id==idd,date2 == ymd(dmda), Category == CategoryChosse) %>%
select(-any_of(dropnames))
This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.
If you have a query related to it or one of the replies, start a new topic and refer back with a link.