The code below works very well, however when I test for another database it gives an error. First, I'll show you the example that works well. If you can test it, I'd appreciate it. Every help is welcome.
database1
df1 <- data.frame( Id = rep(1:5, length=900),
date1 = as.Date( "2021-12-01"),
date2= rep(seq( as.Date("2021-01-01"), length.out=450, by=1), each = 2),
Category = rep(c("ABC", "EFG"), length.out = 900),
Week = rep(c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
"Saturday", "Sunday"), length.out = 900),
DR1 = sample( 200:250, 900, repl=TRUE),
setNames( replicate(365, { sample(0:900, 900)}, simplify=FALSE),
paste0("DRM", formatC(1:365, width = 2, format = "d", flag = "0"))))
idd<-"2"
dmda<-"2021-12-10"
category<-"ABC"
Code:
library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
Rcpp::sourceCpp(code = '
#include <Rcpp.h>
// [[Rcpp::export]]
double mediancpp(Rcpp::NumericVector& x, const bool na_rm) {
std::size_t m = x.size();
if (m < 1) Rcpp::stop("zero length vector not allowed.");
if (!na_rm) {
for (Rcpp::NumericVector::iterator i = x.begin(); i != x.end(); ++i)
if (Rcpp::NumericVector::is_na(*i)) return *i;
} else {
for (Rcpp::NumericVector::iterator i = x.begin(); i != x.begin() + m; )
Rcpp::NumericVector::is_na(*i) ? std::iter_swap(i, x.begin() + --m) : (void)++i;
}
if (m < 1) return x[0];
std::size_t n = m / 2;
std::nth_element(x.begin(), x.begin() + n, x.begin() + m);
return m % 2 ? x[n] : (x[n] + *std::max_element(x.begin(), x.begin() + n)) / 2.;
}
')
dt1 <- data.table::setDT(data.table::copy(df1))
# type checks
stopifnot(
data.table::is.data.table(dt1),
length(idd) == length(dmda),
length(idd) == length(category)
)
dmda <- switch(
class(dt1$date2),
character = as.character(dmda), Date = as.Date(dmda, "%Y-%m-%d"),
stop("non-comformable types between `dmda` and `dt1$date2`")
)
idd <- as(idd, class(dt1$Id))
# find subsets
DT <- data.table::setDT(list(Id = idd, date2 = dmda, Category = category, order = seq_along(idd)))
DT <- dt1[
dt1[DT, .(Id, Category, date2, Week, order), on = .NATURAL],
on = .(Id, Category, Week), allow.cartesian = TRUE
]
DT[, c("rowid", "date1", "date2", "i.date2") := c(
list(seq_len(.N)), lapply(.SD, as.Date, "%Y-%m-%d")
), .SDcols = c("date1", "date2", "i.date2")]
# pivot + type conversion
DT <- data.table::melt(DT, measure = patterns("DRM(\\d+)"), variable = "day")
DT[, `:=`(day = as.integer(sub("^\\D+", "", day)), value = as.numeric(value))]
# computations
DT[, keep := rev(cumsum(rev(value)) != 0), by = "rowid"]
DT[, value := value + mediancpp(DR1 - value, TRUE),
by = c("Id", "Category", "i.date2", "date1", "day", "Week")]
DT <- DT[date2 == i.date2 & keep & day > i.date2 - date1,
.(value = sum(value), order = order[[1L]]),
by = c("Id", "Category", "i.date2", "date1", "day")]
However, when I test in the database below, it gives the following error in the last DT
: Error in order[[1L]] : subscript out of bounds
.
Is it some error in the dates or something in the second database?
#database2
df1<- structure(
list(
Id = c(1, 1, 1, 1),
date1 = c("2022-01-06","2022-01-06","2022-01-06","2022-01-06"),
date2 = c("2022-01-02","2022-01-03","2022-01-09","2022-01-10"),
Week = c("Sunday","Monday","Sunday","Monday"),
Category = c("EFG", "ABC","EFG","ABC"),
DR1 = c(200, 300, 200, 200),
DRM000 = c(300, 300, 300, 300),
DRM001 = c(300, 300, 300, 300),
DRM002 = c(300,300,300,300),
DRM003 = c(300,300,300,300),
DRM004 = c(300,250,350,350)),row.names = c(NA, 4L), class = "data.frame")
idd<-"1"
dmda<-"2022-01-10"
category<-"ABC"