I would like some help in relation to decreasing the processing time of the code below. In this example it took 28 seconds, however, I have a much larger code, which takes much longer. Before I used pivot_longer
to calculate adjusted1
, and after I used data.table
, it improved considerably. However, I would like to know if there is any way to improve it, for example, leaving the adjusted2
output in data.table
.
library(dplyr)
library(tidyr)
library(lubridate)
library(data.table)
library(tictoc)
#database
df1 <- data.frame(
date1 = as.Date( "2021-12-01"),
date2= rep(seq( as.Date("2021-01-01"), length.out=27500, by=1), each = 2),
Category = rep(c("ABC", "EFG"), length.out = 55000),
Week = rep(c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday",
"Saturday", "Sunday"), length.out = 55000),
DR1 = sample( 200:250, 55000, repl=TRUE),
setNames( replicate(365, { sample(0:55000, 55000)}, simplify=FALSE),
paste0("DRM", formatC(1:365, width = 2, format = "d", flag = "0"))))
df1<-as.data.table(df1)
dmda<-"2021-12-10"
code<-"ABC"
tic()
adjusted1<-melt(df1[date2 == dmda & Category == code][,
lapply(.SD, sum, na.rm = TRUE), by = Category,
.SDcols = patterns("^DRM")],
id.var = "Category", variable.name = "name", value.name = "val")[,
name := readr::parse_number(as.character(name))][]
colnames(adjusted1)[-1]<-c("days","numbers")
adjusted2 <- adjusted1 %>%
group_by(Category) %>%
slice((ymd(dmda) - min(as.Date(df1$date1) [
df1$Category == first(Category)])):max(days)+1) %>%
ungroup%>%data.frame()
if(any(table(adjusted2$numbers) >= 3)& length(unique(adjusted2$numbers)) == 1){
yz <- unique(adjusted2$numbers)
var<-as.numeric(yz)
}else
model <- lm(numbers ~ I(days^2), adjusted2)
coef<-max(coef(model)[1], 0)
var<-as.numeric(coef)
toc()
0.28 sec elapsed