Replacing Values based on previous Years value

Hi,

How can I replace values for certain columns based on previous years values. So, values for 2021-12-01 to be replaced with values from 2020-12-01 as an example and this would be for entire data frame. This example has only one column to be changed, but if I had several colmuns, how do we apply mutate(across) in that case for such change in value replacement

library(tidyverse)
library(lubridate)

vol <- data.frame(
  Date = c("2018 Jan","2018 Jan","2018 Jan","2018 Jan",
           "2018 Feb","2018 Feb","2018 Feb","2018 Feb",
           "2018 Mar","2018 Mar","2018 Mar","2018 Mar",
           "2018 Apr","2018 Apr","2018 Apr","2018 Apr",
           "2018 May","2018 May","2018 May","2018 May",
           "2018 Jun","2018 Jun","2018 Jun","2018 Jun",
           "2018 Jul","2018 Jul","2018 Jul","2018 Jul",
           "2018 Aug","2018 Aug","2018 Aug","2018 Aug",
           "2018 Sep","2018 Sep","2018 Sep","2018 Sep",
           "2018 Oct","2018 Oct","2018 Oct","2018 Oct",
           "2018 Nov","2018 Nov","2018 Nov","2018 Nov",
           "2018 Dec","2018 Dec","2018 Dec","2018 Dec",
           "2019 Jan","2019 Jan","2019 Jan","2019 Jan",
           "2019 Feb","2019 Feb","2019 Feb","2019 Feb",
           "2019 Mar","2019 Mar","2019 Mar","2019 Mar",
           "2019 Apr","2019 Apr","2019 Apr","2019 Apr",
           "2019 May","2019 May","2019 May","2019 May",
           "2019 Jun","2019 Jun","2019 Jun","2019 Jun",
           "2019 Jul","2019 Jul","2019 Jul","2019 Jul",
           "2019 Aug","2019 Aug","2019 Aug","2019 Aug",
           "2019 Sep","2019 Sep","2019 Sep","2019 Sep",
           "2019 Oct","2019 Oct","2019 Oct","2019 Oct",
           "2019 Nov","2019 Nov","2019 Nov","2019 Nov",
           "2019 Dec","2019 Dec","2019 Dec","2019 Dec",
           "2020 Jan","2020 Jan","2020 Jan","2020 Jan",
           "2020 Feb","2020 Feb","2020 Feb","2020 Feb",
           "2020 Mar","2020 Mar","2020 Mar","2020 Mar",
           "2020 Apr","2020 Apr","2020 Apr","2020 Apr",
           "2020 May","2020 May","2020 May","2020 May",
           "2020 Jun","2020 Jun","2020 Jun","2020 Jun",
           "2020 Jul","2020 Jul","2020 Jul","2020 Jul",
           "2020 Aug","2020 Aug","2020 Aug","2020 Aug",
           "2020 Sep","2020 Sep","2020 Sep","2020 Sep",
           "2020 Oct","2020 Oct","2020 Oct","2020 Oct",
           "2020 Nov","2020 Nov","2020 Nov","2020 Nov",
           "2020 Dec", "2020 Dec","2020 Dec", "2020 Dec",
           "2021 Jan","2021 Jan","2021 Jan","2021 Jan",
           "2021 Feb","2021 Feb","2021 Feb","2021 Feb",
           "2021 Mar","2021 Mar","2021 Mar","2021 Mar",
           "2021 Apr","2021 Apr","2021 Apr","2021 Apr",
           "2021 May","2021 May","2021 May","2021 May",
           "2021 Jun","2021 Jun","2021 Jun","2021 Jun",
           "2021 Jul","2021 Jul","2021 Jul","2021 Jul",
           "2021 Aug","2021 Aug","2021 Aug","2021 Aug",
           "2021 Sep","2021 Sep","2021 Sep","2021 Sep",
           "2021 Oct","2021 Oct","2021 Oct","2021 Oct",
           "2021 Nov","2021 Nov","2021 Nov","2021 Nov",
           "2021 Dec","2021 Dec", "2021 Dec","2021 Dec"),
  
  Country = c("CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US",
              "CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US",
              "CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US",
              "CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US"),
  
  Type = c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B",
           "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B",
           "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B",
           "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B"
  ),
  
  Sales = c(100,110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210,100,110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210,100,110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210,100,110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210,
            220, 230,240, 250, 260, 270, 280, 290, 300, 310, 320, 330,220, 230,240, 250, 260, 270, 280, 290, 300, 310, 320, 330,20, 230,240, 250, 260, 270, 280, 290, 300, 310, 320, 330,220, 230,240, 250, 260, 270, 280, 290, 300, 310, 320, 330,
            340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450,340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450,340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450,340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450,
            460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 60, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570)
)

vol$Date <- ym(vol$Date)

Thanks for your help!

Hi, you can use reshape2 to do this. Below is the additional code you need to solve your problem.

library(reshape2)

vol_wide <- dcast(vol, ...~Date, value.var = "Sales")
vol_wide$`2021-12-01` <-  vol_wide$`2020-12-01`

vol_long <- melt(vol_wide, id.vars = c("Country", "Type"))

Thanks @ElisaDuranMicco !!

This solution is hard coding value of 2020-12-01 to 2021-12-01.
We instead are looking for each row to have its value equivalent to what was one year ago. So, looking for a way that can do it for all the rows instead of hard coding.

Thanks for your help!

Hi,
I hope I have understood what you need. I created Date2, which is the original date plus one year. Then, you can use Date2 and have the data you need.

vol$day <- day(vol$Date)
vol$month <- month(vol$Date)
vol$year <- year(vol$Date)
vol$year2 <- vol$year + 1
vol$Date2 <- paste(vol$year2, vol$month, vol$day, sep="-")
vol2 <- select(vol, Date2, Country, Type, Sales)

By the way, reshape2 has been superseded by tidyr and has been for a while now.

hadley/reshape: An R package to flexible rearrange, reshape and aggregate data (github.com)

reshape2 is superseded: only changes necessary to keep it on CRAN will be made. We recommend using tidyr instead.

1 Like

You could use the statar package. I have left the original column there so you can see it, but you could delete it.

library(statar)
library(tidyverse)
library(lubridate)

vol %>% 
  group_by(Country, Type) %>% 
  mutate(Sales_shift = tlag(Sales, n = years(1), time = Date)) %>% 
  ungroup() %>% 
  filter(Date >= "2019-01-01")

image

This is great!!
Thanks @williaml I wasn't able to get the right values with lag with my initial trials.

Thanks a bunch!

This would work. Thanks for the help @ElisaDuranMicco!!

Thanks again @william!!

I am noticing this works only until we have the data available. Is there a way to apply a for or while loop where the same logic can carry on to the extended data with no figures originally. I just edited my sample code to show NAs in future months along with your suggested code. I would like to fill in the NAs with the same logic for the remaining figures. Yes, we can create another column and use Sales_Shift, but as the data gets extended more and more, it will be an issue.

library(tidyverse)
library(lubridate)

vol <- data.frame(
  Date = c("2018 Jan","2018 Jan","2018 Jan","2018 Jan",
           "2018 Feb","2018 Feb","2018 Feb","2018 Feb",
           "2018 Mar","2018 Mar","2018 Mar","2018 Mar",
           "2018 Apr","2018 Apr","2018 Apr","2018 Apr",
           "2018 May","2018 May","2018 May","2018 May",
           "2018 Jun","2018 Jun","2018 Jun","2018 Jun",
           "2018 Jul","2018 Jul","2018 Jul","2018 Jul",
           "2018 Aug","2018 Aug","2018 Aug","2018 Aug",
           "2018 Sep","2018 Sep","2018 Sep","2018 Sep",
           "2018 Oct","2018 Oct","2018 Oct","2018 Oct",
           "2018 Nov","2018 Nov","2018 Nov","2018 Nov",
           "2018 Dec","2018 Dec","2018 Dec","2018 Dec",
           "2019 Jan","2019 Jan","2019 Jan","2019 Jan",
           "2019 Feb","2019 Feb","2019 Feb","2019 Feb",
           "2019 Mar","2019 Mar","2019 Mar","2019 Mar",
           "2019 Apr","2019 Apr","2019 Apr","2019 Apr",
           "2019 May","2019 May","2019 May","2019 May",
           "2019 Jun","2019 Jun","2019 Jun","2019 Jun",
           "2019 Jul","2019 Jul","2019 Jul","2019 Jul",
           "2019 Aug","2019 Aug","2019 Aug","2019 Aug",
           "2019 Sep","2019 Sep","2019 Sep","2019 Sep",
           "2019 Oct","2019 Oct","2019 Oct","2019 Oct",
           "2019 Nov","2019 Nov","2019 Nov","2019 Nov",
           "2019 Dec","2019 Dec","2019 Dec","2019 Dec",
           "2020 Jan","2020 Jan","2020 Jan","2020 Jan",
           "2020 Feb","2020 Feb","2020 Feb","2020 Feb",
           "2020 Mar","2020 Mar","2020 Mar","2020 Mar",
           "2020 Apr","2020 Apr","2020 Apr","2020 Apr",
           "2020 May","2020 May","2020 May","2020 May",
           "2020 Jun","2020 Jun","2020 Jun","2020 Jun",
           "2020 Jul","2020 Jul","2020 Jul","2020 Jul",
           "2020 Aug","2020 Aug","2020 Aug","2020 Aug",
           "2020 Sep","2020 Sep","2020 Sep","2020 Sep",
           "2020 Oct","2020 Oct","2020 Oct","2020 Oct",
           "2020 Nov","2020 Nov","2020 Nov","2020 Nov",
           "2020 Dec", "2020 Dec","2020 Dec", "2020 Dec",
           "2021 Jan","2021 Jan","2021 Jan","2021 Jan",
           "2021 Feb","2021 Feb","2021 Feb","2021 Feb",
           "2021 Mar","2021 Mar","2021 Mar","2021 Mar",
           "2021 Apr","2021 Apr","2021 Apr","2021 Apr",
           "2021 May","2021 May","2021 May","2021 May",
           "2021 Jun","2021 Jun","2021 Jun","2021 Jun",
           "2021 Jul","2021 Jul","2021 Jul","2021 Jul",
           "2021 Aug","2021 Aug","2021 Aug","2021 Aug",
           "2021 Sep","2021 Sep","2021 Sep","2021 Sep",
           "2021 Oct","2021 Oct","2021 Oct","2021 Oct",
           "2021 Nov","2021 Nov","2021 Nov","2021 Nov",
           "2021 Dec","2021 Dec", "2021 Dec","2021 Dec"),
  
  Country = c("CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US",
              "CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US",
              "CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US",
              "CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US","CA","CA","US","US"),
  
  Type = c("A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B",
           "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B",
           "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B",
           "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B", "A", "B"
  ),
  
  Sales = c(100,110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210,100,110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210,100,110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210,100,110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210,
            220, 230,240, 250, 260, 270, 280, 290, 300, 310, 320, 330,220, 230,240, 250, 260, 270, 280, 290, 300, 310, 320, 330,20, 230,240, 250, 260, 270, 280, 290, 300, 310, 320, 330,220, 230,240, 250, 260, 270, 280, 290, 300, 310, 320, 330,
            NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
            NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)
)

vol$Date <- ym(vol$Date)

# First Option

library(statar)

vol1 <- vol %>% 
  group_by(Country, Type) %>% 
  mutate(Sales_shift = tlag(Sales, n = years(1), time = Date)) 

Any help here would be very helpful.

Thanks again!!

Sure, but what values would go in to the column when there are NA values though? Do you go two years back?

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.