Filter rows based on growing season time period in r

Mtrs · June 29, 2022, 1:58pm

I am struggling to find a solution.

I have the following gridded samples datasets (I paste different parts of my dataset)

First one: structure(list(gid = c("117765", "117765", "117765", "117765", 
    "117765", "117765", "117765", "117765", "117765", "117765", "117765", 
    "117765", "117765", "117765", "117765", "117765", "117765", "117765", 
    "117765", "117765", "117765", "117765", "117765", "117765"), 
        country = c("Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
        "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)"
        ), Time = structure(c(3667, 3697, 3727, 3758, 3788, 3819, 
        3849, 3880, 3911, 3941, 3972, 4002, 4033, 4063, 4092, 4123, 
        4153, 4184, 4214, 4245, 4276, 4306, 4337, 4367), class = "Date"), 
        Month = c("01", "02", "03", "04", "05", "06", "07", "08", 
        "09", "10", "11", "12", "01", "02", "03", "04", "05", "06", 
        "07", "08", "09", "10", "11", "12"), SPEI1 = c(-0.702853560447693, 
        2.77506303787231, -1.38380765914917, -0.474617034196854, 
        0.610002398490906, -0.389719426631927, 2.31887650489807, 
        1.47994863986969, 1.66277933120728, 0.399970233440399, -1.47139978408813, 
        -0.435711354017258, -0.510784149169922, -1.15937781333923, 
        0.523077189922333, -0.161062479019165, -0.481528997421265, 
        -1.71726500988007, -1.77663195133209, 0.765306115150452, 
        -0.774405002593994, -0.197176232933998, -1.47615599632263, 
        -0.388415157794952), growstart = c(10, 10, 10, 10, 10, 10, 
        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
        10, 10, 10), growend = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), maincrop = c(28, 
        28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 
        28, 28, 28, 28, 28, 28, 28, 28)), row.names = 572161:572184, class = "data.frame")

_

Second one = structure(list(gid = c("100468", "100468", "100468", "100468", 
"100468", "100468", "100468", "100468", "100468", "100468", "100468", 
"100468", "100468", "100468", "100468", "100468", "100468", "100468", 
"100468", "100468", "100468", "100468", "100468", "100468"), 
    country = c("Namibia", "Namibia", "Namibia", "Namibia", "Namibia", 
    "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", 
    "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", 
    "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia", 
    "Namibia"), Time = structure(c(3667, 3697, 3727, 3758, 3788, 
    3819, 3849, 3880, 3911, 3941, 3972, 4002, 4033, 4063, 4092, 
    4123, 4153, 4184, 4214, 4245, 4276, 4306, 4337, 4367), class = "Date"), 
    SPEI1 = c(-1.95947802066803, 0.557283878326416, 1.77989518642426, 
    -1.2029390335083, -0.119278997182846, 1.44610369205475, -1.4578732252121, 
    -1.14002466201782, 1.1647777557373, -1.34318947792053, -0.500527501106262, 
    1.50793671607971, -1.45792877674103, -2.00679230690002, -1.51340460777283, 
    -1.9636687040329, -1.40127754211426, -0.182968750596046, 
    0.295145452022552, 0.630711793899536, -0.166128441691399, 
    -0.55840003490448, -2.62139987945557, -1.74482023715973), 
    growstart = c(12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), growend = c(4, 
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
    4, 4, 4, 4), maincrop = c(52, 52, 52, 52, 52, 52, 52, 52, 
    52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 
    52), Month = c("01", "02", "03", "04", "05", "06", "07", 
    "08", "09", "10", "11", "12", "01", "02", "03", "04", "05", 
    "06", "07", "08", "09", "10", "11", "12")), row.names = 385:408, class = "data.frame")

_

Third one: structure(list(gid = c("117770", "117770", "117770", "117770", 
"117770", "117770", "117770", "117770", "117770", "117770", "117770", 
"117770", "117770", "117770", "117770", "117770", "117770", "117770", 
"117770", "117770", "117770", "117770", "117770", "117770"), 
    country = c("Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)", 
    "Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)"
    ), Time = structure(c(3667, 3697, 3727, 3758, 3788, 3819, 
    3849, 3880, 3911, 3941, 3972, 4002, 4033, 4063, 4092, 4123, 
    4153, 4184, 4214, 4245, 4276, 4306, 4337, 4367), class = "Date"), 
    SPEI1 = c(0.649401307106018, 1.423499584198, -2.04273128509521, 
    0.271935135126114, 0.616238355636597, -1.03605198860168, 
    1.6733535528183, 1.78166878223419, 1.87084305286407, 1.10145688056946, 
    -1.23061907291412, -1.64128601551056, -1.00736439228058, 
    -1.91670513153076, 1.09841585159302, 0.464365869760513, 1.01759243011475, 
    -1.08844792842865, -0.508061945438385, -0.196570366621017, 
    -0.805905878543854, 0.117944374680519, -0.862984955310822, 
    -1.63738548755646), growstart = c(5, 5, 5, 5, 5, 5, 5, 5, 
    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), growend = c(12, 
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 
    12, 12, 12, 12, 12, 12, 12, 12), maincrop = c(37, 37, 37, 
    37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 
    37, 37, 37, 37, 37, 37), Month = c("01", "02", "03", "04", 
    "05", "06", "07", "08", "09", "10", "11", "12", "01", "02", 
    "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"
    )), row.names = 574081:574104, class = "data.frame")

I have a column 'main crop' that indicates the main crop in my cell (gid) and the growing season months (growstart and growend).

For each crop/gid//country/Time, I need to select the rows corresponding to the growing season.

The issue I am struggling with is that some growing season months overlap two years and I don't know how to filter based on this condition?

Example in the first sample (overlapping):
For crop 28, the starting growing month is 10(October from the previous year) and it ends in 1(January from the following year).
So for this crop according to the gid/country/Time, I will get the rows 1980-01-16 for year 1980 (because I don't have the data for 1979) and the rows between 1980-10-16 and 1981-01-16 for 1981.

**Example in the second sample (overlapping two years): **
For crop 52, the starting growing month is 12(December from the previous year) and it ends in 4(April from the following year).
So for this crop according to the gid/country/Time, I will only get the rows between 1980-01-16 and 1980-04-16 for year 1980 + the rows between 1980-12-16 and 1981-04-16 for 1981.

Example in the second sample (no overlap):
For crop 37, the starting growing month is 5(May of the current year) and it ends in 12(December of the current year).
I will only get the rows between 1980-05-16 and 1980-05-16 for year 1980 + between 1981-05-16 and 1981-12-16 for year 1981.

Please note that I have a dataframe of more than 3 millions observations and different type of crops/growing season. Hence the 3 samples.

I would really appreciate an automated way to get what I want.

Thank you !!

PS: 10 coffees for the helper that would save my sanity.

dvetsch75 · June 29, 2022, 3:01pm

A list-col would be how I would do this:

library(dplyr)

df <- structure(
    list(
        gid = c(
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765"
        ),
        country = c(
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)"
        ),
        Time = structure(
            c(
                3667,
                3697,
                3727,
                3758,
                3788,
                3819,
                3849,
                3880,
                3911,
                3941,
                3972,
                4002,
                4033,
                4063,
                4092,
                4123,
                4153,
                4184,
                4214,
                4245,
                4276,
                4306,
                4337,
                4367
            ),
            class = "Date"
        ),
        Month = c(
            "01",
            "02",
            "03",
            "04",
            "05",
            "06",
            "07",
            "08",
            "09",
            "10",
            "11",
            "12",
            "01",
            "02",
            "03",
            "04",
            "05",
            "06",
            "07",
            "08",
            "09",
            "10",
            "11",
            "12"
        ),
        SPEI1 = c(
            -0.702853560447693,
            2.77506303787231,
            -1.38380765914917,
            -0.474617034196854,
            0.610002398490906,
            -0.389719426631927,
            2.31887650489807,
            1.47994863986969,
            1.66277933120728,
            0.399970233440399,
            -1.47139978408813,-0.435711354017258,
            -0.510784149169922,
            -1.15937781333923,
            0.523077189922333,
            -0.161062479019165,
            -0.481528997421265,-1.71726500988007,
            -1.77663195133209,
            0.765306115150452,-0.774405002593994,
            -0.197176232933998,
            -1.47615599632263,-0.388415157794952
        ),
        growstart = c(
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10
        ),
        growend = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
        maincrop = c(
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28
        )
    ),
    row.names = 572161:572184,
    class = "data.frame"
)

df %>% 
    mutate(
        grow_months = case_when(
            growstart > growend ~ list(c(growstart:12, 1:growend)),
            T ~ list(c(growstart:growend))
        ),
        Month = as.numeric(Month) # Needed for the below filter conditions to match type i.e. b/c 01 != 1
    ) %>% 
    rowwise() %>% 
    filter(
        Month %in% grow_months
    )
#> Warning: Problem with `mutate()` input `grow_months`.
#> i numerical expression has 24 elements: only the first used
#> i Input `grow_months` is `case_when(...)`.
#> Warning: Problem with `mutate()` input `grow_months`.
#> i numerical expression has 24 elements: only the first used
#> i Input `grow_months` is `case_when(...)`.

#> Warning: Problem with `mutate()` input `grow_months`.
#> i numerical expression has 24 elements: only the first used
#> i Input `grow_months` is `case_when(...)`.

#> Warning: Problem with `mutate()` input `grow_months`.
#> i numerical expression has 24 elements: only the first used
#> i Input `grow_months` is `case_when(...)`.
#> # A tibble: 8 x 9
#> # Rowwise: 
#>   gid    country  Time       Month  SPEI1 growstart growend maincrop grow_months
#>   <chr>  <chr>    <date>     <dbl>  <dbl>     <dbl>   <dbl>    <dbl> <list>     
#> 1 117765 Congo, ~ 1980-01-16     1 -0.703        10       1       28 <int [4]>  
#> 2 117765 Congo, ~ 1980-10-16    10  0.400        10       1       28 <int [4]>  
#> 3 117765 Congo, ~ 1980-11-16    11 -1.47         10       1       28 <int [4]>  
#> 4 117765 Congo, ~ 1980-12-16    12 -0.436        10       1       28 <int [4]>  
#> 5 117765 Congo, ~ 1981-01-16     1 -0.511        10       1       28 <int [4]>  
#> 6 117765 Congo, ~ 1981-10-16    10 -0.197        10       1       28 <int [4]>  
#> 7 117765 Congo, ~ 1981-11-16    11 -1.48         10       1       28 <int [4]>  
#> 8 117765 Congo, ~ 1981-12-16    12 -0.388        10       1       28 <int [4]>

^{Created on 2022-06-29 by the reprex package (v1.0.0)}

Mtrs · June 29, 2022, 4:20pm

Hi. Thank you for trying but I don't understand how your code could be reproducible to my entire dataframe with different crops with different growing season months ?

dvetsch75 · June 29, 2022, 4:36pm

Because by generating a new list-col where each element in the list is the months between growstart and growend, you get a unique list that you use to subset (also, a slight edit to my initial code: the rowwise should come before the mutate). For example, if I add a new crop to your dataset and change the growing season, the list-col is generated in the case_when, and then the rowwise allows you to check row by row to make sure the the Month column is somewhere in the list column:


# New dummy data
newcrop <- data.frame(
    'gid' = '999999',
    'country' = 'USA',
    'Time' = Sys.Date(),
    'Month' = '06',
    'SPEI1' = runif(1),
    growstart = 2,
    growend = 6,
    maincrop = -1
)

# Adding newcrop back to original data
df <- bind_rows(
    df,
    newcrop
)

t <- df %>% 
    rowwise() %>% 
    mutate(
        grow_months = case_when(
            growstart > growend ~ list(c(growstart:12, 1:growend)),
            T ~ list(c(growstart:growend))
        ),
        Month = as.numeric(Month) # Needed for the below filter conditions to match type i.e. b/c 01 != 1
    ) %>%
    filter(
        Month %in% grow_months
    )
t
#> A tibble: 9 x 9
#> Rowwise: 
#> gid    country                               Time       Month  SPEI1 growstart growend maincrop grow_months
#> <chr>  <chr>                                 <date>     <dbl>  <dbl>     <dbl>   <dbl>    <dbl> <list>     
#>     1 117765 Congo, Democratic Republic of (Zaire) 1980-01-16     1 -0.703        10       1       28 <int [4]>  
#>     2 117765 Congo, Democratic Republic of (Zaire) 1980-10-16    10  0.400        10       1       28 <int [4]>  
#>     3 117765 Congo, Democratic Republic of (Zaire) 1980-11-16    11 -1.47         10       1       28 <int [4]>  
#>     4 117765 Congo, Democratic Republic of (Zaire) 1980-12-16    12 -0.436        10       1       28 <int [4]>  
#>     5 117765 Congo, Democratic Republic of (Zaire) 1981-01-16     1 -0.511        10       1       28 <int [4]>  
#>     6 117765 Congo, Democratic Republic of (Zaire) 1981-10-16    10 -0.197        10       1       28 <int [4]>  
#>     7 117765 Congo, Democratic Republic of (Zaire) 1981-11-16    11 -1.48         10       1       28 <int [4]>  
#>     8 117765 Congo, Democratic Republic of (Zaire) 1981-12-16    12 -0.388        10       1       28 <int [4]>  
#>     9 999999 USA                                   2022-06-29     6  0.529         2       6       -1 <int [5]>

And if you look at the last value of t$grow_months you can see that the interval you are checking is different.

Mtrs · June 29, 2022, 4:40pm

I got an error because of NA in the dataframe
"! Problem while computing grow_months = case_when(...).
The error occurred in row 1.
Caused by error in growstart:12:
! Argument NA / NaN"

But I need to keep them like that. Where can I add an argument keeping the NA?

dvetsch75 · June 29, 2022, 4:41pm

Can you give more information about the error you get?

dvetsch75 · June 29, 2022, 4:44pm

So that appears to me that some of your growstart values are NA. Is that true? And if so, what does that mean? How could you determine the growing season if you don't know when it starts?

Mtrs · June 29, 2022, 4:45pm

Yes, it's simply because for some rows, I don't have crop information (main crop, growstart, growend). But I need to keep those rows for my analysis afterwards.

dvetsch75 · June 30, 2022, 2:01pm

You could handle this all within one pipeline, but it might be simpler to just set the missings aside first - then put them back after doing the filter that you want to do:

library(dplyr)


df <- structure(
    list(
        gid = c(
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765",
            "117765"
        ),
        country = c(
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)",
            "Congo, Democratic Republic of (Zaire)"
        ),
        Time = structure(
            c(
                3667,
                3697,
                3727,
                3758,
                3788,
                3819,
                3849,
                3880,
                3911,
                3941,
                3972,
                4002,
                4033,
                4063,
                4092,
                4123,
                4153,
                4184,
                4214,
                4245,
                4276,
                4306,
                4337,
                4367
            ),
            class = "Date"
        ),
        Month = c(
            "01",
            "02",
            "03",
            "04",
            "05",
            "06",
            "07",
            "08",
            "09",
            "10",
            "11",
            "12",
            "01",
            "02",
            "03",
            "04",
            "05",
            "06",
            "07",
            "08",
            "09",
            "10",
            "11",
            "12"
        ),
        SPEI1 = c(
            -0.702853560447693,
            2.77506303787231,
            -1.38380765914917,
            -0.474617034196854,
            0.610002398490906,
            -0.389719426631927,
            2.31887650489807,
            1.47994863986969,
            1.66277933120728,
            0.399970233440399,
            -1.47139978408813,-0.435711354017258,
            -0.510784149169922,
            -1.15937781333923,
            0.523077189922333,
            -0.161062479019165,
            -0.481528997421265,-1.71726500988007,
            -1.77663195133209,
            0.765306115150452,-0.774405002593994,
            -0.197176232933998,
            -1.47615599632263,-0.388415157794952
        ),
        growstart = c(
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10,
            10
        ),
        growend = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
                    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
        maincrop = c(
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28,
            28
        )
    ),
    row.names = 572161:572184,
    class = "data.frame"
)

# New dummy data
newcrop <- data.frame(
    'gid' = c('999999', '111111'),
    'country' = c('USA', 'Canada'),
    'Time' = rep(Sys.Date(), 2),
    'Month' = rep('06', 2),
    'SPEI1' = runif(2),
    growstart = c(2, NA),
    growend = c(6, NA),
    maincrop = c(-1, NA)
)

# Adding newcrop back to original data
df <- bind_rows(
    df,
    newcrop
)

missings <- df %>% 
    filter(
        across(
            growstart:maincrop,
            is.na
        )
    ) %>% 
    mutate(
        Month = as.numeric(Month),
        grow_months = list(NA_integer_)
    )

t <- df %>% 
    filter(
        across(
            growstart:maincrop,
            function(x) {
                !is.na(x)
            }
        )
    ) %>% 
    rowwise() %>% 
    mutate(
        grow_months = case_when(
            growstart <= growend ~ list(growstart:growend),
            growstart > growend ~ list(c(growstart:12, 1:growend))
            
        ),
        Month = as.numeric(Month) # Needed for the below filter conditions to match type i.e. b/c 01 != 1
    ) %>%
    filter(
        Month %in% grow_months
    ) %>% 
    bind_rows(missings)

t
#> # A tibble: 10 x 9
#> # Rowwise: 
#>    gid    country Time       Month  SPEI1 growstart growend maincrop grow_months
#>    <chr>  <chr>   <date>     <dbl>  <dbl>     <dbl>   <dbl>    <dbl> <list>     
#>  1 117765 Congo,~ 1980-01-16     1 -0.703        10       1       28 <int [4]>  
#>  2 117765 Congo,~ 1980-10-16    10  0.400        10       1       28 <int [4]>  
#>  3 117765 Congo,~ 1980-11-16    11 -1.47         10       1       28 <int [4]>  
#>  4 117765 Congo,~ 1980-12-16    12 -0.436        10       1       28 <int [4]>  
#>  5 117765 Congo,~ 1981-01-16     1 -0.511        10       1       28 <int [4]>  
#>  6 117765 Congo,~ 1981-10-16    10 -0.197        10       1       28 <int [4]>  
#>  7 117765 Congo,~ 1981-11-16    11 -1.48         10       1       28 <int [4]>  
#>  8 117765 Congo,~ 1981-12-16    12 -0.388        10       1       28 <int [4]>  
#>  9 999999 USA     2022-06-30     6  0.653         2       6       -1 <int [5]>  
#> 10 111111 Canada  2022-06-30     6  0.890        NA      NA       NA <int [1]>

^{Created on 2022-06-30 by the reprex package (v1.0.0)}

Mtrs · July 1, 2022, 7:42am

Hi, thank you. I updated my post by posting several samples of my dataset with different crops and growing seasons. How does your code apply to all of them ?

dvetsch75 · July 1, 2022, 3:08pm

Did you try the snippet I sent you with your new data? I ran your new examples and everything seems to work fine?

Mtrs · July 2, 2022, 10:27am

dvetsch75:

-10-16    10  0.400        10       1       28 <int [4]>  
#>  3 117765 Congo,~ 1980-11-16    11 -1.47         10       1       28 <int [4]>  
#>  4 117765 Congo,~ 1980-12-16    12 -0.436        10       1       28 <int [4]>  
#>  5 117765 Congo,~ 1981-01-16     1 -0.511        10       1       28 <int [4]>  
#>  6 117765 Congo,~ 1981-10-16    10 -0.197        10       1       28 <int [4]>  
#>  7 117765 Congo,~ 1981-11-16    11 -1.48         10       1       28 <int [4]>  
#>  8 117765 Congo,~ 1981-12-16    12 -

How dear !!!!!! I spoke too quickly. It works. You are a genius.

Let me understand the code, how R understood that there are overlapping years ?

Like 10 as a growing start, means the month from the previous year?

system · July 9, 2022, 10:28am

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.