I am struggling to find a solution.
I have the following gridded samples datasets (I paste different parts of my dataset)
First one: structure(list(gid = c("117765", "117765", "117765", "117765",
"117765", "117765", "117765", "117765", "117765", "117765", "117765",
"117765", "117765", "117765", "117765", "117765", "117765", "117765",
"117765", "117765", "117765", "117765", "117765", "117765"),
country = c("Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)"
), Time = structure(c(3667, 3697, 3727, 3758, 3788, 3819,
3849, 3880, 3911, 3941, 3972, 4002, 4033, 4063, 4092, 4123,
4153, 4184, 4214, 4245, 4276, 4306, 4337, 4367), class = "Date"),
Month = c("01", "02", "03", "04", "05", "06", "07", "08",
"09", "10", "11", "12", "01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12"), SPEI1 = c(-0.702853560447693,
2.77506303787231, -1.38380765914917, -0.474617034196854,
0.610002398490906, -0.389719426631927, 2.31887650489807,
1.47994863986969, 1.66277933120728, 0.399970233440399, -1.47139978408813,
-0.435711354017258, -0.510784149169922, -1.15937781333923,
0.523077189922333, -0.161062479019165, -0.481528997421265,
-1.71726500988007, -1.77663195133209, 0.765306115150452,
-0.774405002593994, -0.197176232933998, -1.47615599632263,
-0.388415157794952), growstart = c(10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10), growend = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), maincrop = c(28,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
28, 28, 28, 28, 28, 28, 28, 28)), row.names = 572161:572184, class = "data.frame")
_
Second one = structure(list(gid = c("100468", "100468", "100468", "100468",
"100468", "100468", "100468", "100468", "100468", "100468", "100468",
"100468", "100468", "100468", "100468", "100468", "100468", "100468",
"100468", "100468", "100468", "100468", "100468", "100468"),
country = c("Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia", "Namibia", "Namibia", "Namibia", "Namibia", "Namibia",
"Namibia"), Time = structure(c(3667, 3697, 3727, 3758, 3788,
3819, 3849, 3880, 3911, 3941, 3972, 4002, 4033, 4063, 4092,
4123, 4153, 4184, 4214, 4245, 4276, 4306, 4337, 4367), class = "Date"),
SPEI1 = c(-1.95947802066803, 0.557283878326416, 1.77989518642426,
-1.2029390335083, -0.119278997182846, 1.44610369205475, -1.4578732252121,
-1.14002466201782, 1.1647777557373, -1.34318947792053, -0.500527501106262,
1.50793671607971, -1.45792877674103, -2.00679230690002, -1.51340460777283,
-1.9636687040329, -1.40127754211426, -0.182968750596046,
0.295145452022552, 0.630711793899536, -0.166128441691399,
-0.55840003490448, -2.62139987945557, -1.74482023715973),
growstart = c(12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12), growend = c(4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4), maincrop = c(52, 52, 52, 52, 52, 52, 52, 52,
52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52,
52), Month = c("01", "02", "03", "04", "05", "06", "07",
"08", "09", "10", "11", "12", "01", "02", "03", "04", "05",
"06", "07", "08", "09", "10", "11", "12")), row.names = 385:408, class = "data.frame")
_
Third one: structure(list(gid = c("117770", "117770", "117770", "117770",
"117770", "117770", "117770", "117770", "117770", "117770", "117770",
"117770", "117770", "117770", "117770", "117770", "117770", "117770",
"117770", "117770", "117770", "117770", "117770", "117770"),
country = c("Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)",
"Congo, Democratic Republic of (Zaire)", "Congo, Democratic Republic of (Zaire)"
), Time = structure(c(3667, 3697, 3727, 3758, 3788, 3819,
3849, 3880, 3911, 3941, 3972, 4002, 4033, 4063, 4092, 4123,
4153, 4184, 4214, 4245, 4276, 4306, 4337, 4367), class = "Date"),
SPEI1 = c(0.649401307106018, 1.423499584198, -2.04273128509521,
0.271935135126114, 0.616238355636597, -1.03605198860168,
1.6733535528183, 1.78166878223419, 1.87084305286407, 1.10145688056946,
-1.23061907291412, -1.64128601551056, -1.00736439228058,
-1.91670513153076, 1.09841585159302, 0.464365869760513, 1.01759243011475,
-1.08844792842865, -0.508061945438385, -0.196570366621017,
-0.805905878543854, 0.117944374680519, -0.862984955310822,
-1.63738548755646), growstart = c(5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), growend = c(12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12), maincrop = c(37, 37, 37,
37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
37, 37, 37, 37, 37, 37), Month = c("01", "02", "03", "04",
"05", "06", "07", "08", "09", "10", "11", "12", "01", "02",
"03", "04", "05", "06", "07", "08", "09", "10", "11", "12"
)), row.names = 574081:574104, class = "data.frame")
I have a column 'main crop' that indicates the main crop in my cell (gid) and the growing season months (growstart and growend).
For each crop/gid//country/Time, I need to select the rows corresponding to the growing season.
The issue I am struggling with is that some growing season months overlap two years and I don't know how to filter based on this condition?
Example in the first sample (overlapping):
For crop 28, the starting growing month is 10(October from the previous year) and it ends in 1(January from the following year).
So for this crop according to the gid/country/Time, I will get the rows 1980-01-16 for year 1980 (because I don't have the data for 1979) and the rows between 1980-10-16 and 1981-01-16 for 1981.
**Example in the second sample (overlapping two years): **
For crop 52, the starting growing month is 12(December from the previous year) and it ends in 4(April from the following year).
So for this crop according to the gid/country/Time, I will only get the rows between 1980-01-16 and 1980-04-16 for year 1980 + the rows between 1980-12-16 and 1981-04-16 for 1981.
Example in the second sample (no overlap):
For crop 37, the starting growing month is 5(May of the current year) and it ends in 12(December of the current year).
I will only get the rows between 1980-05-16 and 1980-05-16 for year 1980 + between 1981-05-16 and 1981-12-16 for year 1981.
Please note that I have a dataframe of more than 3 millions observations and different type of crops/growing season. Hence the 3 samples.
I would really appreciate an automated way to get what I want.
Thank you !!
PS: 10 coffees for the helper that would save my sanity.