I am trying to replace a loop in order to optimize the code. I was looking for a way to make analysis_sample_a
and pass_buffer_list_w
reproducible without writing so much code but I did not succeed because both objects were lists.
Here is how the data looks like of the object analyis_sample_a
analysis_sample_a <- structure(list(prs_id = structure(c(5413941, 5173912, 5155959,
5399763, 5331026, 5058319), label = " ESP", format.stata = "%12.0g"),
year = structure(c(2011, 2007, 2010, 2008, 2013, 2005), label = "Calendar year", format.stata = "%8.0g"),
X = structure(c(13.42006, 13.368156, 10.631321, 13.423421,
10.75336, 9.158268), label = "geo_xWGS84", format.stata = "%12.0g"),
Y = structure(c(52.476585, 52.551794, 48.446771, 52.485964,
53.009095, 48.780813), label = "geo_yWGS84", format.stata = "%12.0g"),
X_proj = c(800129.781458432, 796100.497439696, 620632.880007759,
800293.921457125, 617638.859690204, 511626.830156876), Y_proj = c(5823237.00413242,
5831381.69520824, 5367244.21158149, 5824293.43326152, 5874720.17139051,
5403102.34693945), pgq_id = structure(c(27, 27, 27, 27, 27,
27), label = "GEO Ebene", format.stata = "%8.0g", labels = c(` -9 Nicht zuordenbar` = -9,
` -7 Keine Angabe` = -7, ` -5 (leer)` = -5,
` 8 Gemeinde` = 8, ` 9 Postleitzahl` = 9,
` 11 Ortsteil` = 11, ` 13 Gemeinde/PLZ` = 13,
` 16 Ortsteil/PLZ` = 16, ` 20 Block` = 20,
` 22 Blockseite` = 22, ` 24 Gebäude` = 24,
` 27 Hausnummer` = 27), class = c("haven_labelled",
"vctrs_vctr", "double")), aqk_id = structure(c(7, 7, 7, 7,
7, 7), label = "GEO Matchqual.", format.stata = "%8.0g", labels = c(` -9 Nicht zuordenbar` = -9,
` -7 Keine Angabe` = -7, ` -5 (leer)` = -5,
` 1 Sonstiges` = 1, ` 2 Ort/Str. tls 1:1, Rest fehlt` = 2,
` 3 PLZ/Ort/Str. tls 1:1,Hsn fehlt` = 3, ` 4 PLZ fehlt, Rest teils 1:1` = 4,
` 5 Ort fehlt, Rest teils 1:1` = 5, ` 6 fast alles 1:1` = 6,
` 7 alles 1:1` = 7), class = c("haven_labelled",
"vctrs_vctr", "double")), htq_id = structure(c(4, 4, 4, 4,
4, 4), label = "GEO Treffgenau.", format.stata = "%8.0g", labels = c(` -9 Nicht zuordenbar` = -9,
` -7 Keine Angabe` = -7, ` -5 (leer)` = -5,
` 1 sehr mehrdeutig` = 1, ` 2 etwas mehrdeutig` = 2,
` 3 fast eindeutig` = 3, ` 4 eindeutig` = 4
), class = c("haven_labelled", "vctrs_vctr", "double")),
female = structure(c(1, 0, 0, 1, 0, 0), label = "Female", format.stata = "%8.0g"),
age = structure(c(31.1671237945557, 55.145206451416, 40.4630126953125,
60.687671661377, 45.6575355529785, 43.4438362121582), format.stata = "%9.0g"),
age_1525 = structure(c(0, 0, 0, 0, 0, 0), label = "Age group: 15-25 years", format.stata = "%8.0g"),
age_2635 = structure(c(1, 0, 0, 0, 0, 0), label = "Age group: 26-35 years", format.stata = "%8.0g"),
age_3650 = structure(c(0, 0, 1, 0, 1, 1), label = "Age group: 36-50 years", format.stata = "%8.0g"),
age_51ret = structure(c(0, 1, 0, 1, 0, 0), label = "Age group: 51-retirement age", format.stata = "%8.0g"),
age_ret75 = structure(c(0, 0, 0, 0, 0, 0), label = "Age group: retirement age-75 years", format.stata = "%8.0g"),
german = structure(c(1, 1, 1, 0, 1, 1), label = "German citizen", format.stata = "%8.0g"),
foreign = structure(c(0, 0, 0, 1, 0, 0), label = "No german citizenship", format.stata = "%8.0g"),
refugee = structure(c(0, 0, 0, 0, 0, 0), label = "Refugee", format.stata = "%8.0g"),
employed = structure(c(0, 0, 1, 0, 1, 0), format.stata = "%8.0g"),
fulltime = structure(c(0, 0, 1, 0, 1, 0), format.stata = "%8.0g"),
parttime = structure(c(0, 0, 0, 0, 0, 0), format.stata = "%8.0g"),
marginal = structure(c(0, 0, 0, 0, 0, 0), label = "Employed but below marginal earning threshold", format.stata = "%8.0g"),
not_emp = structure(c(1, 1, 0, 1, 0, 1), format.stata = "%8.0g"),
ub1_unemp = structure(c(1, 0, 0, 0, 0, 0), format.stata = "%8.0g"),
ub1_emp = structure(c(0, 0, 0, 0, 0, 0), format.stata = "%8.0g"),
ub2_unemp = structure(c(0, 1, 0, 1, 0, 1), format.stata = "%8.0g"),
ub2_emp = structure(c(0, 0, 0, 0, 0, 0), format.stata = "%8.0g"),
ub2_ub1 = structure(c(0, 0, 0, 0, 0, 0), format.stata = "%8.0g"),
jobsearch = structure(c(1, 1, 0, 1, 0, 1), format.stata = "%9.0g"),
educ = structure(c(3, 2, 2, 1, 2, 2), format.stata = "%9.0g"),
educ_low = structure(c(0, 0, 0, 1, 0, 0), format.stata = "%8.0g"),
educ_med = structure(c(0, 1, 1, 0, 1, 1), format.stata = "%8.0g"),
educ_high = structure(c(1, 0, 0, 0, 0, 0), format.stata = "%8.0g"),
yearly_income = structure(c(4911.39990234375, 0, 55056.59765625,
0, 69598.1953125, 0), format.stata = "%9.0g"), ieb_wo_gem_num = structure(c(1.1e+07,
1.1e+07, 9772216, 1.1e+07, 3360018, 8111000), label = "Gem 12/2017", format.stata = "%10.0g"),
`_ID` = structure(c(8326, 8262, 5592, 8326, 5738, 3412), format.stata = "%12.0g"),
longitude = structure(c(13.4231042861938, 13.3653039932251,
10.653510093689, 13.4231042861938, 10.7989225387573, 9.16147613525391
), label = "x-coordinate of area centroid", format.stata = "%9.0g")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
Then I tried to divide the dataframe into different lists by year
splited <- analysis_sample_a %>%
filter(year %in% c(2007:2017))
analysis_sample_year_a <- splited %>%
group_split(year)
This is how the data of pass_buffer_a
looks like
pass_buffer_a <- structure(list(welle = c("Welle 11 (2017)", "Welle 8 (2014)",
"Welle 4 (2010)", "Welle 6 (2012)", "Welle 8 (2014)", "Welle 5 (2011)"
), g__2017 = c("09564000", "03241001", "13075136", "05958020",
"05913000", "07332049"), pass_geo_i = c("20090", "14978", "28451",
"9862", "6059", "7906"), state = c("Bayern", "Niedersachsen",
"Mecklenburg-Vorpommern", "Nordrhein-Westfalen", "Nordrhein-Westfalen",
"Rheinland-Pfalz"), plz = c("90443", "30165", "17373", "59969",
"44328", "67273"), pop_pc = c("97", "90", "36", "7", "79", "29"
), kde_bandwi = c(60.7424210253877, 68.1159472633942, 240.487367481696,
510.263978381978, 79.7029170659758, 305.605859767971), buffer_siz = c(182.227263076163,
204.347841790183, 721.462102445088, 1530.79193514593, 239.108751197927,
916.817579303913)), row.names = 0:5, class = "data.frame")
This is the code in which I also create a lists by waves.
convert_to_shp_list <- function(shp){
y <- as(shp, "SpatialPolygons")
p <- slot(y, "polygons")
v <- lapply(p, function(z) { SpatialPolygons(list(z)) })
return(v)}
pass_buffer_list <- convert_to_shp_list(pass_buffer_proj)
pass_buffer_list <- convert_to_shp_list(pass_buffer)
pass_buffer_list_w4 <- pass_buffer_list[pass_buffer$welle == "Welle 4 (2010)"]
pass_buffer_list_w5 <- pass_buffer_list[pass_buffer$welle == "Welle 5 (2011)"]
pass_buffer_list_w6 <- pass_buffer_list[pass_buffer$welle == "Welle 6 (2012)"]
pass_buffer_list_w8 <- pass_buffer_list[pass_buffer$welle == "Welle 8 (2014)"]
pass_buffer_list_w11 <- pass_buffer_list[pass_buffer$welle == "Welle 11 (2017)"]
pass_buffer_list_w <- list(pass_buffer_list_w4,
pass_buffer_list_w5, pass_buffer_list_w6, pass_buffer_list_w8, pass_buffer_list_w11)
This is the loop which I tried to replace.
analysis_buf <- list()
c <- 1
for(i in 1:length(analysis_sample_year_a)){
for(j in 1:length(pass_buffer_list_w[[i]])){
analysis_buf[[c]] <- analysis_sample_year_a[[i]][which(!is.na(sp::over(SpatialPoints(data.frame(analysis_sample_year_a[[i]]$X_proj,analysis_sample_year_a[[i]]$Y_proj)), pass_buffer_list_w[[i]][[j]]))), ]
c <- c + 1
}
}
This is how the output should look like
My idea to replace the loop is to apply the base::lapply()
function into different objects data1, data2,....,data11 by each wave. In this way in each wave I have a point assigned to its determined buffer. The problem is that I get an empty list but I dont know the why.
analysis_2007 <- list(analysis_sample_year_a[[1]]
analysis_2008 <- list(analysis_sample_year_a[[2]]
...
analysis_2017 <- list(analysis_sample_year_a[[11]]
data1 <- lapply(seq(nrow(pass_buffer_list_w)), function(x) sp::over(pass_buffer_list_w[x,], analysis_2007))
data2 <- lapply(seq(nrow(pass_buffer_list_w)), function(x) sp::over(pass_buffer_list_w[x,], analysis_2008))
...
data11 <- lapply(seq(nrow(pass_buffer_list_w)), function(x) sp::over(pass_buffer_list_w[x,], analysis_2017))
Then, my next step to replace the loop would be to create a single list where there are the lists from data1,...,data11
list_data <- tibble(list(data1, data2, ...,data11)
I hope someone can help me with this matter because I am stuck since 3 weeks and I do not find an answer. Thank you for your help