I get an empty list in applying the function lapply

lencoman · August 1, 2022, 2:47pm

I am trying to replace a loop in order to optimize the code. I was looking for a way to make analysis_sample_a and pass_buffer_list_w reproducible without writing so much code but I did not succeed because both objects were lists.

Here is how the data looks like of the object analyis_sample_a

analysis_sample_a <- structure(list(prs_id = structure(c(5413941, 5173912, 5155959, 
5399763, 5331026, 5058319), label = " ESP", format.stata = "%12.0g"), 
    year = structure(c(2011, 2007, 2010, 2008, 2013, 2005), label = "Calendar year", format.stata = "%8.0g"), 
    X = structure(c(13.42006, 13.368156, 10.631321, 13.423421, 
    10.75336, 9.158268), label = "geo_xWGS84", format.stata = "%12.0g"), 
    Y = structure(c(52.476585, 52.551794, 48.446771, 52.485964, 
    53.009095, 48.780813), label = "geo_yWGS84", format.stata = "%12.0g"), 
    X_proj = c(800129.781458432, 796100.497439696, 620632.880007759, 
    800293.921457125, 617638.859690204, 511626.830156876), Y_proj = c(5823237.00413242, 
    5831381.69520824, 5367244.21158149, 5824293.43326152, 5874720.17139051, 
    5403102.34693945), pgq_id = structure(c(27, 27, 27, 27, 27, 
    27), label = "GEO Ebene", format.stata = "%8.0g", labels = c(`          -9 Nicht zuordenbar` = -9, 
    `          -7 Keine Angabe` = -7, `          -5 (leer)` = -5, 
    `           8 Gemeinde` = 8, `           9 Postleitzahl` = 9, 
    `          11 Ortsteil` = 11, `          13 Gemeinde/PLZ` = 13, 
    `          16 Ortsteil/PLZ` = 16, `          20 Block` = 20, 
    `          22 Blockseite` = 22, `          24 Gebäude` = 24, 
    `          27 Hausnummer` = 27), class = c("haven_labelled", 
    "vctrs_vctr", "double")), aqk_id = structure(c(7, 7, 7, 7, 
    7, 7), label = "GEO Matchqual.", format.stata = "%8.0g", labels = c(`          -9 Nicht zuordenbar` = -9, 
    `          -7 Keine Angabe` = -7, `          -5 (leer)` = -5, 
    `           1 Sonstiges` = 1, `           2 Ort/Str. tls 1:1, Rest fehlt` = 2, 
    `           3 PLZ/Ort/Str. tls 1:1,Hsn fehlt` = 3, `           4 PLZ fehlt, Rest teils 1:1` = 4, 
    `           5 Ort fehlt, Rest teils 1:1` = 5, `           6 fast alles 1:1` = 6, 
    `           7 alles 1:1` = 7), class = c("haven_labelled", 
    "vctrs_vctr", "double")), htq_id = structure(c(4, 4, 4, 4, 
    4, 4), label = "GEO Treffgenau.", format.stata = "%8.0g", labels = c(`          -9 Nicht zuordenbar` = -9, 
    `          -7 Keine Angabe` = -7, `          -5 (leer)` = -5, 
    `           1 sehr mehrdeutig` = 1, `           2 etwas mehrdeutig` = 2, 
    `           3 fast eindeutig` = 3, `           4 eindeutig` = 4
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
    female = structure(c(1, 0, 0, 1, 0, 0), label = "Female", format.stata = "%8.0g"), 
    age = structure(c(31.1671237945557, 55.145206451416, 40.4630126953125, 
    60.687671661377, 45.6575355529785, 43.4438362121582), format.stata = "%9.0g"), 
    age_1525 = structure(c(0, 0, 0, 0, 0, 0), label = "Age group: 15-25 years", format.stata = "%8.0g"), 
    age_2635 = structure(c(1, 0, 0, 0, 0, 0), label = "Age group: 26-35 years", format.stata = "%8.0g"), 
    age_3650 = structure(c(0, 0, 1, 0, 1, 1), label = "Age group: 36-50 years", format.stata = "%8.0g"), 
    age_51ret = structure(c(0, 1, 0, 1, 0, 0), label = "Age group: 51-retirement age", format.stata = "%8.0g"), 
    age_ret75 = structure(c(0, 0, 0, 0, 0, 0), label = "Age group: retirement age-75 years", format.stata = "%8.0g"), 
    german = structure(c(1, 1, 1, 0, 1, 1), label = "German citizen", format.stata = "%8.0g"), 
    foreign = structure(c(0, 0, 0, 1, 0, 0), label = "No german citizenship", format.stata = "%8.0g"), 
    refugee = structure(c(0, 0, 0, 0, 0, 0), label = "Refugee", format.stata = "%8.0g"), 
    employed = structure(c(0, 0, 1, 0, 1, 0), format.stata = "%8.0g"), 
    fulltime = structure(c(0, 0, 1, 0, 1, 0), format.stata = "%8.0g"), 
    parttime = structure(c(0, 0, 0, 0, 0, 0), format.stata = "%8.0g"), 
    marginal = structure(c(0, 0, 0, 0, 0, 0), label = "Employed but below marginal earning threshold", format.stata = "%8.0g"), 
    not_emp = structure(c(1, 1, 0, 1, 0, 1), format.stata = "%8.0g"), 
    ub1_unemp = structure(c(1, 0, 0, 0, 0, 0), format.stata = "%8.0g"), 
    ub1_emp = structure(c(0, 0, 0, 0, 0, 0), format.stata = "%8.0g"), 
    ub2_unemp = structure(c(0, 1, 0, 1, 0, 1), format.stata = "%8.0g"), 
    ub2_emp = structure(c(0, 0, 0, 0, 0, 0), format.stata = "%8.0g"), 
    ub2_ub1 = structure(c(0, 0, 0, 0, 0, 0), format.stata = "%8.0g"), 
    jobsearch = structure(c(1, 1, 0, 1, 0, 1), format.stata = "%9.0g"), 
    educ = structure(c(3, 2, 2, 1, 2, 2), format.stata = "%9.0g"), 
    educ_low = structure(c(0, 0, 0, 1, 0, 0), format.stata = "%8.0g"), 
    educ_med = structure(c(0, 1, 1, 0, 1, 1), format.stata = "%8.0g"), 
    educ_high = structure(c(1, 0, 0, 0, 0, 0), format.stata = "%8.0g"), 
    yearly_income = structure(c(4911.39990234375, 0, 55056.59765625, 
    0, 69598.1953125, 0), format.stata = "%9.0g"), ieb_wo_gem_num = structure(c(1.1e+07, 
    1.1e+07, 9772216, 1.1e+07, 3360018, 8111000), label = "Gem 12/2017", format.stata = "%10.0g"), 
    `_ID` = structure(c(8326, 8262, 5592, 8326, 5738, 3412), format.stata = "%12.0g"), 
    longitude = structure(c(13.4231042861938, 13.3653039932251, 
    10.653510093689, 13.4231042861938, 10.7989225387573, 9.16147613525391
    ), label = "x-coordinate of area centroid", format.stata = "%9.0g")), row.names = c(NA, 
-6L), class = c("tbl_df", "tbl", "data.frame"))

Then I tried to divide the dataframe into different lists by year

splited <- analysis_sample_a %>%
  filter(year %in% c(2007:2017))

analysis_sample_year_a <- splited  %>%
          group_split(year)

This is how the data of pass_buffer_a looks like

pass_buffer_a <- structure(list(welle = c("Welle 11 (2017)", "Welle 8 (2014)", 
"Welle 4 (2010)", "Welle 6 (2012)", "Welle 8 (2014)", "Welle 5 (2011)"
), g__2017 = c("09564000", "03241001", "13075136", "05958020", 
"05913000", "07332049"), pass_geo_i = c("20090", "14978", "28451", 
"9862", "6059", "7906"), state = c("Bayern", "Niedersachsen", 
"Mecklenburg-Vorpommern", "Nordrhein-Westfalen", "Nordrhein-Westfalen", 
"Rheinland-Pfalz"), plz = c("90443", "30165", "17373", "59969", 
"44328", "67273"), pop_pc = c("97", "90", "36", "7", "79", "29"
), kde_bandwi = c(60.7424210253877, 68.1159472633942, 240.487367481696, 
510.263978381978, 79.7029170659758, 305.605859767971), buffer_siz = c(182.227263076163, 
204.347841790183, 721.462102445088, 1530.79193514593, 239.108751197927, 
916.817579303913)), row.names = 0:5, class = "data.frame")

This is the code in which I also create a lists by waves.

convert_to_shp_list <- function(shp){
  y <- as(shp, "SpatialPolygons")
  p <- slot(y, "polygons")
  v <- lapply(p, function(z) { SpatialPolygons(list(z)) })
  return(v)}

pass_buffer_list <- convert_to_shp_list(pass_buffer_proj)

pass_buffer_list <- convert_to_shp_list(pass_buffer)

pass_buffer_list_w4 <- pass_buffer_list[pass_buffer$welle == "Welle 4 (2010)"]
pass_buffer_list_w5 <- pass_buffer_list[pass_buffer$welle == "Welle 5 (2011)"]
pass_buffer_list_w6 <- pass_buffer_list[pass_buffer$welle == "Welle 6 (2012)"]
pass_buffer_list_w8 <- pass_buffer_list[pass_buffer$welle == "Welle 8 (2014)"]
pass_buffer_list_w11 <- pass_buffer_list[pass_buffer$welle == "Welle 11 (2017)"]

 pass_buffer_list_w <- list(pass_buffer_list_w4,
                            pass_buffer_list_w5, pass_buffer_list_w6, pass_buffer_list_w8, pass_buffer_list_w11)

This is the loop which I tried to replace.

analysis_buf <- list()
c <- 1
for(i in 1:length(analysis_sample_year_a)){
  for(j in 1:length(pass_buffer_list_w[[i]])){
    analysis_buf[[c]] <- analysis_sample_year_a[[i]][which(!is.na(sp::over(SpatialPoints(data.frame(analysis_sample_year_a[[i]]$X_proj,analysis_sample_year_a[[i]]$Y_proj)), pass_buffer_list_w[[i]][[j]]))), ]
    c <- c + 1
  }
}

This is how the output should look like

enter image description here

My idea to replace the loop is to apply the base::lapply() function into different objects data1, data2,....,data11 by each wave. In this way in each wave I have a point assigned to its determined buffer. The problem is that I get an empty list but I dont know the why.


analysis_2007 <- list(analysis_sample_year_a[[1]]
analysis_2008 <- list(analysis_sample_year_a[[2]]
...
analysis_2017 <- list(analysis_sample_year_a[[11]]


data1 <- lapply(seq(nrow(pass_buffer_list_w)), function(x) sp::over(pass_buffer_list_w[x,], analysis_2007))
data2 <- lapply(seq(nrow(pass_buffer_list_w)), function(x) sp::over(pass_buffer_list_w[x,], analysis_2008))
...
data11 <- lapply(seq(nrow(pass_buffer_list_w)), function(x) sp::over(pass_buffer_list_w[x,], analysis_2017))

Then, my next step to replace the loop would be to create a single list where there are the lists from data1,...,data11


list_data <- tibble(list(data1, data2, ...,data11)

I hope someone can help me with this matter because I am stuck since 3 weeks and I do not find an answer. Thank you for your help

system · August 22, 2022, 2:48pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.