I'm trying to put csv files and bind them together to create a large dataset of census data per year. Unfortunately, there comes a point where R can no longer allocate the data (up to 124Mb if I'm not mistaken). So it decides to just stop reading and binding files up to 2010. I've tried using gc() over and over, but it still does not seem to work
R version: Microsoft R Open 3.4.1
Rstudio version: Version 1.0.153
Windows version: Windows 10 x64 bit
CPU: Intel i3-4005U CPU 1.70GHz
RAM: 4GB
Here is the code.
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(purrr)
library(readr)
library(stringr)
library(ffbase)})
# Import and filter only household head cases ----
#Standardize column specifications for each
col_spec90 <- cols(PROVINCE = col_character(),
P2_OVERSEAS_CONT = col_character(),
P3_RELATIONSHIP = col_character(),
P5_AGE = col_integer(),
P6_SEX = col_character(),
P7_MARITAL_STAT = col_character(),
P8_RELIGION = col_character(),
P12_DISABILITY = col_character(),
P17_RES_5YRS_AGO = col_character(),
P22_EDUCATION = col_character())
frame90 <- tibble()
for (i in list.files("data/CPH1990")){
file <- read_csv(str_c("data/CPH1990/", i), col_types = col_spec90) %>%
filter(P3_RELATIONSHIP == "01")
frame90 <- bind_rows(frame90, file)
rm(file)
gc()}
rm(col_spec90, i)
gc()
col_spec00 <- cols(PROVINCE = col_character(),
P2H_RELHEAD = col_character(),
P6_AGE_LAST = col_integer(),
P7_SEX = col_character(),
P8H_OVERSEAS = col_character(),
P9_MSTATUS = col_character(),
P10_RELIGION = col_character(),
P13_DISABILITY = col_character(),
P22_EDUCATION = col_character(),
P23H_RES_5YR_AGO = col_character())
frame00 <- tibble()
for (i in list.files("data/CPH2000")){
file <- read_csv(str_c("data/CPH2000/", i), col_types = col_spec00) %>%
filter(P2H_RELHEAD == "01")
frame00 <- bind_rows(frame00, file)
rm(file)
gc()}
rm(col_spec00, i)
gc()
col_spec10 <- cols(PRV = col_character(),
P2 = col_character(),
P3 = col_character(),
P5 = col_integer(),
P7 = col_character(),
P8 = col_character(),
P12 = col_character(),
P14 = col_character(),
P16R = col_character(),
P19 = col_character())
frame10 <- tibble()
for (i in list.files("data/CPH2010")){
file <- read_csv(str_c("data/CPH2010/", i), col_types = col_spec10) %>%
filter(P2 == "01")
frame10 <- bind_rows(frame10, file)
rm(file)
gc()}
rm(col_spec10, i)
gc()
save.image("out/frame.rda")
gc()
write_csv(frame90, "out/frame90.csv")
rm(frame90)
gc()
write_csv(frame00, "out/frame00.csv")
rm(frame00)
gc()
write_csv(frame10, "out/frame10.csv")
gc()