Caused by error in `readOGR()`: ! could not find function "readOGR"

Hi I am trying to run the following code please

# Clean DHS Data

# Clean DHS survey data. Create Household Level 
# dataframe with relevant socioeconomic variables.
renv::restore() 

pacman::p_load(tidyverse,
               rgdal,
               viridis,
               readstata13,
               dplyr,
               data.table,
               raster,
               stargazer,
               stringdist,
               tmaptools,
               stringr,
               geosphere,
               rgeos,
               haven,
               ggmap,
               sf,
               sp,
               glmnet,
               rgeos,
               caret,
               mltest,
               RANN,
               lubridate,
               jsonlite,
               httr,
               curl,
               ggpmisc,
               haven,
               sjmisc,
               dbscan,
               ggplot2,
               spatialEco,
               geosphere,
               radiant.data,
               readxl,
               mclust,
               missMDA,
               DescTools,
               furrr,
               countrycode,
               FactoMineR,
               progressr,
               ggmap,
               ggridges,
               ggpubr,
               xgboost,
               WDI,
               scales,
               ggExtra,
               ggrepel,
               ggcorrplot,
               rnaturalearth,
               ggthemes,
               gghalves,
               ggtext,
               ggsignif,
               LiblineaR,
               caret,
               exactextractr)

github_dir = "/ocean/projects/soc230008p/ramzan/Big Data Poverty Estimation/"
source(file.path(github_dir, "Functions", "functions.R"))
install.packages("rgdal")
if (!require(dplyr)) {
  install.packages("dplyr")
  library(dplyr)
}


if (!require(stringr)) {
  install.packages("stringr")
  library(stringr)
}

if (!require(magrittr)) {
  install.packages("magrittr")
  library(magrittr)
}


if (!require(purrr)) {
  install.packages("purrr")
  library(purrr)
}


if (!require(haven)) {
  install.packages("haven")
  library(haven)
}
library(sf)

library(rgdal)

dhs_dir = "/ocean/projects/soc230008p/ramzan/Big Data Poverty Estimation/Data/DHS"

# Functions to Clean Data ------------------------------------------------------
clean_hh <- function(df, 
                     hh_vars){
  
  #### Country specific fixes
  if(grepl("KH_2014|KH_2010", df$hv000[1])){
    # DH doesn't record anything for hv201 (water source); however, has water source
    # during dry and wet times. Use dry (sh102)
    df <- df %>%
      dplyr::select(-hv201) %>%
      dplyr::rename(hv201 = sh102)
  }
  
  if(grepl("KH_2005", df$hv000[1])){
    # DH doesn't record anything for hv201 (water source); however, has water source
    # during dry and wet times. Use dry (sh102)
    df <- df %>%
      dplyr::select(-hv201) %>%
      dplyr::rename(hv201 = hv201d)
  }
  
  #### Education variables
  ## Functions
  to_na_if_30above <- function(x){
    x[x >= 30] <- NA
    return(x)
  }
  
  to_na_if_4above <- function(x){
    x[x >= 4] <- NA
    return(x)
  }
  
  max_ig_na <- function(x){
    x <- max(x, na.rm = T)
    x[x %in% c(Inf,-Inf)] <- NA
    return(x)
  }
  
  count_0 <- function(x){
    sum(x == 0, na.rm = T)
  }
  
  count_1 <- function(x){
    sum(x == 1, na.rm = T)
  }
  
  count_2 <- function(x){
    sum(x == 2, na.rm = T)
  }
  
  count_3 <- function(x){
    sum(x == 3, na.rm = T)
  }
  
  count_3g <- function(x){
    sum(x > 3, na.rm = T)
  }
  
  mean_ig_na <- function(x){
    x <- mean(x, na.rm = T)
    x[x %in% c(Inf,-Inf)] <- NA
    return(x)
  }
  
  median_ig_na <- function(x){
    x <- median(x, na.rm = T)
    x[x %in% c(Inf,-Inf)] <- NA
    return(x)
  }
  
  ## Education Completed in Single Years
  educ_years <- df %>%
    dplyr::select(contains("hv108")) %>%
    mutate_all(to_numeric) %>%
    mutate_all(as.numeric) %>%
    mutate_all(to_na_if_30above) 
  
  df$educ_years_hh_max  <- apply(educ_years, 1, max_ig_na)
  df$educ_years_hh_mean <- apply(educ_years, 1, mean_ig_na)
  
  ## Highest Education Level Obtained
  # 0 = Early childhoon education program
  # 1 = Primary
  # 2 = Secondary
  # 3 = Higher
  # 8 = Don't Know
  # 00 = Less than 1 year completed
  # 98 = Don't know
  educ_levels <- df %>%
    dplyr::select(contains("hv106")) %>%
    mutate_all(to_numeric) %>%
    mutate_all(as.numeric) %>%
    mutate_all(to_na_if_4above) 
  
  df$educ_levels_hh_max <- apply(educ_levels, 1, max_ig_na)
  df$educ_levels_hh_n0  <- apply(educ_levels, 1, count_0)
  df$educ_levels_hh_n1  <- apply(educ_levels, 1, count_1)
  df$educ_levels_hh_n2  <- apply(educ_levels, 1, count_2)
  df$educ_levels_hh_n3  <- apply(educ_levels, 1, count_3)
  df$educ_levels_hh_n3g <- apply(educ_levels, 1, count_3g)
  
  # Make sure has all variables, which is needed for renaming
  for(var_i in hh_vars){
    if(is.null(df[[var_i]])){
      df[[var_i]] <- NA
    }
  }
  
  df_out <- df %>%
    dplyr::rename(cluster_id = hv001,
                  water_source = hv201,
                  water_time_to_get = hv204,
                  floor_material = hv213,
                  toilet_type = hv205,
                  has_electricity = hv206,
                  has_radio = hv207,
                  has_tv = hv208,
                  has_fridge = hv209,
                  has_motorbike = hv211,
                  has_car = hv212,
                  n_hh_members = hv009,
                  #kitchen_is_sep_room = hv242,
                  #has_bank_account = hv247,
                  wall_material = hv214,
                  roof_material = hv215,
                  n_rooms_sleeping = hv216,
                  wealth_index = hv270,
                  wealth_index_score = hv271) %>%
    dplyr::select(hhid,
                  cluster_id, 
                  educ_levels_hh_max,
                  educ_levels_hh_n0,
                  educ_levels_hh_n1,
                  educ_levels_hh_n2,
                  educ_levels_hh_n3,
                  educ_levels_hh_n3g,
                  educ_years_hh_max,
                  educ_years_hh_mean,
                  water_source,
                  water_time_to_get,
                  floor_material,
                  toilet_type,
                  has_electricity,
                  has_radio,
                  has_tv,
                  has_fridge,
                  has_motorbike,
                  has_car,
                  n_hh_members,
                  #kitchen_is_sep_room,
                  #has_bank_account,
                  wall_material,
                  roof_material,
                  n_rooms_sleeping,
                  wealth_index, wealth_index_score) %>%
    # value labels sometime different. For example, in some surveys, for floor
    # material, cement is 34 and in others cement is 35.
    mutate(floor_material = floor_material %>% as_factor() %>% as.character(),
           wall_material = wall_material %>% as_factor() %>% as.character(),
           roof_material = roof_material %>% as_factor() %>% as.character(),
           water_source = water_source %>% as_factor() %>% as.character(),
           toilet_type = toilet_type %>% as_factor() %>% as.character(),
           water_time_to_get = water_time_to_get %>% as.numeric()) %>%
    dplyr::mutate(cluster_id = cluster_id %>% as.character())
  
  return(df_out)
}

clean_geo <- function(df){
  df <- df@data %>%
    dplyr::filter(SOURCE != "MIS") %>% # Missing lat/lon
    dplyr::rename(cluster_id = DHSCLUST,
                  uid = DHSID,
                  latitude = LATNUM,
                  longitude = LONGNUM,
                  urban_rural = URBAN_RURA,
                  year = DHSYEAR,
                  country_code = DHSCC) %>%
    dplyr::select(cluster_id, uid, latitude, longitude, urban_rural, year, country_code) %>%
    dplyr::mutate(cluster_id = cluster_id %>% as.character())
  
  return(df)
}

merge_clean <- function(hh_df, geo_df){
  
  df_out <- hh_df %>%
    left_join(geo_df, by = "cluster_id") %>%
    mutate_at(vars(urban_rural), as.character) %>%
    dplyr::select(uid, cluster_id, everything()) %>%
    dplyr::mutate(year = year %>% as.character())
  
  return(df_out)
}

process_dhs <- function(dir){
  # DESCRIPTION: Cleans and merges dhs dataseets
  # ARGs:
  # dir: Directory that countains DHS survey modules for a specific country and year
  
  print(dir)
  
  # List of all files for that country & year
  files_all <- file.path(dir) %>% list.files(recursive=T, full.names = T)
  files_all <- files_all[!grepl("archive", files_all)]
  
  # Grab HH and geo file paths
  hh_path <- files_all %>% str_subset("[A-Z]{2}HR") %>% str_subset(".dta$|.DTA$")
  geo_path <- files_all %>% str_subset("[A-Z]{2}GE") %>% str_subset(".shp$")
  
  # Load and clean data
  num_to_string <- function(x){
    if(x <= 9){
      x <- paste0("0", x)
    } else{
      x <- as.character(x)
    }
    
    return(x)
  } 
  num_to_string <- Vectorize(num_to_string)
  
  ## Load variable names; needed for hv108 (education), which includes a value
  # for each household
  df_onerow_names <- read_dta(hh_path, n_max = 1) %>% names()
  
  # Years of education
  if(TRUE %in% (df_onerow_names %>% str_detect("hv108"))){
    hh108_names <- df_onerow_names %>%
      str_subset("hv108")
  } else{
    hh108_names <- NULL
  } 
  
  # Levels of education
  if(TRUE %in% (df_onerow_names %>% str_detect("hv106"))){
    hh106_names <- df_onerow_names %>%
      str_subset("hv106")
  } else{
    hh106_names <- NULL
  } 
  
  hh_vars <- c("hhid",
               "hv000",
               hh108_names,
               hh106_names,
               "hv216",
               "sh110j",
               "sh110k",
               "hv001",
               "hv201",
               "hv204",
               "hv205",
               "hv213",
               "hv206",
               "hv207",
               "hv208",
               "hv209",
               "hv211",
               "hv212",
               "hv214",
               "hv215",
               "hv221",
               "hv009",
               "hv270",
               "hv271")
  
  if(grepl("RawData/KH", dir)){
    hh_vars <- c(hh_vars, "sh102")
  }
  
  # Sometimes not all variables are in the dataset
  hh_vars_import <- hh_vars[hh_vars %in% df_onerow_names]
  
  hh_df <- read_dta(hh_path, col_select = all_of(hh_vars_import)) %>% clean_hh(hh_vars = hh_vars)
  geo_sdf <- readOGR(geo_path) %>% clean_geo()
  
  # Merge data
  survey_df <- merge_clean(hh_df, geo_sdf)
  survey_df$country_year <- dir %>% str_replace_all(".*/", "")
  
  # If wealth index in separate dataset, merge in. In earlier DHS rounds, WI
  # was in a separate dataset and not in the Household Record dataset.
  wi_path <- files_all %>% str_subset("[A-Z]{2}WI") %>% str_subset(".dta$|.DTA$")
  if(length(wi_path) > 0){
    wi_df <- read_dta(wi_path)
    
    # For Egypt in 2000, hhids didn't match; needed to remove extra while space
    if(dir %>% str_detect("EG_2000")){
      wi_df$whhid <- wi_df$whhid %>% str_squish() 
      survey_df$hhid <- survey_df$hhid %>% str_squish() 
    }
    
    survey_df <- survey_df %>%
      left_join(wi_df, c("hhid" = "whhid")) %>%
      dplyr::mutate(wi_from_diff_dataset = T)
  }
  
  return(survey_df)
}

# Process Data -----------------------------------------------------------------
## Create vector of paths to country-year folders
countries <- file.path(dhs_dir, "RawData") %>% list.files()
country_year_dirs <- lapply(countries, function(country_i){
  country_year_dir <- file.path(dhs_dir, "RawData", country_i) %>% list.files(full.names = T)
}) %>% 
  unlist()

## Remove archive folders
country_year_dirs <- country_year_dirs[!grepl("archive", country_year_dirs)]

dhs_all_df <- map_df(country_year_dirs, process_dhs)

## Fix country code
# In some cases, India uses IN country code (when IN is Indonesia, and should be IA)
dhs_all_df$country_code <- dhs_all_df$country_year %>% substring(1,2)
dhs_all_df$year <- dhs_all_df$country_year %>% substring(4,7) %>% as.numeric()

# Export HH Level Data ---------------------------------------------------------
saveRDS(dhs_all_df, file.path(dhs_dir, "FinalData", "Individual Datasets", 
                              "survey_socioeconomic_hhlevel.Rds"))


**and getting the following issues please** 


> dhs_all_df <- map_df(country_year_dirs, process_dhs)
[1] "/ocean/projects/soc230008p/ramzan/Big Data Poverty Estimation/Data/DHS/RawData/AL/AL_2008-09_DHS_03292022_1959_82518"
Error in `map()`:
ℹ In index: 1.
Caused by error in `readOGR()`:
! could not find function "readOGR"
Run `rlang::last_trace()` to see where the error occurred.
Warning messages:
1: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
2: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
3: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
4: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
5: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
6: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
7: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
8: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
9: In max(x, na.rm = T) : no non-missing arguments to max; returning -Inf
> 
> ## Fix country code
> # In some cases, India uses IN country code (when IN is Indonesia, and should be IA)
> dhs_all_df$country_code <- dhs_all_df$country_year %>% substring(1,2)
Error: object 'dhs_all_df' not found
> dhs_all_df$year <- dhs_all_df$country_year %>% substring(4,7) %>% as.numeric()
Error: object 'dhs_all_df' not found
> 
> # Export HH Level Data ---------------------------------------------------------
> saveRDS(dhs_all_df, file.path(dhs_dir, "FinalData", "Individual Datasets", 
+                               "survey_socioeconomic_hhlevel.Rds"))
Error: object 'dhs_all_df' not found
>

What is the result of running

grep("rgdal", search())

If it is integer(0), you have not successfully loaded the rgdal pakcage. You should then run

library(rgdal)

and if you get an error, post the text of the error here.


The code in your post is very hard to read because of its formatting. Please edit your post by adding lines with three back ticks just before and after your code, like this
```
Your code
```
The back tick key is just to the left of the 1 key on a US keyboard.

This is the result please of the suggested commands

> grep("rgdal", search())
integer(0)
> library(rgdal)
Error in library(rgdal) : there is no package called ‘rgdal’
> install.packages("rgdal")
Error: package 'rgdal' is not available
> install.packages("rgdal", repos = "http://cran.us.r-project.org")
Installing package into ‘J:/Big Data Poverty Estimation/renv/library/R-4.1/x86_64-w64-mingw32’
(as ‘lib’ is unspecified)
Warning message:
package ‘rgdal’ is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages 
> 
> library(rgdal)
Error in library(rgdal) : there is no package called ‘rgdal’

This is the message on CRAN concerning the rgdal package:

Package ‘rgdal’ was removed from the CRAN repository.

Formerly available versions can be obtained from the archive.

Archived on 2023-10-16 at the request of the maintainer. Consider using 'sf' or 'terra' instead.

I guess your options are to download the source code from the archive and compile it yourself or use another package.

Thanks

How I can install available versions from the archive .

What steps I need to take please

Already using 'sp' please

Thanks for the help please

You're reading in shapefiles with readOGR in the code. You could use sf::st_read() and that would work with your read_geo() function (though maybe you might need to change df@data to df

1 Like

I am getting this error message when trying to install rgdal please
'''

install.packages("https://cran.r-project.org/src/contrib/Archive/rgdal/rgdal_1.6-7.tar.gz", repos=NULL, type="source")
Installing package into ‘J:/Big Data Poverty Estimation/renv/library/R-4.1/x86_64-w64-mingw32’
(as ‘lib’ is unspecified)
trying URL 'cran.r-project.org/src/contrib/Archive/rgdal/rgdal_1.6-7.tar.gz'
Content type 'application/x-gzip' length 4351163 bytes (4.1 MB)
downloaded 4.1 MB

  • One or more packages recorded in the lockfile are not installed.
  • Use renv::status() for more details.
  • installing source package 'rgdal' ...
    ** package 'rgdal' successfully unpacked and MD5 sums checked
    ** using staged installation


    WARNING: this package has a configure script
    It probably needs manual configuration


** libs

*** arch - i386
Warning in system(cmd) : 'make' not found
ERROR: compilation failed for package 'rgdal'

  • removing 'J:/Big Data Poverty Estimation/renv/library/R-4.1/x86_64-w64-mingw32/rgdal'
    Warning message:
    In utils::install.packages("", :
    installation of package ‘C:/Users/ramzan/AppData/Local/Temp/RtmpM1W0mh/downloaded_packages/rgdal_1.6-7.tar.gz’ had non-zero exit status

library(rgdal)
Error in library(rgdal) : there is no package called ‘rgdal’
'''

Have you got Rtools installed? You might need that first.

RTools: Toolchains for building R and R packages from source on Windows (r-project.org)

Thanks

Yes I did installed rtools43-5958-5975.exe before that please

Regards
Ramzan

@williaml

Thanks when I use sf::st_read(geo_path)

I am getting the following error message please
'''

dhs_all_df <- map_df(country_year_dirs, process_dhs)
[1] "J:/Big Data Poverty Estimation/Data/DHS/RawData/AL/AL_2017-18_DHS_09092021_165_82518"
Reading layer ALGE71FL' from data source J:\Big Data Poverty Estimation\Data\DHS\RawData\AL\AL_2017-18_DHS_09092021_165_82518\ALGE71FL\ALGE71FL.shp'
using driver ESRI Shapefile' Simple feature collection with 715 features and 20 fields Geometry type: POINT Dimension: XY Bounding box: xmin: 19.39732 ymin: 39.6669 xmax: 21.00275 ymax: 42.43358 Geodetic CRS: WGS 84 [1] "J:/Big Data Poverty Estimation/Data/DHS/RawData/AL/ALGE71FL.cpg" Error in map(): i In index: 2. Caused by error in if (grepl("\n", file)) ...: ! argument is of length zero Run rlang::last_trace()` to see where the error occurred. '''

I think your new problem isn't related to readOGR() or sf::st_read().

You are running through the country_year_dirs list. The first item gets read as a shapefile.

The issue is with the second item.

Yes, I agree. It seems like the problem might not be related to readOGR() or sf::st_read(). Thank you for pointing that out.
Can you please help me resolve the issue.

Regards
Ramzan

It is a bit hard to do without having access to your files. The .cpg file is related to the shapefile, but shouldn't be read in by st_read() explicitly. You've got to play around with your code to figure it out.

I have tried alot please
I can send you the code and the files please if you can please help

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.