res = httr::GET("https://www.climatewatchdata.org/api/v1/data/historical_emissions?historical-emissions-data-sources=215")
data = jsonlite::fromJSON(rawToChar(res$content))
res = httr::GET("https://www.climatewatchdata.org/api/v1/data/historical_emissions?historical-emissions-data-sources=215")
data = httr::content(res)
Additional information:
httr will recognise that it is json and call jsonlite under the hood.
There is also the httr2 package which has a more modern api so you could check it out too (same author as httr)
library(httr)
# call api
res = httr::GET("https://www.climatewatchdata.org/api/v1/data/historical_emissions?historical-emissions-data-sources=215")
# extract content
data = httr::content(res)
# understand api and realize that the header holds information on all available results
links <- trimws(unlist(strsplit(res$headers$link, split = ",")))
# helper function to extract proper links
get_link <- function(x){
# find everything between "<" and ">"
pattern <- "<(.*?)>"
result <- regmatches(x, regexec(pattern, x))
# if nothing is found return NA
if(length(result) == 0) return(NA)
result[[1]][2]
}
# helper function to make next call
append_new_results <- function(data, res){
# extract links from header
links <- trimws(unlist(strsplit(res$headers$link, split = ",")))
# get last (if exists)
linkLast <- get_link(links[grepl(x = links, pattern = "rel=\"last\"")])
# nothing found then we are done
if(is.na(linkLast)){
# no more next links
return(data)
}
# get next link
linkNext <- get_link(links[grepl(x = links, pattern = "rel=\"next\"")])
# something to look at :D
print(paste0("calling ", linkNext))
# call api with next link
res <- httr::GET(linkNext)
# extract data
dataTmp <- httr::content(res)
# append to current data
keys <- unique(c(names(data), names(dataTmp)))
data <- setNames(mapply(c, data[keys], dataTmp[keys]), keys)
# call next
append_new_results(data, res)
}
data <- append_new_results(data, res)
this will take a while you have 472 pages with ~50 results
@vedoa This is brilliant man. I am still wrapping my head around recursive functions. For practice I made a function below that will extract the results of your code output and make it one big dataframe. The input argument page is meant to be the list output from your function append_new_results above. I ran this all and it works. Cheers
# Function to extract the data and create dataframe
# This uses the brilliant API output object from @vedoa
# @param page List The list output from API get function `append_new_results`
page2DF <- function(page) {
d <- page[['data']]
left_cols <- purrr::map(seq_along(d), ~ list2DF(d[[.x]][1:7])) %>% dplyr::bind_rows()
right <- purrr::map(seq_along(d), ~ d[[.x]][[8]])
dfs <- purrr::map(seq_along(right), ~ purrr::list_transpose(right[[.x]], simplify = TRUE, default = NA_real_))
vals <- purrr::map(seq_along(dfs), ~ list2DF(dfs[[.x]]) %>% tidyr::pivot_wider(names_from = 'year'))
out <- dplyr::bind_cols(left_cols, dplyr::bind_rows(vals))
return(out)
}
# Convert API Data List to DF
data_df <- page2DF(data)