Hello.
My code and data is this:
> # Dataframe with Tickers and Dates ---------------------------------------------
>
> df1 <- data.frame(ticker = c("VOXX", "AAPL", "ACCD", "CMC", "AZZ", "GBX"),
> date = as.Date(c("2022-12-01", "2023-01-20",
> "2022-06-25", "2021-04-27", "2015-03-25",
> "2010-12-10")))
>
> df1
>
> # Create funcions for downloading financials -----------------------------------
>
> # Define example url and ask for permission
>
>
> url <- paste0("https://www.macrotrends.net/stocks/charts/", "VOXX","/",
> tolower(VOXX),"/income-statement?freq=A")
> url
> polite::bow(url) # positive for scraping
>
> # Define function for scraping financials
>
> macrotrends_stock_income_statement<- function(ticker) { # additional argument quarrelty annuly
>
>
> # Define URL with ticker input
> url <- paste0("https://www.macrotrends.net/stocks/charts/", ticker,"/",
> tolower(ticker),"/income-statement?freq=Q")
>
> # Read url
> page <- read_html(url)
>
> # Extract the right table
> df = data.frame(jsonlite::fromJSON(str_match_all(page%>%html_text(),'var originalData = (.*);')[[1]][,2]))
> df$field_name <-lapply(df$field_name, function(x) { read_html(x) %>% html_node('a,p') %>% html_text()})
> df <- subset(df, select = -c(popup_icon))
> colnames(df) <- lapply(colnames(df), function(x){gsub('X','',x)})
> df <- df[!is.na(df$field_name),]
> df <- apply(df,2,as.character)
> df <- df %>% as.data.frame()
>
>
> }
>
> df2 <- macrotrends_stock_income_statement("VOXX")
> df2 <- t(df2)
> names(df2) <- df2[1,]
> df2 <- df2 %>% row_to_names(row_number = 1)
> df2 <- df2 %>% as.data.frame()
>
>
> # What I need by merging of both -----------------------------------------------
> # I only take revenue but the other columns should be like this
>
>
> result_df <- data.frame(ticker = c("VOXX", "AAPL", "ACCD", "CMC", "AZZ", "GBX"),
> date = as.Date(c("2022-12-01", "2023-01-20",
> "2022-06-25", "2021-04-27", "2015-03-25",
> "2010-12-10")))
> result_df$revenue_2022_08_31 <- NA
> result_df$revenue_2021_08_31 <- NA
> result_df$revenue_2020_08_31 <- NA
> result_df$revenue_2019_08_31 <- NA
> result_df$revenue_2018_08_31 <- NA
> result_df$revenue_2017_08_31 <- NA
> result_df$revenue_2016_08_31 <- NA
> result_df$revenue_2015_08_31 <- NA
> result_df$revenue_2014_08_31 <- NA
> result_df$revenue_2013_08_31 <- NA
> result_df$revenue_2012_08_31 <- NA
> result_df$revenue_2011_08_31 <- NA
> result_df$revenue_2010_08_31 <- NA
> result_df$revenue_2009_08_31 <- NA
>
> # Also for the other key numbers of the income statement
>
> # filling VOXX
>
>
> result_df[result_df$ticker == "VOXX",3:16] <- t(df4[,1])
I dont know how to do it. I looked at stackoverflow but didint find anything. My knowledge in tidyverse isnt so deep. I thougth it would be not as hard as this.
If somebody has a advice or questions thank you.