Hello everyone,
My goal is to iterate through each pdf file in the directory and apply a function and save each as a xlsx file.
I have this so far:
library(pdftools)
library(splitstackshape)
library(tidyverse)
library(xlsx)
#Set Directory
setwd("C:/Users/jason/RetrievedFiles")
PDF <- function(clean){
T1 <- unlist(str_split(data_frame_list, "[\r\n]+"))
T2 <- unlist(str_split(T1, "[\r\n]+"))
T3 <- str_split_fixed(str_trim(T2), "\s{2,}", 1)
T4 <- as.data.frame(cSplit(T3, 'V1', sep=", ", type.convert=FALSE))}
Get_PDF_Files <- list.files(pattern = "*pdf")
PDFList <- lapply(Get_PDF_Files, pdf_text)
Output <- lapply(PDFList, PDF)
FileExport <- write_xlsx(Output,path = "C:/Users/jason/Downloads/NewFile.xlsx")
I basically want to convert the first page of each PDF into an excel file and save them with a different name.
Thanks for you help!