I am trying to run code in markdown in RStudio when knitting to html. The code runs without error as an R script. I am reading in files as a corpus using the tm text mining package. With each file (with each element in the corpus), I am processing it with code like the following, but I am getting an error apparently with this statement which uses a function from the syuzhet package
d<-get_nrc_sentiment(AttdAI)
The error looks like this
Error in get_nrc_sentiment(AttdAI) : Data must be a character vector.
Calls: <Anonymous> ... withCallingHandlers -> withVisible -> eval -> eval -> get_nrc_senti
Here is the relevant code
for (cor in 1:length(my_corpus)){
Essay_Corpus <- Corpus(VectorSource(my_corpus[cor]))
# Convert the text to lower case
Essay_Corpus <- tm_map(Essay_Corpus, content_transformer(tolower))
# Remove numbers
Essay_Corpus <- tm_map(Essay_Corpus, removeNumbers)
# Remove english common stopwords
Essay_Corpus <- tm_map(Essay_Corpus, removeWords, stopwords("en"))
# Remove punctuations
Essay_Corpus <- tm_map(Essay_Corpus, removePunctuation)
# Eliminate extra white spaces
Essay_Corpus <- tm_map(Essay_Corpus, stripWhitespace)
# Text stemming
Essay_Corpus <- tm_map(Essay_Corpus, stemDocument)
# Remove additional stopwords
Essay_Corpus <- tm_map(Essay_Corpus, removeWords, c("also", "can", "may", "even", "will", "however", "like", "many","retrieved","like" ,"name","data","ghotbi"))
#Create term document matrix
dtm_essay <- DocumentTermMatrix(Essay_Corpus)
tdm_essay <- TermDocumentMatrix(Essay_Corpus)
m_essay <- as.matrix(tdm_essay)
v_essay <- sort(rowSums(m_essay),decreasing=TRUE)
d_essay <- data.frame(word = names(v_essay),freq=v_essay)
findAssocs(tdm_essay, c("vaccine", "time"), c(0))
findAssocs(tdm_essay, c("job", "human"), c(0))
#Draw word cloud
set.seed(1234)
Essay_wc[[cor]] <- ggplot(d_essay, aes(label = word, size = freq)) + geom_text_wordcloud()
#Sentiment analysis
AttdAI <- convert.tm.to.character(my_corpus[cor])
d<-get_nrc_sentiment(AttdAI)
td<-data.frame(t(d))
td_new <- data.frame(rowSums(td[1:2]))
#Transformation and cleaning
names(td_new)[1] <- "count"
td_new <- cbind("sentiment"= rownames(td_new), td_new)
allEssaysSentsAfter <- cbind(allEssaysSentsAfter,td_new[,2])
rownames(td_new) <- NULL
td_emo <-td_new[1:8,]
td_sentiment<-td_new[9:10,]
#Calculate difference in Emotion
afterSentiment <- cbind(afterSentiment,as.matrix(td_new[,2]))
#plotting
emo_plot[[cor]] <- qplot(sentiment, xlab="After Essay emotions", data=td_emo, weight=count, geom="bar",fill=sentiment)+ggtitle(pdfNames[cor])+theme(axis.title.x = element_text(size = 9, lineheight = .9,family = "Times", face = "bold.italic", colour = "red")) + theme(plot.title = element_text(size = 9, lineheight = .9,family = "Times", face = "bold.italic", colour = "red"))+ylim(0,22)
sentiment_plot[[cor]] <- qplot(sentiment, xlab ="After Essay sentiments in binary terms", data=td_sentiment, weight=count, geom="bar",fill=sentiment)+ggtitle(pdfNames[cor])+theme(axis.title.x = element_text(size = 9, lineheight = .9,family = "Times", face = "bold.italic", colour = "red")) + theme(plot.title = element_text(size = 9, lineheight = .9,family = "Times", face = "bold.italic", colour = "red"))+ylim(0,55)
coll2 <- textstat_collocations(AttdAI, size = 4:6)
freqterms <- findFreqTerms(dtm_essay, lowfreq = 50)
}
Can anyone tell me how I can run this in markdown?
For reference, the libraries being used are
library("tm")
library(dplyr)
library("NLP")
library("ggplot2")
library("syuzhet")
library("SnowballC")
library("RColorBrewer")
library("wordcloud")
library(RSQLite)
library(quanteda)
library(readtext) # To read .txt files
library(stm) # For structural topic models
library(stminsights) # For visual exploration of STM
library(gsl) # Required for the topicmodels package
library(topicmodels) # For topicmodels
library(caret) # For machine learning
library(wordcloud)
library(textreg)
library(grid)
library(gridExtra)
library(ggpubr)
library(rlist)
library(ggwordcloud)
library(ggplot2)
Thanks!