Bayesian statistic model

Sys.setlocale(category = "LC_ALL", locale = "")

library(tidyverse)

dat<-readxl::read_xlsx("examplexlsx.xlsx")

dat<-dat%>%

select(v1,v2)

dat$v1<-as.factor(dat$v1)

library(tm)

library(tidytext)

library(textdata)

sms_corpus<-VCorpus(VectorSource(dat$v2))

inspect(sms_corpus[1:2])

as.character(sms_corpus[[1]])

lapply(sms_corpus[1:4],as.character)

library(stringi)

sms_corpus<-tm::tm_map(sms_corpus,tm::content_transformer(function(x) stri_trans_tolower(x)))

sms_corpus<-tm::tm_map(sms_corpus,tm::removeNumbers)

english_stopwords <- tm::stopwords("english")

english_stopwords <- iconv(english_stopwords, from = "UTF-8", to = "UTF-8")

sms_corpus<-tm_map(sms_corpus,removeWords,english_stopwords)

sms_corpus<-tm_map(sms_corpus,removePunctuation)

sms_corpus<-tm_map(sms_corpus,tm::stemDocument)

sms_corpus<-tm_map(sms_corpus,stripWhitespace)

sms_dtm<-DocumentTermMatrix(sms_corpus)

sms_dtm_train<-sms_dtm[1:350,]

sms_dtm_test<-sms_dtm[351:500,]

sms_train_labels<-dat[1:350,]$v1

sms_test_labels<-dat[351:500,]$v1

library(wordcloud)

wordcloud::wordcloud(sms_corpus,random.order = FALSE)

sms_freq_words<-tm::findFreqTerms(sms_dtm_train,5)

#sms_dtm_freq_train<-sms_dtm_train[,sms_freq_words]

#sms_dtm_freq_test<-sms_dtm_test[,sms_freq_words]

convert_counts<-function(x){

x<-ifelse(x > 0, "Yes", "No")

}

sms_train<-apply(sms_dtm_train, MARGIN = 2,convert_counts)

sms_test<-apply(sms_dtm_test, MARGIN = 2,convert_counts)

library(naivebayes)

sms_classifier <- naive_bayes(sms_train,sms_train_labels)

library(gmodels)

sms_test_pred<-predict(sms_classifier,sms_test)


最后预测报错信息如下:

Error in `[.default`(tab, V, ) : 下标出界
I'm look forward your advice as soon as possible,thank you!