Bayesian statistic model

Sys.setlocale(category = "LC_ALL", locale = "")

library(tidyverse)

dat<-readxl::read_xlsx("examplexlsx.xlsx")

dat<-dat%>%

select(v1,v2)

dat$v1<-as.factor(dat$v1)

library(tm)

library(tidytext)

library(textdata)

sms_corpus<-VCorpus(VectorSource(dat$v2))

inspect(sms_corpus[1:2])

as.character(sms_corpus[[1]])

lapply(sms_corpus[1:4],as.character)

library(stringi)

sms_corpus<-tm::tm_map(sms_corpus,tm::content_transformer(function(x) stri_trans_tolower(x)))

sms_corpus<-tm::tm_map(sms_corpus,tm::removeNumbers)

english_stopwords <- tm::stopwords("english")

english_stopwords <- iconv(english_stopwords, from = "UTF-8", to = "UTF-8")

sms_corpus<-tm_map(sms_corpus,removeWords,english_stopwords)

sms_corpus<-tm_map(sms_corpus,removePunctuation)

sms_corpus<-tm_map(sms_corpus,tm::stemDocument)

sms_corpus<-tm_map(sms_corpus,stripWhitespace)

sms_dtm<-DocumentTermMatrix(sms_corpus)

sms_dtm_train<-sms_dtm[1:350,]

sms_dtm_test<-sms_dtm[351:500,]

sms_train_labels<-dat[1:350,]$v1

sms_test_labels<-dat[351:500,]$v1

library(wordcloud)

wordcloud::wordcloud(sms_corpus,random.order = FALSE)

sms_freq_words<-tm::findFreqTerms(sms_dtm_train,5)

#sms_dtm_freq_train<-sms_dtm_train[,sms_freq_words]

#sms_dtm_freq_test<-sms_dtm_test[,sms_freq_words]

convert_counts<-function(x){

x<-ifelse(x > 0, "Yes", "No")

}

sms_train<-apply(sms_dtm_train, MARGIN = 2,convert_counts)

sms_test<-apply(sms_dtm_test, MARGIN = 2,convert_counts)

library(naivebayes)

sms_classifier <- naive_bayes(sms_train,sms_train_labels)

library(gmodels)

sms_test_pred<-predict(sms_classifier,sms_test)


最后预测报错信息如下:

Error in `[.default`(tab, V, ) : 下标出界
I'm look forward your advice as soon as possible,thank you!

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.