So I am trying to extract data from twitter through it's Api, but I get this error
**Error in `$<-.data.frame`(`*tmp*`, "hashtags", value = character(0)) : **
** replacement has 0 rows, data has 16 ;**
tweet.df <- search_tweets(tag, n = 1000, include_rts = FALSE, lang = lang, token = twitter_token, retryonratelimit = retryonratelimit)
print(paste0("Total Tweets downloaded for - ",tag,": ",length(tweet.df$text)))
print(paste0("Total Unique Texts downloaded for - ",tag,": ",length(unique(tweet.df$text))))
tweet.df$hashtags <- as.character(tweet.df$hashtags)
tweet.df$symbols <- as.character(tweet.df$symbols)
tweet.df$urls_url <- as.character(tweet.df$urls_url)
tweet.df$urls_t.co <- as.character(tweet.df$urls_t.co)
tweet.df$urls_expanded_url <- as.character(tweet.df$urls_expanded_url)
tweet.df$media_url <- as.character(tweet.df$media_url)
tweet.df$media_t.co <- as.character(tweet.df$media_t.co)
tweet.df$media_expanded_url <- as.character(tweet.df$media_expanded_url)
tweet.df$media_type <- as.character(tweet.df$media_type)
tweet.df$ext_media_url <- as.character(tweet.df$ext_media_url)
tweet.df$ext_media_t.co <- as.character(tweet.df$ext_media_t.co)
tweet.df$ext_media_expanded_url <- as.character(tweet.df$ext_media_expanded_url)
tweet.df$mentions_user_id <- as.character(tweet.df$mentions_user_id)
tweet.df$mentions_screen_name <- as.character(tweet.df$mentions_screen_name)
tweet.df$geo_coords <- as.character(tweet.df$geo_coords)
tweet.df$coords_coords <- as.character(tweet.df$coords_coords)
tweet.df$bbox_coords <- as.character(tweet.df$bbox_coords)
tweet.df
}
tweets_cleaner <- function(tweet.df){
tweets_txt <- unique(tweet.df$text)
clean_tweet = gsub("&", "", tweets_txt) # Remove Amp
clean_tweet = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", clean_tweet) # Remove Retweet
clean_tweet = gsub("@\\w+", "", clean_tweet) # Remove @
clean_tweet = gsub("#", " ", clean_tweet) # Before removing punctuations, add a space before every hashtag
clean_tweet = gsub("[[:punct:]]", "", clean_tweet) # Remove Punct
clean_tweet = gsub("[[:digit:]]", "", clean_tweet) # Remove Digit/Numbers
clean_tweet = gsub("http\\w+", "", clean_tweet) # Remove Links
clean_tweet = gsub("[ \t]{2,}", " ", clean_tweet) # Remove tabs
clean_tweet = gsub("^\\s+|\\s+$", " ", clean_tweet) # Remove extra white spaces
clean_tweet = gsub("^ ", "", clean_tweet) # remove blank spaces at the beginning
clean_tweet = gsub(" $", "", clean_tweet) # remove blank spaces at the end
clean_tweet = gsub("[^[:alnum:][:blank:]?&/\\-]", "", clean_tweet) # Remove Unicode Char
clean_tweet <- str_replace_all(clean_tweet," "," ") #get rid of unnecessary spaces
clean_tweet <- str_replace_all(clean_tweet, "https://t.co/[a-z,A-Z,0-9]*","") # Get rid of URLs
clean_tweet <- str_replace_all(clean_tweet, "http://t.co/[a-z,A-Z,0-9]*","")
clean_tweet <- str_replace(clean_tweet,"RT @[a-z,A-Z]*: ","") # Take out retweet header, there is only one
clean_tweet <- str_replace_all(clean_tweet,"#[a-z,A-Z]*","") # Get rid of hashtags
clean_tweet <- str_replace_all(clean_tweet,"@[a-z,A-Z]*","") # Get rid of references to other screennames
clean_tweet
}
tweets_cleaner_tm <- function(clean_tweet, custom_stopwords = c("bla bla")){
docs <- Corpus(VectorSource(clean_tweet))
#inspect(docs)
docs <- tm_map(docs, content_transformer(tolower)) # Convert the text to lower case
docs <- tm_map(docs, removeNumbers) # Remove numbers
docs <- tm_map(docs, removeWords, stopwords("english")) # Remove english common stopwords
docs <- tm_map(docs, removeWords, custom_stopwords) # Remove your own stop word
docs <- tm_map(docs, removePunctuation) # Remove punctuations
docs <- tm_map(docs, stripWhitespace) # Eliminate extra white spaces
# docs <- tm_map(docs, stemDocument) # Text stemming
docs
}
#============ Get The data for all hashtags ==================
#=== AurabyTransCorp============
#Sys.sleep(15*60)
tweet_df_PHED <- tweets_downloader(tag="@aurabytranscorp OR #TheAuraExperience OR #AuraExperience.", n=1000, lang='en',
retryonratelimit = retryonratelimit)
saveRDS(tweet_df_PHED, file = "tweet_df_PHED.rds")