simulation of data

Random integers

integers <- sample(1:10000, replace = TRUE)

Random characters

library(janeaustenr)
library(dplyr)
library(stringr)
library(tidytext)

original_books <- austen_books() %>%
  group_by(book) %>%
  mutate(linenumber = row_number(),
         chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
                                                 ignore_case = TRUE)))) %>%
  ungroup()

tidy_books <- original_books %>%   unnest_tokens(word, text)
words <- tidy_books$word
characters <- sample(words, 10000)

Binary categorical variables

binaries <- rbinom(n=10000, size=1, prob=1/2)

Multivalued categorical variables

data(iris)
categories <- sample(iris$Species, 10000, replace = TRUE)