Here is an example of working with your data. It uses the functions select(), gather(), group_by() and summarize()
- select() chooses columns.
- gather() combines many columns into two, one column to label the data and one to store the value
- group_by() makes subsets of the data, one subset for each value of the chosen column(s).
- summarize() does calculations on the subsets created by group_by()
Look at the intermediate data frames at each calculation step and see if it makes sense. This is a lot to digest at one time, so do not be surprised if you are confused.
library(dplyr)
library(tidyr)
Dat <- data.frame(
Progress = c(100L, 100L, 100L, 100L, 100L),
Duration.in.seconds. = c(1770L, 1030L, 644L, 3988L, 1292L),
Id = c(1L, 2L, 3L, 4L, 5L),
model = c(4L, 2L, 1L, 3L, 2L),
location = c(1L, 3L, 2L, 2L, 3L),
education = c(3L, 1L, 3L, 1L, 1L),
fee = c(2L, 5L, 4L, 5L, 4L),
income = c(5L, 4L, 5L, 4L, 5L),
red = c(4L, 1L, 2L, 2L, 4L),
blue = c(3L, 2L, 1L, 1L, 3L),
green = c(1L, 4L, 4L, 3L, 2L),
yellow = c(2L, 5L, 3L, 4L, 1L),
black = c(5L, 3L, 5L, 5L, 5L),
Age = c(47L, 47L, 51L, 50L, 38L),
Recorded.Date = as.factor(c("15/06/2018 21:29",
"16/06/2018 15:47",
"18/06/2018 19:07", "19/06/2018 20:29",
"20/06/2018 13:59")),
RID = as.factor(c("R_Djkev4OH9F3RuIp",
"R_2vY3qfyS8vNWvCH",
"R_1Rr1Eh9iCI3wznj", "R_T1rPDENUBBntTCF",
"R_3inja17CkIpsjHr")),
Distribution = as.factor(c("anonymous", "anonymous",
"anonymous", "anonymous",
"anonymous")),
Block = as.factor(c("A", "C", "A", "C", "B")),
Difficulty.of.choice.questions = as.factor(c("Moderately easy",
"Moderately easy",
"Extremely difficult", "Extremely difficult",
"Extremely difficult")),
Gender = as.factor(c("Female", "Male", "Female",
"Female", "Female")),
Ethnicity = as.factor(c("Other", "European",
"European", "Other",
"European")),
Current.job.satisfaction4 = as.factor(c("Somewhat dissatisfied",
"Extremely satisfied",
"Somewhat satisfied",
"Somewhat satisfied", "Somewhat satisfied")),
Current.job.satisfaction2 = as.factor(c("Dissatisfied", "Satisfied",
"Satisfied", "Satisfied",
"Satisfied"))
)
#Calculate means
Columns1 <- Dat %>% select(model:income)
Col1_tall <- Columns1 %>% gather(key = Feature, value = Rank, model:income)
Stats1 <- Col1_tall %>% group_by(Feature) %>% summarize(Avg = mean(Rank))
Stats1
#> # A tibble: 5 x 2
#> Feature Avg
#> <chr> <dbl>
#> 1 education 1.8
#> 2 fee 4
#> 3 income 4.6
#> 4 location 2.2
#> 5 model 2.4
Colors <- Dat %>% select(red:black)
Colors_tall <- Colors %>% gather(key = Color, value = Rank, red:black)
ColorStats <- Colors_tall %>% group_by(Color) %>% summarize(Avg = mean(Rank))
ColorStats
#> # A tibble: 5 x 2
#> Color Avg
#> <chr> <dbl>
#> 1 black 4.6
#> 2 blue 2
#> 3 green 2.8
#> 4 red 2.6
#> 5 yellow 3
Created on 2019-09-17 by the reprex package (v0.2.1)