Making a bar chart of the data set in its current form does not make sense. Each value of traveltime appears multiple times with different values of G3. What ggplot is doing is stacking all of the G3 values, effectively giving you the sum of G3 for each travel time. I illustrate this in the code below by setting the color aesthetic to white, so each bar has a white outline. You can see in the filtered data I named DF2 that there are six occurrences of traveltime = 1 and one of travel time = 3 and the plot shows each of those stacked on top of each other.
What do you want the bar plot to show?
As an aside, the first row of the data is what should be the headers. I deleted that row. You probably have to modify how you read in the data.
library(dplyr)
library(ggplot2)
DF <- structure(list(index = c("index", "1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19"),
school = c("school", "GP", "GP", "GP", "GP", "GP",
"GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP",
"GP", "GP", "GP"),
sex = c("sex", "F", "F", "F", "F", "F", "M",
"M", "F", "M", "M", "F", "F", "M", "M", "M", "F", "F", "F", "M"
),
age = c("age", "18", "17", "15", "15", "16", "16", "16", "17",
"15", "15", "15", "15", "15", "15", "15", "16", "16", "16", "17"
),
address = c("address", "U", "U", "U", "U", "U", "U", "U",
"U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U"),
famsize = c("famsize", "GT3", "GT3", "LE3", "GT3", "GT3",
"LE3", "LE3", "GT3", "LE3", "GT3", "GT3", "GT3", "LE3", "GT3",
"GT3", "GT3", "GT3", "GT3", "GT3"),
Pstatus = c("Pstatus",
"A", "T", "T", "T", "T", "T", "T", "A", "A", "T", "T", "T",
"T", "T", "A", "T", "T", "T", "T"),
Medu = c("Medu", "4",
"1", "1", "4", "3", "4", "2", "4", "3", "3", "4", "2", "4",
"4", "2", "4", "4", "3", "3"),
Fedu = c("Fedu", "4", "1",
"1", "2", "3", "3", "2", "4", "2", "4", "4", "1", "4", "3",
"2", "4", "4", "3", "2"),
Mjob = c("Mjob", "at_home", "at_home",
"at_home", "health", "other", "services", "other", "other",
"services", "other", "teacher", "services", "health", "teacher",
"other", "health", "services", "other", "services"),
Fjob = c("Fjob",
"teacher", "other", "other", "services", "other", "other",
"other", "teacher", "other", "other", "health", "other",
"services", "other", "other", "other", "services", "other",
"services"),
reason = c("reason", "course", "course", "other",
"home", "home", "reputation", "home", "home", "home", "home",
"reputation", "reputation", "course", "course", "home", "home",
"reputation", "reputation", "course"),
guardian = c("guardian",
"mother", "father", "mother", "mother", "father", "mother",
"mother", "mother", "mother", "mother", "mother", "father",
"father", "mother", "other", "mother", "mother", "mother",
"mother"),
traveltime = c("traveltime", "2", "1", "1", "1",
"1", "1", "1", "2", "1", "1", "1", "3", "1", "2", "1", "1",
"1", "3", "1"),
studytime = c("studytime", "2", "2", "2",
"3", "2", "2", "2", "2", "2", "2", "2", "3", "1", "2", "3",
"1", "3", "2", "1"),
failures = c("failures", "0", "0", "3",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "3"),
schoolsup = c("schoolsup", "yes", "no",
"yes", "no", "no", "no", "no", "yes", "no", "no", "no", "no",
"no", "no", "no", "no", "no", "yes", "no"),
famsup = c("famsup",
"no", "yes", "no", "yes", "yes", "yes", "no", "yes", "yes",
"yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes",
"yes"),
paid = c("paid", "no", "no", "yes", "yes", "yes",
"yes", "no", "no", "yes", "yes", "yes", "no", "yes", "yes",
"no", "no", "yes", "no", "no"),
activities = c("activities",
"no", "no", "no", "yes", "no", "yes", "no", "no", "no", "yes",
"no", "yes", "yes", "no", "no", "no", "yes", "yes", "yes"
),
nursery = c("nursery", "yes", "no", "yes", "yes", "yes",
"yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes",
"yes", "yes", "yes", "yes", "yes"),
higher = c("higher",
"yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes",
"yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes",
"yes"),
internet = c("internet", "no", "yes", "yes", "yes",
"no", "yes", "yes", "no", "yes", "yes", "yes", "yes", "yes",
"yes", "yes", "yes", "yes", "no", "yes"),
romantic = c("romantic",
"no", "no", "no", "yes", "no", "no", "no", "no", "no", "no",
"no", "no", "no", "no", "yes", "no", "no", "no", "no"),
famrel = c("famrel",
"4", "5", "4", "3", "4", "5", "4", "4", "4", "5", "3", "5",
"4", "5", "4", "4", "3", "5", "5"),
freetime = c("freetime",
"3", "3", "3", "2", "3", "4", "4", "1", "2", "5", "3", "2",
"3", "4", "5", "4", "2", "3", "5"),
goout = c("goout", "4",
"3", "2", "2", "2", "2", "4", "4", "2", "1", "3", "2", "3",
"3", "2", "4", "3", "2", "5"),
Dalc = c("Dalc", "1", "1",
"2", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "2"),
Walc = c("Walc", "1", "1", "3",
"1", "2", "2", "1", "1", "1", "1", "2", "1", "3", "2", "1",
"2", "2", "1", "4"),
health = c("health", "3", "3", "3",
"5", "5", "5", "3", "1", "1", "5", "2", "4", "5", "3", "3",
"2", "2", "4", "5"),
absences = c("absences", "6", "4", "10",
"2", "4", "10", "0", "6", "0", "0", "0", "4", "2", "2", "0",
"4", "6", "4", "16"),
G1 = c("G1", "5", "5", "7", "15", "6",
"15", "12", "6", "16", "14", "10", "10", "14", "10", "14",
"14", "13", "8", "6"),
G2 = structure(c(NA, NA, NA, 3L, 4L,
3L, 4L, NA, NA, NA, 4L, NA, NA, NA, NA, NA, NA, NA, 3L, 2L
), .Label = c("0", "5", "10", "15", "20"), class = "factor"),
G3 = c(NA, NA, NA, 10, 15, 10, 15, NA, NA, NA, 15, NA, NA,
NA, NA, NA, NA, NA, 10, 5)), row.names = c(NA, 20L), class = "data.frame")
DF <- DF %>% slice(-1) #remove the first row
#inspect the data
DF2 <- DF %>% select(traveltime, G3) #select just the two columns of interest for display
DF2
#> traveltime G3
#> 1 2 NA
#> 2 1 NA
#> 3 1 10
#> 4 1 15
#> 5 1 10
#> 6 1 15
#> 7 1 NA
#> 8 2 NA
#> 9 1 NA
#> 10 1 15
#> 11 1 NA
#> 12 3 NA
#> 13 1 NA
#> 14 2 NA
#> 15 1 NA
#> 16 1 NA
#> 17 1 NA
#> 18 3 10
#> 19 1 5
DF2 %>% filter(!is.na(G3)) #remove all the NA rows
#> traveltime G3
#> 1 1 10
#> 2 1 15
#> 3 1 10
#> 4 1 15
#> 5 1 15
#> 6 3 10
#> 7 1 5
#plot the full data set
ggplot(DF, aes(x=traveltime, y=G3, fill = traveltime)) + geom_bar(stat = "identity", color = "white") +
labs(x="Travel time", y="Student grade") +
ggtitle("Plot to show how a students travel time can affect their final grade") +
scale_fill_manual("traveltime", values = c("1" = "purple", "2" = "turquoise", "3" = "blue", "4" = "yellow")) +
theme_classic()
#> Warning: Removed 12 rows containing missing values (position_stack).
Created on 2022-04-29 by the reprex package (v0.2.1)