Hi,
I'm using the SSA database to analyze the name "William."
I created a graph for William in the Northeast by gender, but the results are displayed by gender and aggregate. I would like a line for each state within the same graph. For example, one line for New York, one line for Vermont, etc. and not just overall males and females named William in the Northeast.
How should I adjust my code?
I'm new to R so any help is appreciated.
SSA_state.df <- read.delim("https://www.laits.utexas.edu/~mr56267/TLAH_Names_2020/Textbook/SSA_state_level.txt",
stringsAsFactors = FALSE)
library(dplyr)
library(scales)
library(ggplot2)
sequence_of_years <- seq(from = 1880, to = 2018, by = 10)
William_state.df <- SSA_state.df[which(SSA_state.df$name=="William"),]
William_NE.df <- William_state.df[which(William_state.df$state=="ME"|
William_state.df$state=="MA"|
William_state.df$state=="RI"|
William_state.df$state=="CT"|
William_state.df$state=="NH"|
William_state.df$state=="VT"|
William_state.df$state=="NY"|
William_state.df$state=="PA"|
William_state.df$state=="NJ"|
William_state.df$state=="DE"|
William_state.df$state=="MD"),]
ggplot(data = William_NE.df, aes(x = year,y = perc, color=gender, group=interaction(state,gender))) +
geom_point() +
labs(title="William in the Northeast", x = "Year",
y="Percentage of Total by Gender",
caption="Source: Data from the Social Security Administration") +
scale_x_continuous(breaks = sequence_of_years) +
scale_color_manual(labels= c("Female","Male"), values = c("blue","red"))+
scale_y_log10(labels=prettyNum) +
annotation_logticks()