2 January 2023 class
#End result is same, only equation to get the result is a little different
#ggplot(data = <DATA>) + <GEOM_FUNCTION>(mapping=aes(<MAPPING>))
#ggplot(data = <DATA>, mapping = aes(<MAPPINGS>)) + <GEOM_FUNCTION>()
#ggplot(<DATA>, aes(<MAPPINGS>)) + <GEOM_FUNCTION>()
#ggplot(<DATA>) + <GEOM_FUNCTION>(aes(<MAPPINGS))
#how to change library you are working in
library(dplyr)
#mpg dataset is already a part of dplyr package
#How to see basic descriptive statistics
library(tidyverse)
library(ggplot2)
summary(mpg)
glimpse(mpg)
#head gives you first 5 observations of the data
head(mpg)
#tail gives you last 5 observations
tail(mpg)
#Plotting the data
ggplot(data=mpg)
#You can also write simply mpg
ggplot(mpg)
#Doesnt plot data, we need to add an aesthetic layer
ggplot(mpg, aes(x=class, y=displ))
#^^^^ This shows the graph with 2 variables, but data isnt plotted
#^^^^^ This is because the geometrics are not specified, so R doesnt know how to plot
#Also class is categorical variable, so no point
#Plotting graph properly now
ggplot(mpg, aes(x=cty, y=displ)) + geom_point()
#For histogram, you can only have 1 aesthetic!
#Histogram on x axis (vertical)
ggplot(mpg, aes(x=hwy)) + geom_histogram()
#Histogram on y axis (horizontal)
ggplot(mpg, aes(y=hwy)) + geom_histogram()
#Now we try to type the function in another way
ggplot(mpg) + geom_point(aes(x=displ, y=hwy))
#Segregating based on class, manufacturer, transmission of cars (Differentiating between cars based on colour)
ggplot(mpg) + geom_point(aes(x=displ, y=hwy, color=class))
ggplot(mpg) + geom_point(aes(x=displ, y=hwy, color=manufacturer))
ggplot(mpg) + geom_point(aes(x=displ, y=hwy, color=trans))
#Segregating based on shape of cars
ggplot(mpg) + geom_point(aes(x=displ, y=hwy, shape=class))
#Segregating based on shape as well as colour
ggplot(mpg) + geom_point(aes(x=displ, y=hwy, shape=trans, color=manufacturer))
#alpha = Change colour transparency
#When assigning colour, class etc. a specific colour etc., need to close brackets of aesthetics! ;it becomes a dimension within dataset
ggplot(mpg) + geom_point(aes(x=displ, y=hwy), shape=6, color='tomato', alpha=0.1)
#Showing the entire data in a spreadsheet format
view(mpg)
summary(mpg)
view(mpg)
head(mpg)
tail(mpg)
#Adding title
#xlab = Labelling x axis
#ylab = labelling y axis
# !!!! When writing codes in multiple lines, use + sign
ggplot(mpg) + geom_point(aes(x=displ,y=hwy, color=class)) +
ggtitle("car performance", subtitle="MPG") +
xlab("Displacement") +
ylab("Highway Milage") +
theme_grey() +
#Elements in x axis change to darkgreen colour, rotate by 180
#Changes size to 10, text becomes bold and italic
theme(axis.text.x = element_text(face='bold.italic',
color = 'darkgreen',
size = 10, angle = 180),
#Elements in y axis change to blue colour, angle 90
#Changes size to 20, text becomes blue
axis.text.y = element_text(face = 'bold',
color = 'blue',
size = 20, angle = 90))
#Making changes to the legend in the graph - HOMEWORK
#Facets - We use when we have different categories and we want to club them
#Facets is for subplots
#We use ~ because we say we are looking at only 1 variable class
ggplot(mpg) + geom_point(aes(x=displ,y=hwy)) +facet_wrap(~ class, nrow=2)
ggplot(mpg) + geom_point(aes(x=displ,y=hwy)) +facet_wrap(~ class, nrow=7)
ggplot(mpg) + geom_point(aes(x=displ,y=hwy)) +facet_wrap(~ class, ncol=2)
ggplot(mpg) + geom_point(aes(x=displ,y=hwy)) +facet_wrap(~ class, ncol=10)
#What this means?
ggplot(mpg) + geom_point(aes(x=displ,y=hwy)) +facet_grid(drv~ cyl)
#Learn how to change title, subtitle
#Create 2-3 types of bar diagrams; grouped, horizontal, percentage
#Create one of the plots sir already gave and change title, size, color etc.
#Try making boxplot diagrams
#Go to R graph gallery
#Search on google - eg. ggplot2 percentage box plot - eg. Geek4Geeks
#### 4 January 2023 class
library(tidyverse)
jpeg(file="filename.jpeg")
#Changing dimensions of a saved graph?
#Just go to export > Change width and height in options
#Data extraction
#How to import excel file?
#In environment, click on spreadsheet (import dataset) > Choose excel
#> Choose file > Choose sheet > Choose range > Skip?
#Or for csv, choose text(base)
#But how to import with codes?
library(readxl)
viewsheet <- read_excel("Gold_silver_prices.xlsx")
view(viewsheet)
#Cleaning names
#Usually, we keep all variable names in small letters
#In R, we also dont put spaces between variables, but we put a connector
#Eg. gold_prices instead of gold prices
#There are packages that help us do this, eg. janitor
library(janitor)
#%>% is pipe operator
##For all cases https://www.rdocumentation.org/packages/janitor/versions/1.2.0/topics/clean_names
viewsheet<- viewsheet %>% clean_names(case="lower_camel")
view(viewsheet)
#This assigned the excel sheet "Gold_silver_prices.xlxs" to the variable "viewsheet"
#We can also assign a variable to quickly show the different graphs!
#Use show() instead of view() to show graph
line_graph <- ggplot(viewsheet, mapping=aes(x=year, y=gold))+
geom_line()
show(line_graph)
#Plotting line graph for viewsheet (gold_silver_prices)
ggplot(viewsheet, mapping=aes(x=year, y=gold))+
geom_line()
#Can also write command like this
ggplot(viewsheet)+
geom_line(mapping=aes(x=year, y=gold))
#Plotting both gold and silver prices together
ggplot(viewsheet)+
geom_line(aes(x=year, y=gold))+
geom_line(aes(x=year, y=silver))
#However, if you have large number of variables eg. 100+, we cant add every variable individually
#To solve this, we can go ahead and change the structure of the data
#We can change the data into long format
#We use the reshape2 package to convert data from wide format to long format, vice versa
install.packages("reshape2")
library("reshape2")
#Changing data into long form
data_long <- melt(viewsheet, id.vars= "year")
view(data_long)
head(data_long)
library(ggplot2)
library(dplyr)
line_plot<-ggplot(data_long, aes(x=year, y=value, color=variable)) +
geom_line()
show(line_plot)
#To do: Treemap
# Use secondary axis
#Reading csv files
#nifty_50 <-read.csv("Exact file location")
install.packages("treemap")
library(treemap)
view(nifty_50)
library(janitor)
nifty_50<- nifty_50 %>% clean_names(case="lower_camel")
#Type=index
#How to remove percentage sign from some data
nifty_50$weightage<-as.numeric(sub("%","",nifty_50$weightage))
#We remove percentage sign because it gives errors otherwise
#sub() = substitute
#We put 2 "" without any spaces to remove anything completely
#We put $ sign before weightage to specify it is weightage that gets affected in "nifty_50" dataset
treemap(nifty_50,
index=c("company","industry"),
vSize="weightage",
type="index",
palette="Spectral",
title="Nifty 50 Index weightage",
fontsize.title=12)
##RColorBrewe::display.brewer.all()
#Next class - reate pie chsrt using nifty 50 data
#### 4 January 2023 class
#Making graphs on our own
#loading libraries
library(dplyr)
library(ggplot2)
library(janitor)
library(tidyverse)
library(readxl)
library(ggridges)
library(hrbrthemes)
#No line, bar, scatter graphs please!
#If there is blank cell without data
#which(is.na("state_gdp$gdp_growth"))
#This gives cells which have empty values
#How to eliminate missing data?
#na.omit("state.gdp")
#How to eliminate only once specific cell?
bangaloreweather<-read.csv("bangalore.csv")
view(bangaloreweather)
##Cleaning names
bangaloreweather<-bangaloreweather %>% clean_names(case="small_camel")
view(bangaloreweather)
##### Failed command to convert months into vectors so that months are in order in graph
### bangaloreweather$month <- factor(bangaloreweather$month, levels = c
## ("January", "February", "March", "April", "May", "June", "July", "August",
## "September", "October", "November"))
ggplot(bangaloreweather,aes(x=so2, y=month,fill="red"))+
geom_density_ridges_gradient(alpha=0.8, scale = 0.9, rel_min_height = 0.01) +
theme_ipsum()+
theme(legend.position="none")+
labs(title = 'Sulfur dioxide in the air per month', subtitle="Bangalore")+
xlab("Amount of sulphur dioxide")+
ylab("Month")+
theme(axis.title.x = element_text(face="bold", color="darkblue", size=10,hjust=0.5)) +
theme(axis.title.y = element_text(face="bold", color="darkblue", size=10,hjust=0.5))+
###was unable to make it into gradient
scale_fill_brewer(type=fill, palette = "YlOrRd")
###This code used in example didnt work, package virgis not available
###scale_fill_viridis(name = "Temp. [F]", option = "C")
#How to add gradient?????
In example, they used
install.packages("inferno")
install.packages("hrbrthemes")
install.packages("virgis")