library(tidyverse)
covidcases_active_and_death_correlations <- read_csv("full_data.csv", col_names = FALSE)
dfadc<-as.data.frame(t(covidcases_active_and_death_correlations))
dfadc<-dfadc[-1,]
names(dfadc)[1] <- "new_cases"
names(dfadc)[2] <- "new_deaths"
dfadc$new_cases<-as.numeric(dfadc$new_cases)
dfadc$new_deaths<-as.numeric(dfadc$new_deaths)
cor(dfadc$new_deaths,dfadc$new_cases,use="pairwise.complete.obs")
pdf("visualization.pdf")
plot(jitter(dfadc$new_deaths,1),dfadc$new_cases,xlab="Deaths Cases",ylab="Active Cases", main="Active vs Death Covid Cases")
plot(dfadc$new_deaths,dfadc$new_cases,xlab="Deaths Cases",ylab="Active Cases",main="Active vs Death Covid Cases")
abline(lm(dfadc$new_cases~dfadc$new_deaths))
#hist(dfadc$new_deaths)
dt <-dfadc$new_cases
dtMin=min(dt,na.rm=TRUE)
dtMax=max(dt,na.rm=TRUE)
dtMean=mean(dt,na.rm=TRUE)
dtSd=sd(dt,na.rm=TRUE)
h <- hist(dt, breaks = 30, density = 15,
col = "lightgray",
ylab = "Number of Cases",
xlab = "Total Active Cases",
main = "Frequency Distribution of Active and Death Cases",
xlim=c(dtMin,9),
ylim=c(0,100)) #you might want to tweak this
x <-seq(dtMin, dtMax, .1) #creates a sequence of numbers between first 2 params
y1 <-dnorm(x, mean=dtMean, sd=dtSd) #creates a theoretical normal distribution based on that
y1 <- y1 *diff(h$mids[1:2]) *length(dt) #a multiplier to make it fit is the histogram
lines(x, y1, col="blue")
dev.off()