I need help with a course project. I've been working on this for 5 days and have gotten no where. I'm using the AdventureWorks database. This part of the project is creating a linear regression, plotting the actual data for Jan 2013 - Dec 2013, and then forecast Jan 2014-Jun 2014. The forecasted piece needs to be in a different line type and color. It also needs to include the line-of-best-fit (abline). Sadly, my instructor will not provide any help other than "check these websites". Below is what I have tried (and failed) in R so far. Any guidance is appreciated!
#load DBI package
library(DBI)
#create connection
con <- DBI::dbConnect(odbc::odbc(),
Driver="SQL Server",
Server = ".",
Database = "AdventureWorks2017",
Trusted_Connection="True")
###create query
library(dplyr)
qry1<-dbGetQuery (con,
"select
p.Name as 'Product'
,Case
when MONTH(OrderDate) = 1 then 'Jan-13'
when MONTH(OrderDate) = 2 then 'Feb-13'
when MONTH(OrderDate) = 3 then 'Mar-13'
when MONTH(OrderDate) = 4 then 'Apr-13'
when MONTH(OrderDate) = 5 then 'May-13'
when MONTH(OrderDate) = 6 then 'Jun-13'
when MONTH(OrderDate) = 7 then 'Jul-13'
when MONTH(OrderDate) = 8 then 'Aug-13'
when MONTH(OrderDate) = 9 then 'Sep-13'
when MONTH(OrderDate) = 10 then 'Oct-13'
when MONTH(OrderDate) = 11 then 'Nov-13'
when MONTH(OrderDate) = 12 then 'Dec-13'
End as 'Month1'
,MONTH(OrderDate) as 'MonthOrder'
,sum(sod.LineTotal) RevenueByDay
FROM Sales.SalesOrderHeader soh
left join Sales.SalesOrderDetail sod on soh.SalesOrderID=sod.SalesOrderID
left join Production.Product p on sod.ProductID=p.ProductID
where year(OrderDate)='2013'
and p.name in ('Mountain-200 Black, 38')
group by
p.Name
,Case
when MONTH(OrderDate) = 1 then 'Jan-13'
when MONTH(OrderDate) = 2 then 'Feb-13'
when MONTH(OrderDate) = 3 then 'Mar-13'
when MONTH(OrderDate) = 4 then 'Apr-13'
when MONTH(OrderDate) = 5 then 'May-13'
when MONTH(OrderDate) = 6 then 'Jun-13'
when MONTH(OrderDate) = 7 then 'Jul-13'
when MONTH(OrderDate) = 8 then 'Aug-13'
when MONTH(OrderDate) = 9 then 'Sep-13'
when MONTH(OrderDate) = 10 then 'Oct-13'
when MONTH(OrderDate) = 11 then 'Nov-13'
when MONTH(OrderDate) = 12 then 'Dec-13'
End
,MONTH(OrderDate)
order by MONTH(OrderDate)
")
#created data frame
lndat<-data.frame(qry1)
class(lndat$Month1)
#make month a factor (because it wouldn't work otherwise)
lndat$Month1<-factor(c("Jan-13","Feb-13","Mar-13","Apr-13","May-13","Jun-13","Jul-13"
,"Aug-13","Sep-13","Oct-13","Nov-13","Dec-13")
,levels = c("Jan-13","Feb-13","Mar-13","Apr-13","May-13"
,"Jun-13","Jul-13","Aug-13","Sep-13","Oct-13","Nov-13","Dec-13"))
#Change column names
colnames(lndat)<-c("Product","Month","MonthOrder","Revenue")
#sumamry
summary(lndat)
class(lndat$MonthOrder)
lndat$MonthOrder<-as.numeric(lndat$MonthOrder)
plot(lndat$MonthOrder,lndat$Revenue)
prodLM<-lm(formula=MonthOrder~Revenue, data = lndat)
abline(lm(MonthOrder~Revenue, data = lndat)) ##DID NOT PRODUCT LINE
#open ggplot & scales library
library(ggplot2)
library(scales)
ggplot(prodLM,aes(MonthOrder,Revenue)) +
geom_line() +
geom_smooth(lndat$Revenue ~ lndat$MonthOrder) +
scale_x_continuous("Month",limits = c(1,12))