Hi Everyone,
I am trying to do forecasting in R Studio however, I ended up getting errors. Could someone help me with the forecasting model in R Studio?
Below is the R code:
# Step 1: Install Required Packages
install.packages("DBI")
install.packages("odbc")
install.packages("tidyverse")
install.packages("forecast")
# Step 2: Load Required Packages
library(DBI)
library(odbc)
library(tidyverse)
library(forecast)
# Step 3: Establish a Connection to SQL Server with Windows Authentication
con <- dbConnect(odbc(),
driver = "SQL Server",
server = "PRD1",
database = "DUMMY",
trusted_connection = "yes")
# Step 4: Query Data
query <- "SELECT Cast([source_system_link_2] as varchar) as [source_system_link_2]
,isnull([financial_scenario],'IC') as [financial_scenario]
,[account_description_1]
,[account_description_2]
,[balance_dt]
,format([balance_dt],'MMM-yy') as [month_year]
,sum([balance_amt]) as [balance_amt]
,Cast([acct_cd_1] as varchar) as [acct_cd_1]
,Cast([acct_cd_2] as varchar) as [acct_cd_2]
,[supplier_name]
,isnull([WO#],[Work Order ID]) as [WO#]
,[WO_ID]
,[Work Order ID]
,avg([Total Labor Hours]) as [Total Labor Hours]
,avg([Labor_Code]) as [Labor_Code]
,avg([Hourly Rate]) as [Hourly Rate]
FROM [DUMMY].[SSDI].[CPWO_New]
group by Cast([source_system_link_2] as varchar)
,isnull([financial_scenario],'IC')
,[account_description_1]
,[account_description_2]
,[balance_dt]
,Cast([acct_cd_1] as varchar)
,Cast([acct_cd_2] as varchar)
,[supplier_name]
,isnull([WO#],[Work Order ID])
,[WO_ID]
,[Work Order ID]"
data <- dbGetQuery(con, query)
# Step 5: Prepare Time Series Data for Multiple Dimension and Value Columns
# Define the dimension columns and value columns from your SQL Server table
dimension_columns <- c("[source_system_link_2]", "[financial_scenario]", "[account_description_1]", "[account_description_2]", "[balance_dt]", "[acct_cd_1]", "[acct_cd_2]", "[supplier_name]", "[WO#]", "[Work Order ID]")
dimension_columns <- as.character(dimension_columns)
value_columns <- c("[balance_amt]", "[Total Labor Hours]", "[Labor_Code]", "[Hourly Rate]")
# Initialize an empty list to store the time series data for each value column and dimension combination
ts_data <- list()
# Iterate through each value column
for (value_column in value_columns) {
# Iterate through each dimension combination
for (i in 1:length(dimension_columns)) {
# Construct the SQL query dynamically based on dimension and value column
#query <- glue::glue("SELECT {dimension_columns[i]}, {value_column} FROM [DUMMY].[SSDI].[CPWO_New]")
query <- glue::glue("SELECT {as.character(dimension_columns[i])}, {value_column} FROM [DUMMY].[SSDI].[CPWO_New]")
# Fetch the data from the SQL Server table
data <- dbGetQuery(con, query)
# Group the data by dimension columns and aggregate the values using appropriate functions (e.g., sum, average)
#data_agg <- data %>% group_by(across(all_of(dimension_columns[i])))
group_columns <- intersect(dimension_columns[i], colnames(data))
group_vars <- syms(group_columns)
data_agg <- data %>% dplyr::group_by(!!!group_vars)
# Create a time series object for each dimension and value column combination
ts_data[[paste(dimension_columns[i], value_column, sep = "_")]] <- ts_object <- ts(data_agg[[value_column]], frequency = 100)
#<- ts(data_agg$value_column, frequency = 100)
}
}
# The ts_data list will contain time series objects for each dimension and value column combination
# Step 6: Perform Forecasting for Each Dimension and Value Column Combination
# Initialize an empty list to store the forecasted results for each combination
forecast_results <- list()
# Iterate through each dimension and value column combination
for (dimension_column in dimension_columns) {
for (value_column in value_columns) {
# Get the time series data for the current combination
ts_data_current <- ts_data[[paste(dimension_column, value_column, sep = "_")]]
# Perform forecasting using the desired method (e.g., ARIMA)
forecast_model <- auto.arima(ts_data_current)
# Add the forecasted results to the forecast_results list
forecast_results[[paste(dimension_column, value_column, sep = "_")]] <- forecast_model
}
}
# The forecast_results list contains the forecasted models for each dimension and value column combination
#Step 7: Analyze and Extract Forecasted Results
# Initialize an empty list to store the extracted forecasted results
extracted_forecasts <- list()
# Iterate through each dimension and value column combination
for (dimension_column in dimension_columns) {
for (value_column in value_columns) {
# Get the forecasted model for the current combination
forecast_model <- forecast_results[[paste(dimension_column, value_column, sep = "_")]]
# Extract the point forecasts from the forecasted model
point_forecasts <- forecast_model$mean
# Store the extracted forecasts in the extracted_forecasts list
extracted_forecasts[[paste(dimension_column, value_column, sep = "_")]] <- point_forecasts
}
}
# Step 8: Move Forecasted Results Back to SQL Server
# Establish a connection to SQL Server
con <- dbConnect(odbc(),
driver = "SQL Server",
server = "PRD1",
database = "DUMMY",
trusted_connection = "yes")
# Iterate through each dimension and value column combination
for (dimension_column in dimension_columns) {
for (value_column in value_columns) {
# Get the extracted forecasts for the current combination
extracted_forecasts_current <- extracted_forecasts[[paste(dimension_column, value_column, sep = "_")]]
# Create a table (if needed) to store the forecasted results
create_table_query <- glue::glue("CREATE TABLE forecast_output_{dimension_column}_{value_column} (date_column DATE, forecast_value FLOAT)")
dbExecute(con_output, create_table_query)
# Insert the forecasted results into the table
insert_query <- glue::glue("INSERT INTO forecast_output_{dimension_column}_{value_column} (date_column, forecast_value) VALUES (?, ?)")
dbExecute(con_output, insert_query, params = list(index(extracted_forecasts_current), extracted_forecasts_current))
}
}
# Close the connection
dbDisconnect(con)
Thanks
Nakul