I have been working on utilizing Artificial Neural Networks (ANNs) to address the issue of missing data in the target data (T1). Specifically, I aim to effectively fill the gaps in the time series by integrating predictor variables such as P1, P2, P3, P4 and P5.
I have excluded the "missing" months from the training and testing datasets to establish the model, which is then employed for predicting these absent months. However, I am facing an issue with the prediction timeseries showing the same trend but the values are too small. I am uncertain whether the problem lies in the normalization and de-normalization process or if there might be other issues in the steps I have undertaken in this study.
library(neuralnet)
library(dplyr)
library(zoo)
# Load your time series dataset
data <- read.csv("D:/Datasets/ANNs/TW.csv")
# Convert the Date column using the zoo package
data$Date <- as.yearmon(data$Date, format = "%m-%Y") # Convert to Date format
data <- data %>%
arrange(Date) # Make sure your data is sorted by date
# Normalize the data (excluding Date column)
normalized_data <- data %>%
select(-Date) %>%
mutate_all(scale)
# Split data into input features (X) and target variable (y)
X <- normalized_data %>%
select(-T1) # Exclude the target variable
y <- normalized_data$T1
# Find missing indices
missing_indices <- which(is.na(y))
# Split data into training and testing sets
set.seed(123) # For reproducibility
train_ratio <- 0.7
num_rows <- nrow(X)
num_train <- round(train_ratio * num_rows)
num_missing <- length(missing_indices)
num_actual_train <- num_train - num_missing
# Randomly sample from non-missing data for actual training
actual_train_indices <- sample(setdiff(seq_len(num_rows), missing_indices), size = num_actual_train)
test_indices <- setdiff(seq_len(num_rows), c(actual_train_indices, missing_indices))
X_train <- X[actual_train_indices, ]
y_train <- y[actual_train_indices]
X_test <- X[test_indices, ]
y_test <- y[test_indices]
# Combine input features and target variable for training
train_data <- data.frame(X_train, T1 = y_train)
# Define the neural network model
model <- neuralnet(
T1 ~ P1 + P2 + P3 + P4 + P5,
data = train_data,
hidden = 5, # Define the number of hidden layers and neurons
linear.output = TRUE # Use linear activation for the output layer
)
plot(model)
# Predict missing and available months using the model
all_X <- rbind(X_train, X_test)
all_predictions <- predict(model, newdata = all_X)
predicted_values_all <- all_predictions
# Replace missing values with predictions
y[missing_indices] <- predicted_values_all[(num_actual_train + 1):(num_actual_train + num_missing)]
# Replace this with your denormalization process
denormalize <- function(value) {
min_val <- min(y, na.rm = TRUE)
max_val <- max(y, na.rm = TRUE)
denormalized_value <- (value * (max_val - min_val)) + min_val
return(denormalized_value)
}
# Denormalize the target variable
denormalized_y <- sapply(y, denormalize)
# Calculate R-squared (R2) and Root Mean Squared Error (RMSE)
measured_values <- data$T1
measured_values[missing_indices] <- denormalized_y[missing_indices]
# Calculate R-squared (R2) and Root Mean Squared Error (RMSE)
r2 <- 1 - sum((measured_values - denormalized_y)^2) / sum((measured_values -
mean(measured_values))^2)
rmse <- sqrt(mean((measured_values - denormalized_y)^2))
cat("R-squared (R2):", r2, "\n")
cat("Root Mean Squared Error (RMSE):", rmse, "\n")