##This script follows on from script: LCA_LL_V1 due to implementation of posterior probabilities needed (line 42)
## Elastic Net Regression on environmental predictors (Fam_env_and_conflict_scale)
#Install packages and open library
install.packages("caret")
#> Installing package into 'C:/Users/lukel/OneDrive/Documents/R/win-library/4.1'
#> (as 'lib' is unspecified)
#> package 'caret' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'caret'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE):
#> problem copying C:\Users\lukel\OneDrive\Documents\R\win-
#> library\4.1\00LOCK\caret\libs\x64\caret.dll to C:
#> \Users\lukel\OneDrive\Documents\R\win-library\4.1\caret\libs\x64\caret.dll:
#> Permission denied
#> Warning: restored 'caret'
#>
#> The downloaded binary packages are in
#> C:\Users\lukel\AppData\Local\Temp\RtmpgtoA2j\downloaded_packages
library(caret)
#> Warning: package 'caret' was built under R version 4.1.3
#> Loading required package: ggplot2
#> Warning: package 'ggplot2' was built under R version 4.1.3
#> Loading required package: lattice
install.packages("glmnet")
#> Installing package into 'C:/Users/lukel/OneDrive/Documents/R/win-library/4.1'
#> (as 'lib' is unspecified)
#> package 'glmnet' successfully unpacked and MD5 sums checked
#> Warning: cannot remove prior installation of package 'glmnet'
#> Warning in file.copy(savedcopy, lib, recursive = TRUE):
#> problem copying C:\Users\lukel\OneDrive\Documents\R\win-
#> library\4.1\00LOCK\glmnet\libs\x64\glmnet.dll to C:
#> \Users\lukel\OneDrive\Documents\R\win-library\4.1\glmnet\libs\x64\glmnet.dll:
#> Permission denied
#> Warning: restored 'glmnet'
#>
#> The downloaded binary packages are in
#> C:\Users\lukel\AppData\Local\Temp\RtmpgtoA2j\downloaded_packages
library(glmnet)
#> Warning: package 'glmnet' was built under R version 4.1.3
#> Loading required package: Matrix
#> Loaded glmnet 4.1-6
#Set WD and Open CSV file
setwd("C:/Users/lukel/OneDrive/Desktop/University_Documents/PhD/Year_1/NIMH_Data/Predictors")
Fam.env.con <- read.csv("Family_env_and_conflict_scale.csv")
#####################################################################################
#Data wrangling on predictor variables
####################################################################################
#Filter year one data
Fam.env.con.1 <- filter(Fam.env.con, grepl("1_year_follow_up_y_arm_1", eventname))
#> Error in is.factor(x): object 'eventname' not found
#Remove column
Fam.env.con.1 <- Fam.env.con.1[ ,-c(11)]
#> Error in eval(expr, envir, enclos): object 'Fam.env.con.1' not found
#Change colnames
colnames(Fam.env.con.1) <- c("Key", "Fam_fight", "Fam_rarely_angry", "Fam_angry_throw_things", "Fam_lose_temper", "Fam_criticise", "Fam_hit", "Fam_keep_peace", "Fam_outdo", "Fam_raise")
#> Error in colnames(Fam.env.con.1) <- c("Key", "Fam_fight", "Fam_rarely_angry", : object 'Fam.env.con.1' not found
#Change columns to numeric for purpose of mutating values within columns (next step)
Fam.env.con.2 <- Fam.env.con.1 %>% mutate_at(c(2:10), as.numeric)
#> Error in Fam.env.con.1 %>% mutate_at(c(2:10), as.numeric): could not find function "%>%"
#Change data values in specific columns so all TRUE = 1 and all FALSE = 0
Fam.env.con.2[c(3,5,8,10)][Fam.env.con.2[c(3,5,8,10)] == 1] <- 3
#> Error in Fam.env.con.2[c(3, 5, 8, 10)][Fam.env.con.2[c(3, 5, 8, 10)] == : object 'Fam.env.con.2' not found
Fam.env.con.2[c(3,5,8,10)][Fam.env.con.2[c(3,5,8,10)] == 0] <- 1
#> Error in Fam.env.con.2[c(3, 5, 8, 10)][Fam.env.con.2[c(3, 5, 8, 10)] == : object 'Fam.env.con.2' not found
Fam.env.con.2[c(3,5,8,10)][Fam.env.con.2[c(3,5,8,10)] == 3] <- 0
#> Error in Fam.env.con.2[c(3, 5, 8, 10)][Fam.env.con.2[c(3, 5, 8, 10)] == : object 'Fam.env.con.2' not found
#Remove NA values
Fam.env.con.3 <- na.omit(Fam.env.con.2)
#> Error in na.omit(Fam.env.con.2): object 'Fam.env.con.2' not found
#####################################################################################
#######################################################################################
#Combine polca posterior states (class) and Fam_env_conflict data by 'Key'
Combined.data <- merge(poLCA_posterior.states, Fam.env.con.3, by = 'Key')
#> Error in merge(poLCA_posterior.states, Fam.env.con.3, by = "Key"): object 'poLCA_posterior.states' not found
#Remove columns from dataset
Combined.data.2 <- Combined.data[ ,-c(1,3:5,15)]
#> Error in eval(expr, envir, enclos): object 'Combined.data' not found
#Change colnames
colnames(Combined.data.2) <- c("state", "fam.fight", "fam.rarely.angry", "fam.angry.throw.things", "fam.lose.temper", "fam.criticise", "fam.hit", "fam.keep.peace", "fam.outdo", "fam.raise")
#> Error in colnames(Combined.data.2) <- c("state", "fam.fight", "fam.rarely.angry", : object 'Combined.data.2' not found
#Remove class 'one' (serious del) so y variable is binary
EN1 <- subset(Combined.data.2, Combined.data.2[, 1] != 1)
#> Error in subset(Combined.data.2, Combined.data.2[, 1] != 1): object 'Combined.data.2' not found
#Drop levels on factor variable
EN1 <- droplevels(EN1)
#> Error in droplevels(EN1): object 'EN1' not found
#Create dummy variables required for glmnet
dummy.vars.1 <- model.matrix(state~., EN1)[,-1]
#> Error in terms.formula(object, data = data): object 'EN1' not found
#Create Y variable as factor
response.1 <- EN1$state
#> Error in eval(expr, envir, enclos): object 'EN1' not found
#Split data into training and test data (80/20)
set.seed(123)
train.index.1 <- createDataPartition(response.1, p = 0.8, list = FALSE)
#> Error in createDataPartition(response.1, p = 0.8, list = FALSE): object 'response.1' not found
train.predictors.1 <- dummy.vars.1[train.index.1, ]
#> Error in eval(expr, envir, enclos): object 'dummy.vars.1' not found
train.response.1 <- response.1[train.index.1]
#> Error in eval(expr, envir, enclos): object 'response.1' not found
test.predictors.1 <- dummy.vars.1[-train.index.1, ]
#> Error in eval(expr, envir, enclos): object 'dummy.vars.1' not found
test.response.1 <- response.1[-train.index.1]
#> Error in eval(expr, envir, enclos): object 'response.1' not found
#Change factor variable names so they are valid
levels(train.response.1) <- c("two", "three")
#> Error in levels(train.response.1) <- c("two", "three"): object 'train.response.1' not found
#Change factor variable names so they are valid
levels(test.response.1) <- c("two", "three")
#> Error in levels(test.response.1) <- c("two", "three"): object 'test.response.1' not found
#########################################################################
#first elastic net model using cross-validation sampling (class 2 vs 3)
#########################################################################
#Create parameters for cross-validation (testing model with random subsamples of training data)
custom.1 <- trainControl(method = "repeatedcv", number = 10, repeats = 5, verboseIter = TRUE, classProbs = TRUE, summaryFunction = twoClassSummary)
#Create model
set.seed(1234)
EN1.model.cv <- train(train.response.1~., data = train.predictors.1, method='glmnet', tuneGrid =expand.grid(alpha=seq(0,1,length=10), lambda = seq(0.0001,0.2,length=5)), trControl=custom.1)
#> Error in eval(expr, p): object 'train.predictors.1' not found
Created on 2023-01-09 with reprex v2.0.2