NEED HELP ASAP Creating RMSE Plot out of Cumulative Elastic Net Models
Hello,
I have troubles creating a plot in R and I am really under pressure right now.
Ive estimated 20 models with additional predictors for the variable "age" ( aka features items_gc1- item_gc20) . I want to create a plot that shows the shrinking RMSE in both the train and test- group as more and more features are added ( x = RSME, y= variables (1-20))
Do you have an idea how a possible code could look like ? I was thinking of building factor variables out of the test/train- variables but it was always buggy.
Here are the estimated models:
THANK YOU IN ADVANCE!
#Model1
items_gc1 <- c("gc_mat090")
set.seed(112)
enet_reg1.1 <- train(age ~ .,
subset_train_dat[, c(items_gc1, "age","ones")],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train1 <- postResample(pred = predict(enet_reg1.1, subset_train_dat),
obs = subset_train_dat$age))
#Werte: RMSE= 13.78 / R2 = 0.11
(res_enet_reg_test1 <- postResample(pred = predict(enet_reg1.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte: RMSE= 13.18 / R2 = 0.17
#Model2
items_gc2 <- c("gc_mat090","gc_lit005")
set.seed(112)
enet_reg2.1 <- train(age ~ .,
subset_train_dat[, c("age",items_gc2) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train2 <- postResample(pred = predict(enet_reg2.1, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 12.81, R2 = 0.23
(res_enet_reg_test2 <- postResample(pred = predict(enet_reg2.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.57 , R2 = 0.24
#Model3
items_gc3 <-c("gc_mat090","gc_lit005","gc_lit030")
set.seed(112)
enet_reg3.1 <- train(age ~ .,
subset_train_dat[, c("age",items_gc3) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train3 <- postResample(pred = predict(enet_reg3.1, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 12.49, R2 = 0.27
(res_enet_reg_test3 <- postResample(pred = predict(enet_reg3.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.79 , R2 = 0.22
#Modell4
items_gc4 <-c("gc_mat090","gc_lit005","gc_lit030","gc_mus097")
set.seed(112)
enet_reg4.1 <- train(age ~ .,
subset_train_dat[, c("age",items_gc4) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train4 <- postResample(pred = predict(enet_reg4.1, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 11.78, R2 = 0.35
(res_enet_reg_test4 <- postResample(pred = predict(enet_reg4.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.32 , R2 = 0.27
#Modell 5
items_gc5 <-c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013")
set.seed(112)
enet_reg5.1 <- train(age ~ .,
subset_train_dat[, c("age",items_gc5) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train5 <- postResample(pred = predict(enet_reg5.1, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 11.49, R2 = 0.38
(res_enet_reg_test5<- postResample(pred = predict(enet_reg5.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.08 , R2 = 0.30
#Modell 6
items_gc6 <-c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081")
set.seed(112)
enet_reg6.1 <- train(age ~ .,
subset_train_dat[, c("age",items_gc6) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train6 <- postResample(pred = predict(enet_reg6.1, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 11.17, R2 = 0.42
(res_enet_reg_test6<- postResample(pred = predict(enet_reg6.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.10 , R2 = 0.31
#Modell 7
items_gc7 <- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031")
set.seed(112)
enet_reg7.1 <- train(age ~ .,
subset_train_dat[, c("age",items_gc7) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train7 <- postResample(pred = predict(enet_reg7.1, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 11.02, R2 = 0.43
(res_enet_reg_test7<- postResample(pred = predict(enet_reg7.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.23 , R2 = 0.30
#Modell 8
items_gc8 <- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010")
set.seed(112)
enet_reg8.1 <- train(age ~ .,
subset_train_dat[, c("age",items_gc8) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train8 <- postResample(pred = predict(enet_reg8.1, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 10.90, R2 = 0.44
(res_enet_reg_test8<- postResample(pred = predict(enet_reg8.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.15 , R2 = 0.31
#Modell 9
items_gc9<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043")
set.seed(112)
enet_reg9.1 <- train(age ~ .,
subset_train_dat[, c("age",items_gc9) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train9<- postResample(pred = predict(enet_reg9.1, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 10.83, R2 = 0.45
(res_enet_reg_test9<- postResample(pred = predict(enet_reg9.1, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.36 , R2 = 0.29
#Modell 10
items_gc10<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011")
set.seed(112)
enet_reg10<- train(age ~ .,
subset_train_dat[, c("age",items_gc10) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train10<- postResample(pred = predict(enet_reg10, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 10.76, R2 = 0.46
(res_enet_reg_test10<- postResample(pred = predict(enet_reg10, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.29 , R2 = 0.29
#Modell 11
items_gc11<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073")
set.seed(112)
enet_reg11<- train(age ~ .,
subset_train_dat[, c("age",items_gc11) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train11<- postResample(pred = predict(enet_reg11, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 10.52, R2 = 0.48
(res_enet_reg_test11<- postResample(pred = predict(enet_reg11, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.33 , R2 = 0.29
#Modell 12
items_gc12<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105")
set.seed(112)
enet_reg12<- train(age ~ .,
subset_train_dat[, c("age",items_gc12) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train12<- postResample(pred = predict(enet_reg12, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 10.43, R2 = 0.49
(res_enet_reg_test12<- postResample(pred = predict(enet_reg12, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.16 , R2 = 0.31
#Modell 13
items_gc13<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039")
set.seed(112)
enet_reg13<- train(age ~ .,
subset_train_dat[, c("age",items_gc13) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train13<- postResample(pred = predict(enet_reg13, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 10.27, R2 = 0.51
(res_enet_reg_test13<- postResample(pred = predict(enet_reg13, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.17 , R2 = 0.31
#Modell 14
items_gc14<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022")
set.seed(112)
enet_reg14<- train(age ~ .,
subset_train_dat[, c("age",items_gc14) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train14<- postResample(pred = predict(enet_reg14, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 10.10, R2 = 0.52
(res_enet_reg_test14<- postResample(pred = predict(enet_reg14, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.10 , R2 = 0.32
#Modell 15
items_gc15<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057")
set.seed(112)
enet_reg15<- train(age ~ .,
subset_train_dat[, c("age",items_gc15) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train15<- postResample(pred = predict(enet_reg15, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 9.95 R2 = 0.54
(res_enet_reg_test15<- postResample(pred = predict(enet_reg15, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.13 , R2 = 0.33
#Modell 16
items_gc16<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042")
set.seed(112)
enet_reg16<- train(age ~ .,
subset_train_dat[, c("age",items_gc16) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train16<- postResample(pred = predict(enet_reg16, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 9.78, R2 = 0.55
(res_enet_reg_test16<- postResample(pred = predict(enet_reg16, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.37 , R2 = 0.32
#Modell 17
items_gc17<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042","gc_mus046")
set.seed(112)
enet_reg17<- train(age ~ .,
subset_train_dat[, c("age",items_gc17) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train17<- postResample(pred = predict(enet_reg17, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 9.67, R2 = 0.56
(res_enet_reg_test17<- postResample(pred = predict(enet_reg17, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.39 , R2 = 0.32
#Modell 18
items_gc18<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042","gc_mus046","gc_wir018")
set.seed(112)
enet_reg18<- train(age ~ .,
subset_train_dat[, c("age",items_gc18) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train18<- postResample(pred = predict(enet_reg18, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 9.65, R2 = 0.56
(res_enet_reg_test18<- postResample(pred = predict(enet_reg18, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.49 , R2 = 0.31
#Modell 19
items_gc19<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042","gc_mus046","gc_wir018","gc_ern034")
set.seed(112)
enet_reg19<- train(age ~ .,
subset_train_dat[, c("age",items_gc19) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train19<- postResample(pred = predict(enet_reg19, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 9.54, R2 = 0.57
(res_enet_reg_test19<- postResample(pred = predict(enet_reg19, subset_test_dat),
obs = subset_test_dat$age))
#Werte : RMSE = 12.61 , R2 = 0.30
#Modell 20
items_gc20<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042","gc_mus046","gc_wir018","gc_ern034","gc_mus013")
set.seed(112)
enet_reg20<- train(age ~ .,
subset_train_dat[, c("age",items_gc20) ],
metric = "RMSE",
method = "glmnet",
tuneLength = 10)
(res_enet_reg_train20<- postResample(pred = predict(enet_reg20, subset_train_dat),
obs = subset_train_dat$age))
Werte : RMSE = 9.42, R2 = 0.59
(res_enet_reg_test20<- postResample(pred = predict(enet_reg20, subset_test_dat),
obs = subset_test_dat$age))
Werte : RMSE = 12.43 , R2 = 0.32