Creating RMSE Plot out of Cumulative Elastic Net Models

NEED HELP ASAP Creating RMSE Plot out of Cumulative Elastic Net Models

Hello,

I have troubles creating a plot in R and I am really under pressure right now.

Ive estimated 20 models with additional predictors for the variable "age" ( aka features items_gc1- item_gc20) . I want to create a plot that shows the shrinking RMSE in both the train and test- group as more and more features are added ( x = RSME, y= variables (1-20))

Do you have an idea how a possible code could look like ? I was thinking of building factor variables out of the test/train- variables but it was always buggy.

Here are the estimated models:

THANK YOU IN ADVANCE!

#Model1

items_gc1 <- c("gc_mat090")

set.seed(112)

enet_reg1.1 <- train(age ~ .,

subset_train_dat[, c(items_gc1, "age","ones")],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train1 <- postResample(pred = predict(enet_reg1.1, subset_train_dat),

obs = subset_train_dat$age))

#Werte: RMSE= 13.78 / R2 = 0.11

(res_enet_reg_test1 <- postResample(pred = predict(enet_reg1.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte: RMSE= 13.18 / R2 = 0.17


#Model2

items_gc2 <- c("gc_mat090","gc_lit005")

set.seed(112)

enet_reg2.1 <- train(age ~ .,

subset_train_dat[, c("age",items_gc2) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train2 <- postResample(pred = predict(enet_reg2.1, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 12.81, R2 = 0.23

(res_enet_reg_test2 <- postResample(pred = predict(enet_reg2.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.57 , R2 = 0.24


#Model3

items_gc3 <-c("gc_mat090","gc_lit005","gc_lit030")

set.seed(112)

enet_reg3.1 <- train(age ~ .,

subset_train_dat[, c("age",items_gc3) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train3 <- postResample(pred = predict(enet_reg3.1, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 12.49, R2 = 0.27

(res_enet_reg_test3 <- postResample(pred = predict(enet_reg3.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.79 , R2 = 0.22


#Modell4

items_gc4 <-c("gc_mat090","gc_lit005","gc_lit030","gc_mus097")

set.seed(112)

enet_reg4.1 <- train(age ~ .,

subset_train_dat[, c("age",items_gc4) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train4 <- postResample(pred = predict(enet_reg4.1, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 11.78, R2 = 0.35

(res_enet_reg_test4 <- postResample(pred = predict(enet_reg4.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.32 , R2 = 0.27


#Modell 5

items_gc5 <-c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013")

set.seed(112)

enet_reg5.1 <- train(age ~ .,

subset_train_dat[, c("age",items_gc5) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train5 <- postResample(pred = predict(enet_reg5.1, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 11.49, R2 = 0.38

(res_enet_reg_test5<- postResample(pred = predict(enet_reg5.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.08 , R2 = 0.30


#Modell 6

items_gc6 <-c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081")

set.seed(112)

enet_reg6.1 <- train(age ~ .,

subset_train_dat[, c("age",items_gc6) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train6 <- postResample(pred = predict(enet_reg6.1, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 11.17, R2 = 0.42

(res_enet_reg_test6<- postResample(pred = predict(enet_reg6.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.10 , R2 = 0.31


#Modell 7

items_gc7 <- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031")

set.seed(112)

enet_reg7.1 <- train(age ~ .,

subset_train_dat[, c("age",items_gc7) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train7 <- postResample(pred = predict(enet_reg7.1, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 11.02, R2 = 0.43

(res_enet_reg_test7<- postResample(pred = predict(enet_reg7.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.23 , R2 = 0.30


#Modell 8

items_gc8 <- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010")

set.seed(112)

enet_reg8.1 <- train(age ~ .,

subset_train_dat[, c("age",items_gc8) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train8 <- postResample(pred = predict(enet_reg8.1, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 10.90, R2 = 0.44

(res_enet_reg_test8<- postResample(pred = predict(enet_reg8.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.15 , R2 = 0.31


#Modell 9

items_gc9<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043")

set.seed(112)

enet_reg9.1 <- train(age ~ .,

subset_train_dat[, c("age",items_gc9) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train9<- postResample(pred = predict(enet_reg9.1, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 10.83, R2 = 0.45

(res_enet_reg_test9<- postResample(pred = predict(enet_reg9.1, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.36 , R2 = 0.29


#Modell 10

items_gc10<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011")

set.seed(112)

enet_reg10<- train(age ~ .,

subset_train_dat[, c("age",items_gc10) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train10<- postResample(pred = predict(enet_reg10, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 10.76, R2 = 0.46

(res_enet_reg_test10<- postResample(pred = predict(enet_reg10, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.29 , R2 = 0.29


#Modell 11

items_gc11<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073")

set.seed(112)

enet_reg11<- train(age ~ .,

subset_train_dat[, c("age",items_gc11) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train11<- postResample(pred = predict(enet_reg11, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 10.52, R2 = 0.48

(res_enet_reg_test11<- postResample(pred = predict(enet_reg11, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.33 , R2 = 0.29


#Modell 12

items_gc12<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105")

set.seed(112)

enet_reg12<- train(age ~ .,

subset_train_dat[, c("age",items_gc12) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train12<- postResample(pred = predict(enet_reg12, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 10.43, R2 = 0.49

(res_enet_reg_test12<- postResample(pred = predict(enet_reg12, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.16 , R2 = 0.31


#Modell 13

items_gc13<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039")

set.seed(112)

enet_reg13<- train(age ~ .,

subset_train_dat[, c("age",items_gc13) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train13<- postResample(pred = predict(enet_reg13, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 10.27, R2 = 0.51

(res_enet_reg_test13<- postResample(pred = predict(enet_reg13, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.17 , R2 = 0.31


#Modell 14

items_gc14<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022")

set.seed(112)

enet_reg14<- train(age ~ .,

subset_train_dat[, c("age",items_gc14) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train14<- postResample(pred = predict(enet_reg14, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 10.10, R2 = 0.52

(res_enet_reg_test14<- postResample(pred = predict(enet_reg14, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.10 , R2 = 0.32


#Modell 15

items_gc15<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057")

set.seed(112)

enet_reg15<- train(age ~ .,

subset_train_dat[, c("age",items_gc15) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train15<- postResample(pred = predict(enet_reg15, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 9.95 R2 = 0.54

(res_enet_reg_test15<- postResample(pred = predict(enet_reg15, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.13 , R2 = 0.33


#Modell 16

items_gc16<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042")

set.seed(112)

enet_reg16<- train(age ~ .,

subset_train_dat[, c("age",items_gc16) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train16<- postResample(pred = predict(enet_reg16, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 9.78, R2 = 0.55

(res_enet_reg_test16<- postResample(pred = predict(enet_reg16, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.37 , R2 = 0.32


#Modell 17

items_gc17<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042","gc_mus046")

set.seed(112)

enet_reg17<- train(age ~ .,

subset_train_dat[, c("age",items_gc17) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train17<- postResample(pred = predict(enet_reg17, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 9.67, R2 = 0.56

(res_enet_reg_test17<- postResample(pred = predict(enet_reg17, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.39 , R2 = 0.32


#Modell 18

items_gc18<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042","gc_mus046","gc_wir018")

set.seed(112)

enet_reg18<- train(age ~ .,

subset_train_dat[, c("age",items_gc18) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train18<- postResample(pred = predict(enet_reg18, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 9.65, R2 = 0.56

(res_enet_reg_test18<- postResample(pred = predict(enet_reg18, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.49 , R2 = 0.31


#Modell 19

items_gc19<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042","gc_mus046","gc_wir018","gc_ern034")

set.seed(112)

enet_reg19<- train(age ~ .,

subset_train_dat[, c("age",items_gc19) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train19<- postResample(pred = predict(enet_reg19, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 9.54, R2 = 0.57

(res_enet_reg_test19<- postResample(pred = predict(enet_reg19, subset_test_dat),

obs = subset_test_dat$age))

#Werte : RMSE = 12.61 , R2 = 0.30


#Modell 20

items_gc20<- c("gc_mat090","gc_lit005","gc_lit030","gc_mus097","gc_che013","gc_kun081","gc_mat031","gc_wir010","gc_med043","gc_che011","gc_mus073","gc_jur105","gc_lit039","gc_phy022","gc_gsu057","gc_lit042","gc_mus046","gc_wir018","gc_ern034","gc_mus013")

set.seed(112)

enet_reg20<- train(age ~ .,

subset_train_dat[, c("age",items_gc20) ],

metric = "RMSE",

method = "glmnet",

tuneLength = 10)

(res_enet_reg_train20<- postResample(pred = predict(enet_reg20, subset_train_dat),

obs = subset_train_dat$age))

Werte : RMSE = 9.42, R2 = 0.59

(res_enet_reg_test20<- postResample(pred = predict(enet_reg20, subset_test_dat),

obs = subset_test_dat$age))

Werte : RMSE = 12.43 , R2 = 0.32

Hello,

if I understand your request correctly, you only want a line graph featuring the RSME for each model on the y-axis and the number of predictors on the right axis. Since you know that your predictor is increasing by 1 in every step, you can just create a data.frame containing the columns number_of_predictors (e.g. 1:20) and RMSE. If you collect your models res_enet_reg_test1,...,res_enet_reg_test20 in a list and use lapply() to extract just the value RMSE, you would have all you need for that.

It would be benefical to provide a minimal reprex, e.g. a tiny bit of (toy) data and all the libraries you need to run your models, wrapped inside a R codechunk. You can do this via reprex::reprex() or similar functions to provide a usable framework to work with.

In this case sample data and the first three models would be enough to provide some working code, but I am not familiar with all the prediction and/or machine learning packages out there so I don't know about the structure of output from your model etc.

If my hint from above is already enough, feel free to ignore the reprex advise on this one (but keep it in mind for later requests and consider reading this: FAQ: What's a reproducible example (`reprex`) and how do I create one?).

Kind regards

Hello, thank your for your answer ! To specify, I want to have 2 graphs (with two different colours) for the train data and test data of each model. That's why I got a knot In my head right now.

I tried to run reprex, but that's what r shows me then :

reprex::reprex() 
#> ℹ Non-interactive session, setting `html_preview = FALSE`.
#> CLIPR_ALLOW has not been set, so clipr will not run interactively
#> Error in switch(where, expr = stringify_expression(x_expr), clipboard = ingest_clipboard(), : EXPR must be a length 1 vector

**Can you tell me how to allow it to let it run interactively? **

Did you copy the code to clipboard and use reprex afterwards?

Okay, I do not understand how to do this right now


p <- AlleRMSEWerte %>% 
  select(features, dfRMSE1, RMSEpredGesamt) %>%
  drop_na() %>% 
  ggplot(mapping = aes(x = features, 
                       y = dfRMSE1,
                       color = RMSEpredGesamt,
                       shape = RMSEpredGesamt))

But I figured out how to build the plot, but there is the following error: Error in UseMethod("select") : **
** no applicable method for 'select' applied to an object of class "c('matrix', 'array', 'double', 'numeric')"

Could you explain that one for me ? Its right that I put an data frame with all RMSE-values of the train and test data ( here : AlleRMSEWerte) as the first object?

And can you tell me how to select the RMSE-Values from my data.frame ?

predsTrainRMSEright <- data.frame (traindata = c (res_enet_reg_train,res_enet_reg_train2,res_enet_reg_train3,res_enet_reg_train4,res_enet_reg_train5,res_enet_reg_train6,res_enet_reg_train7,res_enet_reg_train8,res_enet_reg_train9,res_enet_reg_train10,res_enet_reg_train11,res_enet_reg_train12,res_enet_reg_train13,res_enet_reg_train14,res_enet_reg_train5,res_enet_reg_train16,res_enet_reg_train17,res_enet_reg_train18,res_enet_reg_train19,res_enet_reg_train20),
                                   testdata = c (res_enet_reg_test1,res_enet_reg_test2,res_enet_reg_test3,res_enet_reg_test4,res_enet_reg_test5,res_enet_reg_test6,res_enet_reg_test7,res_enet_reg_test8,res_enet_reg_test9,res_enet_reg_test10,res_enet_reg_test11,res_enet_reg_test12,res_enet_reg_test13,res_enet_reg_test14,res_enet_reg_test5,res_enet_reg_test16,res_enet_reg_test17,res_enet_reg_test18,res_enet_reg_test19,res_enet_reg_test20))
**predsTrainRMSEright <- apply(predsTrainRMSEright, ?? )**

Without the libraries you use and a bit of data I don't know what the structure of your elements is and how to access them.
The select error indicates, that you try to select a column from something which is not a data.frame, but a matrix (which won't work, because select() is only working with data.frames).

What structure has the element AlleRMSEWerte? Is it a list, containing the output of your predictions as elements? If so, what is the structure of your predictions (or models or whatever you created)?

Without knowing that it's quite impossible to really help you or provide working code. If you cannot provide this, you are basically left to just copy and paste all the values you need and manually create the data.frame for your plot, since I will not guess the structure of the elements in your environment.

So maybe you can at least specify your libraries, so that I (or someone else) can have a look on your output structure and help you from there.

Kind regards

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.