multiple regression (testing part)

pe2ju · May 31, 2022, 9:23pm

I start from your codes;

mtcar = mtcars
number_rows = nrow(mtcar)
print("## number of rows in Train dataset ##")
#> [1] "## number of rows in Train dataset ##"
number_rows_train = 0.7 * number_rows
number_rows_train = ceiling(number_rows_train)
print(number_rows_train)
#> [1] 23
Train_data = mtcars[1: number_rows_train,]
print("## Head of Train dataset ##")
#> [1] "## Head of Train dataset ##"
print(head(Train_data))
#>                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
#> Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
#> Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
#> Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
#> Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
#> Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
#> Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
print("## number of rows in Test dataset ##")
#> [1] "## number of rows in Test dataset ##"
number_rows_test = number_rows - number_rows_train
print(number_rows_test)
#> [1] 9
print("## Head of Test dataset ##")
#> [1] "## Head of Test dataset ##"
Test_data = mtcars[(number_rows_train+1) : number_rows ,]
print(head(Test_data))
#>                   mpg cyl  disp  hp drat    wt  qsec vs am gear carb
#> Camaro Z28       13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
#> Pontiac Firebird 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
#> Fiat X1-9        27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
#> Porsche 914-2    26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
#> Lotus Europa     30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
#> Ford Pantera L   15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4

#Whole Response data (I assign some random price to Price vector myself)
Price <- sample(400 : 2500, size = 32)
responce_train = Price[1: number_rows_train]
responce_test = Price [(number_rows_train+1) : number_rows]

model_train = lm(responce_train ~ mpg+cyl+disp+hp+drat+wt+qsec+vs+am+gear+carb,Train_data)
print(model_train)
#> 
#> Call:
#> lm(formula = responce_train ~ mpg + cyl + disp + hp + drat + 
#>     wt + qsec + vs + am + gear + carb, data = Train_data)
#> 
#> Coefficients:
#> (Intercept)          mpg          cyl         disp           hp         drat  
#>   -1857.479       32.837      -22.572       -1.152        5.711     -746.254  
#>          wt         qsec           vs           am         gear         carb  
#>     251.611       50.775     -263.074     -113.935      903.106       46.744

cat("# # # # The Beta Coefficient Values (Training Part) # # # ","\n")
#> # # # # The Beta Coefficient Values (Training Part) # # #
Beta_coef <- coef(model_train)
print(Beta_coef)
#>  (Intercept)          mpg          cyl         disp           hp         drat 
#> -1857.478896    32.836870   -22.572199    -1.151534     5.710904  -746.254309 
#>           wt         qsec           vs           am         gear         carb 
#>   251.611297    50.774756  -263.073837  -113.934830   903.105852    46.744417
print("----------------------------------------------")
#> [1] "----------------------------------------------"
print("Intercept is:")
#> [1] "Intercept is:"
print(Beta_coef[1]) #the mean value of the response variable when 
#> (Intercept) 
#>   -1857.479
#all of the predictor variables in the model are equal to zero.
print("----------------------------------------------")
#> [1] "----------------------------------------------"


#============== Test the model or call it Prediction part ======================
#==================== Use coefficients obtained from Training with =============
#============= their corresponding variables from Testing data sets ============
# Use y = a + Bx {ie, y = a + b1*x1 + b2*x2 + .....}
pred_test = Beta_coef[1] + Test_data['mpg']*Beta_coef[2] + Test_data['cyl']*Beta_coef[3] + 
  Test_data['disp']*Beta_coef[4] + Test_data['hp']*Beta_coef[5] + Test_data['drat']*Beta_coef[6] + 
  Test_data['wt']*Beta_coef[7] + Test_data['qsec']*Beta_coef[8] + Test_data['vs']*Beta_coef[9] + 
  Test_data['am']*Beta_coef[10] + Test_data['gear']*Beta_coef[11] + Test_data['carb']*Beta_coef[12]

# Compare the reserved data for testing with the new predictions
df_com_test <- data.frame(pred_test, 'obs' = responce_test) # pred_test is your new predictions
names(df_com_test)[names(df_com_test) == 'mpg'] <- 'pred' # Convert rownames to column ids
df_com_test['ID']  <- rownames(df_com_test)
# Now go on plotting/analysing your 'df_com_test' with your statistical measures like correlation, RMSE etc

df_melt <- reshape2::melt(df_com_test, id = 'ID')
ggplot2::ggplot(data = df_melt, aes(x = ID, y = value, group = variable, color = variable)) +
  geom_line( size = 1) + geom_point(size = 2)
#> Error in aes(x = ID, y = value, group = variable, color = variable): could not find function "aes"

Please note that for an elegant regression analysis using multiple predictors, I suggest you to look for another response of mine at nls fit function with summation

If you have any more doubt please drop your concern(s) down .....