I start from your codes;
mtcar = mtcars
number_rows = nrow(mtcar)
print("## number of rows in Train dataset ##")
#> [1] "## number of rows in Train dataset ##"
number_rows_train = 0.7 * number_rows
number_rows_train = ceiling(number_rows_train)
print(number_rows_train)
#> [1] 23
Train_data = mtcars[1: number_rows_train,]
print("## Head of Train dataset ##")
#> [1] "## Head of Train dataset ##"
print(head(Train_data))
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
#> Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
#> Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
#> Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
#> Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
#> Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
print("## number of rows in Test dataset ##")
#> [1] "## number of rows in Test dataset ##"
number_rows_test = number_rows - number_rows_train
print(number_rows_test)
#> [1] 9
print("## Head of Test dataset ##")
#> [1] "## Head of Test dataset ##"
Test_data = mtcars[(number_rows_train+1) : number_rows ,]
print(head(Test_data))
#> mpg cyl disp hp drat wt qsec vs am gear carb
#> Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
#> Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
#> Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
#> Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
#> Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
#> Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
#Whole Response data (I assign some random price to Price vector myself)
Price <- sample(400 : 2500, size = 32)
responce_train = Price[1: number_rows_train]
responce_test = Price [(number_rows_train+1) : number_rows]
model_train = lm(responce_train ~ mpg+cyl+disp+hp+drat+wt+qsec+vs+am+gear+carb,Train_data)
print(model_train)
#>
#> Call:
#> lm(formula = responce_train ~ mpg + cyl + disp + hp + drat +
#> wt + qsec + vs + am + gear + carb, data = Train_data)
#>
#> Coefficients:
#> (Intercept) mpg cyl disp hp drat
#> -1857.479 32.837 -22.572 -1.152 5.711 -746.254
#> wt qsec vs am gear carb
#> 251.611 50.775 -263.074 -113.935 903.106 46.744
cat("# # # # The Beta Coefficient Values (Training Part) # # # ","\n")
#> # # # # The Beta Coefficient Values (Training Part) # # #
Beta_coef <- coef(model_train)
print(Beta_coef)
#> (Intercept) mpg cyl disp hp drat
#> -1857.478896 32.836870 -22.572199 -1.151534 5.710904 -746.254309
#> wt qsec vs am gear carb
#> 251.611297 50.774756 -263.073837 -113.934830 903.105852 46.744417
print("----------------------------------------------")
#> [1] "----------------------------------------------"
print("Intercept is:")
#> [1] "Intercept is:"
print(Beta_coef[1]) #the mean value of the response variable when
#> (Intercept)
#> -1857.479
#all of the predictor variables in the model are equal to zero.
print("----------------------------------------------")
#> [1] "----------------------------------------------"
#============== Test the model or call it Prediction part ======================
#==================== Use coefficients obtained from Training with =============
#============= their corresponding variables from Testing data sets ============
# Use y = a + Bx {ie, y = a + b1*x1 + b2*x2 + .....}
pred_test = Beta_coef[1] + Test_data['mpg']*Beta_coef[2] + Test_data['cyl']*Beta_coef[3] +
Test_data['disp']*Beta_coef[4] + Test_data['hp']*Beta_coef[5] + Test_data['drat']*Beta_coef[6] +
Test_data['wt']*Beta_coef[7] + Test_data['qsec']*Beta_coef[8] + Test_data['vs']*Beta_coef[9] +
Test_data['am']*Beta_coef[10] + Test_data['gear']*Beta_coef[11] + Test_data['carb']*Beta_coef[12]
# Compare the reserved data for testing with the new predictions
df_com_test <- data.frame(pred_test, 'obs' = responce_test) # pred_test is your new predictions
names(df_com_test)[names(df_com_test) == 'mpg'] <- 'pred' # Convert rownames to column ids
df_com_test['ID'] <- rownames(df_com_test)
# Now go on plotting/analysing your 'df_com_test' with your statistical measures like correlation, RMSE etc
df_melt <- reshape2::melt(df_com_test, id = 'ID')
ggplot2::ggplot(data = df_melt, aes(x = ID, y = value, group = variable, color = variable)) +
geom_line( size = 1) + geom_point(size = 2)
#> Error in aes(x = ID, y = value, group = variable, color = variable): could not find function "aes"
Please note that for an elegant regression analysis using multiple predictors, I suggest you to look for another response of mine at nls fit function with summation
If you have any more doubt please drop your concern(s) down .....