Max,
first of all thanks for your prompt reply. I tried to work with reprex, even though i was not able to include local .csv files.
i attach the lines from reprex, reffering to the case of SPLIT .csv files (train and test) that lead to very poor accuracy in testing data (and to the best case, where data partition is applied).If a basically create a partition of the whole SAME dataset and run the same rf i get very nice results on test data. i'm sorry but i'm not good at all in using reprex; i hope that preliminarly this new post may let you give me an insight.
THANKS IN ADVANCE
best regards
CASE 1 - data split into to files
library(readr)
library(caret)
#> Carico il pacchetto richiesto: lattice
#> Carico il pacchetto richiesto: ggplot2
library(readr)
FMIB_10ind3_d_trainAll <- read_delim("FMIB_10ind3_v3_trainS.csv",
";", escape_double = FALSE, trim_ws = TRUE)
#> Error: 'FMIB_10ind3_v3_trainS.csv' does not exist in current working directory ('C:/Users/105034479/AppData/Local/Temp/RtmpKkfhcM/reprex31cc195b7785').
FMIB_10ind3_d_testAll <- read_delim("FMIB_10ind3_v3_testS.csv",
";", escape_double = FALSE, trim_ws = TRUE)
#> Error: 'FMIB_10ind3_v3_testS.csv' does not exist in current working directory ('C:/Users/105034479/AppData/Local/Temp/RtmpKkfhcM/reprex31cc195b7785').
dtrainA[["out10d"]] = factor(dtrainA[["out10d"]])
#> Error in factor(dtrainA[["out10d"]]): oggetto "dtrainA" non trovato
numFolds<-trainControl(method="repeatedcv", number =10, repeats=5)
modelrf_10v2A=train(out10d~.,data=dtrainA,method="nnet",trControl=numFolds,preProcess=c("center","scale"),tuneLength=10)
#> Error in eval(expr, p): oggetto "dtrainA" non trovato
test_pred10v2A<-predict(modelrf_10v2A,newdata=dtestA)
#> Error in predict(modelrf_10v2A, newdata = dtestA): oggetto "modelrf_10v2A" non trovato
table(test_pred10v2A,dtestA$out10d)#funziona
#> Error in table(test_pred10v2A, dtestA$out10d): oggetto "test_pred10v2A" non trovato
confusionMatrix(table(test_pred10v2A,dtestA$out10d))
#> Error in table(test_pred10v2A, dtestA$out10d): oggetto "test_pred10v2A" non trovato
CASE 2 data partitioned
library(caret)
library(readr)
FMIB_10ind3_v1 <- read_delim("FMIB_10ind3_v1.csv",
";", escape_double = FALSE, trim_ws = TRUE)
TDataIndex_v1 <- createDataPartition(y=FMIB_10ind3_v1$out10,p=0.7, list = FALSE)
trainingData10v1<-FMIB_10ind3_v1[TDataIndex_v1,]
testD10v1<-FMIB_10ind3_v1[-TDataIndex_v1,]
trainingData10v1[["out10"]] = factor(trainingData10v1[["out10"]])
numFolds<-trainControl(method="repeatedcv", number=10,repeats=5)
modelrf_10v1=train(out10~.,data=trainingData10v1,method="parRF",trControl=numFolds,preProcess=c("center","scale"),tuneLength=15)
test_pred10v1<-predict(modelrf_10v1,newdata=testD10v1)
table(test_pred10v1,testD10v1$out10)#funziona
confusionMatrix(table(test_pred10v1,testD10v1$out10))