I have this code working, which saves the prediction with “H” and NH. ”I would like to know, how can I save the prediction in percentage instead of“ H ”and“ NH ”?
Regards!
Code:
options(java.parameters = "-Xmx15g")
setwd("C:/hom")
xl_data_tmp = read.csv("train.csv", header = TRUE, sep=",", dec=",")
xl_data_tmp$y <- as.factor(xl_data_tmp$y)
xl_data_tmp$x3 <- as.factor(xl_data_tmp$x3)
#xl_data_tmp$x3 <- as.factor(xl_data_tmp$x3)
#normalizar indices numéricos entre 0 y 1 si es necesario
#xl_data_tmp$x1 = xl_data_tmp$x1
xl_data_tmp$x1 = (xl_data_tmp$x1-min(xl_data_tmp$x1))/(max(xl_data_tmp$x1)-min(xl_data_tmp$x1))
xl_data_tmp$x2 = (xl_data_tmp$x2-min(xl_data_tmp$x2))/(max(xl_data_tmp$x2)-min(xl_data_tmp$x2))
xl_data_tmp$x4 = (xl_data_tmp$x4-min(xl_data_tmp$x4))/(max(xl_data_tmp$x4)-min(xl_data_tmp$x4))
#xl_data_tmp$x4 = xl_data_tmp$x4
xl_data_tmp$x5 = (xl_data_tmp$x5-min(xl_data_tmp$x5))/(max(xl_data_tmp$x5)-min(xl_data_tmp$x5))
#xl_data_tmp$x5 = xl_data_tmp$x5
#xl_data_tmp$x6 = xl_data_tmp$x6
xl_data_tmp$x6 = (xl_data_tmp$x6-min(xl_data_tmp$x6))/(max(xl_data_tmp$x6)-min(xl_data_tmp$x6))
#xl_data_tmp$x7 = xl_data_tmp$x7
xl_data_tmp$x7 = (xl_data_tmp$x7-min(xl_data_tmp$x7))/(max(xl_data_tmp$x7)-min(xl_data_tmp$x7))
xl_data_tmp$x8 = (xl_data_tmp$x8-min(xl_data_tmp$x8))/(max(xl_data_tmp$x8)-min(xl_data_tmp$x8))
#xl_data_tmp$x9 = (xl_data_tmp$x9-min(xl_data_tmp$x9))/(max(xl_data_tmp$x9)-min(xl_data_tmp$x9))
#xl_data_tmp$x8 = xl_data_tmp$x8
#xl_data_tmp$x9 = xl_data_tmp$x9
#xl_data_tmp$x10 = xl_data_tmp$x10
xl_data_tmp$x10 = (xl_data_tmp$x10-min(xl_data_tmp$x10))/(max(xl_data_tmp$x10)-min(xl_data_tmp$x10))
#xl_data_tmp$x11 = xl_data_tmp$x11
xl_data_tmp$x11 = (xl_data_tmp$x11-min(xl_data_tmp$x11))/(max(xl_data_tmp$x11)-min(xl_data_tmp$x11))
#xl_data_tmp$x12 = xl_data_tmp$x12
xl_data_tmp$x12 = (xl_data_tmp$x12-min(xl_data_tmp$x12))/(max(xl_data_tmp$x12)-min(xl_data_tmp$x12))
#xl_data_tmp$x13 = (xl_data_tmp$x13-min(xl_data_tmp$x13))/(max(xl_data_tmp$x13)-min(xl_data_tmp$x13))
#xl_data_tmp$x13 = xl_data_tmp$x13
summary(xl_data_tmp)#mostrar resumen de los datos de entrenamiento
set.seed(1)
sampidx <- c(sample(1:650,519), sample(651:1299,519))
xl_data_tmp<-xl_data_tmp[sampidx,]
print(sampidx)
#sampidx <- c(sample(1:370,296), sample(371:740,296))
train_subset<-xl_data_tmp[sampidx,]
test_subset<-xl_data_tmp[-sampidx,]
print(train_subset)
print(test_subset)
summary(test_subset)
library(pROC)
##librearias necesarias para entrenar
library(nnet)
library(caret)
##configuración de paralelismo
#install.packages("doParallel")
library(doParallel)
numCores <- detectCores()#cantidad de cores
cl = makeCluster(numCores)
registerDoParallel(cl)
##parametros
nn.Grid <- expand.grid(.size=c(1,2,3), .decay=c(0.01,0.1,1))
#crear listas para guardar los resultados de las iteraciones de el entrenamiento de nnet,
set.seed(1)
nn.seeds <- vector(mode = "list", length = 11) # number of resamples + 1 for final model
for(i in 1:10) nn.seeds[[i]] <- sample.int(n=1000, 9) # 9 is the # of tuning parameter combinations
nn.seeds[[11]] <- 1 # for the last model
remove(i)
nn.seeds
#configuración de los ciclos de entrenamiento nnet y el retorno en la salida del resultado
nn.Control <- trainControl(method = "repeatedcv", # use N-fold cross validation
number = 5, # the number of folds
repeats = 2,
classProbs = TRUE, summaryFunction = twoClassSummary,
seeds = nn.seeds)
#Fit model
model.nn <- train(y ~ .,
data=train_subset,
method='nnet',
maxit = 500,
linout = FALSE,
trControl = nn.Control,
tuneGrid = nn.Grid,
metric = "ROC",
MaxNWts = 1000000,
importance=TRUE,
na.action=na.exclude,
allowParallel = TRUE)
stopCluster(cl)
remove(cl)
registerDoSEQ()
varImp(model.nn)#importancia de cada variable, en el modelo entrenado
print(model.nn)
plot(model.nn, metric = "ROC")#presentar la relación del ROC, decaimiento y tamaño
remove(nn.Control, nn.Grid, nn.seeds)
require("NeuralNetTools")
garson(model.nn)
#plot_data<-garson(model.nn, bar_plot = FALSE)$rel_imp
#plot_data_names<-garson(model.nn)$x_names
#print(plot_data)
#print(plot_data_names)
olden(model.nn)
plot_data2<-garson(model.nn, bar_plot = FALSE)
plot_data3<-olden(model.nn, bar_plot = FALSE)
print(plot_data2)
plot_data3
#escribir el resultado a un archivo
library("xlsx")
write.xlsx(plot_data2, file = "garson.xlsx", sheetName = "resultado", append = FALSE)
write.xlsx(plot_data3, file = "olden.xlsx", sheetName = "resultado", append = FALSE)
##predecir usando datos externos
#leer datos para realizar una predicción desde archivo
xl_data_test = read.csv("predecir.csv", header = TRUE, sep=",", dec=",")
#conversión categorÃas
xl_data_test$y <- as.factor(xl_data_test$y)
xl_data_test$x3 <- as.factor(xl_data_test$x3)
#xl_data_test$x3 <- as.factor(xl_data_test$x3)
#normalizar valores numéricos
#xl_data_test$x1 = xl_data_test$x1/100
xl_data_test$x1 = (xl_data_test$x1-min(xl_data_test$x1))/(max(xl_data_test$x1)-min(xl_data_test$x1))
xl_data_test$x2 = (xl_data_test$x2-min(xl_data_test$x2))/(max(xl_data_test$x2)-min(xl_data_test$x2))
#xl_data_test$x3 = (xl_data_test$x3-min(xl_data_test$x3))/(max(xl_data_test$x3)-min(xl_data_test$x3))
xl_data_test$x4 = (xl_data_test$x4-min(xl_data_test$x4))/(max(xl_data_test$x4)-min(xl_data_test$x4))
#xl_data_test$x5 = (xl_data_test$x5-min(xl_data_test$x5))/(max(xl_data_test$x5)-min(xl_data_test$x5))
#xl_data_test$x6 = (xl_data_test$x6-min(xl_data_test$x6))/(max(xl_data_test$x6)-min(xl_data_test$x6))
#xl_data_test$x1 = (xl_data_test$x1-min(xl_data_test$x1))/(max(xl_data_test$x1)-min(xl_data_test$x1))
#xl_data_test$x1 = xl_data_test$x1
#xl_data_test$x4 = xl_data_test$x4
xl_data_test$x5 = (xl_data_test$x5-min(xl_data_test$x5))/(max(xl_data_test$x5)-min(xl_data_test$x5))
#xl_data_test$x5 = xl_data_test$x5
#xl_data_test$x6 = xl_data_test$x6
xl_data_test$x6 = (xl_data_test$x6-min(xl_data_test$x6))/(max(xl_data_test$x6)-min(xl_data_test$x6))
#xl_data_test$x7 = xl_data_test$x7
xl_data_test$x7 = (xl_data_test$x7-min(xl_data_test$x7))/(max(xl_data_test$x7)-min(xl_data_test$x7))
xl_data_test$x8 = (xl_data_test$x8-min(xl_data_test$x8))/(max(xl_data_test$x8)-min(xl_data_test$x8))
#xl_data_test$x8 = xl_data_test$x8
#xl_data_test$x9 = (xl_data_test$x9-min(xl_data_test$x9))/(max(xl_data_test$x9)-min(xl_data_test$x9))
#xl_data_test$x9 = xl_data_test$x9
#xl_data_test$x10 = xl_data_test$x10
xl_data_test$x10 = (xl_data_test$x10-min(xl_data_test$x10))/(max(xl_data_test$x10)-min(xl_data_test$x10))
#xl_data_test$x11 = xl_data_test$x11
xl_data_test$x11 = (xl_data_test$x11-min(xl_data_test$x11))/(max(xl_data_test$x11)-min(xl_data_test$x11))
#xl_data_test$x12 = xl_data_test$x12
xl_data_test$x12 = (xl_data_test$x12-min(xl_data_test$x12))/(max(xl_data_test$x12)-min(xl_data_test$x12))
#xl_data_test$x13 = xl_data_test$x13
#xl_data_test$x13 = (xl_data_test$x13-min(xl_data_test$x13))/(max(xl_data_test$x13)-min(xl_data_test$x13))
xl_data_test1 = xl_data_test
#predecir
preds.nn <- predict.train(model.nn, newdata=xl_data_test1, type="raw") # Neural network
preds.nn
##escribir resultados de la prediccion a un excel
library("xlsx")
write.xlsx(cbind(preds.nn,xl_data_test1), file = "salida_prediccion.xlsx", sheetName = "nnet", append = FALSE)