I trained a regression model by keras. To select the best model and paramethers I considered different amounts for these variables: number of nodes, hidden layers, batch size, ....
So I have some for loops. to speed up the running time. I want to parallelize one of those loops.
the inside loop.
I read the documents about parallization but I haven't managed to write the proper code.
This is a part of my code, I would like to parallelize the inside for loop. I really appreciate any help.
library(plyr)
library(boot)
# library(neuralnet)
library(keras)
# library(corrplot)
library(tensorflow)
library(kerasR)
library(tidyverse)
library(tfruns)
set.seed(200)
acc_nested_list = list()
pr_nested_list = list()
#first data
df = mainlist[[1]] # a 33*31 data frame which last column is target (33 samples and 30 features)
df2 = df[, 2:length(df)]
#Normalization
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
maxmindf <- as.data.frame(lapply(df2, normalize))
attach(maxmindf)
df_norm<-as.matrix(maxmindf)
# Determine sample size and split data
ind <- sample(2, nrow(df_norm), replace=TRUE, prob=c(0.80, 0.20))
training <- df_norm[ind==1, 1:ncol(df_norm)-1]
test1 <- df_norm[ind==2, 1:ncol(df_norm)-1]
training_target <- df_norm[ind==1, ncol(df_norm)]
test1_target <- df_norm[ind==2, ncol(df_norm)]
#number of nodes in the first hidden layer
u1_1 = ceiling((1/2) * (ncol(training)+1))
u2_1 = ceiling(1* (ncol(training)+1))
u3_1 = ceiling((2/3) * (ncol(training)+1))
u4_1 = ceiling(2*(ncol(training)))
temp = NULL
nodes1 = c(u1_1,u2_1, u3_1, u4_1)
dropouts = c(0,0.05)
batchsizes = c(4, 8, 16)
Epochs= 1000
learning_rates=c(0.001, 0.01)
layers = c(1,2)
dropouts2 = c(0,0.05)
best_loss = Inf
for (m in 1: length(batchsizes)) {
for (j in 1: length(dropouts)) {
for (k in 1:length(learning_rates)) {
for (i in 1: length(nodes1) ) { #how to paralize this loop
model <- keras_model_sequential()
model %>%
layer_dense(nodes1[i], activation = "relu", input_shape = c(dim(training)[2])
) %>%
layer_dropout(rate = dropouts[j]) %>%
layer_dense(units=1, activation ="linear")
#####c) Compiling the DNN model
model %>% compile(
loss = 'mse',
optimizer = optimizer_adam(learning_rates[k]),
metrics = c('mse'))
model
# }
print_dot_callback <- callback_lambda(
on_epoch_end = function(epoch, logs) {
if (epoch %% 100 == 0) cat("\n")
cat(".")})
early_stop <- callback_early_stopping(monitor = "val_loss", mode='min',patience =30)
# model<-build_model()
# model %>% summary()
###########d) Fitting the DNN model#################
model1<-model %>% fit(
training,
training_target,
epochs = 1000,
batch_size = batchsizes[m],
shuffled=F,
validation_split = 0.2,
verbose=0,
callbacks = list(early_stop, print_dot_callback)
)
model1
temp_loss = mean(model1$metrics$val_loss)
print("loss for one hidden layer mosel is:")
print(temp_loss)
if(temp_loss < best_loss){
best_loss = temp_loss
# best_model = model
a_hyper_nodes1 = nodes1[i]
# print(hyper_nodes1)
a_hyper_dropouts = dropouts[j]
a_hyper_batchsizes = batchsizes[m]
a_hyper_learning_rates = learning_rates[k]}
}#i
}#k1
}#j1
}#m1
}
#------------------------------------------
#Best model with best paramethers
Best_model<-keras_model_sequential()
Best_model %>%
layer_dense(units = a_hyper_nodes1, activation = "relu", input_shape = c(dim(training)[2])) %>%
layer_dropout(rate = a_hyper_dropouts) %>%
layer_dense(units=1, activation ="linear")
#####c) Compiling the DNN model
Best_model %>% compile(
loss = 'mse',
optimizer = optimizer_adam(a_hyper_learning_rates),
metrics = c('mse'))
Best_model
# }
Best_model %>% summary()
###########d) Fitting the the best model on training set#################
Best_ModelFited<-Best_model %>% fit(
training,
training_target,
epochs = 1000,
batch_size = b_hyper_batchsizes,
shuffled=F,
validation_split = 0.2,
verbose=0,
# callbacks = list(early_stop, print_dot_callback)
)
Best_ModelFited
Best_ModelFited%>% summary()
}
#save the best model
name1 = paste0("~path/models/","gene___",1,".hdf5")
save_model_hdf5(Best_model, name1)
#predict test data
Yhat = Best_model%>% predict(test1)
y_p = Yhat
y_p_tst = as.numeric(y_p)
#y_tst=y[tst_set]
plot(test1_target,y_p_tst)+
abline(0,1)
MSE1 = mean((test1_target - y_p_tst)^2)
MAPE1 = MAPE(test1_target,y_p_tst)
cor1 = cor(test1_target, y_p_tst)
dimm = dim((mainlist[[1]])[2])
accuracy1= c(dimm[1],MSE1, MAPE1, cor1) #for each gene/model we store these 3 variables
acc_nested_list[[1]] <- (accuracy1)
name2 = paste0("/path/acc_metrics",".RDS")
saveRDS(acc_nested_list, name2) #local
acc_metrics <- readRDS("path/acc_metrics.RDS")
alpha1 = 0.6
alpha2 = 0.5
r = 0
corelations = data.frame(do.call(rbind.data.frame, acc_metrics))
for (u in 1: length(corelations)) {
if(alpha2 <= corelations[,4]){
r = r+1
}
}
print("The precentage of genes which passed the thereshould are")
print(r / p)