how to parallelize a for loop which train a keras model

shamimashrafiyan · September 6, 2022, 11:06am

I trained a regression model by keras. To select the best model and paramethers I considered different amounts for these variables: number of nodes, hidden layers, batch size, ....
So I have some for loops. to speed up the running time. I want to parallelize one of those loops.
the inside loop.
I read the documents about parallization but I haven't managed to write the proper code.
This is a part of my code, I would like to parallelize the inside for loop. I really appreciate any help.

library(plyr) 
library(boot)
# library(neuralnet)
library(keras)
# library(corrplot)
library(tensorflow)
library(kerasR) 
library(tidyverse)
library(tfruns)

set.seed(200)

acc_nested_list = list()
pr_nested_list = list() 

#first data
df = mainlist[[1]] # a 33*31 data frame which last column is target (33 samples and 30 features)
df2 = df[, 2:length(df)]

#Normalization
normalize <- function(x) {
  return ((x - min(x)) / (max(x) - min(x)))
}
maxmindf <- as.data.frame(lapply(df2, normalize))

attach(maxmindf)
df_norm<-as.matrix(maxmindf)


# Determine sample size and split data
ind <- sample(2, nrow(df_norm), replace=TRUE, prob=c(0.80, 0.20))
training <- df_norm[ind==1, 1:ncol(df_norm)-1]
test1 <- df_norm[ind==2, 1:ncol(df_norm)-1]
training_target <- df_norm[ind==1, ncol(df_norm)]
test1_target <- df_norm[ind==2, ncol(df_norm)]



#number of nodes in the first hidden layer
u1_1 = ceiling((1/2) * (ncol(training)+1))
u2_1 = ceiling(1* (ncol(training)+1))
u3_1 = ceiling((2/3) * (ncol(training)+1))
u4_1 = ceiling(2*(ncol(training)))

temp = NULL

nodes1 = c(u1_1,u2_1, u3_1, u4_1)
dropouts = c(0,0.05)
batchsizes = c(4, 8, 16) 
Epochs= 1000
learning_rates=c(0.001, 0.01)
layers = c(1,2)
dropouts2 = c(0,0.05)

best_loss = Inf

    for (m in 1: length(batchsizes)) {
      for (j in 1: length(dropouts)) {
        for (k in 1:length(learning_rates)) {
         for (i in 1: length(nodes1) ) { #how to paralize this loop

            model <- keras_model_sequential()
            model %>%
              layer_dense(nodes1[i], activation = "relu", input_shape = c(dim(training)[2])
              ) %>%
              
              layer_dropout(rate = dropouts[j]) %>%
              
              layer_dense(units=1, activation ="linear")
            
            
            #####c) Compiling the DNN model
            model %>% compile(
              loss = 'mse',
              optimizer = optimizer_adam(learning_rates[k]),
              metrics = c('mse'))
            model
            # }
            print_dot_callback <- callback_lambda(
              on_epoch_end = function(epoch, logs) {
                if (epoch %% 100 == 0) cat("\n")
                cat(".")})
            
            early_stop <- callback_early_stopping(monitor = "val_loss", mode='min',patience =30)
            # model<-build_model()
            # model %>% summary()
            
            
            ###########d) Fitting the DNN model#################
            
            model1<-model %>% fit( 
              training, 
              training_target,
              epochs = 1000, 
              batch_size = batchsizes[m],
              shuffled=F,
              validation_split = 0.2,
              verbose=0,
              callbacks = list(early_stop, print_dot_callback)
            )
            model1
            
            temp_loss = mean(model1$metrics$val_loss)
            print("loss for one hidden layer mosel is:")
            print(temp_loss)
            if(temp_loss < best_loss){
              best_loss = temp_loss
              # best_model = model
              a_hyper_nodes1 = nodes1[i]
              # print(hyper_nodes1)
              a_hyper_dropouts = dropouts[j]
              a_hyper_batchsizes = batchsizes[m]
              a_hyper_learning_rates = learning_rates[k]}
            
          }#i
          
        }#k1
      }#j1
    }#m1

  }


  #------------------------------------------
  #Best model with best paramethers
  Best_model<-keras_model_sequential()
  Best_model %>%
    
    layer_dense(units = a_hyper_nodes1, activation = "relu", input_shape = c(dim(training)[2])) %>%
    layer_dropout(rate = a_hyper_dropouts) %>%
    layer_dense(units=1, activation ="linear")
  
  
  #####c) Compiling the DNN model
  Best_model %>% compile(
    loss = 'mse',
    optimizer = optimizer_adam(a_hyper_learning_rates),
    metrics = c('mse'))
  Best_model
  # }
  
  Best_model %>% summary()
 
  
  ###########d) Fitting the the best model on training set#################
  Best_ModelFited<-Best_model %>% fit(
    training, 
    training_target,
    epochs = 1000, 
    batch_size = b_hyper_batchsizes,
    shuffled=F,
    validation_split = 0.2,
    verbose=0,
    # callbacks = list(early_stop, print_dot_callback)
  )
  Best_ModelFited
  Best_ModelFited%>% summary()
  
  
}
#save the best model
name1 = paste0("~path/models/","gene___",1,".hdf5")
save_model_hdf5(Best_model, name1)

#predict test data
Yhat = Best_model%>% predict(test1)
y_p = Yhat
y_p_tst = as.numeric(y_p)
#y_tst=y[tst_set]
plot(test1_target,y_p_tst)+
  abline(0,1)

MSE1 = mean((test1_target - y_p_tst)^2)
MAPE1 = MAPE(test1_target,y_p_tst)
cor1 = cor(test1_target, y_p_tst)
dimm = dim((mainlist[[1]])[2])
accuracy1= c(dimm[1],MSE1, MAPE1, cor1) #for each gene/model we store these 3 variables


acc_nested_list[[1]] <- (accuracy1)
name2 = paste0("/path/acc_metrics",".RDS")
saveRDS(acc_nested_list, name2) #local
acc_metrics <- readRDS("path/acc_metrics.RDS")
alpha1 = 0.6
alpha2 = 0.5
r = 0
corelations =  data.frame(do.call(rbind.data.frame, acc_metrics))
for (u in 1: length(corelations)) {
  if(alpha2 <= corelations[,4]){
    r = r+1
  }
}
print("The precentage of genes which passed the thereshould are")
print(r / p)

system · October 18, 2022, 11:07am

This topic was automatically closed 42 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.