Hello all,
I created a for loop for 10 dataframes and 6 iterations for stepwise regression, below is the code:
data_list<- list(data1,data2,data3,data4,data5,data6,data7,data8,data9,data10)
data_list
new_variable<-0
final_model<-0
new_predictor<-0
predictors <- matrix(data = 0, nrow = 100, ncol = 7)
col_names <- c("seed","var1", "var2", "var3", "var4", "var5", "var6")
df_predictors <- data.frame(matrix = predictors)
colnames(df_predictors)<-col_names
df_names<-as.data.frame(names)
for (i in 1:length(data_list)){
null_model<-glm(data_list[[i]][[54]]~1, data= data_list[[i]], family= "binomial")
for (j in 1:6){
new_predictor<-add1(null_model, data_list[[i]][[54]]~PC1+PC2+PC3+PC4+PC5+PC6+PC7+PC8+PC9, data= data_list[[i]], test = 'Chisq')
new_predictor
print(new_predictor)
new_predictor[,5]
new_predictor_df<-as.data.frame(new_predictor)
new_predictor_df<- tibble::rownames_to_column(new_predictor_df, "Variable")
new_predictor_2<-filter(new_predictor_df, Pr(>Chi)
<=0.1)
#new_predictor_2_name<-new_predictor_2_sorted[which.min(new_predictor_2_sorted$Pr(>Chi)
),]
ifelse(is.na(new_predictor_2),
(break &
(i=i+1)),
(ifelse(length(new_predictor_2)>1,
(new_predictor_2_sorted<-new_predictor_2[order(new_predictor_2$Pr(>Chi)
),]),
new_predictor_2_sorted<-new_predictor_2)))
new_predictor_2_sorted
min_new_predictor<-new_predictor_2_sorted[which.min(new_predictor_2_sorted$Pr(>Chi)
),]
min_new_pred_df<-as.data.frame(min_new_predictor)
min_new_pred_df
new_variable<-min_new_pred_df[1,1]
print(new_variable)
ifelse(!is.na(new_variable),
updated_model<-update(null_model,paste("~.+",new_variable)),
final_model<-null_model)
#print(summary(updated_model))
#print(final_model)
ifelse(summary(updated_model)$coefficients[,4]>=0.1,
(new_variable<-new_predictor_2_sorted[which(new_predictor_2_sorted$`Pr(>Chi)`<=0.1)[2],] &
if(!is.na(new_variable)){
updated_model<-update(null_model,paste("~.+",new_variable))}),
ifelse(summary(updated_model)$coefficients[,4]>=0.1,
(new_variable<-new_predictor_2_sorted[which(new_predictor_2_sorted$`Pr(>Chi)`<=0.1)[3],] &
if(!is.na(new_variable)){
updated_model<-update(null_model,paste("~.+",new_variable))}),
ifelse(summary(updated_model)$coefficients[,4]>=0.1,
(new_variable<-new_predictor_2_sorted[which(new_predictor_2_sorted$`Pr(>Chi)`<=0.1)[4],] &
if(!is.na(new_variable)){
updated_model<-update(null_model,paste("~.+",new_variable))}),
ifelse(summary(updated_model)$coefficients[,4]>=0.1,
final_model<-null_model,
null_model<-updated_model))))
new_var<-print(paste(i, new_variable))
df_predictors= rbind(df_predictors,new_var)
}
print(summary(final_model))
print(df_predictors)
}
I am getting an error message after the second iteration for data1 (after addition of first selected significant variable), "Error in Ops.data.frame(new_predictor_2_sorted[which(new_predictor_2_sorted$Pr(>Chi)
<= :
list of length 30 not meaningful". I tried to fix this error but I didn't have any success. Can someone please help me fix this code. I need to run it for 100 seeds for 24 datasets! I would appreciate it if I could get some help today as I am running out of time for the assignment.
Thank you all!
Radhika