I have the following data in R:
#PART 1
#create data
library(dplyr)
library(caret)
set.seed(123)
salary <- rnorm(1000,5,5)
height <- rnorm(1000,2,2)
my_data = data.frame(salary, height)
I created the following loop - but for some reason it only saves the final iteration:
for (i in 1:10)
{
#PART 2
#create train_i and test_i data
train_i<-sample_frac(my_data, 0.7)
sid<-as.numeric(rownames(train_i))
test_i<-my_data[-sid,]
#PART 3
salary_quantiles = data.frame( train_i %>% summarise (quant_1 = quantile(salary, 0.33),
quant_2 = quantile(salary, 0.66),
quant_3 = quantile(salary, 0.99)))
#PART 4
train_i$salary_type = as.factor(ifelse(train_i$salary < salary_quantiles$quant_1 , "A", ifelse( train_i$salary > salary_quantiles$quant_1 & train_i$salary < salary_quantiles$quant_2, "B", "C")))
#PART 5
height_quantiles = data.frame( train_i %>% group_by(salary_type) %>% summarise(quant_80 = quantile(height, 0.80)))
#PART 6
#test_i
test_i$salary_type = as.factor(ifelse(test_i$salary < salary_quantiles$quant_1 , "A", ifelse( test_i$salary > salary_quantiles$quant_1 & test_i$salary < salary_quantiles$quant_2, "B", "C")))
test_i$height_pred <- height_quantiles$quant_80[match(test_i$salary_type, height_quantiles$salary_type)]
test_i$accuracy = ifelse(test_i$height_pred > test_i$height, 1, 0)
#PART 7 : Results Frame
results_i = data.frame(test_i %>%
group_by(salary_type) %>%
dplyr::summarize(Mean = mean(accuracy, na.rm=TRUE)))
results_i$iteration = i
results_i$total_mean = mean(test_i$accuracy)
}
Here are the results from this loop (only the final iteration was saved, i.e. iteration = 10)
> results
salary_type Mean iteration total_mean
1 A 0.7582418 10 0.7566667
2 B 0.7818182 10 0.7566667
3 C 0.7272727 10 0.7566667
Can someone please show me how to fix this?
Thanks!