I am working with the R programming language. I am trying to use the "genetic algorithm" to optimize the following function ("f") using the "GA" library:
# create data for example
num_var_1 <- abs(rnorm(1000, 10, 1))
num_var_2 <- abs(rnorm(1000, 10, 5))
num_var_3 <- abs(rnorm(1000, 10, 10))
num_var_4 <- abs(rnorm(1000, 10, 10))
num_var_5 <- abs(rnorm(1000, 10, 10))
factor_1 <- c("0","B", "C")
factor_2 <- c("0","BB", "CC")
factor_3 <- c("0","BBB", "CCC", "DDD")
factor_4 <- c("0","BBBB", "CCCC", "DDDD", "EEEE")
factor_5 <- c("0","BBBBB", "CCCCC", "DDDDD", "EEEEE", "FFFFFF")
factor_var_1 <- as.factor(sample(factor_1, 1000, replace=TRUE, prob=c(0.3, 0.5, 0.2)))
factor_var_2 <- as.factor(sample(factor_2, 1000, replace=TRUE, prob=c(0.5, 0.3, 0.2)))
factor_var_3 <- as.factor(sample(factor_3, 1000, replace=TRUE, prob=c(0.5, 0.2, 0.2, 0.1)))
factor_var_4 <- as.factor(sample(factor_4, 1000, replace=TRUE, prob=c(0.5, 0.2, 0.1, 0.1, 0.1)))
factor_var_5 <- as.factor(sample(factor_4, 1000, replace=TRUE, prob=c(0.3, 0.2, 0.1, 0.1, 0.1)))
id = 1:1000
my_data = data.frame(id,num_var_1, num_var_2, num_var_3, num_var_4, num_var_5, factor_var_1, factor_var_2, factor_var_3, factor_var_4, factor_var_5)
#randomly add some zeros to the data
my_data[] <- lapply(my_data, function(x) {
x[sample(seq_along(x), length(x)/2)] <- 0
x
})
#load libraries
library(dplyr)
library(GA)
#define function for optimization
f <- function(x1, x2, x3, x4, x5, x8) {
target1 = x1
target2 = x2
target3 = x3
target4 = x4
target5 = x5
target8 = x8
result <- filter(my_data, num_var_1 < target1 & num_var_2 < target2 & num_var_3 < target3 & num_var_4 <target4 & num_var_5 <target5 & factor_var_3 %in% target8 )
return(sum(result == '0', na.rm = TRUE) / prod(dim(result)) * 100)
}
# run optimization
GA <- ga(type = "real-valued",
fitness = function(x) f(x[1], x[2], x[3], x[4], x[5], x[6]),
lower = c( min(num_var_1), min(num_var_2), min(num_var_3), min(num_var_4), min(num_var_5), c("0","BBB", "CCC", "DDD")), upper = c( max(num_var_1), max(num_var_2), max(num_var_3), max(num_var_4), max(num_var_5), c("0","BBB", "CCC", "DDD")),
popSize = 50, maxiter = 1000, run = 100)
Problem: However, this returns the following error:
Error in validObject(.Object) :
invalid class “ga” object: 1: invalid object for slot "lower" in class "ga": got class "character", should be or extend class "numericOrNA"
invalid class “ga” object: 2: invalid object for slot "upper" in class "ga": got class "character", should be or extend class "numericOrNA"
I think this error is because the x[6]
argument takes "factors" as input, and not "numeric" input. Without the x[6] argument, the optimization function works fine:
f_mod <- function(x1, x2, x3, x4, x5) {
target1 = x1
target2 = x2
target3 = x3
target4 = x4
target5 = x5
result <- filter(my_data, num_var_1 < target1 & num_var_2 < target2 & num_var_3 < target3 & num_var_4 <target4 & num_var_5 <target5 )
return(sum(result == '0', na.rm = TRUE) / prod(dim(result)) * 100)
}
GA <- ga(type = "real-valued",
fitness = function(x) f_mod(x[1], x[2], x[3], x[4], x[5]),
lower = c( min(num_var_1), min(num_var_2), min(num_var_3), min(num_var_4), min(num_var_5)), upper = c( max(num_var_1), max(num_var_2), max(num_var_3), max(num_var_4), max(num_var_5)),
popSize = 50, maxiter = 100, run = 100)
My Question: Can someone please show me how to fix this problem? Are there any optimization algorithms/packages in R that can optimize functions having both "numeric" and "factor" inputs?
Thanks!