Correctly Specifying Constraints Within a Function

omario · April 2, 2022, 4:40pm

I am working with the R programming language.

I have the following data:

library(GA)
library(dplyr)

var_1 = rnorm(1000,10,10)
var_2 = rnorm(1000,5,5)
var_3 = rnorm(1000, 1,1)
goal = rnorm(1000,100,100)

my_data = data.frame(var_1, var_2, var_3, goal)

I wrote the following function that randomly splits this data into 3 different groups and evaluates a "fitness value" ("total_mean") of these groups based on the percentage of the data within each group that is less than some randomly assigned percentile:

#define fitness function
fitness <- function(x) {

x1 = x[1]
x2 = x[2]
x3 = x[3]
x4 = x[4]
x5 = x[5]
x6 = x[6]
x7 = x[7]
x8 = x[8]
x9 = x[9]

    #bin data according to random criteria
    train_data <- my_data %>% mutate(cat = ifelse(var_1 <= x1 & var_2 <= x2 & var_3 <= x3, "a", ifelse( var_1 <= x4 &  var_2  <= x5 & var_3 <= x6, "b", "c")))
   
    train_data$cat = as.factor(train_data$cat)
   
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(var_1, var_2, var_3, goal, cat)
   
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(var_1, var_2, var_3, goal, cat)
   
    c_table = train_data %>%
        filter(cat == "c") %>%
       select(var_1, var_2, var_3, goal, cat)
   
    x7 =  runif(1,0, 1)
    x8=  runif(1, 0, 1)
    x9 =  runif(1, 0, 1)
   
    #calculate  quantile ("quant") for each bin
   
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x7)))
   
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x8)))
   
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x9)))
   
    #create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
    table_a$diff = ifelse(table_a$quant > table_a$goal,1,0)
    table_b$diff = ifelse(table_b$quant > table_b$goal,1,0)
    table_c$diff = ifelse(table_c$quant > table_c$goal,1,0)
   
    #group all tables
   
    final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
    total_mean = mean(final_table$diff)

n_row_a = nrow(table_a)
n_row_b = nrow(table_b)
n_row_c = nrow(table_c)

return(total_mean)


}

I was able to then optimize this function using the Genetic Algorithm in R:

GA <- ga(type = "real-valued",
         fitness = fitness,
         lower = c(min(var_1), min(var_2), min(var_3), min(var_1), min(var_2), min(var_3), 0,0,0), upper = c(max(var_1), max(var_2), max(var_3), max(var_1), max(var_2), max(var_3), 1,1,1),
         popSize = 50, maxiter = 10, run = 10)

My Question: I would now like to add some "constraints" to this function that prevents the arguments of this function from taking certain values and also prevents the splits made by this function having 0 rows. My logic being that these constraints will work by assigning the returned value of the function as "NaN":

if (n_row_a < 1 | n_row_b < 1 | n_row_c <1 | x4 < x1 | x5 < x2 | x6 < x3){

total_mean <- NaN

}

I tried to add these constraints to the above function:

#define fitness function
fitness <- function(x) {

x1 = x[1]
x2 = x[2]
x3 = x[3]
x4 = x[4]
x5 = x[5]
x6 = x[6]
x7 = x[7]
x8 = x[8]
x9 = x[9]

    #bin data according to random criteria
    train_data <- my_data %>% mutate(cat = ifelse(var_1 <= x1 & var_2 <= x2 & var_3 <= x3, "a", ifelse( var_1 <= x4 &  var_2  <= x5 & var_3 <= x6, "b", "c")))


   
    train_data$cat = as.factor(train_data$cat)
   
    #new splits
    a_table = train_data %>%
        filter(cat == "a") %>%
        select(var_1, var_2, var_3, goal, cat)
   
    b_table = train_data %>%
        filter(cat == "b") %>%
        select(var_1, var_2, var_3, goal, cat)
   
    c_table = train_data %>%
        filter(cat == "c") %>%
       select(var_1, var_2, var_3, goal, cat)
   
    x7 =  runif(1,0, 1)
    x9 =  runif(1, 0, 1)
    x9 =  runif(1, 0, 1)
   
    #calculate  quantile ("quant") for each bin
   
    table_a = data.frame(a_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x7)))
   
    table_b = data.frame(b_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x8)))
   
    table_c = data.frame(c_table%>% group_by(cat) %>%
                             mutate(quant = quantile(goal, prob = x9)))
   
   
   
   
    #create a new variable ("diff") that measures if the quantile is bigger tha the value of "c1"
    table_a$diff = ifelse(table_a$quant > table_a$goal,1,0)
    table_b$diff = ifelse(table_b$quant > table_b$goal,1,0)
    table_c$diff = ifelse(table_c$quant > table_c$goal,1,0)
   
    #group all tables
   
    final_table = rbind(table_a, table_b, table_c)
# calculate the total mean : this is what needs to be optimized
    total_mean = mean(final_table$diff)

n_row_a = nrow(table_a)
n_row_b = nrow(table_b)
n_row_c = nrow(table_c)

return(total_mean)

if (n_row_a < 1 | n_row_b < 1 | n_row_c <1 | x4 < x1 | x5 < x2 | x6 < x3){

total_mean <- NaN

}
   
}

My Problem: However, now the constraints do not seem to be respected:

GA <- ga(type = "real-valued",
         fitness = fitness,
         lower = c(min(var_1), min(var_2), min(var_3), min(var_1), min(var_2), min(var_3), 0,0,0), upper = c(max(var_1), max(var_2), max(var_3), max(var_1), max(var_2), max(var_3), 1,1,1),
         popSize = 50, maxiter = 1000, run = 100)

# output

> GA@solution
     x1   x2  x3 x4 x5   x6   x7   x8   x9
[1,] 24 -5.3 4.4 38 12 -1.6 0.88 0.23 0.99
[2,] 21 -5.3 4.4 38 12 -1.6 0.88 0.23 0.99

As we can see here, X6 is less than X3 - it appears that these constraints were not respected.

Can someone please show me how to correctly specify these constraints in my function?

Thanks!

pieterjanvc · April 3, 2022, 7:33pm

Hi,

I think the issues lies in the placement of the return() function

return(total_mean)

if (n_row_a < 1 | n_row_b < 1 | n_row_c <1 | x4 < x1 | x5 < x2 | x6 < x3){

total_mean <- NaN

}

As soon as the return() function is executed, the parent function ends and everything after it is ignored. Make sure the return is last, or use and if-else statement to conditionally choose when to end the function by invoking return.

if (n_row_a < 1 | n_row_b < 1 | n_row_c <1 | x4 < x1 | x5 < x2 | x6 < x3){

total_mean <- NaN

}

return(total_mean)

Hope this helps,
PJ

system · April 10, 2022, 7:33pm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.