Simulating Expired Orders in a Factory

I have this Operations Research problem :

  • Suppose there is a factory that on day 1 has 100 boxes
  • New boxes arrive according to some random rate (e.g. Poisson Distribution)
  • There are 5 employees at the factory - each employee finishes boxes at the same random rate (e.g. Exponential Distribution)
  • Boxes are processed according to the FIFO (First In, First Out)
  • As soon as an employee finishes a box, he starts the next box right away
  • In the event where there no boxes remaining and 2 or more employees are idle, the employee who has been idle the longest starts the next box

I am trying to answer the following question: After 1000 days - what will be the average queue length and the average number of boxes that have been waiting for more than 5 days (i.e. expired) ? I am not sure if there is a mathematical solution to this problem so I tried to simulate it.

I tried to set the code up like this:

Step 1 : Define parameters

library(dplyr)
library(ggplot2)
library(RColorBrewer)

set.seed(123)

initial_boxes <- 100  
num_workers <- 5  
# rate for exponential distribution (service rate)
rate <- 1  
arrival_rate <- 5  

Step 2: Define queue function

simulate_process <- function(iterations) {
    time <- 0
    queue <- data.frame(box = paste0("box_", 1:initial_boxes), time_entered = time)
    worker_histories <- vector("list", num_workers)
    processed_boxes <- data.frame(time_started = numeric(), time_finished = numeric(), box = character(), worker = integer(), time_taken = numeric())
    queue_length <- data.frame(time = numeric(), queue_length = integer())
    waiting_longer_than_5 <- data.frame(time = numeric(), percent_waiting_longer_than_5 = numeric())
    cumulative_boxes <- data.frame(time = numeric(), cumulative_boxes = integer())
    cumulative_processed_count <- 0
    
    for (i in 1:iterations) {
        num_new_boxes <- rpois(1, arrival_rate)
        new_boxes <- paste0("box_", (nrow(queue) + nrow(processed_boxes) + 1):(nrow(queue) + nrow(processed_boxes) + num_new_boxes))
        queue <- rbind(queue, data.frame(box = new_boxes, time_entered = time))
        
        queue_length <- rbind(queue_length, data.frame(time = time, queue_length = nrow(queue)))
        
        for (worker in 1:num_workers) {
            if (nrow(queue) == 0) break
            box <- queue$box[1]
            time_entered <- queue$time_entered[1]
            queue <- queue[-1,]
            time_started <- ifelse(length(worker_histories[[worker]]) == 0, time, max(sapply(worker_histories[[worker]], function(x) x$time_finished)))
            time_taken <- rexp(1, rate)
            time_finished <- time_started + time_taken
            worker_histories[[worker]] <- c(worker_histories[[worker]], list(list(time_started = time_started, time_finished = time_finished, box = box, time_taken = time_taken)))
            processed_boxes <- rbind(processed_boxes, data.frame(time_started = time_started, time_finished = time_finished, box = box, worker = worker, time_taken = time_taken))
            
            queue_length <- rbind(queue_length, data.frame(time = time, queue_length = nrow(queue)))
        }
        
        if (nrow(queue) > 0) {
            waiting_times <- time - queue$time_entered
            percent_waiting_longer_than_5 <- sum(waiting_times > 5) / nrow(queue) * 100
        } else {
            percent_waiting_longer_than_5 <- 0  
        }
        waiting_longer_than_5 <- rbind(waiting_longer_than_5, data.frame(time = time, percent_waiting_longer_than_5 = percent_waiting_longer_than_5))
        
        cumulative_processed_count <- cumulative_processed_count + sum(sapply(worker_histories, function(w) {
            if (length(w) > 0) {
                return(sum(sapply(w, function(x) x$time_finished <= time)))
            } else {
                return(0)
            }
        }))
        
        cumulative_boxes <- rbind(cumulative_boxes, data.frame(time = time, cumulative_boxes = cumulative_processed_count))
        
        time <- time + 1
    }
    
    return(list(queue_length = queue_length, waiting_longer_than_5 = waiting_longer_than_5, cumulative_boxes = cumulative_boxes))
}

Step 3: Simulate Queue 100 times and calculate summary stats

num_simulations <- 100

all_simulations <- vector("list", num_simulations)
for (sim in 1:num_simulations) {
    all_simulations[[sim]] <- simulate_process(50)
}

queue_length_data <- bind_rows(lapply(all_simulations, `[[`, "queue_length"), .id = "simulation_id")
waiting_longer_than_5_data <- bind_rows(lapply(all_simulations, `[[`, "waiting_longer_than_5"), .id = "simulation_id")
cumulative_boxes_data <- bind_rows(lapply(all_simulations, `[[`, "cumulative_boxes"), .id = "simulation_id")

average_queue_length <- queue_length_data %>%
    group_by(time) %>%
    summarise(avg_queue_length = mean(queue_length))

average_waiting_longer_than_5 <- waiting_longer_than_5_data %>%
    group_by(time) %>%
    summarise(avg_percent_waiting_longer_than_5 = mean(percent_waiting_longer_than_5))

average_cumulative_boxes <- cumulative_boxes_data %>%
    group_by(time) %>%
    summarise(avg_cumulative_boxes = mean(cumulative_boxes))

Step 4: Visualize results

ggplot() +
    geom_line(data = queue_length_data, aes(x = time, y = queue_length, group = simulation_id), 
              color = "blue", alpha = 0.2) +  # Faded individual trajectories
    geom_line(data = average_queue_length, aes(x = time, y = avg_queue_length), 
              color = "blue", size = 1.5, alpha = 0.9) +  
    theme_minimal() +
    labs(title = "Queue Length Over Time Across Simulations", 
         x = "Time", y = "Queue Length") +
    theme(
        plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12)
    )

ggplot() +
    geom_line(data = waiting_longer_than_5_data, aes(x = time, y = percent_waiting_longer_than_5, group = simulation_id), 
              color = "purple", alpha = 0.2) +  
    geom_line(data = average_waiting_longer_than_5, aes(x = time, y = avg_percent_waiting_longer_than_5), 
              color = "purple", size = 1.5, alpha = 0.9) +  
    theme_minimal() +
    labs(title = "Percentage of Boxes Waiting Longer Than 5 Time Units Over Time", 
         x = "Time", y = "Percent Waiting Longer Than 5 Units") +
    theme(
        plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12)
    )

ggplot() +
    geom_line(data = cumulative_boxes_data, aes(x = time, y = cumulative_boxes, group = simulation_id), 
              color = "green", alpha = 0.2) + 
    geom_line(data = average_cumulative_boxes, aes(x = time, y = avg_cumulative_boxes), 
              color = "green", size = 1.5, alpha = 0.9) + 
    theme_minimal() +
    labs(title = "Cumulative Boxes Processed Over Time Across Simulations", 
         x = "Time", y = "Cumulative Boxes Processed") +
    theme(
        plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12)
    )

Can someone please help me understand if I have done this correctly? I can provide more clarifications explaining my code is needed

Thanks!