It seems like I've developed a function for bias correction of precipitation data using R's fitQmap
functions, implementing cross-validation. I've noticed that while the function works correctly on some occasions, it gives errors in certain cases. Interestingly, when I rerun the function, it encounters errors in different places. Even more puzzling is that if I isolate the code responsible for correction and apply it to the same data frame that previously caused errors, it performs flawlessly.
I'm seeking insights from an expert to help understand this inconsistency. What could be causing these sporadic errors? Is there a potential issue with how the function interacts with certain data? Any suggestions on how to improve the reliability and stability of the function would be greatly appreciated."
when running this code:
sspline_5 <- list()
obs = list_obs_m
mod = list_modH_m
folds = 5
list_of_sublists_modH <- list()
for (i in seq_along(obs)) {
## get the observed data to be used as train
sublist <- obs[[i]]
sublist_name <- names(obs)[i] # Get the name of the sublist
#subl <- list_obs_m[[1]]
## get the modeled Historical data to be used as train
sublist_train_test_modH <- mod[[i]]
## corrected test data set
processed_sublist_modH <- sublist_train_test_modH
corrected_modH <- list()
for (j in seq_along(sublist)) {
df <- sublist[[j]]
df_modH <- sublist_train_test_modH[[j]]
processed_df_modH <- df_modH
# value_mod <- df_modH[[2]]
bacia_month <- colnames(df[2])
cat("Bacia_month is ", bacia_month, "\n")
set.seed(123) # Set seed value for randomness
#
fold_df <- createFolds(df$Date, k = folds, list = TRUE, returnTrain = TRUE)
combine_iteration <- data.frame(index = integer(), value = numeric(), iteration = integer())
for (k in 1:folds) {
cat("Iteration", k, "\n")
train_indices_df <- unlist(fold_df[-k])
cat("fold_df", length(fold_df) , "k", k , "\n")
test_indices_df <- fold_df[[k]]
train_data_df <- df[train_indices_df, ]
test_data_df <- df [test_indices_df, ]
train_data_df_mod <- df_modH[train_indices_df, ]
test_data_df_mod <- df_modH [test_indices_df, ]
cat("Number of training data:", nrow(train_data_df), "\n")
cat("Test data count:", nrow(test_data_df_mod), "\n")
# train SSPLINE model
qm.fit <- fitQmap(train_data_df[2],train_data_df_mod[2],
qstep=0.01, method="SSPLIN")
y_modH <- doQmap(test_data_df_mod[2],qm.fit)
combine_iteration <- rbind(combine_iteration, data.frame(index = test_indices_df, value = y_modH, iteration = k))
}
mean_combine_iteration <- aggregate(combine_iteration[[2]] ~ index, data = combine_iteration, FUN = mean)
processed_df_modH[[2]] <- mean_combine_iteration[[2]]
processed_sublist_modH[[j]] <- processed_df_modH
corrected_modH[[j]] <- processed_sublist_modH[[j]]
}
names(processed_sublist_modH) <- names(sublist)
names(corrected_modH) <- names(sublist)
list_of_sublists_modH[[sublist_name]] <- corrected_modH
}
sspline_5 <- list(Historico = list_of_sublists_modH)
sspline_5 <- output
I get this type of error:
Bacia_month is Serra do Facão_Jul
Iteration 1
fold_df 5 k 1
Number of training data: 113
Test data count: 27
Iteration 2
fold_df 5 k 2
Number of training data: 112
Test data count: 28
Warning: model identification for Serra do Facão_Jul failed
NA's produced.Warning: Quantile mapping for Serra do Facão_Jul failed NA's produced.Iteration 3
fold_df 5 k 3
Number of training data: 115
Test data count: 25
Warning: model identification for Serra do Facão_Jul failed
NA's produced.Warning: Quantile mapping for Serra do Facão_Jul failed NA's produced.Iteration 4
fold_df 5 k 4
Number of training data: 110
Test data count: 30
Iteration 5
fold_df 5 k 5
Number of training data: 110
Test data count: 30
Warning: model identification for Serra do Facão_Jul failed
NA's produced.Warning: Quantile mapping for Serra do Facão_Jul failed NA's produced.
Test data count: 30
knowing that if I run the only the list that contian this object "Serra do Facão", it would run it correctly.
More than that if I am executing the function that contain this code, which it is like this:
biasCorr_CV <- function(obs, mod, newdata = NULL, method = c("RQUANT", "PTF", "SSPLINE", "QUANT", "dqm", "qdm"), cross.val = c("none", "kfold"), folds = NULL){
output <- list()
if(cross.val == "none"){
output <- RQUANT_correction_m_null(obs, mod)
} else if(cross.val == "kfold" & !is.null(folds)) {
list_of_sublists_modH <- list()
for (i in seq_along(obs)) {
## get the observed data to be used as train
sublist <- obs[[i]]
sublist_name <- names(obs)[i] # Get the name of the sublist
#subl <- list_obs_m[[1]]
## get the modeled Historical data to be used as train
sublist_train_test_modH <- mod[[i]]
## corrected test data set
processed_sublist_modH <- sublist_train_test_modH
corrected_modH <- list()
for (j in seq_along(sublist)) {
df <- sublist[[j]]
#value_obs <- df[[2]]
#d <- subl[[1]]
#v <- d[[2]]
df_modH <- sublist_train_test_modH[[j]]
processed_df_modH <- df_modH
# value_mod <- df_modH[[2]]
bacia_month <- colnames(df[2])
cat("Bacia_month is ", bacia_month, "\n")
set.seed(123) # Set seed value for randomness
fold_df <- createFolds(df$Date, k = folds, list = TRUE, returnTrain = TRUE)
combine_iteration <- data.frame(index = integer(), value = numeric(), iteration = integer())
for (k in 1:folds) {
cat("Iteration", k, "\n")
train_indices_df <- unlist(fold_df[-k])
cat("fold_df", length(fold_df) , "k", k , "\n")
test_indices_df <- fold_df[[k]]
train_data_df <- df[train_indices_df, ]
test_data_df <- df [test_indices_df, ]
train_data_df_mod <- df_modH[train_indices_df, ]
test_data_df_mod <- df_modH [test_indices_df, ]
cat("Number of training data:", nrow(train_data_df), "\n")
cat("Test data count:", nrow(test_data_df_mod), "\n")
if(method == "RQUANT"){
# train RQUANT model
qm.fit <- fitQmap(train_data_df[2], train_data_df_mod[2],
method="RQUANT",qstep=0.01)
y_modH <- doQmap(test_data_df_mod[2] ,qm.fit,type="linear")
}
if(method == "PTF"){
# train PTF model
qm.fit <- fitQmap(train_data_df[2],train_data_df_mod[2],
method="PTF", transfun="expasympt", cost="RSS",wett.day=TRUE)
y_modH <- doQmap(test_data_df_mod[2],qm.fit)
}
if(method == "SSPLINE"){
# train SSPLINE model
qm.fit <- fitQmap(train_data_df[2],train_data_df_mod[2],
qstep=0.01, method="SSPLIN")
y_modH <- doQmap(test_data_df_mod[2],qm.fit)
}
# if(method == "QUANT"){
# # train QUANT model
#
# qm.fit <- fitQmap(train_data_df[2],train_data_df_mod[2],
# method="QUANT", qstep=0.01)
# y_modH <- doQmap(test_data_df_mod[2],qm.fit,type="tricub")
# }
if(method == "dqm"){
# train DETRENDED QUANTILE MATCHING with delta-method extrapolation model
y_modH <- dqm(train_data_df[[2]], train_data_df_mod[[2]], test_data_df_mod[[2]], precip = TRUE, pr.threshold=0.1, n.quantiles= 100, detrend = TRUE)
}
if(method == "qdm"){
# train Quantile delta mapping model
y_modH <- qdm(train_data_df[[2]], train_data_df_mod[[2]], test_data_df_mod[[2]], precip = TRUE, pr.threshold=0.1, n.quantiles= 100, jitter.factor=0.01)
}
combine_iteration <- rbind(combine_iteration, data.frame(index = test_indices_df, value = y_modH, iteration = k))
}
mean_combine_iteration <- aggregate(combine_iteration[[2]] ~ index, data = combine_iteration, FUN = mean)
processed_df_modH[[2]] <- mean_combine_iteration[[2]]
processed_sublist_modH[[j]] <- processed_df_modH
corrected_modH[[j]] <- processed_sublist_modH[[j]]
}
names(processed_sublist_modH) <- names(sublist)
names(corrected_modH) <- names(sublist)
list_of_sublists_modH[[sublist_name]] <- corrected_modH
}
output <- list(Historico = list_of_sublists_modH)
}
return(output)
}
it will give me this error:
sspline_cv_5 <- biasCorr_CV(list_obs_m,list_modH_m, method = "SSPLIN", cross.val = "kfold",folds = 5)
Bacia_month is 14 de Julho_Jan Iteration 1 fold_df 5 k 1 Number of training data: 113 Test data count: 27
[image] Show Traceback
Error in data.frame(index = test_indices_df, value = y_modH, iteration = k) : arguments imply differing number of rows: 27, 30, 1
any ideas how can I improve my code?