I am studying some software written by R programming language. The data frame of 'dep_var' has no "NA" nor "Inf/-Inf" in it.
for (dep in depvars){
col <- str_replace(paste("log.p.abst",substr(dep, 3, 11), sep = "."),
".votes", "")
col2 <- str_replace(paste("pred.p.abst",substr(dep, 3, 11), sep = "."),
".votes", "")
dep_var <- data.frame(dep_var = log(data_filt2[, dep]/data_filt2$p.abstention))
dep_var[dep_var == Inf,] <- max(dep_var[dep_var != Inf,])
dep_var[dep_var == -Inf,] <- min(dep_var[dep_var != -Inf,])
reg_data <- bind_cols(dep_var, data_filt2[, covars])
reg_data <- reg_data[complete.cases(reg_data),]
# run linear mixed effect model
mlr_reg0 <- lme(dep_var ~ -1 + fpv2011_major + nbi + masculinidad +
extranjeros + analfabetismo + no_usa_pc +
menor_15 + mayor_65 + desocupados +
universitarios + per_propietario + per_urban,
random = list(~1|CODIGO.PROVINCIA,
~1|CODIGO.DEPARTAMENTO), data = reg_data,
control = lmeControl(opt = "optim"))
# re-estimate excluding points with outlier residuals from the previous reg
mlr_reg <- lme(dep_var ~ -1 + fpv2011_major + nbi + masculinidad +
extranjeros + analfabetismo + no_usa_pc +
menor_15 + mayor_65 + desocupados +
universitarios + per_propietario + per_urban,
random = list(~1|CODIGO.PROVINCIA,
~1|CODIGO.DEPARTAMENTO),
data = reg_data[-which(abs(residuals(mlr_reg0,
type = "normalized"))>qnorm(0.975)),],
control = lmeControl(opt = "optim"))
# calculate the standard deviation of the residuals
sd_resid <- sd(residuals(mlr_reg))
# predict outcome variable for each mesa + randomly generated noise
pred_mlr <- predict(mlr_reg, newdata = reg_data)
pred_mlr[is.na(pred_mlr)] <- mean(pred_mlr, na.rm = T)
noise_pred <- rnorm(nrow(reg_data), 0, sd_resid)
pred_mlr_noisy <- pred_mlr + noise_pred
clean_table[[col]] <- pred_mlr_noisy
clean_table[[col2]] <- exp(pred_mlr_noisy)
}
> dput(head(dep_var))
structure(list(dep_var = c(0.1221026968009, -0.350860974073313,
-0.106609735058258, -0.266628663253948, -0.208401269577006, 0.1221026968009
)), row.names = c(NA, 6L), class = "data.frame")
> summary(dep_var)
dep_var
Min. :-3.44999
1st Qu.:-0.02381
Median : 0.37729
Mean : 0.35284
3rd Qu.: 0.76214
Max. : 5.09987
> head(dep_var)
dep_var
1 0.1221027
2 -0.3508610
3 -0.1066097
4 -0.2666287
5 -0.2084013
6 0.1221027
> tail(dep_var)
dep_var
90007 0.5675210
90008 0.4788926
90009 0.4468503
90010 0.4587465
90011 0.6061358
90012 0.6350395
> dep_var[dep_var == Inf,] <- max(dep_var[dep_var != Inf,])
# This is not problematic.
> dep_var[dep_var == -Inf,] <- min(dep_var[dep_var != -Inf,])
Error in -Inf : argument "e2" is missing, with no default
However, these codes make errors, and I did not find resolution although I had endeavored trying to get a solution for that problem through stack overflow etc. in internet for 1 month.