I am currently trying to solve a problem with the boot
package and writing a function within a function in R
. I have developed several functions to perform the lasso but continue to receive an error when bootstrapping these functions within a wrapper function. I'm not sure this will gain any traction, but I've been struggling with this code for several weeks now and can't seem to fix my error.
My data appears like this:
dput(head(data))
structure(list(xdot = c(-0.2, -0.279673738719296, -0.359076346467696,
-0.438178346167097, -0.516945929568287, -0.595349063298155),
`1` = c(1, 1, 1, 1, 1, 1), x = c(2, 1.997601508855, 1.99440761360292,
1.99042101434809, 1.98564511431594, 1.98008334612387), y = c(0,
-0.039956793916898, -0.0798177925537019, -0.119568122366144,
-0.159190709068346, -0.198670364342884), z = c(1, 0.997004494555671,
0.994017962224951, 0.991040377675655, 0.988071711771147,
0.985111938521599), xx = c(4, 3.99041178817976, 3.97766172919729,
3.96177581435847, 3.94278652000677, 3.92073005759709), xy = c(0,
-0.0798177518174037, -0.159189213170081, -0.237990903403717,
-0.316096253706052, -0.393383879803706), xz = c(2, 1.99161768265962,
1.9824769919195, 1.97258759379309, 1.96195976707217, 1.95060374353442
), yy = c(0, 0.00159654538011746, 0.00637088000814579, 0.0142965358861653,
0.0253416818536829, 0.0394699136681344), yz = c(0, -0.0398371031231821,
-0.0793403195035247, -0.118496837147713, -0.157291836407224,
-0.195712547744611), zz = c(1, 0.994017962164209, 0.988071709225845,
0.982161030183504, 0.976285707602364, 0.970445531417783),
xxx = c(8, 7.97125260902066, 7.93307883704802, 7.88560183503509,
7.82897479024219, 7.76337229169526), xxy = c(0, -0.159444061463859,
-0.317488178749868, -0.473702095358444, -0.627654981824995,
-0.778932869032911), xxz = c(4, 3.97845848794315, 3.95386720647687,
3.92627979932809, 3.89575582597129, 3.86235798745937), xyy = c(0,
0.00318926146027812, 0.0127061315935966, 0.0284561254602049,
0.0503195867613143, 0.0781537187272197), xyz = c(0, -0.0795786573072806,
-0.158236937283518, -0.23585859479259, -0.312325766483786,
-0.387527156416577), xzz = c(2, 1.98565178104819, 1.97061773966567,
1.95491395395101, 1.93855694547712, 1.92156303508068), yyy = c(0,
-6.3792834732329e-05, -0.000508509578874707, -0.00170940995224898,
-0.00403416030327222, -0.00784150212903046), yyz = c(0, 0.0015917629197392,
0.00633276916327676, 0.0141684443240788, 0.0250393989683282,
0.0388822831668961), yzz = c(0, -0.0397177708638903, -0.0788657027151702,
-0.11743515024024, -0.155415614046513, -0.192798767301695
), zzz = c(1, 0.991040375946786, 0.982161026936799, 0.97336123829137,
0.964640290288373, 0.955997478684596), xxxx = c(16, 15.923386239244,
15.8217928319208, 15.6956676032357, 15.5455655423471, 15.3721241845453
), xxxy = c(0, -0.318505697758174, -0.633200840927662, -0.942866605142168,
-1.24630004813686, -1.54235200172055), xxxz = c(8, 7.94737467843222,
7.88562285977236, 7.81494982079301, 7.73558852240775, 7.64779072773679
), xxyy = c(0, 0.00637087350518465, 0.0253412055897096, 0.0566396701029174,
0.0999168416070009, 0.154750876889417), xxyz = c(0, -0.158966445909678,
-0.315588952471455, -0.469457903489781, -0.62016813229351,
-0.767336068591203), xxzz = c(4, 3.96654099388248, 3.9302150234902,
3.8911018151864, 3.84928612760987, 3.80485496429048), xyyy = c(0,
-0.000127432662915438, -0.00101417537569773, -0.00340244549109213,
-0.0080104106965598, -0.0155268277742881), xyyz = c(0, 0.00317970801021046,
0.012630123034429, 0.0282011693232673, 0.0497193602268686,
0.0769901613580433), xyzz = c(0, -0.0793402790060642, -0.15729035794728,
-0.233745390861298, -0.30860025471987, -0.381757628287297
), xzzz = c(2, 1.97970375032752, 1.95882942990681, 1.93739866324702,
1.91543327948342, 1.89295468647977), yyyy = c(0, 2.54895715077441e-06,
4.05881120781917e-05, 0.000204390938344411, 0.00064220083917328,
0.00155787408496998), yyyz = c(0, -6.36017429485792e-05,
-0.000505467655364905, -0.00169409428467935, -0.00398603967641339,
-0.00772475736325044), yyzz = c(0, 0.00158699478524704, 0.00629488629892138,
0.0140415004140115, 0.0247407218003568, 0.0383034013446867
), yzzz = c(0, -0.0395987960650309, -0.0783939251023723,
-0.116382975646484, -0.153561771806902, -0.189928367401148
), zzzz = c(1, 0.988071709105087, 0.976285702572483, 0.964640289211122,
0.953133782868649, 0.941764529448744), xxxxx = c(32, 31.8085803775947,
31.5551040848309, 31.2409866317028, 30.8679762684397, 30.438087092366
), xxxxy = c(0, -0.636247462420643, -1.2628605780859, -1.87670150460201,
-2.47470960155468, -3.05398551246767), xxxxz = c(16, 15.8756876490722,
15.7271462695312, 15.5550403493822, 15.3601335558774, 15.1432630546321
), xxxyy = c(0, 0.0127264665266812, 0.0505406933659937, 0.11273678961859,
0.198399388374821, 0.306419634126799), xxxyz = c(0, -0.31755161220649,
-0.62941300957804, -0.934418876457857, -1.23143382194305,
-1.5193893702976), xxxzz = c(8, 7.92356827431485, 7.83845076594542,
7.744930821915, 7.64331619289266, 7.5339299492083), xxyyy = c(0,
-0.000254559679717289, -0.00202267909082015, -0.00677229900564367,
-0.0159058328632881, -0.0307444130940013), xxyyz = c(0, 0.00635178951891474,
0.0251896135406067, 0.0561322000502198, 0.0987250047213959,
0.152446936320451), xxyzz = c(0, -0.15849026105549, -0.313701087436383,
-0.465251737977334, -0.612770588061164, -0.755911922027422
), xxzzz = c(4, 3.95465919874016, 3.90670432875561, 3.85623901249676,
3.80337073320441, 3.74820804966573), xyyyy = c(0, 5.09180065039369e-06,
8.09492397505141e-05, 0.00040682401882304, 0.00127518295871402,
0.00308472053100702), xyyyz = c(0, -0.000127050937679889,
-0.00100810854028978, -0.00337196086451277, -0.00791486020893975,
-0.0152956634078199), xyyzz = c(0, 0.0031701831775545, 0.0125545691613335,
0.0279484974970259, 0.0491262933675283, 0.0758439271025127
), xyzzz = c(0, -0.079102614768347, -0.156349441084388, -0.231651120439124,
-0.304919181934073, -0.376073997247507), xzzzz = c(2, 1.97377353696526,
1.94711163826223, 1.92004030293263, 1.8925854392426, 1.86477226073164
), yyyyy = c(0, -1.01848155576497e-07, -3.23965351000351e-06,
-2.44386407264956e-05, -0.000102232406952281, -0.000309503412061324
), yyyyz = c(0, 2.5413217357519e-06, 4.03453124585221e-05,
0.000202559672730327, 0.00063454048246281, 0.00153468035981734
), yyyzz = c(0, -6.34112235813079e-05, -0.000502443928756447,
-0.00167891583970679, -0.00393849304626149, -0.00760975070072064
), yyzzz = c(0, 0.00158224093372771, 0.00625723005129159,
0.0139156938734348, 0.0244456073397322, 0.0377331379506352
), yzzzz = c(0, -0.0394801776558292, -0.0779249696810755,
-0.115340228139708, -0.151730042731855, -0.187100702190787
), zzzzz = c(1, 0.985111934921076, 0.970445524620454, 0.955997476540943,
0.941764528385934, 0.927743481236134)), row.names = c(NA,
6L), class = "data.frame")
which, I understand isn't ideal but the problem I'm having occurs with a larger data set.
I have the functions:
library(boot)
library(glmnet)
library(np)
library(tidyverse)
foo1 <- function(data,index){ #index is the bootstrap sample index
x <- data[index, -1] %>%
as.matrix() %>%
unname()
y <- data[index, 1] %>%
scale(center = TRUE, scale = FALSE) %>%
as.matrix() %>%
unname()
ols <- lm(y ~ x)
# The intercept estimate should be dropped.
ols.coef <- as.numeric(coef(ols))[-1]
ols.coef[is.na(ols.coef)] <- 0
## The intercept estimate should be dropped.
lasso <- cv.glmnet(x, y, alpha = 1,
penalty.factor = 1 / abs(ols.coef))
# Select nonzero coefficients from bic.out
coef <- as.vector(coef(lasso,
s = lasso$lambda.min))[-1]
return(coef)
}
foo2 <- function(data, index){ #index is the bootstrap sample index
x <- data[index, -1] %>%
as.matrix() %>%
unname()
y <- data[index, 1] %>%
scale(center = TRUE, scale = FALSE) %>%
as.matrix() %>%
unname()
# ic.glmnet provides coefficients with lowest BIC
ols <- lm(y ~ x)
# The intercept estimate should be dropped.
ols.coef <- as.numeric(coef(ols))[-1]
ols.coef[is.na(ols.coef)] <- 0
lasso <- cv.glmnet(x, y, alpha = 1,
penalty.factor = 1 / abs(ols.coef))
# Select nonzero coefficients from bic.out
coef <- as.vector(coef(lasso,
s = lasso$lambda.min))[-1]
coef_nonzero <- coef != 0
if(sum(coef_nonzero) > 0) {
ls_obj <- lm(y ~ x[, coef_nonzero, drop = FALSE])
ls_coef <- coef(ls_obj)[-1]
coef[coef_nonzero] <- ls_coef
}
return(coef)
}
### Non-parametric bootstrap
foo3 <- function(data, num_samples) {
bstar <- b.star(data[, 1], round = TRUE)
# Select Block Length of circular block result
blocklength <- bstar[, 2]
init_boot_ts <- tsboot(tseries = data,
statistic = foo1,
R = num_samples, l = blocklength,
sim = "fixed")
final_boot_ts <- tsboot(tseries = data,
statistic = foo2,
R = num_samples,
l = blocklength, sim = "fixed")
# point estimates
final_boot_t0 <- final_boot_ts$t0
return(list(point_estimates = final_boot_t0))
}
which seem to work when I run foo1
and foo2
on their own. When I perform these methods using tsboot
outside the wrapper function, I do not get an error. However, when placed within my function I continue to get either Error in t[r, ] <- res[[r]] : number of items to replace is not a multiple of length
or Error in x[, coef_nonzero, drop = FALSE] :(subscript) logical subscript too long
. Which seems to be unclear as there should never be an index which is greater than the number of columns in x. Lastly, although I have not provided an example here, I get an error for this code when running with lapply for several dataframes similar to the attached.
Again, I apologise for not being able to provide more data, but this error occurs as the data set increases. Perhaps there is an issue with the data? I can't find any missing values when constructing the data frame.
Thanks in advance.