data(basque)
> basque[85:89, 1:4]
regionno regionname year gdpcap
85 2 Andalucia 1996 5.995930
86 2 Andalucia 1997 6.300986
87 3 Aragon 1955 2.288775
88 3 Aragon 1956 2.445159
89 3 Aragon 1957 2.603399
> dataprep.out <- dataprep(
+ foo = basque,
+ predictors = c("school.illit","school.prim","school.med","school.high","school.post.high","invest"),
+ predictors.op = "mean",
+ time.predictors.prior = 1964:1969,
+ special.predictors = list(
+ list("gdpcap", 1960:1969, "mean"),
+ list("sec.agriculture", seq(1961, 1969, 2), "mean"),
+ list("sec.energy", seq(1961, 1969, 2), "mean"),
+ list("sec.industry", seq(1961, 1969, 2), "mean"),
+ list("sec.construction", seq(1961, 1969, 2), "mean"),
+ list("sec.services.venta", seq(1961, 1969, 2), "mean"),
+ list("sec.services.nonventa", seq(1961, 1969, 2), "mean"),
+ list("popdens", 1969, "mean")),
+ dependent = "gdpcap",
+ unit.variable = "regionno",
+ unit.names.variable = "regionname",
+ time.variable = "year",
+ treatment.identifier = 17,
+ controls.identifier = c(2:16, 18),
+ time.optimize.ssr = 1960:1969,
+ time.plot = 1955:1997)
> dataprep.out$X1
17
school.illit 39.888465
school.prim 1031.742299
school.med 90.358668
school.high 25.727525
school.post.high 13.479720
invest 24.647383
special.gdpcap.1960.1969 5.285468
special.sec.agriculture.1961.1969 6.844000
special.sec.energy.1961.1969 4.106000
special.sec.industry.1961.1969 45.082000
special.sec.construction.1961.1969 6.150000
special.sec.services.venta.1961.1969 33.754000
special.sec.services.nonventa.1961.1969 4.072000
special.popdens.1969 246.889999
> dataprep.out$Z1
17
1960 4.285918
1961 4.574336
1962 4.898957
1963 5.197015
1964 5.338903
1965 5.465153
1966 5.545916
1967 5.614896
1968 5.852185
1969 6.081405
> dataprep.out$X1["school.high",] <- dataprep.out$X1["school.high",] + dataprep.out$X1["school.post.high",]
> dataprep.out$X1 <- as.matrix(dataprep.out$X1[which(rownames(dataprep.out$X1) == "school.post.high"),])
> dataprep.out$X0["school.high",] <- dataprep.out$X0["school.high",] + dataprep.out$X0["school.post.high",]
> dataprep.out$X0 <- dataprep.out$X0[which(rownames(dataprep.out$X0) == "school.post.high"),]
> lowest <- which(rownames(dataprep.out$X0) == "school.illit")
> highest <- which(rownames(dataprep.out$X0) == "school.high")
> dataprep.out$X1[lowest:highest,] <-
+ (100 * dataprep.out$X1[lowest:highest,]) /
+ sum(dataprep.out$X1[lowest:highest,])
Error in lowest:highest : argument of length 0
> dataprep.out$X1[lowest:highest,] <-
+ (100 * dataprep.out$X1[lowest:highest,])/
+ sum(dataprep.out$X1[lowest:highest,])
Error in lowest:highest : argument of length 0
Hi, and welcome. A reproducible example, called a reprex helps to attract more answers. Your code is almost complete. It should look like this
library(Synth)
#> ##
#> ## Synth Package: Implements Synthetic Control Methods.
#> ## See http://www.mit.edu/~jhainm/software.htm for additional information.
data(basque)
dataprep.out <- dataprep(
foo = basque,
predictors = c("school.illit","school.prim","school.med","school.high","school.post.high","invest"),
predictors.op = "mean",
time.predictors.prior = 1964:1969,
special.predictors = list(
list("gdpcap", 1960:1969, "mean"),
list("sec.agriculture", seq(1961, 1969, 2), "mean"),
list("sec.energy", seq(1961, 1969, 2), "mean"),
list("sec.industry", seq(1961, 1969, 2), "mean"),
list("sec.construction", seq(1961, 1969, 2), "mean"),
list("sec.services.venta", seq(1961, 1969, 2), "mean"),
list("sec.services.nonventa", seq(1961, 1969, 2), "mean"),
list("popdens", 1969, "mean")),
dependent = "gdpcap",
unit.variable = "regionno",
unit.names.variable = "regionname",
time.variable = "year",
treatment.identifier = 17,
controls.identifier = c(2:16, 18),
time.optimize.ssr = 1960:1969,
time.plot = 1955:1997)
dataprep.out$X1["school.high",] <- dataprep.out$X1["school.high",] + dataprep.out$X1["school.post.high",]
dataprep.out$X1 <- as.matrix(dataprep.out$X1[which(rownames(dataprep.out$X1) == "school.post.high"),])
dataprep.out$X0["school.high",] <- dataprep.out$X0["school.high",] + dataprep.out$X0["school.post.high",]
dataprep.out$X0 <- dataprep.out$X0[which(rownames(dataprep.out$X0) == "school.post.high"),]
lowest <- which(rownames(dataprep.out$X0) == "school.illit")
highest <- which(rownames(dataprep.out$X0) == "school.high")
dataprep.out$X1[lowest:highest,] <- (100 * dataprep.out$X1[lowest:highest,]) / sum(dataprep.out$X1[lowest:highest,])
#> Error in lowest:highest: argument of length 0
dataprep.out$X1[lowest:highest,] <- (100 * dataprep.out$X1[lowest:highest,])/sum(dataprep.out$X1[lowest:highest,])
#> Error in lowest:highest: argument of length 0
Created on 2019-10-14 by the reprex package (v0.3.0)
The errors arise from
dataprep.out$X1
[,1]
[1,] 13.48
which is a single element matrix. So, if you subset it, the result is always going to be
dataprep.out$X1[lowest]
numeric(0)
except for cases like dataprep.out$X1[1,1]
I can't tell from your code what it is that you're trying to calculate specifically.
** I want to repeat a synthetic control method analysis done by Abadie,Diamond, and Hainmueller (2011) in their paper entitled "Synth: An R package for Synthetic Control Methods in Comparative Case Studies." (https://www.jstatsoft.org/article/view/v042i13)
I tried to follow the commands given in the paper in order to consolidate the variables "school.high and school.post.high to represent all cities that have people with more than high school education and use the percentage share for each predictor for further analysis. However, anytime I try the code as given in the paper a get an error. I have already tried a suggestion by Richard Careaga (technocrat) but I could not get what I expected.
Please help me figure out the cause of the error and how to resolve it.
Please find below the code:
library(reprex)
library(Synth)
#> ##
#> ## Synth Package: Implements Synthetic Control Methods.
#> ## See http://www.mit.edu/~jhainm/software.htm for additional information.
data(basque)
basque[85:89, 1:4]
#> regionno regionname year gdpcap
#> 85 2 Andalucia 1996 5.995930
#> 86 2 Andalucia 1997 6.300986
#> 87 3 Aragon 1955 2.288775
#> 88 3 Aragon 1956 2.445159
#> 89 3 Aragon 1957 2.603399
dataprep.out <- dataprep(
foo = basque,
predictors = c("school.illit","school.prim","school.med","school.high","school.post.high","invest"),
predictors.op = "mean",
time.predictors.prior = 1964:1969,
special.predictors = list(
list("gdpcap", 1960:1969, "mean"),
list("sec.agriculture", seq(1961, 1969, 2), "mean"),
list("sec.energy", seq(1961, 1969, 2), "mean"),
list("sec.industry", seq(1961, 1969, 2), "mean"),
list("sec.construction", seq(1961, 1969, 2), "mean"),
list("sec.services.venta", seq(1961, 1969, 2), "mean"),
list("sec.services.nonventa", seq(1961, 1969, 2), "mean"),
list("popdens", 1969, "mean")),
dependent = "gdpcap",
unit.variable = "regionno",
unit.names.variable = "regionname",
time.variable = "year",
treatment.identifier = 17,
controls.identifier = c(2:16, 18),
time.optimize.ssr = 1960:1969,
time.plot = 1955:1997)
dataprep.out$X0
#> 2 3 4
#> school.illit 863.389160 73.121226 31.488423
#> school.prim 3062.424886 728.578929 670.909393
#> school.med 155.565318 44.215389 46.398482
#> school.high 57.266496 16.091676 14.799358
#> school.post.high 27.278924 8.684416 6.424505
#> invest 19.320031 21.577486 22.769643
#> special.gdpcap.1960.1969 2.560747 3.699907 3.733876
#> special.sec.agriculture.1961.1969 24.194000 21.726000 12.362000
#> special.sec.energy.1961.1969 2.774000 6.278000 18.648000
#> special.sec.industry.1961.1969 18.276000 22.780000 24.126000
#> special.sec.construction.1961.1969 8.130000 7.832000 9.006000
#> special.sec.services.venta.1961.1969 38.186000 34.289999 30.152000
#> special.sec.services.nonventa.1961.1969 8.444000 7.096000 5.708000
#> special.popdens.1969 68.510002 24.040001 98.739998
#> 5 6 7
#> school.illit 47.903906 128.308287 9.394911
#> school.prim 300.813619 522.094955 289.571732
#> school.med 20.045204 45.447489 24.106414
#> school.high 5.921604 12.086849 7.304420
#> school.post.high 3.680154 5.844122 2.885214
#> invest 24.441712 25.954247 29.071211
#> special.gdpcap.1960.1969 5.215974 3.051014 3.871173
#> special.sec.agriculture.1961.1969 13.130000 19.944000 15.922000
#> special.sec.energy.1961.1969 2.076000 7.818000 2.894000
#> special.sec.industry.1961.1969 18.258000 9.816000 36.530000
#> special.sec.construction.1961.1969 8.294000 8.670000 5.976000
#> special.sec.services.venta.1961.1969 51.752000 45.278001 33.484000
#> special.sec.services.nonventa.1961.1969 6.494000 8.482000 5.198000
#> special.popdens.1969 104.169998 148.250000 87.389999
#> 8 9
#> school.illit 105.508144 254.449707
#> school.prim 1766.928691 1035.269368
#> school.med 102.299929 33.369543
#> school.high 37.787317 16.539727
#> school.post.high 19.173427 6.308165
#> invest 19.652509 17.970690
#> special.gdpcap.1960.1969 2.807062 2.240688
#> special.sec.agriculture.1961.1969 29.718000 36.086001
#> special.sec.energy.1961.1969 7.818000 5.180000
#> special.sec.industry.1961.1969 16.474000 17.178000
#> special.sec.construction.1961.1969 7.144000 5.810000
#> special.sec.services.venta.1961.1969 30.844000 29.238000
#> special.sec.services.nonventa.1961.1969 8.000000 6.506000
#> special.popdens.1969 28.770000 22.379999
#> 10 11 12
#> school.illit 277.935237 266.967601 177.727966
#> school.prim 2883.361735 1695.117126 692.888550
#> school.med 215.165476 103.959094 24.048450
#> school.high 63.608315 34.275772 11.271676
#> school.post.high 32.374611 16.822106 4.570566
#> invest 22.520380 23.702696 20.239484
#> special.gdpcap.1960.1969 5.147008 3.843245 1.926478
#> special.sec.agriculture.1961.1969 6.936000 18.582000 37.948000
#> special.sec.energy.1961.1969 2.880000 2.698000 2.646000
#> special.sec.industry.1961.1969 40.056000 28.046000 10.886000
#> special.sec.construction.1961.1969 6.706000 6.170000 8.770000
#> special.sec.services.venta.1961.1969 39.068000 39.064001 31.720000
#> special.sec.services.nonventa.1961.1969 4.356000 5.444000 8.038000
#> special.popdens.1969 153.119995 128.699997 28.969999
#> 13 14 15
#> school.illit 234.752001 133.158962 105.877481
#> school.prim 1666.930420 1856.074280 431.746806
#> school.med 77.464478 269.961647 27.907434
#> school.high 29.341941 62.458658 9.082540
#> school.post.high 13.543060 57.704985 4.428528
#> invest 20.606269 16.234543 20.706271
#> special.gdpcap.1960.1969 2.516288 5.976692 2.899557
#> special.sec.agriculture.1961.1969 28.862000 1.864000 19.352000
#> special.sec.energy.1961.1969 5.328000 2.074000 10.228000
#> special.sec.industry.1961.1969 17.882000 23.834000 19.644000
#> special.sec.construction.1961.1969 7.032000 8.358000 6.290000
#> special.sec.services.venta.1961.1969 33.714001 52.714000 36.177999
#> special.sec.services.nonventa.1961.1969 7.188000 11.162000 8.312000
#> special.popdens.1969 91.019997 442.450012 73.360001
#> 16 18
#> school.illit 13.438050 9.151832
#> school.prim 280.002396 152.269465
#> school.med 20.450936 9.760035
#> school.high 6.550353 3.377170
#> school.post.high 4.190368 1.732763
#> invest 19.955159 18.055932
#> special.gdpcap.1960.1969 3.996087 3.809205
#> special.sec.agriculture.1961.1969 24.704000 30.322001
#> special.sec.energy.1961.1969 2.740000 2.888000
#> special.sec.industry.1961.1969 28.398000 26.612000
#> special.sec.construction.1961.1969 6.982000 5.238000
#> special.sec.services.venta.1961.1969 30.468000 28.300000
#> special.sec.services.nonventa.1961.1969 6.710000 6.644000
#> special.popdens.1969 44.169998 46.580002
dataprep.out$X1
#> 17
#> school.illit 39.888465
#> school.prim 1031.742299
#> school.med 90.358668
#> school.high 25.727525
#> school.post.high 13.479720
#> invest 24.647383
#> special.gdpcap.1960.1969 5.285468
#> special.sec.agriculture.1961.1969 6.844000
#> special.sec.energy.1961.1969 4.106000
#> special.sec.industry.1961.1969 45.082000
#> special.sec.construction.1961.1969 6.150000
#> special.sec.services.venta.1961.1969 33.754000
#> special.sec.services.nonventa.1961.1969 4.072000
#> special.popdens.1969 246.889999
dataprep.out$Z0
#> 2 3 4 5 6 7 8
#> 1960 2.010140 2.881462 2.967295 4.058841 2.357684 3.137032 2.138817
#> 1961 2.129177 3.099543 3.143887 4.360254 2.445730 3.327621 2.239503
#> 1962 2.280348 3.359183 3.373536 4.646173 2.648243 3.555341 2.454227
#> 1963 2.431020 3.614182 3.597258 4.911525 2.844759 3.771423 2.672237
#> 1964 2.508855 3.680091 3.672594 5.050700 2.951157 3.839403 2.777778
#> 1965 2.584690 3.745287 3.743359 5.184662 3.054199 3.906098 2.882176
#> 1966 2.694444 3.883319 3.909383 5.466795 3.231791 4.032133 2.988075
#> 1967 2.802342 4.016138 4.073122 5.737646 3.403385 4.155955 3.094544
#> 1968 2.987361 4.243645 4.308626 6.161454 3.660312 4.375893 3.302271
#> 1969 3.179092 4.476221 4.549700 6.581691 3.912882 4.610826 3.520994
#> 9 10 11 12 13 14 15
#> 1960 1.667524 4.241788 3.219294 1.535847 1.983290 5.161097 2.118609
#> 1961 1.752428 4.575335 3.362468 1.596258 2.005784 5.632605 2.305484
#> 1962 1.920451 4.838046 3.569980 1.705584 2.185661 5.840831 2.521422
#> 1963 2.091902 5.081334 3.765210 1.817695 2.366395 6.024493 2.739074
#> 1964 2.182591 5.158098 3.823693 1.882819 2.458797 6.099329 2.851257
#> 1965 2.274707 5.223651 3.874179 1.948872 2.549700 6.152028 2.965938
#> 1966 2.378392 5.332477 3.978149 2.032633 2.669666 6.110469 3.099186
#> 1967 2.482362 5.429449 4.073408 2.117609 2.787846 6.057341 3.227292
#> 1968 2.709083 5.674379 4.279777 2.245501 2.978363 6.253142 3.461154
#> 1969 2.947444 5.915524 4.486290 2.381962 3.177378 6.435590 3.706155
#> 16 18
#> 1960 3.163525 2.969866
#> 1961 3.335904 3.153171
#> 1962 3.623393 3.404384
#> 1963 3.894816 3.669238
#> 1964 3.985147 3.803985
#> 1965 4.072979 3.921808
#> 1966 4.210011 4.032705
#> 1967 4.352399 4.160311
#> 1968 4.556984 4.373036
#> 1969 4.765710 4.603542
dataprep.out$Z1
#> 17
#> 1960 4.285918
#> 1961 4.574336
#> 1962 4.898957
#> 1963 5.197015
#> 1964 5.338903
#> 1965 5.465153
#> 1966 5.545916
#> 1967 5.614896
#> 1968 5.852185
#> 1969 6.081405
dataprep.out$X1["school.high",] <- dataprep.out$X1["school.high",] + dataprep.out$X1["school.post.high",]
dataprep.out$X1 <- as.matrix(dataprep.out$X1[which(rownames(dataprep.out$X1) == "school.post.high"),])
dataprep.out$X0["school.high",] <- dataprep.out$X0["school.high",] + dataprep.out$X0["school.post.high",]
dataprep.out$X0 <- dataprep.out$X0[which(rownames(dataprep.out$X0) == "school.post.high"),]
lowest <- which(rownames(dataprep.out$X0) == "school.illit")
highest <- which(rownames(dataprep.out$X0) == "school.high")
dataprep.out$X1[lowest:highest,] <- (100 * dataprep.out$X1[lowest:highest,]) / sum(dataprep.out$X1[lowest:highest,])
#> Error in lowest:highest: Argument der Länge 0
dataprep.out$X0[lowest:highest,] <- 100 * scale(dataprep.out$X0[lowest:highest,], center = FALSE, scale = colSums(dataprep.out$X0[lowest:highest,]))
#> Error in lowest:highest: Argument der Länge 0
Created on 2019-10-15 by the reprex package (v0.3.0)
I'll give this a deeper look when I have a chance. My initial reaction is that it would be better to use dplyr
to mutate the basque
data frame to add a new variable combining the two variables with mutate
, such as
``` r
library(Synth)
#> ##
#> ## Synth Package: Implements Synthetic Control Methods.
#> ## See http://www.mit.edu/~jhainm/software.htm for additional information.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
data(basque)
dataprep.out <- dataprep(
foo = high_post,
predictors = c("school.illit","school.prim","high_post","invest"),
predictors.op = "mean",
time.predictors.prior = 1964:1969,
special.predictors = list(
list("gdpcap", 1960:1969, "mean"),
list("sec.agriculture", seq(1961, 1969, 2), "mean"),
list("sec.energy", seq(1961, 1969, 2), "mean"),
list("sec.industry", seq(1961, 1969, 2), "mean"),
list("sec.construction", seq(1961, 1969, 2), "mean"),
list("sec.services.venta", seq(1961, 1969, 2), "mean"),
list("sec.services.nonventa", seq(1961, 1969, 2), "mean"),
list("popdens", 1969, "mean")),
dependent = "gdpcap",
unit.variable = "regionno",
unit.names.variable = "regionname",
time.variable = "year",
treatment.identifier = 17,
controls.identifier = c(2:16, 18),
time.optimize.ssr = 1960:1969,
time.plot = 1955:1997)
#> Error in is.data.frame(foo): object 'high_post' not found
Created on 2019-10-15 by the reprex package (v0.3.0)
This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.