Hi everyone! First time writing
I have a dataset with dates of birth. Sometimes the dates of birth are missing. If MG_FID is the same, this means that these individuals are siblings and father_dob and mother_dob are the same. So for those IIDs, for who MG_FID is the same, I want to replace the NAs in mother_dob and father_dob.
- If MG_FID=0, then this means that they do not have any siblings and dob will have to remain NA.
mg_fid<-structure(list(IID = c(1101480L, 1101481L, 1101509L, 1101554L,
1101600L, 1101619L, 1101623L, 1101625L, 1101637L, 1101639L, 1101644L,
1101647L, 1101650L, 1101652L, 1101654L, 1101655L, 1101656L, 1101659L,
1101660L, 1101661L, 1101662L, 1101674L, 1101675L, 1101678L, 1101679L,
1101681L, 1101685L, 1101692L, 1101693L, 1101694L, 1101696L, 1101701L,
1101705L, 1101709L, 1101712L, 1101715L, 1101717L, 1101718L, 1101720L,
1131480L, 1131481L, 1131554L, 1131600L, 1131619L, 1131623L, 1131625L,
1131639L, 1131644L, 1131652L, 1131659L, 1131660L, 1131661L, 1131678L,
1131679L, 1131685L, 1131692L, 1131696L, 1131701L, 1131705L, 1131709L,
1131715L, 1131717L, 1131718L, 1193598L, 1193602L, 1193607L, 1193609L,
1193612L, 1193617L, 1193618L, 1193620L, 1193621L, 1193622L, 1193634L,
1193638L, 1193639L, 1193640L, 1193642L, 1193653L, 1193656L, 1193659L,
1193660L, 1193661L, 1193664L, 1193667L, 1193668L, 1193689L, 2202638L,
2202647L, 2202652L, 2202655L, 2202660L, 2202661L, 2202665L, 2202673L,
2202686L, 2202693L, 2232655L, 2232661L, 2232665L, 2232686L, 2296211L,
2296214L, 2296217L, 3305120L, 3305129L, 3305135L, 3305139L, 3305141L,
3305144L, 3305151L, 3305153L, 3305154L, 3305161L, 3305164L, 3305168L,
3335154L, 3345153L, 3392379L, 3392380L, 3392385L, 3392389L, 3392390L,
3392391L, 3392392L, 3392395L, 3392400L, 4406324L, 4406326L, 4406327L,
4406339L, 4406341L, 4406343L, 4406349L, 4406352L, 4406356L, 4406366L,
4406370L, 4406382L, 4406391L, 4406395L, 4406396L, 4406398L, 4406403L,
4406409L, 4406416L, 4406421L, 4406422L, 4406425L, 4406436L, 4406438L,
4406460L, 4406467L, 4406469L, 4406480L, 4406483L, 4406484L, 4406485L,
4436326L, 4436366L, 4436382L, 4436396L, 4436416L, 4436422L, 4436425L,
4436436L, 4436467L, 4436469L, 4436480L, 4436483L, 4494908L, 4494910L,
4494916L, 4494919L), child_dob = structure(c(15454, 15405, 15112,
14371, 14789, 15154, 12628, 15548, 12871, 16199, 14206, 13984,
13718, 14602, 15889, 16216, 13309, 15490, 13870, 12242, 12740,
14956, 11581, 14433, 16346, 14208, 16288, 12957, 15932, 13314,
11487, 11768, 14650, 16247, 12741, 15075, 14823, 14160, 13284,
14798, 16300, 14803, 14049, NA, 13496, 16183, NA, 14206, 11675,
11226, 14630, 13035, 13767, 15667, 15201, NA, 12014, 12646, 15524,
16247, 16884, 15722, 14792, 16310, 16959, 16173, 16765, 15535,
NA, 16659, 14909, 16004, 15088, 15293, 15844, 14653, 16304, 15694,
15958, 14993, 15274, 15332, 16363, 14846, 16304, 16597, 15226,
11404, 15126, 12299, 14852, 15606, 15884, 16515, 15475, 15982,
13819, 15996, 16512, 14823, 16631, 14442, 16672, 14120, 16499,
15269, 13863, 16755, 15690, 15380, 13232, 13482, 13997, 16924,
13431, 14692, 14789, 12156, 15839, 16585, 15884, 13266, 16027,
13214, 14770, 13228, 15578, 13892, 15133, 12759, 13151, 14447,
14922, 14841, 14572, 15804, 15260, 14869, 11111, 15579, 13473,
15099, 15588, 11801, 13915, 14692, 12526, 14145, 13782, 15131,
14759, 13354, 12747, 13389, 13530, 15554, 15211, 11365, NA, NA,
12577, 15479, NA, 14759, NA, 16528, NA, 11559, NA, NA, NA, NA,
NA, NA), class = "Date"), mother_dob = structure(c(3751, 4152,
2012, 4033, 3220, 3547, 226, 4627, 936, 3971, 488, -751, NA,
-152, 7709, 6247, 628, 1515, 2679, 1220, 982, 3470, -480, 1034,
7168, 1783, 3356, 2352, 4166, 892, 1601, 1002, 3436, 3581, 2842,
NA, 7439, 1380, -476, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7939,
7973, NA, 7396, 8002, NA, 8499, 7879, 7512, 7512, 7495, 7919,
7503, 7412, 8069, 7823, 7321, 7399, 7686, 7682, 7504, 7738, 9053,
7579, 1088, 3346, 2276, 1738, 3566, 5392, 1106, 214, 2348, 1863,
NA, NA, NA, NA, 999, NA, 4484, 8885, 1958, 3197, 5721, 2815,
5304, 5451, 2145, 5354, 4758, 2517, 1557, 5354, 2145, 6377, 6011,
4084, -406, 332, 390, -716, 2218, 3982, 1251, 2842, 3158, 2008,
5332, 5872, 1643, 7342, 3350, 5199, 1065, -2651, 6334, NA, 6508,
2554, -835, 1502, 132, 1409, 4033, 3289, 1888, 2864, 1766, 629,
739, 3158, 1915, 778, 3781, NA, 5199, NA, 6508, 132, 4033, 3289,
1888, NA, NA, 3158, NA, 7075, NA, 2985, 431), class = "Date"),
father_dob = structure(c(3048, 4252, -485, 4363, 536, 565,
56, 1953, -534, 3535, -4109, -2086, NA, -321, 4858, 4287,
2169, -895, 2459, 873, 35, 3127, -1333, -73, 3866, 1573,
3192, 1424, -3378, -182, -139, 443, 3353, 5299, 97, NA, 5978,
1369, -3093, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7606,
NA, 6055, 7062, NA, 7893, 8325, 7297, 7297, 6510, 7849, 6883,
7503, 7536, 7356, 6338, NA, 7600, 7908, 7339, 5150, 7532,
7128, 511, 454, 2154, 2348, 2703, 4071, 1491, -1679, 3894,
-1475, NA, NA, NA, NA, -179, NA, 2941, 6677, 1067, -1206,
4403, 981, 3626, 437, 829, 2087, 2980, 2364, 653, 2087, 829,
6281, 6839, 4344, -3220, 3175, 335, 410, 1961, 2730, -951,
2670, 2938, -3676, 1977, 5758, 1154, 7582, 3564, 3297, 4148,
-2770, 5088, NA, 7605, 4096, -1145, 948, 1022, 560, 3016,
-1902, 2022, -703, 40, 446, 91, 1807, 1433, 4119, NA, NA,
3297, NA, 7605, 1022, 3016, -1902, 2022, NA, NA, 1807, NA,
5903, NA, 2521, 849), class = "Date"), MG_FID = c(1L, 2L,
0L, 3L, 4L, 5L, 7L, 8L, 0L, 9L, 10L, 0L, 0L, 11L, 0L, 0L,
0L, 12L, 13L, 14L, 0L, 0L, 0L, 15L, 16L, 17L, 18L, 19L, 20L,
0L, 21L, 22L, 23L, 24L, 0L, 25L, 26L, 27L, 0L, 1L, 2L, 3L,
4L, 5L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 18L,
19L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 28L, 0L, 29L, 30L, 0L, 31L, 0L,
28L, 29L, 30L, 31L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 32L,
35L, 33L, 0L, 0L, 34L, 33L, 35L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 36L, 37L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 38L, 0L,
39L, 0L, 0L, 40L, 41L, 0L, 0L, 42L, 0L, 43L, 44L, 45L, 0L,
46L, 47L, 48L, 49L, 50L, 0L, 51L, 37L, 38L, 39L, 40L, 42L,
43L, 44L, 45L, 47L, 48L, 49L, 50L, 0L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-174L))
mg_fid
I tried the code below, but it turns all my dates into random numbers.
parent_age %>% group_by(MG_FID) %>%
mutate(father_dob = ifelse(is.na(father_dob),max(father_dob,na.rm=TRUE),father_dob),
mother_dob = ifelse(is.na(mother_dob),max(mother_dob,na.rm=TRUE),mother_dob))
Does anyone have any ideas?
Let me know if what I wrote doesn't make sense.