Hi,
Please, help me fix this data.merge script, and figure out how to use the variable y in the output filename. If I can fix these two things, I can then quickly format many dozens of data frames (each with variable dimensions in terms of rows/columns), so that:
- each of these data frames has the SAME number of rows as x.
- each row from y that matches RID and VISCODE is added (in its entirety) to x on the same row at the right (added columns).
- extra rows that are contained in y but not in x are discarded.
- if y lacks a row that matches RID and VISCODE in x, then NA is added to each of the added columns so that there is still a regular/rectangular data frame.
Finally, Iād like outfile to have the same name as the y input df. For example, instead of "ADNI_TEST.csv" in the data.merge function, I would like the function to exit 'y'.csv where y is the actual name of the input dataframe specified by y.
Suggestions please! And thank you very much for any help you can offer.
John
Here is the reprex of my sample data and example data.merge script:
data.merge <- function(x,y,n){
x: Main Data file to which columns are added.
y: New data file used to add columns to x
(y has to have "RID" and "VISCODE" columns for proper matching)
(y may need to alter VISCODE to VISCODE2 for some data files)
n: the column(s) in y you want to add to x -- may select ranges, too.
RID <- y$RID
VISCODE <- y$VISCODE
y <- data.frame(RID,VISCODE,y[,n])
ADNI <- merge(x,y, by = c("RID","VISCODE") ,all.x=TRUE,all.y=FALSE);
return(ADNI) # Stop printing, so as not to overrun memory buffer.
write.csv(ADNI, file = "ADNI_TEST.csv", quote = TRUE)
}
firstfile_x <- tibble::tribble(
~RID, ~VISCODE, ~VISCODE2, ~EXAMDATE, ~VERSION, ~VISNAME, ~VISITNO, ~LONIUID_1, ~LONIUID_2, ~LONIUID_3, ~LONIUID_4, ~RUNDATE, ~STATUS, ~AD_CST_L, ~AD_CST_R, ~AD_ICP_L,
123L, "v11", "m84", "3/23/2013", 1L, "ADNI2 Year 1 Visit", 28L, 369744L, 369769L, 369819L, 369794L, "5/2/2013", "Complete", 0.00114663, 0.00116237, 0.00161111,
123L, "v21", "m96", "2/14/2014", 1L, "ADNI2 Year 2 Visit", 30L, 420946L, 420987L, 421069L, 421028L, "5/2/2014", "Complete", 0.00130536, 0.00133773, 0.00135224,
123L, "v31", "m108", "3/5/2015", 1L, "ADNI2 Year 3 Visit", 8L, 487576L, 487587L, 487635L, 487624L, "8/21/2019", "Complete", 0.00116516, 0.00117003, 0.00146521,
610L, "v51", "m120", "7/8/2016", 1L, "ADNI2 Year 5 Visit", 14L, 786152L, 786164L, 786829L, 786176L, "8/21/2019", "Complete", 0.00160112, 0.00166888, 0.00192628,
671L, "v31", "m108", "7/19/2015", 1L, "ADNI2 Year 3 Visit", 13L, 537878L, 537904L, 537956L, 537930L, "8/21/2019", "Complete", 0.00138872, 0.00122101, 0.00133198,
778L, "v21", "m84", "9/6/2013", 1L, "ADNI2 Year 2 Visit", 30L, 397234L, 397299L, 397444L, 397363L, "11/2/2013", "Complete", 0.00207073, 0.00170183, 0.00188191,
778L, "v31", "m96", "9/25/2014", 1L, "ADNI2 Year 3 Visit", 12L, 471183L, 471200L, 471235L, 471217L, "8/21/2019", "Complete", 0.00208479, 0.00205418, 0.00172855,
908L, "v21", "m84", "1/8/2014", 1L, "ADNI2 Year 2 Visit", 30L, 420931L, 420972L, 421054L, 421013L, "5/2/2014", "Complete", 0.00129875, 0.00116119, 0.00122157,
934L, "v11", "m84", "10/24/2013", 1L, "ADNI2 Year 1 Visit", 28L, 413297L, 413362L, 413492L, 413427L, "2/2/2014", "Complete", 0.0012088, 0.0011105, 0.0014697,
934L, "v31", "m108", "10/16/2015", 1L, "ADNI2 Year 3 Visit", 13L, 614650L, 614692L, 614748L, 614720L, "8/21/2019", "Complete", 0.00118444, 0.00116324, 0.00126697,
1074L, "v31", "m96", "4/24/2015", 1L, "ADNI2 Year 3 Visit", 12L, 537875L, 537901L, 537953L, 537927L, "8/21/2019", "Complete", 0.00117554, 0.00115228, 0.00132158,
1346L, "v31", "m96", "4/2/2015", 1L, "ADNI2 Year 3 Visit", 12L, 537879L, 537905L, 537957L, 537931L, "8/21/2019", "Complete", 0.00125772, 0.00126526, 0.00140356,
1352L, "v31", "m96", "4/12/2015", 1L, "ADNI2 Year 3 Visit", 12L, 786153L, 786165L, 786830L, 786177L, "8/21/2019", "Complete", 0.00138516, 0.00130067, 0.00164973,
2007L, "scmri", "scmri", "6/19/2010", 1L, "ADNIGO Screening MRI", 14L, 299634L, 299885L, 359704L, 359115L, "10/3/2012", "Complete", 0.0012181, 0.00109861, 0.00113204,
2007L, "v11", "m24", "3/28/2012", 1L, "ADNI2 Year 1 Visit", 28L, 335804L, 335683L, 360065L, 359414L, "10/3/2012", "Complete", 0.00143382, 0.00130726, 0.00157676,
2007L, "m03", "m03", "9/1/2010", 1L, "ADNIGO Month 3 MRI", 15L, 299635L, 299888L, 359835L, 359263L, "10/3/2012", "Complete", 0.00139614, 0.00118858, 0.00126052,
2007L, "v06", "m12", "3/26/2011", 1L, "ADNI2 Initial Visit-Cont Pt", 26L, 299636L, 299891L, 359935L, 359365L, "10/3/2012", "Complete", 0.00146508, 0.0014392, 0.00145967,
2031L, "scmri", "scmri", "8/13/2010", 1L, "ADNIGO Screening MRI", 14L, 299637L, 299894L, 359705L, 359116L, "10/3/2012", "Complete", 0.00118117, 0.00110229, 0.00132531,
2031L, "v11", "m24", "7/25/2012", 1L, "ADNI2 Year 1 Visit", 28L, 356984L, 358870L, 360240L, 359559L, "2/1/2013", "Complete", 0.00129732, 0.00120146, 0.00148761,
2031L, "m03", "m03", "11/12/2010", 1L, "ADNIGO Month 3 MRI", 15L, 299638L, 299897L, 359836L, 359264L, "10/3/2012", "Complete", 0.00122562, 0.00111628, 0.0011707,
2031L, "v21", "m36", "7/24/2013", 1L, "ADNI2 Year 2 Visit", 30L, 397236L, 397301L, 397446L, 397365L, "11/2/2013", "Complete", 0.00124111, 0.0012689, 0.00157564,
2031L, "v06", "m12", "7/21/2011", 1L, "ADNI2 Initial Visit-Cont Pt", 26L, 299640L, 299903L, 360066L, 359415L, "10/3/2012", "Complete", 0.00140551, 0.00132471, 0.00175401,
2031L, "m06", "m06", "2/10/2011", 1L, "ADNI1/GO Month 6", 3L, 299639L, 299900L, 359936L, 359366L, "10/3/2012", "Complete", 0.0013122, 0.00135983, 0.00144974,
2047L, "v11", "m24", "9/28/2012", 1L, "ADNI2 Year 1 Visit", 28L, 356988L, 358875L, 360245L, 359564L, "2/1/2013", "Complete", 0.00131261, 0.00120259, 0.00139444,
2047L, "m06", "m06", "3/4/2011", 1L, "ADNI1/GO Month 6", 3L, 299732L, 299951L, 359969L, 359392L, "10/3/2012", "Complete", 0.00125712, 0.00117188, 0.00138467,
2047L, "m03", "m03", "12/8/2010", 1L, "ADNIGO Month 3 MRI", 15L, 299731L, 299950L, 359874L, 359315L, "10/3/2012", "Complete", 0.00127272, 0.00127151, 0.00140615,
2047L, "v06", "m12", "9/15/2011", 1L, "ADNI2 Initial Visit-Cont Pt", 26L, 299733L, 299952L, 360077L, 359427L, "10/3/2012", "Complete", 0.00121407, 0.00121281, 0.00130178,
2047L, "scmri", "scmri", "8/20/2010", 1L, "ADNIGO Screening MRI", 14L, 299730L, 299949L, 360286L, 359188L, "10/3/2012", "Complete", 0.00125326, 0.0012075, 0.00139156,
2047L, "v21", "m36", "9/14/2013", 1L, "ADNI2 Year 2 Visit", 30L, 397237L, 397302L, 397447L, 397366L, "11/2/2013", "Complete", 0.00124826, 0.00119243, 0.00143786
)
secondfile_y <- tibble::tribble(
~RID, ~VISCODE, ~PTID, ~TBIHX, ~DTIROI, ~SITE, ~COLPROT, ~ORIGPROT, ~EXAMDATE, ~DX_bl, ~AGE, ~PTGENDER, ~PTEDUCAT, ~PTETHCAT, ~PTRACCAT, ~PTMARRY, ~APOE4, ~FDG, ~PIB, ~AV45, ~ABETA, ~TAU, ~PTAU, ~CDRSB, ~ADAS11,
610L, "m120", "005_S_0610", 1L, 1L, 5L, "ADNI2", "ADNI1", "7/6/2016", "CN", 79, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, 0.946164, NA, NA, NA, 0, 2L,
778L, "m84", "129_S_0778", 1L, 1L, 129L, "ADNI2", "ADNI1", "9/4/2013", "CN", 72.4, "Male", 13L, "Not Hisp/Latino", "White", "Married", 1L, NA, NA, NA, NA, NA, NA, 6, 18L,
778L, "m96", "129_S_0778", 1L, 1L, 129L, "ADNI2", "ADNI1", "9/23/2014", "CN", 72.4, "Male", 13L, "Not Hisp/Latino", "White", "Married", 1L, NA, NA, 1.39235, "646.2", 226.2, 19.25, 6, 22L,
908L, "m84", "003_S_0908", 1L, 1L, 3L, "ADNI2", "ADNI1", "1/6/2014", "LMCI", 62.9, "Female", 16L, "Not Hisp/Latino", "White", "Married", 0L, 1.2554, NA, 1.01167, NA, NA, NA, 1.5, 8L,
2077L, "m03", "021_S_2077", 1L, 1L, 21L, "ADNIGO", "ADNIGO", "12/8/2010", "EMCI", 81, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, NA, NA, NA, NA, NA, NA,
2077L, "m24", "021_S_2077", 1L, 1L, 21L, "ADNI2", "ADNIGO", "10/17/2012", "EMCI", 81, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, 1.2748, NA, NA, NA, 0.5, 14L,
2077L, "m48", "021_S_2077", 1L, 1L, 21L, "ADNI2", "ADNIGO", "10/21/2014", "EMCI", 81, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, NA, NA, NA, NA, 0.5, 6L,
2347L, "m12", "129_S_2347", 1L, 1L, 129L, "ADNI2", "ADNIGO", "3/28/2012", "EMCI", 71, "Male", 18L, "Not Hisp/Latino", "White", "Married", 1L, NA, NA, NA, NA, NA, NA, 1.5, 9L,
2347L, "m24", "129_S_2347", 1L, 1L, 129L, "ADNI2", "ADNIGO", "3/27/2013", "EMCI", 71, "Male", 18L, "Not Hisp/Latino", "White", "Married", 1L, 1.421, NA, 0.959929, "1081", 300.4, 25.62, 2, 5L,
2347L, "m36", "129_S_2347", 1L, 1L, 129L, "ADNI2", "ADNIGO", "4/7/2014", "EMCI", 71, "Male", 18L, "Not Hisp/Latino", "White", "Married", 1L, NA, NA, NA, NA, NA, NA, 2, 4L,
4185L, "m03", "005_S_4185", 1L, 1L, 5L, "ADNI2", "ADNI2", "1/5/2012", "EMCI", 80.4, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, NA, NA, NA, NA, NA, NA,
4185L, "m06", "005_S_4185", 1L, 1L, 5L, "ADNI2", "ADNI2", "4/2/2012", "EMCI", 80.4, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, NA, NA, NA, NA, 0.5, 5L,
4185L, "m12", "005_S_4185", 1L, 1L, 5L, "ADNI2", "ADNI2", "9/27/2012", "EMCI", 80.4, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, NA, NA, NA, NA, 1, 6L,
4185L, "m24", "005_S_4185", 1L, 1L, 5L, "ADNI2", "ADNI2", "9/26/2013", "EMCI", 80.4, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, 1.11068, NA, 1.05932, NA, NA, NA, 0.5, 4L,
4185L, "m48", "005_S_4185", 1L, 1L, 5L, "ADNI2", "ADNI2", "9/24/2015", "EMCI", 80.4, "Male", 20L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, 1.11009, NA, NA, NA, 0.5, 9L,
4371L, "m03", "129_S_4371", 1L, 1L, 129L, "ADNI2", "ADNI2", "2/13/2012", "CN", 67.7, "Male", 18L, "Not Hisp/Latino", "White", "Married", 1L, NA, NA, NA, NA, NA, NA, NA, NA,
4371L, "m24", "129_S_4371", 1L, 1L, 129L, "ADNI2", "ADNI2", "11/26/2013", "CN", 67.7, "Male", 18L, "Not Hisp/Latino", "White", "Married", 1L, 1.10519, NA, 1.17295, "769.8", 273.6, 24.27, 0, 5L,
4494L, "m03", "126_S_4494", 1L, 1L, 126L, "ADNI2", "ADNI2", "4/30/2012", "AD", 71.1, "Male", 12L, "Not Hisp/Latino", "White", "Married", 2L, NA, NA, NA, NA, NA, NA, NA, NA,
4494L, "m06", "126_S_4494", 1L, 1L, 126L, "ADNI2", "ADNI2", "8/13/2012", "AD", 71.1, "Male", 12L, "Not Hisp/Latino", "White", "Married", 2L, NA, NA, NA, NA, NA, NA, 4.5, 19L,
4494L, "m12", "126_S_4494", 1L, 1L, 126L, "ADNI2", "ADNI2", "2/7/2013", "AD", 71.1, "Male", 12L, "Not Hisp/Latino", "White", "Married", 2L, NA, NA, NA, NA, NA, NA, 4.5, 19L,
4494L, "m24", "126_S_4494", 1L, 1L, 126L, "ADNI2", "ADNI2", "2/10/2014", "AD", 71.1, "Male", 12L, "Not Hisp/Latino", "White", "Married", 2L, 1.05921, NA, 1.40822, "300.8", 510.2, 50.2, 8, 22L,
4626L, "m03", "052_S_4626", 1L, 1L, 52L, "ADNI2", "ADNI2", "7/27/2012", "LMCI", 69.2, "Male", 18L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, NA, NA, NA, NA, NA, NA,
4626L, "m06", "052_S_4626", 1L, 1L, 52L, "ADNI2", "ADNI2", "10/29/2012", "LMCI", 69.2, "Male", 18L, "Not Hisp/Latino", "White", "Married", 0L, NA, NA, NA, NA, NA, NA, 0.5, 11L
)
data.merge(firstfile_x,secondfile_y,77)