Hi everyone. I have a database of medical data with an ID (sample_name) made up by alphanumeric code that follows e.g.:
Example: A2589200014000006001 which accounts for
Project: A25
Center: 892
Patient: 00014
Disease: 00
Visit: 000 (baseline) / 701 (1 year visit)
Sample: 60
Specimen: 01
What I am trying to accomplish is to operate RQ values based on 1-year follow-up according to 701 code between follow-ups
database <- structure(list(sample_name = c("A2571900003007016001", "A2592000026007016001",
"A2592000036007016001", "A2542900002007016001", "A2552500037000006001",
"A2582200025000006001", "A2555600056007016001", "A2537700008000006001",
"A2537700009007016001", "A2564800002000006001", "A2589600037000006001",
"A2552500020007016001", "A2589600034000006001", "A2552500020000006001",
"A2571900001007016001", "A2582200032007016001", "A2582200020000006001",
"A2537700011000006001", "A2582200012007016001", "A2589800008007016001",
"A2591900088007016001", "A2541700027000006001", "A2537700030007016001",
"A2561700001000006001", "A2561900011007016001", "A2537700029000006001",
"A2537700010000006001", "A2537700032000006001", "A2552500026007016001",
"A2591800088000006001", "A2537600099007016001", "A2537700008007016001",
"A2582200021000006001", "A2537700013007016001", "A2555600055007016001",
"A2589600034000006001", "A2561900012007016001", "A2582200012007016001",
"A2555600056007016001", "A2582200019000006001", "A2543900001007016001",
"A2564800001000006001", "A2555100021000006001", "A2592000033000006001",
"A2552500027000006001", "A2537700031000006001", "A2552500026000006001",
"A2541700026007016001", "A2576000026007016001", "A2571200024000006001",
"A2552500026007016001", "A2571900005007016001", "A2556300014000006001",
"A2592000028007016001", "A2582200035000006001", "A2592000023007016001",
"A2552500034000006001", "A2555600055007016001", "A2582200019007016001",
"A2582200022000006001", "A2576000006007016001", "A2555300047000006001",
"A2589200008007016001", "A2582200035007016001", "A2592000028000006001",
"A2576000006007016001", "A2571200024007016001", "A2589400004007016001",
"A2589500045007016001", "A2592000038000006001", "A2555100024007016001",
"A2537700031007016001", "A2582200034000006001", "A2537600099007016001",
"A2537700016007016001", "A2589600038000006001", "A2554700034007016001",
"A2537700032000006001", "A2552500030000006001", "A2535500019000006001",
"A2576000006000006001", "A2571900001000006001", "A2582200030000006001",
"A2552500037000006001", "A2589200014007016001", "A2571200028007016001",
"A2561700001007016001", "A2592000024007016001", "A2582200026007016001",
"A2540900001007016001", "A2576100007000006001", "A2582200021000006001",
"A2544800001007016001", "A2576000007000006001", "A2582200020007016001",
"A2582200033000006001", "A2564800003007016001", "A2537700029000006001",
"A2555300047000006001", "A2555100024007016001", "A2555300047007016001",
"A2582200026007016001", "A2589200012007016001", "A2571900003007016001",
"A2537700015000006001", "A2589800009000006001", "A2592000029000006001",
"A2582200030000006001", "A2535500019000006001", "A2589200013007016001",
"A2574500061007016001", "A2582200020007016001", "A2591800099007016001",
"A2591900092000006001", "A2537700006007016001", "A2576000017000006001",
"A2561900013007016001", "A2576000017000006001", "A2582200012000006001",
"A2582200029000006001", "A2576000017000006001", "A2555100022007016001",
"A2582200033007016001", "A2537700029000006001", "A2582200011000006001",
"A2582200030007016001", "A2591900088000006001", "A2537700031007016001",
"A2576000006000006001", "A2555300047007016001", "A2582200019007016001",
"A2571200030007016001", "A2592000036007016001", "A2592000036000006001",
"A2589600038007016001", "A2589200008007016001", "A2571200027000006001",
"A2582200029007016001", "A2571900001000006001", "A2589200008007016001",
"A2592000036007016001", "A2589200010000006001", "A2571900007000006001",
"A2552500021000006001", "A2592000027000006001", "A2537700009000006001",
"A2582200037000006001", "A2576000014007016001", "A2571200023007016001",
"A2537700017007016001", "A2537700011000006001", "A2589600034007016001",
"A2552500027000006001", "A2552500033000006001", "A2537700011000006001",
"A2592000038000006001", "A2535500019007016001", "A2591800088007016001",
"A2556300014007016001", "A2556300014007016001", "A2592000030000006001",
"A2540900001007016001", "A2589800008000006001", "A2576000014000006001",
"A2571900008007016001", "A2561500002000006001", "A2582200032007016001",
"A2591900095000006001", "A2591800088007016001", "A2592000023000006001",
"A2564800003000006001", "A2582200012000006001", "A2552500021007016001",
"A2540900001000006001", "A2576100015000006001", "A2537700027007016001",
"A2576000007007016001", "A2537700017000006001", "A2552500034007016001",
"A2592000035000006001"), RQ = c(1.08197462642302, 1.49002169146583,
2.2626289256918, 1.59364836099312, 1.16958766405195, 1.15722281085647,
1.90395581699507, 1.16070391438372, 1.42207741058728, NA, 2.01623900584052,
1.32715174233857, 0.937137946702273, 0.379016702964743, 1.19471513515602,
1.45969471059855, 1.83062125071651, 1.19803218514883, 2.57933650131177,
0.971083101781619, 0.844400887423782, 1.21644126855065, 1.23342215585474,
1.33299065527035, 1.15535269687227, 1.01091848210074, 1.36225803464049,
1.70133432190171, 0.179451596914852, 2.44697608226373, 2.76702050165719,
0.854409740889734, 1.60250997083131, 1.02266478905362, 0.943874312681693,
0.885972198499773, 0.839537496184133, 0.99723125135207, 0.638164384414478,
1.87081469574624, 1.13053056712459, 0.726146896129157, 1.29115899476399,
0.775214073170476, 1.00393555807285, 1.43561277531882, 1.12635895425256,
1.2466011942751, 0.584118641779783, 0.770571108358409, 4.80210406012398,
0.768437590644007, 1.10114159809796, 1.9683680440676, 1, 0.851453707748945,
0.1666623346364, 1.24372425877751, 2.32462821503624, 0.523405140987288,
1.03622215405832, 1.13393137629285, 0.970634469769548, 2.42054737903178,
NA, 0.709233866728796, 1.31281776506912, 2.04864025497385, 0.493002431338097,
0.600540452021357, 0.493800430722687, 1.88905756659272, 1.17609125029097,
2.95535888117926, 0.751754411118803, 1.12375951661894, 0.976708528962229,
0.858367088615479, 1.06191380396236, 1.28075986132977, 0.868942930406155,
0.816203046150902, 1.21307324843306, 0.472701058375372, 0.951757980304535,
0.694798558820258, 5.23189407871883, 1.2938466778861, 1.0614232089498,
1.90131820245963, NA, 0.574481895667812, 2.846784593394, 1.26927088601981,
1.88121751127258, 0.610332223355819, 2.46285780193722, 1.86951839513,
0.523889093650146, 0.968618189226629, 0.880869374126979, 1.15508578455358,
0.763305945229095, 1.59955063929534, 0.799221149722627, 0.70907001783973,
1.14419530791605, 1.2593389772436, 3.37760354969513, 0.579280229789139,
1.52837652076517, 0.963039358974067, 0.35675362642947, 0.335643125695067,
0.84109072602697, 0.900001929793514, 1.09378814701508, 1.45598954899909,
1.18701102375694, 0.855792732948346, 1.54221082540794, 0.952637998043937,
0.730522189272839, 1.0707833911119, 0.96638278943318, 1.35441200570982,
0.856188284546456, 2.09604060687005, 0.578878842457425, 1.14076371586843,
2.26106113424919, 9.77886256349568, 1.8881848383005, NA, 0.57700937588178,
2.10672207190967, 0.989656656415207, 2.64695438549274, 1.29863860272989,
1.51676754537444, 1.87384289374485, 1.2637110854608, 0.72547610391709,
0.479964630519455, 1.63052135171292, 0.749672992109442, NA, 1.02526723788859,
2.24026075844427, 0.935839697736695, 0.320560077029036, 0.135246828302878,
1.03646159987396, 1.59364836099312, 0.722298293338962, 0.272879010419556,
1.96337171049351, 1.65748980897599, 1.52414483033977, 1.11265012058483,
NA, 1.47324768581648, 1.04198415065663, 0.86294070314942, 0.553376518901261,
1.7021206869689, 5.49708453933297, 0.917215940605446, 1.65748980897599,
1.32439487523883, 0.310643836121484, 1.17473336089929, 0.768437590644006,
1.4382688051156, 0.940608755636287, 0.798851916449149, 1.17175152030837,
0.603182578991095, 1.18153852202942, 0.89089871814034)), row.names = c(NA,
-180L), class = c("tbl_df", "tbl", "data.frame"))
I have a database of medical data with an ID made up by alphanumeric code that follows e.g.:
Example: A2589200014000006001
Project: A25
Center: 892
Patient: 00014
Disease: 00
Visit: 000 (baseline) / 701 (1 year visit)
Sample: 60
Specimen: 01
What I am trying to accomplish is to substract RQ values based on 1-year follow-up according to 701 code
I am pretty new in terms of string topic, I find it quite tricky and hard. I have read several approaches with a more traditional separate, or modern with stringr (str_detect or mystrsplit)
The idea is to substract RQ values from sample_name: 701 - 000 but matches on the other digits of the sample name (project, patient....)