Dplyr col sums by id

Hi Posit Consultants. Hope you will help me in this col Sums group by encounter_id.
Issue = Some encounters from the previous financial year in a hospital discharged in the current financial year = need to sum up those two years in total for reporting. Some encounters start and end in the same current financial year and no need to sum up
Repex = pasted
Current df = current situation of the data with same encounters across two years and need to sum up
Desired df = manually created and summed up in Excel

Look forward to hearing from you.

current <- tibble::tribble(
             ~costing_dataset_id, ~costing_dataset, ~encounter_id, ~total_cost, ~direct_cost, ~indirect_cost,       ~appl,      ~blood,      ~equip, ~exclude,         ~gs,      ~hotel,       ~imag,      ~lease,         ~ms,   ~on_costs,       ~path, ~pharm_npbs,  ~pharm_pbs,       ~pros, ~pt_transport,   ~sw_admin, ~sw_admin_oc,       ~swah,    ~swah_oc,      ~swhmo,   ~swhmoffs,   ~sw_hotel, ~sw_hotel_oc,     ~sw_med, ~sw_med_ffs,  ~sw_med_oc, ~sw_med_sup, ~sw_med_sup_oc,    ~sw_nurs, ~sw_nurs_oc,   ~sw_other, ~sw_other_oc,    ~sw_sess, ~sw_sess_ffs, ~sw_sess_oc,
                             46L,         "2223CD",     30567617L, 255534.5219,  219230.5542,    36303.96771, 453.5117244, 50027.33153, 185.1550107,       0L, 12143.73869, 1559.596487, 52.76100524, 3379.758711, 11726.27274, 29437.31834, 2871.089633, 4989.892421, 3399.435163, 528.4211944,   14.53969969, 11111.73208,  34.42939306, 1150.882787, 1.168522552,  24886.1865,           0, 3992.447582,  104.2815761, 14864.38342, 990.9450843, 25.30559683, 12069.77259,    337.8741091, 42236.73553, 439.6949443, 7501.569961,  0.001286103, 13602.75609,   1406.90401,  8.62844703,
                             49L,         "2324CD",     30567617L, 305691.9787,  250845.0912,    54846.88748, 929.8141843, 4081.052287, 158.9840028,       0L, 21137.71663, 3058.877166, 93.87396696, 4890.803769,  10944.7937,  47539.6934, 2810.904901,  1119.04257, 5736.413526, 956.8716287,   78.92869632,  14012.6065,  122.7020216,   8262.9646, 76.35040068, 39118.46258, 83.77483444, 10319.20389,  207.3737482, 5038.115546, 604.8911982,           0, 8043.489311,    7.466356575, 65764.83236, 274.7548526, 21886.38669,  9.893360541, 27241.22244,  1079.717549,           0,
                             46L,         "2223CD",     30567734L, 125858.7456,  109891.6721,    15967.07355, 215.9236151, 30489.27764, 93.25947228,       0L, 5662.918802, 722.6105348, 26.52021507, 1560.154302, 6436.364254,  12247.2287, 643.9627557,  9362.46259, 1946.767894, 258.8901118,   3.372127954, 5037.508946,  15.90832303, 901.4125263, 0.697912279, 10923.54606,           0, 1679.722382,  55.73580435, 6136.628774,  567.766782,  12.6160905, 5113.946092,    174.4007609,   16086.074, 117.3995422, 3027.905936,  0.000826781, 5532.476539,  801.5453168, 3.739995856,
                             49L,         "2324CD",     30567734L, 44482.24893,  38054.87679,    6427.372132,  137.140177,           0, 20.33436056,       0L, 7695.021837, 386.0403524,  13.9185424, 699.3217447, 2074.156363, 5211.916449, 76.38045922, 59.17831513, 873.3002477, 106.9669742,             0, 1687.268853,  16.59673158, 1331.591429, 4.835785588, 1355.165839, 83.77483444, 887.4200384,   25.6397257, 603.4399698, 98.62990549,           0, 3208.204444,    0.535263124, 7795.711357, 20.59956698, 8781.174805,  1.458181778, 1050.939595,  175.5867803,           0,
                             46L,         "2223CD",     30567808L, 83365.88669,  70037.68831,    13328.19838, 161.2519199, 10720.24888, 64.20222613,       0L, 4470.371578, 604.9467998,  8.55152899, 1098.870794, 3227.964607, 10820.02957, 786.7528851, 170.8960909, 1458.417979, 92.70213507,   4.350461053, 4211.608038,  12.97803372, 840.8713675, 0.723441767, 9696.681279,           0, 1406.978797,  43.95617916, 5077.458004, 454.3460702, 4.091174693, 4065.297993,    140.6997571, 15318.60733, 136.5206446, 2590.607071,  0.000643052, 5030.296169,   641.474539, 3.132697493,
                             49L,         "2324CD",     30567808L, 68336.78391,  57432.24529,    10904.53862, 203.3134567, 510.1315358, 33.33646894,       0L, 9293.533834, 822.6709776, 5.813332696, 961.4706427, 2676.307159, 9034.462922, 656.9078894, 116.1615605, 1611.258055, 53.70897437,             0, 2813.195885,   25.2077338, 2533.536348, 0.138224127, 3419.700818, 83.77483444, 1911.936613,   60.4137647,  845.852088, 258.8709907,           0, 3922.733433,    1.745338246, 14977.97938, 51.79907412, 9549.440708,  2.214088843, 1435.474874,  463.6929074,           0,
                             46L,         "2223CD",     30567995L,  132347.236,  116977.7202,    15369.51587, 197.1445633, 43802.25311, 80.16950275,       0L, 5468.189684, 522.2041804, 4.481760102, 1443.962522, 6758.253038,  11562.5326, 799.4095938, 5927.929671,  2147.25082, 38.25028769,   1.363421945, 5155.480822,  13.03260141, 324.3212845, 0.429906439, 11791.94536,           0, 901.5342666,  30.62885715, 6366.379795, 663.6649547, 1.753727356, 6289.965652,    237.1366977, 11955.06138, 61.16266201, 2691.676348,  0.000183729, 6168.043136,  936.8674851, 4.756168363,
                             49L,         "2324CD",     30567995L, 236889.9508,  196819.9335,    40070.01734, 747.1187888, 105.8472197, 136.6155061,       0L,  28795.4394, 2251.982514,  35.2316746, 3699.650846, 14177.38194, 32214.36918, 2256.435565, 11393.19415, 4015.241709, 271.2067436,   0.005390004, 9664.412628,  82.02981468, 7648.176897, 0.716383117, 19379.01603, 251.3245033, 6122.489629,  130.4974165, 4930.216764, 760.3269546,           0, 7408.455959,    22.63163661, 48569.61349, 187.8412611, 16093.57146,  7.783251463,  14159.4922,  1371.633894,           0,
                             49L,         "2324CD",     31332289L, 594826.4299,  487882.3995,    106944.0304,   1721.8575, 25698.24012, 309.7895267,       0L, 37806.87814, 6491.689333, 103.8315878, 9212.917924, 23600.03507, 90514.70328, 6341.984037, 3705.477681, 9980.787541, 868.2664906,   243.1183514, 26785.48572,  223.7268202, 11137.22252, 7.408028147, 70689.94158, 83.77483444, 20969.78414,  455.6400784, 11262.53361, 2710.123037,           0,  14191.8367,    41.88359877, 143236.2735, 641.3474047, 31738.96574,  18.69659669, 39173.71167,  4858.497735,           0,
                             49L,         "2324CD",     31396173L, 387108.8623,  319170.4061,    67938.45617, 1147.961271, 17664.71999, 204.4299656,       0L, 31786.21938, 2662.733616, 58.84123435, 5792.138594, 21383.14437, 52928.05054, 3195.059338,  4907.17405, 6017.007373, 434.4600031,   0.003593336, 17038.16192,  122.8034009, 4897.560631, 0.569738163, 51390.58206, 167.5496689, 7425.989045,   133.474351, 10273.31923, 2608.545033,           0, 14832.77166,    63.76801531, 64971.46661, 264.1138123, 29643.15938,  12.15216466, 30372.98116,  4707.951053,           0
             )
current
#> # A tibble: 10 x 41
#>    costing_dataset_id costing_dataset encounter_id total_cost direct_cost
#>                 <int> <chr>                  <int>      <dbl>       <dbl>
#>  1                 46 2223CD              30567617    255535.     219231.
#>  2                 49 2324CD              30567617    305692.     250845.
#>  3                 46 2223CD              30567734    125859.     109892.
#>  4                 49 2324CD              30567734     44482.      38055.
#>  5                 46 2223CD              30567808     83366.      70038.
#>  6                 49 2324CD              30567808     68337.      57432.
#>  7                 46 2223CD              30567995    132347.     116978.
#>  8                 49 2324CD              30567995    236890.     196820.
#>  9                 49 2324CD              31332289    594826.     487882.
#> 10                 49 2324CD              31396173    387109.     319170.
#> # i 36 more variables: indirect_cost <dbl>, appl <dbl>, blood <dbl>,
#> #   equip <dbl>, exclude <int>, gs <dbl>, hotel <dbl>, imag <dbl>, lease <dbl>,
#> #   ms <dbl>, on_costs <dbl>, path <dbl>, pharm_npbs <dbl>, pharm_pbs <dbl>,
#> #   pros <dbl>, pt_transport <dbl>, sw_admin <dbl>, sw_admin_oc <dbl>,
#> #   swah <dbl>, swah_oc <dbl>, swhmo <dbl>, swhmoffs <dbl>, sw_hotel <dbl>,
#> #   sw_hotel_oc <dbl>, sw_med <dbl>, sw_med_ffs <dbl>, sw_med_oc <dbl>,
#> #   sw_med_sup <dbl>, sw_med_sup_oc <dbl>, sw_nurs <dbl>, sw_nurs_oc <dbl>, ...

desired <- tibble::tribble(
             ~costing_dataset_id, ~costing_dataset, ~encounter_id, ~total_cost, ~direct_cost, ~indirect_cost,       ~appl,      ~blood,      ~equip, ~exclude,         ~gs,      ~hotel,       ~imag,      ~lease,         ~ms,   ~on_costs,       ~path, ~pharm_npbs,  ~pharm_pbs,       ~pros, ~pt_transport,   ~sw_admin, ~sw_admin_oc,       ~swah,    ~swah_oc,      ~swhmo,   ~swhmoffs,   ~sw_hotel, ~sw_hotel_oc,     ~sw_med, ~sw_med_ffs,  ~sw_med_oc, ~sw_med_sup, ~sw_med_sup_oc,    ~sw_nurs, ~sw_nurs_oc,   ~sw_other, ~sw_other_oc,    ~sw_sess, ~sw_sess_ffs, ~sw_sess_oc,
                             49L,         "2324CD",     30567617L, 561226.5005,  470075.6453,     91150.8552, 1383.325909, 54108.38382, 344.1390135,       0L, 33281.45532, 4618.473653, 146.6349722,  8270.56248, 22671.06643, 76977.01174, 5681.994534, 6108.934991, 9135.848688, 1485.292823,   93.46839602, 25124.33858,  157.1314147, 9413.847388, 77.51892323, 64004.64908, 83.77483444, 14311.65147,  311.6553243, 19902.49897, 1595.836283, 25.30559683,  20113.2619,    345.3404657, 108001.5679, 714.4497969, 29387.95665,  9.894646644, 40843.97853,  2486.621559,  8.62844703,
                             49L,         "2324CD",     30567734L, 170340.9946,  147946.5489,    22394.44568, 353.0637921, 30489.27764, 113.5938328,       0L, 13357.94064, 1108.650887, 40.43875747, 2259.476047, 8510.520617, 17459.14515, 720.3432149, 9421.640905, 2820.068142, 365.8570861,   3.372127954, 6724.777798,  32.50505461, 2233.003955, 5.533697867,  12278.7119, 83.77483444,  2567.14242,  81.37553005, 6740.068744, 666.3966875,  12.6160905, 8322.150535,    174.9360241, 23881.78536, 137.9991092, 11809.08074,  1.459008559, 6583.416133,  977.1320971, 3.739995856,
                             49L,         "2324CD",     30567808L, 151702.6706,  127469.9336,      24232.737, 364.5653766, 11230.38042, 97.53869507,       0L, 13763.90541, 1427.617777, 14.36486169, 2060.341437, 5904.271766, 19854.49249, 1443.660774, 287.0576515, 3069.676034, 146.4111094,   4.350461053, 7024.803922,  38.18576751, 3374.407716, 0.861665894,  13116.3821, 83.77483444,  3318.91541,  104.3699439, 5923.310092, 713.2170609, 4.091174693, 7988.031426,    142.4450953, 30296.58671, 188.3197187, 12140.04778,  2.214731895, 6465.771043,  1105.167446, 3.132697493,
                             49L,         "2324CD",     30567995L, 369237.1868,  313797.6536,    55439.53322,  944.263352, 43908.10033, 216.7850088,       0L, 34263.62909, 2774.186695,  39.7134347, 5143.613368, 20935.63498, 43776.90178, 3055.845159, 17321.12382, 6162.492529, 309.4570312,   1.368811949, 14819.89345,  95.06241609, 7972.498182, 1.146289555, 31170.96139, 251.3245033, 7024.023896,  161.1262737, 11296.59656, 1423.991909, 1.753727356, 13698.42161,    259.7683343, 60524.67487, 249.0039231, 18785.24781,  7.783435192, 20327.53533,  2308.501379, 4.756168363,
                             49L,         "2324CD",     31332289L, 594826.4299,  487882.3995,    106944.0304,   1721.8575, 25698.24012, 309.7895267,       0L, 37806.87814, 6491.689333, 103.8315878, 9212.917924, 23600.03507, 90514.70328, 6341.984037, 3705.477681, 9980.787541, 868.2664906,   243.1183514, 26785.48572,  223.7268202, 11137.22252, 7.408028147, 70689.94158, 83.77483444, 20969.78414,  455.6400784, 11262.53361, 2710.123037,           0,  14191.8367,    41.88359877, 143236.2735, 641.3474047, 31738.96574,  18.69659669, 39173.71167,  4858.497735,           0,
                             49L,         "2324CD",     31396173L, 387108.8623,  319170.4061,    67938.45617, 1147.961271, 17664.71999, 204.4299656,       0L, 31786.21938, 2662.733616, 58.84123435, 5792.138594, 21383.14437, 52928.05054, 3195.059338,  4907.17405, 6017.007373, 434.4600031,   0.003593336, 17038.16192,  122.8034009, 4897.560631, 0.569738163, 51390.58206, 167.5496689, 7425.989045,   133.474351, 10273.31923, 2608.545033,           0, 14832.77166,    63.76801531, 64971.46661, 264.1138123, 29643.15938,  12.15216466, 30372.98116,  4707.951053,           0
             )

Created on 2025-10-10 with reprex v2.1.1

Can you explain a bit more what you want? It is not clear to me what you want summed.

Everything variable in the dataset is numeric except costing_dataset but it makes no sense to sum costing_dataset_id and your desired data set simply uses costing_dataset_id = 6 .

Do you want everything else summed? That seems like what you want.

I don't really really know how to do it with {dplyr} but is this roughly what you want—done using {data.table}. If so I can probably do it in {dpyr}. I just do not like {dplyr}'s syntax.

# load packages -----------------------------------------------------------
library(data.table)

# Load data ---------------------------------------------------------------
DT <- fread("current_data.csv")

# Drop unwanted variables -------------------------------------------------
MT <- DT[,  !c("costing_dataset_id", "costing_dataset")]


# Sum variables by encounter_id --------------------------------------------
SUM <- MT[ , by = encounter_id,
            lapply(.SD, sum)]

Thanks. Group by costing_dataset_id, costing_dataset and encounter_id and then sum up all vars.
As mentioned above, some encounters have two costing_dataset = so need to sum. Other encounters have only one costing_dataset. Please refer to the desired df. Thank you very much.
I do not know about DT syntax.
I would prefer dplyr.
Thanks again.

You can check Column-wise operations • dplyr Vignette for tips and examples on how to solve such tasks with summarise(across( ... ), ... ).

You could start by excluding non-relevant columns first, then group by encounter_id an sum across all remaining columns with
... |> summarise(across(everything(),sum), .by = encounter_id) .
Or exclude a set of columns in across() itself:

library(dplyr, warn.conflicts = FALSE)
current |> 
  summarise(
    across(!(costing_dataset_id:costing_dataset), sum),
    .by = encounter_id
  )
#> # A tibble: 6 × 39
#>   encounter_id total_cost direct_cost indirect_cost  appl  blood equip exclude
#>          <int>      <dbl>       <dbl>         <dbl> <dbl>  <dbl> <dbl>   <int>
#> 1     30567617    561227.     470076.        91151. 1383. 54108. 344.        0
#> 2     30567734    170341.     147947.        22394.  353. 30489. 114.        0
#> 3     30567808    151703.     127470.        24233.  365. 11230.  97.5       0
#> 4     30567995    369237.     313798.        55440.  944. 43908. 217.        0
#> 5     31332289    594826.     487882.       106944. 1722. 25698. 310.        0
#> 6     31396173    387109.     319170.        67938. 1148. 17665. 204.        0
#> # ℹ 31 more variables: gs <dbl>, hotel <dbl>, imag <dbl>, lease <dbl>,  …

Or if you also need to include count indicator and/or costing_dataset_id & costing_dataset values (collapsed into lists columns), you can have multiple across and non-across summarisations in a single summarise(..., .by = x) call:

current |> 
  summarise(
    across(!(costing_dataset_id:costing_dataset), sum),
    n = n(),
    across(costing_dataset_id:costing_dataset, list),
    .by = encounter_id
  ) |> 
  relocate(n:costing_dataset, .before = 1) 
#> # A tibble: 6 × 42
#>       n costing_dataset_id costing_dataset encounter_id total_cost direct_cost
#>   <int> <list>             <list>                 <int>      <dbl>       <dbl>
#> 1     2 <int [2]>          <chr [2]>           30567617    561227.     470076.
#> 2     2 <int [2]>          <chr [2]>           30567734    170341.     147947.
#> 3     2 <int [2]>          <chr [2]>           30567808    151703.     127470.
#> 4     2 <int [2]>          <chr [2]>           30567995    369237.     313798.
#> 5     1 <int [1]>          <chr [1]>           31332289    594826.     487882.
#> 6     1 <int [1]>          <chr [1]>           31396173    387109.     319170.
#> # ℹ 36 more variables: indirect_cost <dbl>, appl <dbl>, blood <dbl>,  …

I see @ margusl has supplied a {dplyr} solution.

However for completeness here is the data.table code I think does what you want.

# load packages -----------------------------------------------------------
library(data.table)

# Load data ---------------------------------------------------------------
DT <- fread("current_data.csv")

# Sum variables by encounter_id --------------------------------------------
SUM <- DT[ , by = c("costing_dataset_id", "costing_dataset", "encounter_id"),
            lapply(.SD, sum)]


Thanks for your dplyr version.
What does the relocate do in .before = 1?
Thanks.

Relocates last columns so they would appear before current 1st column, otherwise n, costing_dataset_id & costing_dataset would not be visible in reprex output.

Hello, i am getting this area when run your script data.table version.
Please advise.
Thanks.

Error in DT[, by = c("costing_dataset_id", "costing_dataset", "encounter_id"), lapply(.SD, sum)]:
! Can't subset columns with lapply(.SD, sum).
:multiply: lapply(.SD, sum) must be logical, numeric, or character, not an empty list.

It's working fine for me.

I assume your actual data set is larger that your example. You could be running into some kind of missing data (NA's) issue but that's just a guess. Is it possible that one of "costing_dataset_id", "costing_dataset", "encounter_id" is nothing but induced NA's due to an earlier data manipulation?

If you can supply a larger sample we might find something. A handy way to supply data is to use the dput() function. Do dput(mydata) where "mydata" is the name of your dataset. For really large datasets probably dput(head(mydata, 500) will do but if the data set in not confidential a 5,000 or 5,000 row data set probably would work. Paste it here between

```

```

It ran after exporting it as csv and re import it and created as DT object.
However, summing is not happening. I see the un-summed data.
Assume we have the current dataset.

Please refer to encounter id 30567617 the total cost has two lines and they should be summed up.

Please advise.

write.csv(current, 'current_data.csv', row.names = FALSE)
#> Error in eval(expr, p): object 'current' not found

DT <- fread("current_data.csv")
#> Error in fread("current_data.csv"): could not find function "fread"

# DT <- current
# Sum variables by encounter_id --------------------------------------------
SUM <- DT[ , by = c("costing_dataset_id", "costing_dataset", "encounter_id"),
           lapply(.SD, sum)]
#> Error: object 'DT' not found

SUM
#> Error: object 'SUM' not found

It looks like you may not be installing and loading {data.table}.

Try

# Install data.table. Only do this once ------------------------------------
install.packages("data.table")

# load packages -----------------------------------------------------------
library(data.table)

# Load data ---------------------------------------------------------------
DT <- fread("current_data.csv")

# Sum variables by encounter_id --------------------------------------------
SUM <- DT[ , by = c("costing_dataset_id", "costing_dataset", "encounter_id"),
            lapply(.SD, sum)]

These code can get the same data set as you created in Excel

 data <- current %>% 
        group_by(encounter_id)  %>%
     summarise(
         dplyr::across(
             .cols = -c(1:2),
             .fns = sum,
             .names = "{.col}"
         ),
         costing_dataset_id = last(costing_dataset_id),
         costing_dataset = last(costing_dataset),
         # encounter_id = last(encounter_id),
         .groups = "drop"
     ) %>% 
     select(costing_dataset_id, costing_dataset, encounter_id, dplyr::everything())
##  test the output:data equal  to desired or not
testthat::expect_equal(data, desired)
1 Like

From the rather distinctive header I'd say that op's screenshot does show data.table output, they probably just missed a line when called reprex().

As across the first three columns all rows in example data are unique, grouping by those 3 variables would result with groups of size 1, e.g. resulting frame is pretty much the same as input (10 rows in, 10 rows out). I'd stick with the first DT example.

Ah thanks. I was not reading that screenshot closely enough.

Thanks for your time and help.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.