Data Wrangling Woe in R

Hi all, I am trying to compute the cumulative CO2 emission, but I am not getting the expected output correctly. I have attached my code and sample data and a screenshot (n=12) of my expected output. I will be grateful if someone help me to fix this.

Data

ghg_data <- structure(
  list(
    cropping_history = structure(
      c(
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        1L,
        1L,
        2L,
        2L,
        3L,
        3L,
        2L,
        2L,
        1L,
        1L
      ),
      levels = c("Corn", "Fallow", "Soybean"),
      class = "factor"
    ),
    rep = c(
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3,
      1,
      1,
      1,
      1,
      1,
      1,
      2,
      2,
      2,
      2,
      2,
      2,
      3,
      3,
      3,
      3,
      3,
      3
    ),
    julian_day = c(
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      125,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      135,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      142,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      148,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      155,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      162,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      169,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      176,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      184,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      197,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      205,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      209,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      219,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      225,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      231,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      237,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248,
      248
    ),
    co2_kg_co2eq_ha_2_day_1 = c(
      1918.98950688,
      1913.93222976,
      1611.02794752,
      1692.51475104,
      2038.15872864,
      1536.15743136,
      1623.23385696,
      2101.54580352,
      2054.92759488,
      1992.1108896,
      1655.93504736,
      1735.824816,
      1700.99424576,
      1819.25087616,
      1642.01802912,
      1657.18986048,
      1767.7655136,
      1764.95169024,
      1930.05467712,
      1873.01771712,
      1621.52274816,
      1681.33550688,
      1749.77985888,
      1715.93792928,
      1628.51928192,
      1716.20410176,
      1796.39806752,
      1786.51166112,
      1719.93051648,
      1770.88353408,
      1595.85611616,
      1763.88700032,
      1768.67810496,
      1654.71825888,
      1526.38509888,
      1857.54168864,
      1593.0422928,
      1599.62055552,
      1593.19439136,
      1581.6349008,
      1543.30606368,
      1535.54903712,
      1539.4275504,
      1537.98261408,
      1539.76977216,
      1532.9633616,
      1538.096688,
      1512.2399328,
      1482.27651648,
      1499.6918016,
      1510.60487328,
      1503.91253664,
      1451.17236096,
      1442.12249664,
      1604.10746304,
      2006.33210496,
      1515.6621504,
      1562.58455616,
      1640.11679712,
      1741.75665984,
      1643.12074368,
      1639.09013184,
      1956.48180192,
      1764.68551776,
      1584.56279808,
      1724.49347328,
      1690.34734656,
      1679.738472,
      1877.77079712,
      1718.97990048,
      1731.90827808,
      1877.61869856,
      1708.86534624,
      1897.27743744,
      2054.77549632,
      1834.84097856,
      1854.38564352,
      1990.89410112,
      2140.0267392,
      2340.49264128,
      2153.86770816,
      2179.11606912,
      1995.79927968,
      1964.84722272,
      2075.99324544,
      2148.12598752,
      2167.36645536,
      2142.15611904,
      2258.16929568,
      2502.7818048,
      1596.92080608,
      1574.63836704,
      1564.18159104,
      1555.66407168,
      1814.07952512,
      1511.74561248,
      1642.51234944,
      1771.07365728,
      2096.52655104,
      2177.97532992,
      1847.27503584,
      1850.69725344,
      2143.0687104,
      2039.2614432,
      1898.49422592,
      2274.32976768,
      2198.05233984,
      1755.74972736,
      1735.14037248,
      1868.30266176,
      7751.47496256,
      1709.54978976,
      1861.64834976,
      1794.3827616,
      1649.81308032,
      1731.68013024,
      1790.73239616,
      1687.7236464,
      2048.84365248,
      1894.42558944,
      1600.49512224,
      1834.99307712,
      1847.38910976,
      1720.57693536,
      1566.69121728,
      1451.2103856,
      1552.1658048,
      1491.44045472,
      1466.64838944,
      1470.71702592,
      1525.28238432,
      1578.47885568,
      1488.0942864,
      1398.69835776,
      1411.05636576,
      1974.69560448,
      2061.1256112,
      1966.40623296,
      2449.28113632,
      2241.21030624,
      2333.42005824,
      2120.0638032,
      2302.84824768,
      1931.91788448,
      1909.0270512,
      1905.45273504,
      1696.54536288,
      1713.65645088,
      2226.76094304,
      1951.19637696,
      2447.22780576,
      1810.04891328,
      1827.57827232,
      1875.7554912,
      1752.7457808,
      1811.34175104,
      1950.93020448,
      1920.4344432,
      1783.43166528,
      1751.26281984,
      1734.91222464,
      1807.3111392,
      2145.88253376,
      2140.0267392,
      2083.97841984,
      1973.21264352,
      1979.44868448,
      2192.72889024,
      2274.02557056,
      2198.9649312,
      2172.27163392,
      2156.7575808,
      2045.87773056,
      1845.75405024,
      1862.48489184,
      1939.78898496,
      1897.65768384,
      1841.7234384,
      1857.35156544,
      1826.43753312,
      1839.51800928,
      2188.20395808,
      1658.0644272,
      1708.56114912,
      2034.39428928,
      1922.44974912,
      1905.33866112,
      1923.02011872,
      1770.31316448,
      1693.35129312,
      1668.86342496,
      1651.7903616,
      2045.64958272,
      1781.68253184,
      1621.40867424,
      1626.1237296,
      1951.0442784,
      1860.24143808,
      1596.00821472,
      1595.43784512,
      1579.61959488,
      1564.9040592,
      1596.99685536,
      1610.64770112,
      1676.77255008,
      1615.36275648,
      1576.42552512,
      1570.2275088,
      1545.16927104,
      1758.03120576,
      1992.94743168,
      1831.68493344,
      1732.28852448,
      1642.51234944,
      1847.08491264,
      1719.51224544,
      2181.16939968,
      2133.10625472,
      1851.6098448,
      1895.83250112,
      1853.09280576,
      1839.17578752,
      1746.69986304,
      1796.16991968,
      1717.34484096,
      1654.6422096,
      1680.84118656,
      1654.29998784,
      1716.4322496,
      1741.37641344,
      1599.27833376,
      1542.81174336,
      1800.0864576,
      1629.58397184,
      1615.78102752,
      1716.16607712,
      1568.25022752,
      1803.58472448,
      1691.10783936,
      1764.1531728,
      1755.0272592,
      1691.67820896,
      1776.24500832,
      1677.22884576,
      1575.43688448,
      1580.18996448,
      1754.03861856,
      1654.68023424,
      1563.0028272,
      1586.84427648,
      1583.76428064,
      1578.93515136,
      1767.99366144,
      1720.91915712,
      1673.04613536,
      1765.36996128,
      1810.31508576,
      1734.57000288,
      1709.54978976,
      1604.56375872,
      1651.14394272,
      1654.79430816,
      1623.23385696,
      1578.51688032,
      1791.1506672,
      1616.65559424,
      1558.28777184,
      1426.57041888,
      1594.10698272,
      1525.32040896,
      1729.32260256,
      1737.11765376,
      1534.44632256,
      1479.50071776,
      1619.96373792,
      1585.13316768,
      1529.65521792,
      1578.25070784,
      1547.7929712,
      1489.50119808,
      1559.7707328,
      1504.4448816,
      1523.38115232,
      1594.79142624,
      1525.43448288,
      1455.16494816,
      1674.64317024,
      1654.71825888,
      1615.32473184,
      1716.96459456,
      1532.62113984,
      1686.9631536,
      1665.63133056,
      1650.57357312,
      1591.21711008,
      1680.42291552,
      1626.54200064,
      1629.12767616,
      1562.69863008,
      1538.36286048,
      1613.23337664,
      1655.17455456,
      1568.17417824,
      1504.55895552,
      1488.17033568,
      1497.90464352
    )
  ),
  row.names = c(NA, -306L),
  class = c("tbl_df", "tbl", "data.frame")
)

Load the R package

library(tidyverse)

Data Wrangling


Cum_Co2_data <- ghg_data %>%
  # First, group by day and plot to get the daily average
  group_by(cropping_history, rep, julian_day) %>%
  summarise(daily_CO2_rate = mean(co2_kg_co2eq_ha_2_day_1, na.rm = TRUE)) %>%
  ungroup() %>%
  # Then, for each plot (cropping_history and rep), compute the cumulative sum
  group_by(cropping_history, rep) %>%
  arrange(julian_day) %>%
  mutate(Cumulative_CO2 = cumsum(daily_CO2_rate)) %>%
  ungroup()

Plot

ggplot(Cum_Co2_data,aes(x = julian_day, y = Cumulative_CO2, colour = cropping_history))+
  stat_summary(geom = 'point', position = position_dodge(0.9), size = 1.7,shape=19) +
  stat_summary(geom = 'line', position = position_dodge(0.9), linewidth = 0.9)+
  stat_summary(geom = 'errorbar', fun.data = mean_se, 
               position = position_dodge(0.9), width = 0.5) +
  scale_color_brewer(palette = 'Dark2') +
  scale_y_continuous(
    breaks = c(0, 5000, 10000, 15000, 20000,25000,30000,35000,40000),
    limits = c(0, NA),
    expand = expansion(mult = c(0, 0.2))
  ) +
  labs(
    x = 'Julian days',
    y = expression(bold("Cumulative" ~ CO[2] ~ "(kg" ~ CO[2]~"eq" ~ ha^{-1} ~ ")")),
    colour = 'Cropping history'
  ) +
  theme_bw() +
  theme(
    legend.title = element_text(family = 'serif', face = 'bold', colour = 'black', size = 12),
    axis.text = element_text(family = 'serif', face = 'bold', colour = 'black', size = 12),
    strip.background = element_rect(fill = 'gray95'),
    panel.grid = element_blank(),
    legend.position = c(.1, 0.8),
    axis.title = element_text(family = 'serif', face = 'bold', colour = 'black', size = 12),
    legend.text = element_text(family = 'serif', face = 'bold', colour = 'black', size = 12)
  )

Screenshot of expected output

When you summarize grouped data, you get back one row for each grouping (so, in your case, one row for Corn rep 1 on day 125, whereas your spreadsheet has two rows for that combination). Try the following (which does not compute a mean -- your spreadsheet does not show any evidence of containing a mean).

Cum_Co2_data <- ghg_data %>%
  # For each plot (cropping_history and rep), compute the cumulative sum
  group_by(cropping_history) %>%
  arrange(julian_day, rep) %>%
  mutate(Cumulative_CO2 = cumsum(co2_kg_co2eq_ha_2_day_1)) %>%
  ungroup()

Thank you very much @prubin, your solution is exactly what I am looking for!

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.