How can I calculate percent occurrence by row?

nirgrahamuk · July 23, 2020, 3:26pm

df<- structure(list(Sample = c("Sample_A", "Sample_A", "Sample_A", 
 "Sample_B", "Sample_C", "Sample_C", "Sample_D", "Sample_D", "Sample_D", 
 "Sample_D"), Number_Samples = c(3L, 3L, 3L, 1L, 2L, 2L, 4L, 4L, 
 4L, 4L), Cluster = c(12L, 12L, 15L, 10L, 12L, 14L, 7L, 20L, 20L, 
  20L)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
  ))

library(tidyverse)

group_by(df, Sample) %>%
  mutate(rc = n()) %>%
  group_by(Sample, Cluster) %>%
  mutate(percent_observed = round(n() / rc, digits = 4) * 100)