Divide the data , count, and then also output the id

Comede_way · October 30, 2022, 3:02am

How can I divide the data into different groups and count the number of each group(table?), and then also output the id of the corresponding values of the different groups? As you can see, there are two columns in "c3" dataframe. one is the id, the other is corresponding value.

I want to divide the value(second column in c3) in to different groups,[1,4), [4, 7), [7, 10), [10,13), [13,16), [16, 19). Then count the number in each group, how many? And know the number id in every group？It is possibile to output the result above together?

c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
c2<-1:19
c3<-data.frame(c2,c1)
colnames(c3)<-c("id","col")

^{Created on 2022-10-29 with reprex v2.0.2}

FJCC · October 30, 2022, 3:58am

Is this close to what you want?

library(dplyr)
c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
c2<-1:19
c3<-data.frame(c2,c1)
colnames(c3)<-c("id","col")
c3
#>    id  col
#> 1   1 16.6
#> 2   2  1.0
#> 3   3 10.1
#> 4   4  8.6
#> 5   5  8.0
#> 6   6 17.0
#> 7   7  2.4
#> 8   8  7.6
#> 9   9  5.7
#> 10 10 11.6
#> 11 11  3.6
#> 12 12  2.8
#> 13 13  6.3
#> 14 14  1.5
#> 15 15  2.7
#> 16 16 16.7
#> 17 17  6.7
#> 18 18  5.3
#> 19 19 12.5
c3 <- mutate(c3,Group=cut(col,breaks = seq(1,19,3),right = FALSE))
c3
#>    id  col   Group
#> 1   1 16.6 [16,19)
#> 2   2  1.0   [1,4)
#> 3   3 10.1 [10,13)
#> 4   4  8.6  [7,10)
#> 5   5  8.0  [7,10)
#> 6   6 17.0 [16,19)
#> 7   7  2.4   [1,4)
#> 8   8  7.6  [7,10)
#> 9   9  5.7   [4,7)
#> 10 10 11.6 [10,13)
#> 11 11  3.6   [1,4)
#> 12 12  2.8   [1,4)
#> 13 13  6.3   [4,7)
#> 14 14  1.5   [1,4)
#> 15 15  2.7   [1,4)
#> 16 16 16.7 [16,19)
#> 17 17  6.7   [4,7)
#> 18 18  5.3   [4,7)
#> 19 19 12.5 [10,13)
GrpCount <- count(c3,Group)
GrpCount
#>     Group n
#> 1   [1,4) 6
#> 2   [4,7) 4
#> 3  [7,10) 3
#> 4 [10,13) 3
#> 5 [16,19) 3
table(c3$Group,c3$id)
#>          
#>           1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#>   [1,4)   0 1 0 0 0 0 1 0 0  0  1  1  0  1  1  0  0  0  0
#>   [4,7)   0 0 0 0 0 0 0 0 1  0  0  0  1  0  0  0  1  1  0
#>   [7,10)  0 0 0 1 1 0 0 1 0  0  0  0  0  0  0  0  0  0  0
#>   [10,13) 0 0 1 0 0 0 0 0 0  1  0  0  0  0  0  0  0  0  1
#>   [13,16) 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0
#>   [16,19) 1 0 0 0 0 1 0 0 0  0  0  0  0  0  0  1  0  0  0

^{Created on 2022-10-29 with reprex v2.0.2}

Comede_way · October 30, 2022, 4:38am

Thanks a lot, it's fairly close. What's more, is it possible add the id column in "GroupCount"? For example, for the result[1,4), n=6, output all 6 corresponding id? Of course, one column may not be enough.

FJCC · October 30, 2022, 6:23am

This is not elegant.

library(tidyr)
library(dplyr)
c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
c2<-1:19
c3<-data.frame(c2,c1)
colnames(c3)<-c("id","col")
c3 <- mutate(c3,Group=cut(col,breaks = seq(1,19,3),right = FALSE))
c3
#>    id  col   Group
#> 1   1 16.6 [16,19)
#> 2   2  1.0   [1,4)
#> 3   3 10.1 [10,13)
#> 4   4  8.6  [7,10)
#> 5   5  8.0  [7,10)
#> 6   6 17.0 [16,19)
#> 7   7  2.4   [1,4)
#> 8   8  7.6  [7,10)
#> 9   9  5.7   [4,7)
#> 10 10 11.6 [10,13)
#> 11 11  3.6   [1,4)
#> 12 12  2.8   [1,4)
#> 13 13  6.3   [4,7)
#> 14 14  1.5   [1,4)
#> 15 15  2.7   [1,4)
#> 16 16 16.7 [16,19)
#> 17 17  6.7   [4,7)
#> 18 18  5.3   [4,7)
#> 19 19 12.5 [10,13)
GrpCount <- count(c3,Group)
GrpCount
#>     Group n
#> 1   [1,4) 6
#> 2   [4,7) 4
#> 3  [7,10) 3
#> 4 [10,13) 3
#> 5 [16,19) 3
#Prepare for new column names
MaxIDCnt <- max(GrpCount$n)
ColNames <- paste("Grp",1:MaxIDCnt,sep = "_")

Labeled <- inner_join(GrpCount,c3,by="Group") |> 
  group_by(Group) |> 
  summarize(Label=paste(id,collapse = ","))
Labeled
#> # A tibble: 5 × 2
#>   Group   Label          
#>   <fct>   <chr>          
#> 1 [1,4)   2,7,11,12,14,15
#> 2 [4,7)   9,13,17,18     
#> 3 [7,10)  4,5,8          
#> 4 [10,13) 3,10,19        
#> 5 [16,19) 1,6,16
Labeled <- Labeled |> separate(Label,into = ColNames,fill = "right")
Labeled
#> # A tibble: 5 × 7
#>   Group   Grp_1 Grp_2 Grp_3 Grp_4 Grp_5 Grp_6
#>   <fct>   <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 [1,4)   2     7     11    12    14    15   
#> 2 [4,7)   9     13    17    18    <NA>  <NA> 
#> 3 [7,10)  4     5     8     <NA>  <NA>  <NA> 
#> 4 [10,13) 3     10    19    <NA>  <NA>  <NA> 
#> 5 [16,19) 1     6     16    <NA>  <NA>  <NA>

^{Created on 2022-10-30 with reprex v2.0.2}

Comede_way · October 30, 2022, 7:59am

Thank you for help and effort on this matter. Last question, really my last question, I guarantee！！！
If there are more than one column，is it possibile to output the result like "Labeled" for each column data(col1,col2, col3 are colnames)? And then all the"Labeled" result are summarised in a list. This list contain different tibble or dataframe. Currently, I just know it may need loop function. However, for a beginner me, it is a little hard.

 c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
 c2<-c(13.0,11.2,11.0,15.0,10.0,11.5,9.6,7.8,9.2,6.6,1.6,8.2,18.0,18.9,NA,NA,2.9,16.1,17.8)
 c3<-c(4.2,5.6,1.4,3.4,5.0,5.8,5.1,8.2,8.8,9.1,1.9,7.7,9.1,10.6,3.7,9.9,10.2,11.5,NA)
 c4<-c(1:19)
 c5<-data.frame(c4,c1,c2,c3)
 colnames(c5)<-c("id","col1","col2","col3")

^{Created on 2022-10-30 with reprex v2.0.2}

FJCC · October 30, 2022, 3:30pm

Here is a version that stores all the results in one data frame. You can split that data frame into three by filtering on Column, if you want to.

library(tidyr)
library(dplyr)
c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
c2<-c(13.0,11.2,11.0,15.0,10.0,11.5,9.6,7.8,9.2,6.6,1.6,8.2,18.0,18.9,NA,NA,2.9,16.1,17.8)
c3<-c(4.2,5.6,1.4,3.4,5.0,5.8,5.1,8.2,8.8,9.1,1.9,7.7,9.1,10.6,3.7,9.9,10.2,11.5,NA)
c4<-c(1:19)
c5<-data.frame(c4,c1,c2,c3)
colnames(c5)<-c("id","col1","col2","col3")

c5 <- c5 |> pivot_longer(cols = col1:col3,
                         names_to =  "Column", values_to = "Value") |> 
  mutate(Group=cut(Value,breaks = seq(1,19,3),right = FALSE))

GrpCount <- count(c5,Column, Group) |> 
  na.omit()

MaxIDCnt <- max(GrpCount$n)
ColNames <- paste("Grp",1:MaxIDCnt,sep = "_")

Labeled <- inner_join(GrpCount,c5,by=c("Column", "Group")) |> 
  group_by(Column, Group, n) |> 
  summarize(Label=paste(id,collapse = ","))
#> `summarise()` has grouped output by 'Column', 'Group'. You can override using
#> the `.groups` argument.
Labeled <- Labeled |> separate(Label,into = ColNames,fill = "right")
head(Labeled,10)
#> # A tibble: 10 × 9
#> # Groups:   Column, Group [10]
#>    Column Group       n Grp_1 Grp_2 Grp_3 Grp_4 Grp_5 Grp_6
#>    <chr>  <fct>   <int> <chr> <chr> <chr> <chr> <chr> <chr>
#>  1 col1   [1,4)       6 2     7     11    12    14    15   
#>  2 col1   [4,7)       4 9     13    17    18    <NA>  <NA> 
#>  3 col1   [7,10)      3 4     5     8     <NA>  <NA>  <NA> 
#>  4 col1   [10,13)     3 3     10    19    <NA>  <NA>  <NA> 
#>  5 col1   [16,19)     3 1     6     16    <NA>  <NA>  <NA> 
#>  6 col2   [1,4)       2 11    17    <NA>  <NA>  <NA>  <NA> 
#>  7 col2   [4,7)       1 10    <NA>  <NA>  <NA>  <NA>  <NA> 
#>  8 col2   [7,10)      4 7     8     9     12    <NA>  <NA> 
#>  9 col2   [10,13)     4 2     3     5     6     <NA>  <NA> 
#> 10 col2   [13,16)     2 1     4     <NA>  <NA>  <NA>  <NA>

^{Created on 2022-10-30 with reprex v2.0.2}

Comede_way · October 31, 2022, 9:22am

It works!!! Thank you very much. I am very grateful to your effort and time on this problem.

system · November 7, 2022, 9:23am

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.