Divide the data , count, and then also output the id

How can I divide the data into different groups and count the number of each group(table?), and then also output the id of the corresponding values of the different groups? As you can see, there are two columns in "c3" dataframe. one is the id, the other is corresponding value.

I want to divide the value(second column in c3) in to different groups,[1,4), [4, 7), [7, 10), [10,13), [13,16), [16, 19). Then count the number in each group, how many? And know the number id in every group?It is possibile to output the result above together?

c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
c2<-1:19
c3<-data.frame(c2,c1)
colnames(c3)<-c("id","col")

Created on 2022-10-29 with reprex v2.0.2

Is this close to what you want?

library(dplyr)
c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
c2<-1:19
c3<-data.frame(c2,c1)
colnames(c3)<-c("id","col")
c3
#>    id  col
#> 1   1 16.6
#> 2   2  1.0
#> 3   3 10.1
#> 4   4  8.6
#> 5   5  8.0
#> 6   6 17.0
#> 7   7  2.4
#> 8   8  7.6
#> 9   9  5.7
#> 10 10 11.6
#> 11 11  3.6
#> 12 12  2.8
#> 13 13  6.3
#> 14 14  1.5
#> 15 15  2.7
#> 16 16 16.7
#> 17 17  6.7
#> 18 18  5.3
#> 19 19 12.5
c3 <- mutate(c3,Group=cut(col,breaks = seq(1,19,3),right = FALSE))
c3
#>    id  col   Group
#> 1   1 16.6 [16,19)
#> 2   2  1.0   [1,4)
#> 3   3 10.1 [10,13)
#> 4   4  8.6  [7,10)
#> 5   5  8.0  [7,10)
#> 6   6 17.0 [16,19)
#> 7   7  2.4   [1,4)
#> 8   8  7.6  [7,10)
#> 9   9  5.7   [4,7)
#> 10 10 11.6 [10,13)
#> 11 11  3.6   [1,4)
#> 12 12  2.8   [1,4)
#> 13 13  6.3   [4,7)
#> 14 14  1.5   [1,4)
#> 15 15  2.7   [1,4)
#> 16 16 16.7 [16,19)
#> 17 17  6.7   [4,7)
#> 18 18  5.3   [4,7)
#> 19 19 12.5 [10,13)
GrpCount <- count(c3,Group)
GrpCount
#>     Group n
#> 1   [1,4) 6
#> 2   [4,7) 4
#> 3  [7,10) 3
#> 4 [10,13) 3
#> 5 [16,19) 3
table(c3$Group,c3$id)
#>          
#>           1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#>   [1,4)   0 1 0 0 0 0 1 0 0  0  1  1  0  1  1  0  0  0  0
#>   [4,7)   0 0 0 0 0 0 0 0 1  0  0  0  1  0  0  0  1  1  0
#>   [7,10)  0 0 0 1 1 0 0 1 0  0  0  0  0  0  0  0  0  0  0
#>   [10,13) 0 0 1 0 0 0 0 0 0  1  0  0  0  0  0  0  0  0  1
#>   [13,16) 0 0 0 0 0 0 0 0 0  0  0  0  0  0  0  0  0  0  0
#>   [16,19) 1 0 0 0 0 1 0 0 0  0  0  0  0  0  0  1  0  0  0

Created on 2022-10-29 with reprex v2.0.2

1 Like

Thanks a lot, it's fairly close. What's more, is it possible add the id column in "GroupCount"? For example, for the result[1,4), n=6, output all 6 corresponding id? Of course, one column may not be enough.
image

This is not elegant.

library(tidyr)
library(dplyr)
c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
c2<-1:19
c3<-data.frame(c2,c1)
colnames(c3)<-c("id","col")
c3 <- mutate(c3,Group=cut(col,breaks = seq(1,19,3),right = FALSE))
c3
#>    id  col   Group
#> 1   1 16.6 [16,19)
#> 2   2  1.0   [1,4)
#> 3   3 10.1 [10,13)
#> 4   4  8.6  [7,10)
#> 5   5  8.0  [7,10)
#> 6   6 17.0 [16,19)
#> 7   7  2.4   [1,4)
#> 8   8  7.6  [7,10)
#> 9   9  5.7   [4,7)
#> 10 10 11.6 [10,13)
#> 11 11  3.6   [1,4)
#> 12 12  2.8   [1,4)
#> 13 13  6.3   [4,7)
#> 14 14  1.5   [1,4)
#> 15 15  2.7   [1,4)
#> 16 16 16.7 [16,19)
#> 17 17  6.7   [4,7)
#> 18 18  5.3   [4,7)
#> 19 19 12.5 [10,13)
GrpCount <- count(c3,Group)
GrpCount
#>     Group n
#> 1   [1,4) 6
#> 2   [4,7) 4
#> 3  [7,10) 3
#> 4 [10,13) 3
#> 5 [16,19) 3
#Prepare for new column names
MaxIDCnt <- max(GrpCount$n)
ColNames <- paste("Grp",1:MaxIDCnt,sep = "_")

Labeled <- inner_join(GrpCount,c3,by="Group") |> 
  group_by(Group) |> 
  summarize(Label=paste(id,collapse = ","))
Labeled
#> # A tibble: 5 × 2
#>   Group   Label          
#>   <fct>   <chr>          
#> 1 [1,4)   2,7,11,12,14,15
#> 2 [4,7)   9,13,17,18     
#> 3 [7,10)  4,5,8          
#> 4 [10,13) 3,10,19        
#> 5 [16,19) 1,6,16
Labeled <- Labeled |> separate(Label,into = ColNames,fill = "right")
Labeled
#> # A tibble: 5 × 7
#>   Group   Grp_1 Grp_2 Grp_3 Grp_4 Grp_5 Grp_6
#>   <fct>   <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 [1,4)   2     7     11    12    14    15   
#> 2 [4,7)   9     13    17    18    <NA>  <NA> 
#> 3 [7,10)  4     5     8     <NA>  <NA>  <NA> 
#> 4 [10,13) 3     10    19    <NA>  <NA>  <NA> 
#> 5 [16,19) 1     6     16    <NA>  <NA>  <NA>

Created on 2022-10-30 with reprex v2.0.2

1 Like

Thank you for help and effort on this matter. Last question, really my last question, I guarantee!!!
If there are more than one column,is it possibile to output the result like "Labeled" for each column data(col1,col2, col3 are colnames)? And then all the"Labeled" result are summarised in a list. This list contain different tibble or dataframe. Currently, I just know it may need loop function. However, for a beginner me, it is a little hard.

 c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
 c2<-c(13.0,11.2,11.0,15.0,10.0,11.5,9.6,7.8,9.2,6.6,1.6,8.2,18.0,18.9,NA,NA,2.9,16.1,17.8)
 c3<-c(4.2,5.6,1.4,3.4,5.0,5.8,5.1,8.2,8.8,9.1,1.9,7.7,9.1,10.6,3.7,9.9,10.2,11.5,NA)
 c4<-c(1:19)
 c5<-data.frame(c4,c1,c2,c3)
 colnames(c5)<-c("id","col1","col2","col3")

Created on 2022-10-30 with reprex v2.0.2

Here is a version that stores all the results in one data frame. You can split that data frame into three by filtering on Column, if you want to.

library(tidyr)
library(dplyr)
c1<-c(16.6,1.0,10.1,8.6,8.0,17.0,2.4,7.6,5.7,11.6,3.6,2.8,6.3,1.5,2.7,16.7,6.7,5.3,12.5)
c2<-c(13.0,11.2,11.0,15.0,10.0,11.5,9.6,7.8,9.2,6.6,1.6,8.2,18.0,18.9,NA,NA,2.9,16.1,17.8)
c3<-c(4.2,5.6,1.4,3.4,5.0,5.8,5.1,8.2,8.8,9.1,1.9,7.7,9.1,10.6,3.7,9.9,10.2,11.5,NA)
c4<-c(1:19)
c5<-data.frame(c4,c1,c2,c3)
colnames(c5)<-c("id","col1","col2","col3")

c5 <- c5 |> pivot_longer(cols = col1:col3,
                         names_to =  "Column", values_to = "Value") |> 
  mutate(Group=cut(Value,breaks = seq(1,19,3),right = FALSE))

GrpCount <- count(c5,Column, Group) |> 
  na.omit()

MaxIDCnt <- max(GrpCount$n)
ColNames <- paste("Grp",1:MaxIDCnt,sep = "_")

Labeled <- inner_join(GrpCount,c5,by=c("Column", "Group")) |> 
  group_by(Column, Group, n) |> 
  summarize(Label=paste(id,collapse = ","))
#> `summarise()` has grouped output by 'Column', 'Group'. You can override using
#> the `.groups` argument.
Labeled <- Labeled |> separate(Label,into = ColNames,fill = "right")
head(Labeled,10)
#> # A tibble: 10 × 9
#> # Groups:   Column, Group [10]
#>    Column Group       n Grp_1 Grp_2 Grp_3 Grp_4 Grp_5 Grp_6
#>    <chr>  <fct>   <int> <chr> <chr> <chr> <chr> <chr> <chr>
#>  1 col1   [1,4)       6 2     7     11    12    14    15   
#>  2 col1   [4,7)       4 9     13    17    18    <NA>  <NA> 
#>  3 col1   [7,10)      3 4     5     8     <NA>  <NA>  <NA> 
#>  4 col1   [10,13)     3 3     10    19    <NA>  <NA>  <NA> 
#>  5 col1   [16,19)     3 1     6     16    <NA>  <NA>  <NA> 
#>  6 col2   [1,4)       2 11    17    <NA>  <NA>  <NA>  <NA> 
#>  7 col2   [4,7)       1 10    <NA>  <NA>  <NA>  <NA>  <NA> 
#>  8 col2   [7,10)      4 7     8     9     12    <NA>  <NA> 
#>  9 col2   [10,13)     4 2     3     5     6     <NA>  <NA> 
#> 10 col2   [13,16)     2 1     4     <NA>  <NA>  <NA>  <NA>

Created on 2022-10-30 with reprex v2.0.2

2 Likes

It works!!! Thank you very much. I am very grateful to your effort and time on this problem.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.