iterate through a list and create sub datasets for each item on list

I have a dataframe with 3 columns ;Type, year, and number. I need to create dataframe for each type. There are 4 types altogether in the main dataframe.

df<-data_frame(type=c('a','b','c','d','a','b','c','d','a','b','c','d','a','b','c','d'),
        
       year=c('2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022'),
                  number=c(12,10,52,65,78,65,84,1,6,2,8,3,5,32,45,96))

I need to filter from the type and create a sub dataframe for each type. I tried using a for loop but it is not working.

type_list<-as.character(df %>%
                             distinct(type) %>% 
                             pull()
                            )

for (i in seq_along(type_list)) {
type_list[i]<-df %>%
    filter(type==type_list[i]) 
  }

It seems to be fairly easy to do using the {data.table} package.

library(data.table)
df <-   data.frame(type=c('a','b','c','d','a','b','c','d','a','b','c','d','a','b','c','d'),
        
  year=c('2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022'),
                  number=c(12,10,52,65,78,65,84,1,6,2,8,3,5,32,45,96))

DT  <- as.data.table(df)

mylist <- split(DT, by = "type")

dat1 <- mylist[[1]] # look at first subset

Hi, Thanks for the reply. Is there a way to directly assign a table name from a list item.
ex:

list[1]<-df%>%
               filter(type==list[1])

Here are two methods. I think the split() function is the most direct solution.

library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.3.3
df<-data.frame(type=c('a','b','c','d','a','b','c','d','a','b','c','d','a','b','c','d'),
               
               year=c('2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022'),
               number=c(12,10,52,65,78,65,84,1,6,2,8,3,5,32,45,96))

type_list<-as.character(df %>%
                          distinct(type) %>% 
                          pull()
)
Out_list <- vector("list", length = length(type_list))
for (i in seq_along(type_list)) {
  Out_list[[i]] <- df %>%
    filter(type==type_list[i]) 
}
Out_list
#> [[1]]
#>   type year number
#> 1    a 2021     12
#> 2    a 2021     78
#> 3    a 2021      6
#> 4    a 2021      5
#> 
#> [[2]]
#>   type year number
#> 1    b 2022     10
#> 2    b 2022     65
#> 3    b 2022      2
#> 4    b 2022     32
#> 
#> [[3]]
#>   type year number
#> 1    c 2021     52
#> 2    c 2021     84
#> 3    c 2021      8
#> 4    c 2021     45
#> 
#> [[4]]
#>   type year number
#> 1    d 2022     65
#> 2    d 2022      1
#> 3    d 2022      3
#> 4    d 2022     96

#Method 2
split(df, df$type)
#> $a
#>    type year number
#> 1     a 2021     12
#> 5     a 2021     78
#> 9     a 2021      6
#> 13    a 2021      5
#> 
#> $b
#>    type year number
#> 2     b 2022     10
#> 6     b 2022     65
#> 10    b 2022      2
#> 14    b 2022     32
#> 
#> $c
#>    type year number
#> 3     c 2021     52
#> 7     c 2021     84
#> 11    c 2021      8
#> 15    c 2021     45
#> 
#> $d
#>    type year number
#> 4     d 2022     65
#> 8     d 2022      1
#> 12    d 2022      3
#> 16    d 2022     96

Created on 2024-04-26 with reprex v2.0.2

I don't know. I don't work with lists a lot but I can poke around.

What you can do is name them after the fact.

names(mylist) <- c("alpha", "beta", "gama", "delta")

Hi Thank you,
But none of these methods create a separate data frame for each type in the environment.

If you want the data frames to be in the global environment rather than in a list, try

library(tidyverse)
df<-data.frame(type=c('a','b','c','d','a','b','c','d','a','b','c','d','a','b','c','d'),
               
               year=c('2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022','2021','2022'),
               number=c(12,10,52,65,78,65,84,1,6,2,8,3,5,32,45,96))

type_list<-as.character(df %>%
                          distinct(type) %>% 
                          pull()
)

for (i in seq_along(type_list)) {
  tmp <- df %>%
    filter(type==type_list[i]) 
  assign(type_list[i], tmp, envir = .GlobalEnv)
}

1 Like

Thank you so much for this. This is what I've been looking for.

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.