Hi,
I have a function that takes in a folder of multiple csv files (that was created by ImageJ analysis) and spits out 1 csv containing summary statistics.
It receives the following as input:
- Data cutoff
- Treatment group
- Animal ID
- Number of imaged cells
- Input folder path
Right now, it asks for this info for every run, but I want to create a "queue" (perhaps in Excel, or something with a GUI) where it takes a table containing these info and processes each entry as a batch. How should I approach this?
EDIT: I have made an example dataset to illustrate this. The code is very bad, sorry.
#### Get packages ####
if (!require("pacman")) install.packages("pacman"); library(pacman)
#> Loading required package: pacman
p_load_gh("trinker/wakefield")
p_load(tidyverse)
#### Create simulated folder structure ####
folders <- c("./Data","./Data/SUS/", "./Data/Control/",
"./Data/SUS/3123/", "./Data/SUS/3124/","./Data/SUS/3125/",
"./Data/Control/3426/", "./Data/Control/3427/","./Data/Control/3428/")
for (n in 1:9) {
dir.create(path = folders[n])
}
#### Generate random df ####
### Group SUS, Animal 3123 ###
for (i in 1:3)
{
samplesize <-as.integer(runif(min = 400, max = 500, n = 1))
test <- r_data_frame(
n=samplesize,
id,
Endpoint_number = as.integer(runif(min = 2, max = 31, n = samplesize))
)
write_csv(test,
file = paste(c("./Data/SUS/3123/","SUS_","3123_","slide_",i,".csv"),
collapse = ""))
}
#> Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
#> Please use `tibble::as_tibble()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_warnings()` to see where this warning was generated.
### Group SUS, Animal 3124 ###
for (i in 1:3)
{
samplesize <-as.integer(runif(min = 400, max = 500, n = 1))
test <- r_data_frame(
n=samplesize,
id,
Endpoint_number = as.integer(runif(min = 2, max = 31, n = samplesize))
)
write_csv(test,
file = paste(c("./Data/SUS/3124/","SUS_","3124_","slide_",i,".csv"),
collapse = ""))
}
### Group SUS, Animal 3125 ###
for (i in 1:3)
{
samplesize <-as.integer(runif(min = 400, max = 500, n = 1))
test <- r_data_frame(
n=samplesize,
id,
Endpoint_number = as.integer(runif(min = 2, max = 31, n = samplesize))
)
write_csv(test,
file = paste(c("./Data/SUS/3125/","SUS_","3125_","slide_",i,".csv"),
collapse = ""))
}
### Group Control, Animal 3426 ###
for (i in 1:3)
{
samplesize <-as.integer(runif(min = 400, max = 500, n = 1))
test <- r_data_frame(
n=samplesize,
id,
Endpoint_number = as.integer(runif(min = 2, max = 31, n = samplesize))
)
write_csv(test,
file = paste(c("./Data/Control/3426/","Control_","3426_","slide_",i,".csv"),
collapse = ""))
}
### Group Control, Animal 3427 ###
for (i in 1:3)
{
samplesize <-as.integer(runif(min = 400, max = 500, n = 1))
test <- r_data_frame(
n=samplesize,
id,
Endpoint_number = as.integer(runif(min = 2, max = 31, n = samplesize))
)
write_csv(test,
file = paste(c("./Data/Control/3427/","Control_","3427_","slide_",i,".csv"),
collapse = ""))
}
### Group Control, Animal 3428 ###
for (i in 1:3)
{
samplesize <-as.integer(runif(min = 400, max = 500, n = 1))
test <- r_data_frame(
n=samplesize,
id,
Endpoint_number = as.integer(runif(min = 2, max = 31, n = samplesize))
)
write_csv(test,
file = paste(c("./Data/Control/3428/","Control_","3428_","slide_",i,".csv"),
collapse = ""))
}
Created on 2020-12-18 by the reprex package (v0.3.0)
The above block should create ./Data/ with following structure:
> p_load(fs)
> fs::dir_tree(path="./Data/", recurse = TRUE)
./Data/
+-- Control
| +-- 3426
| | +-- Control_3426_slide_1.csv
| | +-- Control_3426_slide_2.csv
| | \-- Control_3426_slide_3.csv
| +-- 3427
| | +-- Control_3427_slide_1.csv
| | +-- Control_3427_slide_2.csv
| | \-- Control_3427_slide_3.csv
| \-- 3428
| +-- Control_3428_slide_1.csv
| +-- Control_3428_slide_2.csv
| \-- Control_3428_slide_3.csv
\-- SUS
+-- 3123
| +-- SUS_3123_slide_1.csv
| +-- SUS_3123_slide_2.csv
| \-- SUS_3123_slide_3.csv
+-- 3124
| +-- SUS_3124_slide_1.csv
| +-- SUS_3124_slide_2.csv
| \-- SUS_3124_slide_3.csv
\-- 3125
+-- SUS_3125_slide_1.csv
+-- SUS_3125_slide_2.csv
\-- SUS_3125_slide_3.csv
And here's a representative csv:
> read_csv('./Data/Control/3426/Control_3426_slide_1.csv')
-- Column specification -------------------------------------------------------------------------------------------------
cols(
ID = col_character(),
Endpoint_number = col_double()
)
# A tibble: 473 x 2
ID Endpoint_number
<chr> <dbl>
1 001 3
2 002 6
3 003 6
4 004 9
5 005 18
6 006 29
7 007 13
8 008 23
9 009 10
10 010 3
# ... with 463 more rows
Each csv should be treated as a technical replicate. Each 3 csvs obtained from an animal would be merged and have the endpoint_number summed then divided by number of cells, counted separately. Here's what I have for that at the moment:
#### Get packages ####
if (!require("pacman")) install.packages("pacman"); library(pacman)
p_load(tidyverse, vroom, here, fs)
#### User asked for the following ####
### There are 13 cells counted in these 3 slides ###
input_folder <- "./Data/Control/3426/"
group <- c("Control")
animal <- c("3426")
cell_count <- c(13)
#### Import csv files ####
files <- fs::dir_ls(path = input_folder, glob = paste(input_folder, group[1], "_", animal[1], "_slide*csv", sep = ""))
df <- vroom::vroom(files)
### Create a dataframe and csv containing summary data ###
total_endpoints <- sum(df$Endpoint_number)
mean_endpoints <- sum(df$Endpoint_number)/cell_count
summary_stats <- tibble(group, animal, cell_count, total_endpoints, mean_endpoints)
write_csv(summary_stats, file = paste(c(input_folder, group, "_", animal, "_summary.csv"), collapse = ""))
I want to provide the user input variables in a table of some sort and have R then batch process all animal data using the given variables. I'm open to other approaches and improvements to my code as well, as the goal is to compare mean_endpoints between groups.