Not sure what you mean with"output averages". Here is with the counts.
First, let's read the data:
library(tidyverse)
data <- read.table(text = "Models input_M input_T output_X
m1 20% 0,1 50
m1 25% 0,15 60
m1 80% 0,2 40
m2 75% 0,8 35
m2 15% 1 25
m3 55% 1,2 18
m4 65% 1,5 85
m4 45% 0,6 125
m4 35% 0,8 10
m4 15% 0,6 1
m5 10% 0,1 50
m5 120% 0,9 20",
header = TRUE,dec = ",")
data
#> Models input_M input_T output_X
#> 1 m1 20% 0.10 50
#> 2 m1 25% 0.15 60
#> 3 m1 80% 0.20 40
#> 4 m2 75% 0.80 35
#> 5 m2 15% 1.00 25
#> 6 m3 55% 1.20 18
#> 7 m4 65% 1.50 85
#> 8 m4 45% 0.60 125
#> 9 m4 35% 0.80 10
#> 10 m4 15% 0.60 1
#> 11 m5 10% 0.10 50
#> 12 m5 120% 0.90 20
Then, we need input_M
to be numeric, right now it's text because of the "%" character.
data |>
mutate(
input_M_numeric = input_M |> str_remove("%") |> as.numeric()
)
We can then create "bins", categories based on the values of M:
data |>
mutate(
input_M_numeric = input_M |> str_remove("%") |> as.numeric(),
M_binned = cut(input_M_numeric, breaks = c(0,20,80, 100, Inf)))
#> Models input_M input_T output_X input_M_numeric M_binned
#> 1 m1 20% 0.10 50 20 (0,20]
#> 2 m1 25% 0.15 60 25 (20,80]
#> 3 m1 80% 0.20 40 80 (20,80]
#> 4 m2 75% 0.80 35 75 (20,80]
#> 5 m2 15% 1.00 25 15 (0,20]
#> 6 m3 55% 1.20 18 55 (20,80]
#> 7 m4 65% 1.50 85 65 (20,80]
#> 8 m4 45% 0.60 125 45 (20,80]
#> 9 m4 35% 0.80 10 35 (20,80]
#> 10 m4 15% 0.60 1 15 (0,20]
#> 11 m5 10% 0.10 50 10 (0,20]
#> 12 m5 120% 0.90 20 120 (100,Inf]
You can rename the bins with the labels
argument, see ?cut
.
We also need T
to be binned, we can do it the same way:
data |>
mutate(
input_M_numeric = input_M |> str_remove("%") |> as.numeric(),
M_binned = cut(input_M_numeric, breaks = c(0,20,80, 100, Inf)),
T_binned = cut(input_T, breaks = c(0, .1, .5, 1, Inf))
)
Now we can count the number of observations in each combination of bins of M
and T
. This can be replaced with summarize()
with appropriate groups if you want a mean or something else:
data |>
mutate(
input_M_numeric = input_M |> str_remove("%") |> as.numeric(),
M_binned = cut(input_M_numeric, breaks = c(0,20,80, 100, Inf)),
T_binned = cut(input_T, breaks = c(0, .1, .5, 1, Inf))
) |>
count(M_binned, T_binned)
#> M_binned T_binned n
#> 1 (0,20] (0,0.1] 2
#> 2 (0,20] (0.5,1] 2
#> 3 (20,80] (0.1,0.5] 2
#> 4 (20,80] (0.5,1] 3
#> 5 (20,80] (1,Inf] 2
#> 6 (100,Inf] (0.5,1] 1
And finally to get a table with the same format/shape as in your post, we can pivot it to be wider.
data |>
mutate(
input_M_numeric = input_M |> str_remove("%") |> as.numeric(),
M_binned = cut(input_M_numeric, breaks = c(0,20,80, 100, Inf)),
T_binned = cut(input_T, breaks = c(0, .1, .5, 1, Inf))
) |>
count(M_binned, T_binned) |>
pivot_wider(id_cols = T_binned,
names_from = "M_binned",
values_from = "n")
A problem here, is that some combinations of bins have no counts, and will end up with NA
. But in our case we know NA
means a count of 0
. So we will use values_fill
:
data |>
mutate(
input_M_numeric = input_M |> str_remove("%") |> as.numeric(),
M_binned = cut(input_M_numeric, breaks = c(0,20,80, 100, Inf)),
T_binned = cut(input_T, breaks = c(0, .1, .5, 1, Inf))
) |>
count(M_binned, T_binned) |>
pivot_wider(id_cols = T_binned,
names_from = "M_binned",
values_from = "n",
values_fill = 0)
#> # A tibble: 4 × 4
#> T_binned `(0,20]` `(20,80]` `(100,Inf]`
#> <fct> <int> <int> <int>
#> 1 (0,0.1] 2 0 0
#> 2 (0.5,1] 2 3 1
#> 3 (0.1,0.5] 0 2 0
#> 4 (1,Inf] 0 2 0