Hello there Rstudio
I am struggling with a way to look up the closest values (row-wise) from a column in a different data frame per group.
Here I have a spec1
table. It has the Length
values which I want to find the closet Length
values in spec_lookup
table for every row for each group of Month
, Species
and Type
Example output:
- 1st row:
Month = 2
,Length = 61.69
,Species = VERSI
,Type = High
.
The closet value inspec_lookup
is65.05
(out of 2 values65.05
and55.28
)
How do I go about doing this? Any help appreciated
library(tidyverse)
spec1 <- tibble::tribble(
~Species, ~Type, ~Year, ~Month, ~Length,
"MIRDE", "Low", 1947, 2, NA,
"MIRDE", "Low", 1947, 3, NA,
"MIRDE", "High", 1948, 2, NA,
"MIRDE", "High", 1948, 3, 151.37,
"MIRDE", "Medium", 1949, 2, 144.58,
"MIRDE", "Medium", 1949, 3, 441.67,
"SESTO", "Low", 1947, 2, 49.17,
"SESTO", "Low", 1947, 3, 53.40,
"SESTO", "High", 1948, 2, 69.03,
"SESTO", "High", 1948, 3, 61.49,
"SESTO", "Medium", 1949, 2, NA,
"SESTO", "Medium", 1949, 3, 104.53,
"VERSI", "High", 1948, 2, NA,
"VERSI", "High", 1948, 3, 98.94,
"VERSI", "Medium", 1949, 2, 145.66,
"VERSI", "Medium", 1949, 3, NA
)
spec1
# A tibble: 16 x 5
Species Type Year Month Length
<chr> <chr> <dbl> <dbl> <dbl>
1 MIRDE Low 1947 2 NA
2 MIRDE Low 1947 3 NA
3 MIRDE High 1948 2 NA
4 MIRDE High 1948 3 151.37
5 MIRDE Medium 1949 2 144.58
6 MIRDE Medium 1949 3 441.67
7 SESTO Low 1947 2 49.17
8 SESTO Low 1947 3 53.4
9 SESTO High 1948 2 69.03
10 SESTO High 1948 3 61.49
11 SESTO Medium 1949 2 NA
12 SESTO Medium 1949 3 104.53
13 VERSI High 1948 2 NA
14 VERSI High 1948 3 98.94
15 VERSI Medium 1949 2 145.66
16 VERSI Medium 1949 3 NA
spec_lookup <- tibble::tribble(
~Species, ~Type, ~Year, ~Month, ~Length,
"MIRDE", "Medium", 2014, 1, NA,
"MIRDE", "Medium", 2014, 2, NA,
"MIRDE", "Medium", 2014, 3, NA,
"MIRDE", "Medium", 2015, 1, NA,
"MIRDE", "Medium", 2015, 2, NA,
"MIRDE", "Medium", 2015, 3, NA,
"MIRDE", "High", 2016, 1, 46.48,
"MIRDE", "High", 2016, 2, 51.43,
"MIRDE", "High", 2016, 3, 162.09,
"MIRDE", "Low", 2017, 1, 274.47,
"MIRDE", "Low", 2017, 2, 285.36,
"MIRDE", "Low", 2017, 3, 123.32,
"MIRDE", "High", 2018, 1, 39.82,
"MIRDE", "High", 2018, 2, 9.64,
"MIRDE", "High", 2018, 3, 162.09,
"MIRDE", "Medium", 2019, 1, 14.71,
"MIRDE", "Medium", 2019, 2, 144.76,
"MIRDE", "Medium", 2019, 3, 142.33,
"SESTO", "Medium", 2015, 1, 3,
"SESTO", "Medium", 2015, 2, 47.45,
"SESTO", "Medium", 2015, 3, 3.26,
"SESTO", "High", 2016, 1, 10.36,
"SESTO", "High", 2016, 2, NA,
"SESTO", "High", 2016, 3, 55.3,
"SESTO", "Low", 2017, 1, 344.4,
"SESTO", "Low", 2017, 2, 143.77,
"SESTO", "Low", 2017, 3, 334,
"SESTO", "High", 2018, 1, 46.03,
"SESTO", "High", 2018, 2, 50.07,
"SESTO", "High", 2018, 3, 61.72,
"SESTO", "Medium", 2019, 1, 52.48,
"SESTO", "Medium", 2019, 2, 47.45,
"SESTO", "Medium", 2019, 3, 349.58,
"VERSI", "Medium", 2015, 1, 35.41,
"VERSI", "Medium", 2015, 2, 53.12,
"VERSI", "Medium", 2015, 3, 39.77,
"VERSI", "High", 2016, 1, 121.76,
"VERSI", "High", 2016, 2, 65.05,
"VERSI", "High", 2016, 3, 586.23,
"VERSI", "Low", 2017, 1, 1082.75,
"VERSI", "Low", 2017, 2, 1503.45,
"VERSI", "Low", 2017, 3, 473.57,
"VERSI", "High", 2018, 1, 78.48,
"VERSI", "High", 2018, 2, 55.28,
"VERSI", "High", 2018, 3, 365.92,
"VERSI", "Medium", 2019, 1, 131.69,
"VERSI", "Medium", 2019, 2, 389.84,
"VERSI", "Medium", 2019, 3, 501.97
)
spec_lookup
# A tibble: 48 x 5
Species Type Year Month Length
<chr> <chr> <dbl> <dbl> <dbl>
1 MIRDE Medium 2014 1 NA
2 MIRDE Medium 2014 2 NA
3 MIRDE Medium 2014 3 NA
4 MIRDE Medium 2015 1 NA
5 MIRDE Medium 2015 2 NA
6 MIRDE Medium 2015 3 NA
7 MIRDE High 2016 1 46.48
8 MIRDE High 2016 2 51.43
9 MIRDE High 2016 3 162.09
10 MIRDE Low 2017 1 274.47
11 MIRDE Low 2017 2 285.36
12 MIRDE Low 2017 3 123.32
13 MIRDE High 2018 1 39.82
14 MIRDE High 2018 2 9.64
15 MIRDE High 2018 3 162.09
# ... with 33 more rows
Update expected output based on @FJCC's answer:
expected_out
will have the same length as spec1
.
Each row has unique Length.lookup
and LengthDiff
values. All duplicated rows should be removed.
If Length.spec1
is NA
, then Length.lookup
will be NA
.
expected_out <- tibble::tribble(
~Species, ~Type, ~Year.spec1, ~Month, ~Length.spec1, ~Year.lookup, ~Length.lookup, ~LengthDiff,
"MIRDE", "Low", 1947, 2, NA, 2017, NA, NA,
"MIRDE", "Low", 1947, 3, NA, 2017, NA, NA,
"MIRDE", "High", 1948, 2, NA, 2016, NA, NA,
"MIRDE", "High", 1948, 3, 151.37, 2016, 162.09, 10.72,
"MIRDE", "Medium", 1949, 2, 144.58, 2014, NA, NA,
"MIRDE", "Medium", 1949, 3, 441.67, 2014, NA, NA,
"SESTO", "Low", 1947, 2, 49.17, 2017, 143.77, 94.6,
"SESTO", "Low", 1947, 3, 53.4, 2017, 334, 280.6,
"SESTO", "High", 1948, 2, 69.03, 2016, NA, NA,
"SESTO", "High", 1948, 3, 61.49, 2018, 61.72, 0.23,
"SESTO", "Medium", 1949, 2, NA, 2015, NA, NA,
"SESTO", "Medium", 1949, 3, 104.53, 2015, 3.26, 101.27,
"VERSI", "High", 1948, 2, NA, 2016, NA, NA,
"VERSI", "High", 1948, 3, 98.94, 2018, 365.92, 266.98,
"VERSI", "Medium", 1949, 2, 145.66, 2015, 53.12, 92.54,
"VERSI", "Medium", 1949, 3, NA, 2015, NA, NA
)
expected_out
# A tibble: 16 x 8
Species Type Year.spec1 Month Length.spec1 Year.lookup Length.lookup LengthDiff
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 MIRDE Low 1947 2 NA 2017 NA NA
2 MIRDE Low 1947 3 NA 2017 NA NA
3 MIRDE High 1948 2 NA 2016 NA NA
4 MIRDE High 1948 3 151.37 2016 162.09 10.72
5 MIRDE Medium 1949 2 144.58 2014 NA NA
6 MIRDE Medium 1949 3 441.67 2014 NA NA
7 SESTO Low 1947 2 49.17 2017 143.77 94.6
8 SESTO Low 1947 3 53.4 2017 334 280.6
9 SESTO High 1948 2 69.03 2016 NA NA
10 SESTO High 1948 3 61.49 2018 61.72 0.23
11 SESTO Medium 1949 2 NA 2015 NA NA
12 SESTO Medium 1949 3 104.53 2015 3.26 101.27
13 VERSI High 1948 2 NA 2016 NA NA
14 VERSI High 1948 3 98.94 2018 365.92 266.98
15 VERSI Medium 1949 2 145.66 2015 53.12 92.54
16 VERSI Medium 1949 3 NA 2015 NA NA