Hi All and good evening. I really need some guidance and have a two part question. I have 2 dataframes, df and df_alt both of which contain sporting events. Here is the dput of df
structure(list(ID = c("1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385"
), Book = c("Bovada", "Bovada"), Home = c("Alabama Crimson Tide",
"Alabama Crimson Tide"), Away = c("San Diego St Aztecs", "San Diego St Aztecs"
), Team = c("Alabama Crimson Tide", "San Diego St Aztecs"), Price = c(-110,
-110), Points = c(-7.5, 7.5)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
# A tibble: 2 × 7
ID Book Home Away Team Price Points
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -110 -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -110 7.5
Here is the dput of df_alt
structure(list(ID = c("1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385"
), Book = c("Bovada", "Bovada"), Home = c("Alabama Crimson Tide",
"Alabama Crimson Tide"), Away = c("San Diego St Aztecs", "San Diego St Aztecs"
), Team = c("Alabama Crimson Tide", "San Diego St Aztecs"), Price = c(-110,
-110), Points = c(-7.5, 7.5)), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"))
> dput(df_alt)
structure(list(ID = c("1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385",
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385",
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385",
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385",
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385",
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385",
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385",
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385"
), Book = c("Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle",
"Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle",
"Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle"),
Home = c("Alabama Crimson Tide", "Alabama Crimson Tide",
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide",
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide",
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide",
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide",
"Alabama Crimson Tide", "Alabama Crimson Tide"), Away = c("San Diego St Aztecs",
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs",
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs",
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs",
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs",
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs"
), Team = c("Alabama Crimson Tide", "Alabama Crimson Tide",
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide",
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide",
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs",
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs",
"San Diego St Aztecs", "San Diego St Aztecs"), Price = c(-153,
-142, -130, -119, 102, 111, 119, 128, 131, 122, 113, 105,
-116, -127, -138, -149), Points = c(-5.5, -6, -6.5, -7, -8,
-8.5, -9, -9.5, 5.5, 6, 6.5, 7, 8, 8.5, 9, 9.5)), row.names = c(NA,
-16L), class = c("tbl_df", "tbl", "data.frame"))
# A tibble: 16 × 7
ID Book Home Away Team Price Points
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -153 -5.5
2 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -142 -6
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -130 -6.5
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -119 -7
5 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide 102 -8
6 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide 111 -8.5
7 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide 119 -9
8 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide 128 -9.5
9 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs 131 5.5
10 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs 122 6
11 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs 113 6.5
12 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs 105 7
13 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -116 8
14 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -127 8.5
15 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -138 9
16 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -149 9.5
I need to be able to find which observations from df have the same Points values in df_alt and join them together in order to run a function. This is what I have done thus far to accomplish this.
------Find intersecting values
df_int <- df %>%
dplyr::select(c(ID, Home, Away, Team, Points)) %>%
dplyr::intersect(df_alt %>% dplyr::select(c(ID, Home, Away, Team, Points))) %>%
mutate(Book = 'Pinnacle')
df_join <- df %>% full_join(df_int)
# Merge prices back into df -----------------------------------------------
df_final <- df_join %>% left_join(df_alt %>% rename(price=Price)) %>%
mutate(Price=ifelse(is.na(price),Price,price))%>%
select(-price)
This is the resulting data
ID Book Home Away Team Price Points
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -110 -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -110 7.5
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -107 -7.5
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -105 7.5
The end result is exactly what I need but this leads me to my first question. Is there a cleaner, less verbose way to accomplish the above? My second question is as follows. Let's say I have a dataframe similar to df but it contains additional Books. Here is the dput
structure(list(ID = c("1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385",
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385"
), Book = c("Bovada", "Bovada", "LowVig.ag", "LowVig.ag"), Home = c("Alabama Crimson Tide",
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide"
), Away = c("San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs",
"San Diego St Aztecs"), Team = c("Alabama Crimson Tide", "San Diego St Aztecs",
"Alabama Crimson Tide", "San Diego St Aztecs"), Price = c(-110,
-110, -109, -103), Points = c(-7.5, 7.5, -7, 7)), row.names = c(NA,
-4L), class = c("tbl_df", "tbl", "data.frame"))
# A tibble: 4 × 7
ID Book Home Away Team Price Points
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -110 -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -110 7.5
3 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -109 -7
4 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -103 7
As you can see I now have 2 new rows with different Book and Points values. I would like to be able to perform the same steps as above (find the intersecting values) but I would like to split the df into groups before doing so. I can split df as follows
df %>%
group_split(ID, Book)
Here is the output
<list_of<
tbl_df<
ID : character
Book : character
Home : character
Away : character
Team : character
Price : double
Points: double
>
>[2]>
[[1]]
# A tibble: 2 × 7
ID Book Home Away Team Price Points
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -110 -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -110 7.5
[[2]]
# A tibble: 2 × 7
ID Book Home Away Team Price Points
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -109 -7
2 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -103 7
This works well and it separates df by each respective ID and Book. But this is where it gets complicated. How can I find the intersecting Points values on each chunk of data? My goal is to have 2 chunks of data similar to the following which allows me to run another function against each chunk.
ID Book Home Away Team Price Points
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -110 -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -110 7.5
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -107 -7.5
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -105 7.5
ID Book Home Away Team Price Points
<chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -109 -7
2 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs -103 7
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama Crimson Tide -119 -7
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego St Aztecs 105 7
I am at a loss any help would be appreciated.