Hello all, I have a very large script that uses various custom-functions to scrape data, manipulate dataframes, etc. However, when I run functions that pull all of the smaller functions together to get a desires result, I RStudio aborts. I have cleared out my global environment mostly: I've left a few dfs and a model that I use with a predict() function. I'll paste the code below. Any help will be GREATLY appreciated.
Thanks in advance
library(baseballr)
library(dplyr)
library(tidyverse)
library(rvest)
library(beepr)
library(furrr)
library(purrr)
library(forcats)
library(limSolve)
library(shiny)
library(shinydashboard)
library(plotly)
### This is where I get an error, the function runs great, but when the assignment of the return to FinalR happens, I get an error ###
FinalR=ShortSim(SimmedSchedule,2020,1)
beep(2)
############################################### SIM GAME FUNCTION PROTOTYPE ###################################
### Prototype uses the GetLineups() and SimHalfInning() functions ###
### Function returns away score and home score ###
### ###
### If you receive Error in if (Instance == "Out") { : missing value where TRUE/FALSE needed function, team ###
### name is incorrect, and cannot be loked up from table ###
###############################################################################################################
SimGame <- function(AwayTeam,HomeTeam,runs){
AggHScore=0
AggVScore=0
GetLineups(AwayTeam,HomeTeam)
LineupHold=GetLineups(AwayTeam,HomeTeam)
AwayLineup=LineupHold[[1]]
HomeLineup=LineupHold[[2]]
t1=0
t2=0
t3=0
t4=0
t5=0
t6=0
t7=0
t8=0
t9=0
te=0
b1=0
b2=0
b3=0
b4=0
b5=0
b6=0
b7=0
b8=0
b9=0
be=0
awaypos=1
homepos=1
for(i in 1:runs){
#Top of First
Pitcher=subset(HomeLineup,ID=="P1")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t1=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of First
Pitcher=subset(AwayLineup,ID=="P1")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b1=Holdh[[1]]
homepos=Holdh[[2]]
#Top of Second
Pitcher=subset(HomeLineup,ID=="P2")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t2=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of Second
Pitcher=subset(AwayLineup,ID=="P2")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b2=Holdh[[1]]
homepos=Holdh[[2]]
#Top of Third
Pitcher=subset(HomeLineup,ID=="P3")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t3=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of Third
Pitcher=subset(AwayLineup,ID=="P3")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b3=Holdh[[1]]
homepos=Holdh[[2]]
#Top of Fourth
Pitcher=subset(HomeLineup,ID=="P4")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t4=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of Fourth
Pitcher=subset(AwayLineup,ID=="P4")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b4=Holdh[[1]]
homepos=Holdh[[2]]
#Top of Fifth
Pitcher=subset(HomeLineup,ID=="P5")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t5=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of Fifth
Pitcher=subset(AwayLineup,ID=="P5")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b5=Holdh[[1]]
homepos=Holdh[[2]]
#Top of Sixth
Pitcher=subset(HomeLineup,ID=="P6")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t6=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of Sixth
Pitcher=subset(AwayLineup,ID=="P6")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b6=Holdh[[1]]
homepos=Holdh[[2]]
#Top of Seventh
Pitcher=subset(HomeLineup,ID=="P7")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t7=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of Seventh
Pitcher=subset(AwayLineup,ID=="P7")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b7=Holdh[[1]]
homepos=Holdh[[2]]
#Top of Eighty
Pitcher=subset(HomeLineup,ID=="P8")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t8=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of Eighty
Pitcher=subset(AwayLineup,ID=="P8")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b8=Holdh[[1]]
homepos=Holdh[[2]]
#Top of Ninth
Pitcher=subset(HomeLineup,ID=="P9")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
t9=Holda[[1]]
awaypos=Holda[[2]]
if((t1+t2+t3+t4+t5+t6+t7+t8+t9)>=(b1+b2+b3+b4+b5+b6+b7+b8)){
#Bottom of Ninth
Pitcher=subset(AwayLineup,ID=="P9")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
b9=Holdh[[1]]
homepos=Holdh[[2]]
}else{
b9="X"
}
if(b9!="X"){
while ((t1+t2+t3+t4+t5+t6+t7+t8+t9+te)==(b1+b2+b3+b4+b5+b6+b7+b8+b9+be)) {
#Top of Extra Inning
Pitcher=subset(HomeLineup,ID=="P9")
Holda=SimHalfInning(AwayLineup,awaypos,Pitcher)
te=Holda[[1]]
awaypos=Holda[[2]]
#Bottom of Extra Inning
Pitcher=subset(AwayLineup,ID=="P9")
Holdh=SimHalfInning(HomeLineup,homepos,Pitcher)
be=Holdh[[1]]
homepos=Holdh[[2]]
}
}
#message("")
#message(" *** FINAL SCORE ***")
#if(te+be==0){
# message(" ",paste(AwayTeam,": ",t1,t2,t3,"-",t4,t5,t6,"-",t7,t8,t9,"---",t1+t2+t3+t4+t5+t6+t7+t8+t9,sep = ""))
#if(b9 != "X"){
# message(" ",paste(HomeTeam,": ",b1,b2,b3,"-",b4,b5,b6,"-",b7,b8,b9,"---",b1+b2+b3+b4+b5+b6+b7+b8+b9, sep = ""))
#}else{
# message(" ",paste(HomeTeam,": ",b1,b2,b3,"-",b4,b5,b6,"-",b7,b8,b9,"---",b1+b2+b3+b4+b5+b6+b7+b8, sep = ""))
#}
#}else{
# message(" ",paste(AwayTeam,": ",t1,t2,t3,"-",t4,t5,t6,"-",t7,t8,t9,"-",te,"---",t1+t2+t3+t4+t5+t6+t7+t8+t9+te,sep = ""))
#message(" ",paste(HomeTeam,": ",b1,b2,b3,"-",b4,b5,b6,"-",b7,b8,b9,"-",be,"---",b1+b2+b3+b4+b5+b6+b7+b8+b9+be, sep = ""))
#}
if(te+be==0){
FinalVscore=t1+t2+t3+t4+t5+t6+t7+t8+t9
if(b9 != "X"){
FinalHscore=b1+b2+b3+b4+b5+b6+b7+b8+b9
}else{
FinalHscore=b1+b2+b3+b4+b5+b6+b7+b8
}
}else{
FinalVscore=t1+t2+t3+t4+t5+t6+t7+t8+t9+te
FinalHscore=b1+b2+b3+b4+b5+b6+b7+b8+b9+be
}
AggHScore=FinalHscore+AggHScore
AggVScore=FinalVscore+AggVScore
}
AggHScore=AggHScore/runs
AggVScore=AggVScore/runs
#land return value here
exit=list(AggVScore,AggHScore)
return(exit)
}
###################################### FUNCTION PROTOTYPE FOR SCRAPING SCHEDULES ###########################################
### Takes parameter of year (for what year you are wanting to scrape for) ###
### Returns a data frame of all games in the schedule, including those that have not been played yet ###
### Takes away first game from each season because it has RPI links and summaries, not score of games ###
### Dependent on dplyr and baseballr packages ###
############################################################################################################################
Years=2020
ScrapeSchedules <- function(Years){
#Schedule=Schedule[0,]
#MU=MU[0,]
Master = school_id_lu(school_name = "")
Master = Master %>%
filter(division==1 & year==Years)
##########################################
for(i in 1:nrow(Master)){
message("Scraping All 2020 Schedule Data for ",paste(Master[[i,1]],"...",sep = ""))
message("***** PERCENT SCRAPED: ",paste((round(((i-1)/nrow(Master))*100,digits = 1)),"%"," *****",sep = ""))
ID = Master[[i,3]]
Team = Master[[i,1]]
MU <- get_ncaa_schedule_info(teamid = ID, year = Years)
for(j in 1:nrow(MU)){
MU[j,3]=MU[j+1,3]
MU[j,4]=MU[j+1,4]
##New stuff, hopefully might work...
MU[j,6]=MU[j+1,6]
MU[j,7]=MU[j+1,7]
}
MU=MU[-nrow(MU),]
MU$Team=Team
#MU=MU %>%
# filter(result != "NA")
if(i>1){
Schedule = rbind(Schedule,MU)
}else{
Schedule=MU
}
}
Schedule$opponent=str_trim(Schedule$opponent)
Schedule$Team=str_trim(Schedule$Team)
Schedule$Sub=ifelse(substring(Schedule$opponent,1,1)=="@",substring(Schedule$opponent,3),"")
Schedule$HomeTeam=ifelse(substring(Schedule$opponent,1,1)=="@",Schedule$Sub,Schedule$Team)
Schedule$VisitingTeam=ifelse(Schedule$HomeTeam==Schedule$Team,sub("\\@.*", "",Schedule$opponent),Schedule$Team)
Schedule$HomeScore=ifelse(Schedule$HomeTeam==Schedule$Team,sub("\\-.*", "",Schedule$score),sub('.*-', '', Schedule$score))
Schedule$VisitingScore=ifelse(Schedule$VisitingTeam==Schedule$Team,sub("\\-.*", "",Schedule$score),sub('.*-', '', Schedule$score))
Schedule$VisitingScore=as.numeric(Schedule$VisitingScore)
Schedule$HomeScore=as.numeric(Schedule$HomeScore)
Schedule$HomeTeam=str_trim(Schedule$HomeTeam)
Schedule$VisitingTeam=str_trim(Schedule$VisitingTeam)
##Check and see if the site was played at a neutral location
Schedule$Neutral=ifelse(grepl("@",substring(Schedule$opponent,2))==TRUE,1,0)
NotPlayed=Schedule %>%
filter(is.na(Schedule$result)==TRUE)
Schedule$ID=Schedule$game_info_url
#Schedule$ID=ifelse(Schedule$Neutral==0,paste(Schedule$date,Schedule$HomeTeam,Schedule$VisitingTeam,Schedule$HomeScore,Schedule$VisitingScore),paste(Schedule$date,ifelse(Schedule$VisitingTeam>Schedule$HomeTeam,Schedule$VisitingTeam,Schedule$HomeTeam),ifelse(Schedule$VisitingTeam>Schedule$HomeTeam,Schedule$VisitingScore,Schedule$HomeScore)))
#Remove duplicated games
Schedule=Schedule %>%
distinct(Schedule$ID, .keep_all = TRUE)
Schedule=Schedule %>%
select("date","HomeTeam","VisitingTeam","HomeScore","VisitingScore","ID","Neutral")
Schedule=Schedule[-nrow(Schedule),]
NotPlayed$ID=paste(NotPlayed$date,NotPlayed$HomeTeam,NotPlayed$VisitingTeam)
NotPlayed=NotPlayed %>%
distinct(NotPlayed$ID, .keep_all = TRUE)
NotPlayed=NotPlayed %>%
select("date","HomeTeam","VisitingTeam","HomeScore","VisitingScore","ID","Neutral")
Schedule=rbind.data.frame(Schedule,NotPlayed)
#Another filter to check for teams with double headers and a game that was canceled
#Check=Schedule[data.frame(table(Schedule$ID))]
#Schedule=
#view(Schedule)
#end of messing around
Schedule=Schedule %>%
arrange(date)
rm(NotPlayed)
beep(2)
return(Schedule)
}
#################################### FUNCTION PROTOTYPE FOR SIM SEASON ##########################################
### Function dependent on ScrapePlayerStats() and ScrapeSchedules() functions ###
### Function takes "Year" parameter for what year you wish to sim and returns SimmedSchedule dataframe ###
#################################################################################################################
SimSeason=function(Year,runs){
holdstats=ScrapePlayerStats(Year)
beep(2)
D1Bat=holdstats[[1]]
D1Pitch=holdstats[[2]]
rm(holdstats)
Schedule=ScrapeSchedules(Year)
NotPlayed=Schedule %>%
filter(is.na(HomeScore)==TRUE)
Schedule=Schedule %>%
filter(is.na(HomeScore)==FALSE)
NotPlayed$VisitingTeam=str_trim(NotPlayed$VisitingTeam,side = "both")
NotPlayed$HomeTeam=str_trim(NotPlayed$HomeTeam,side = "both")
for(i in 1:nrow(NotPlayed)){
tryCatch({
message("Simulating ",paste(NotPlayed[i,3]," @ ",NotPlayed[i,2]," on ",NotPlayed[i,1]," *** ",round((i/nrow(NotPlayed))*100,digits = 1),"% Complete ***",sep = ""))
hold=SimGame(NotPlayed[i,3],NotPlayed[i,2],runs)
#Check to make sure aggregate results from sims to not come out equal which would result in a tie. If they do, chose randomly
if(hold[[1]] == hold[[2]]){
if(runif(1)>=.5){
hold[[1]]=1
hold[[2]]=0
}else{
hold[[1]]=0
hold[[2]]=1
}
}
NotPlayed[i,5]=hold[[1]]
NotPlayed[i,4]=hold[[2]]
},error=function(e){})
}
beep(2)
view(NotPlayed)
NotPlayed=NotPlayed[complete.cases(NotPlayed),]
NotPlayed=NotPlayed %>%
select("date","HomeTeam","VisitingTeam","HomeScore","VisitingScore","Neutral")
NotPlayed$Simmed=1
Schedule=Schedule %>%
select("date","HomeTeam","VisitingTeam","HomeScore","VisitingScore","Neutral")
Schedule$Simmed=0
Schedule=Schedule[complete.cases(Schedule),]
SimmedSchedule=rbind.data.frame(Schedule,NotPlayed)
SimmedSchedule=SimmedSchedule %>%
arrange(date)
beep(1)
rm(Schedule)
rm(NotPlayed)
rm(holdstats)
rm(D1Bat)
rm(D1Pitch)
return(SimmedSchedule)
}
##################################FUNCTION PROTOTYPES TAKEN FROM INTERNET, WRITE UP LATER ############################################
### The functions for calculating weighted wins and RPI taken from https://dpmartin42.github.io/posts/r/college-basketball-rankings###
### The function relies on dplyr, purrr, forecasts, and limsolve function ###
######################################################################################################################################
calc_wp <- function(game_data, team_id, exclusion_id = NULL){
games_played <- game_data[game_data$WTeamID == team_id | game_data$LTeamID == team_id, ]
if(!is.null(exclusion_id)){
games_played <-
games_played[games_played$WTeamID != exclusion_id & games_played$LTeamID != exclusion_id, ]
wp <- sum(games_played$WTeamID == team_id)/length(games_played$WTeamID)
} else{
wwins <- 1.3 * sum(games_played$WTeamID == team_id & games_played$WLoc == "A") +
0.7 * sum(games_played$WTeamID == team_id & games_played$WLoc == "H") +
sum(games_played$WTeamID == team_id & games_played$WLoc == "N")
wlosses <- 1.3 * sum(games_played$LTeamID == team_id & games_played$WLoc == "A") +
0.7 * sum(games_played$LTeamID == team_id & games_played$WLoc == "H") +
sum(games_played$LTeamID == team_id & games_played$WLoc == "N")
wp <- wwins/(wwins + wlosses)
}
return(wp)
}
calc_owp <- function(game_data, team_id){
opp_games <- game_data[game_data$WTeamID == team_id | game_data$LTeamID == team_id, ]
opps <- if_else(opp_games$WTeamID == team_id, opp_games$LTeamID, opp_games$WTeamID)
owp <- opps %>%
map_dbl(~ calc_wp(game_data, team_id = .x, exclusion_id = team_id))
return(mean(owp))
}
calc_oowp <- function(game_data, team_id){
opp_games <- game_data[game_data$WTeamID == team_id | game_data$LTeamID == team_id, ]
opps <- if_else(opp_games$WTeamID == team_id, opp_games$LTeamID, opp_games$WTeamID)
oowp <- opps %>%
map_dbl(~ calc_owp(game_data, team_id = .x))
return(mean(oowp))
}
calc_rpi <- function(game_data, team_id){
rpi <- 0.25 * calc_wp(game_data, team_id) +
0.5 * calc_owp(game_data, team_id) +
0.25 * calc_oowp(game_data, team_id)
return(round(rpi, 4))
}
############################################ FUNCTION PROTOTYPE FOR CALCULATE RPI ####################################################
### The functions for calculating weighted wins and RPI taken from https://dpmartin42.github.io/posts/r/college-basketball-rankings###
### The function relies on dplyr, purrr, forecasts, and limsolve function ###
### Function takes a Df with the results of each schedule game filled out (simmed and actual), returned from the SimSeason function###
### Function returns a dataframe with the wins and RPI calculated for each team along with the split for actual/simmed ###
######################################################################################################################################
###New function to (hopefully) calculate RPI
CalculateRPI=function(SimmedSchedule){
#SimmedCopy=SimmedSchedule
Actuals=SimmedSchedule %>%
filter(SimmedSchedule$Simmed==0)
game_data=Actuals
game_data$WTeamID=ifelse(game_data$HomeScore>game_data$VisitingScore,game_data$HomeTeam,game_data$VisitingTeam)
game_data$WScore=ifelse(game_data$VisitingScore>game_data$HomeScore,game_data$VisitingScore,game_data$HomeScore)
game_data$LTeamID=ifelse(game_data$VisitingScore<game_data$HomeScore,game_data$VisitingTeam,game_data$HomeTeam)
game_data$LScore=ifelse(game_data$VisitingScore<game_data$HomeScore,game_data$VisitingScore,game_data$HomeScore)
game_data$WLoc=ifelse(game_data$Neutral==1,"N",ifelse(game_data$WTeamID==game_data$HomeTeam,"H","A"))
game_data=game_data %>%
select("WTeamID","WScore","LTeamID","LScore","WLoc")
game_data=game_data[complete.cases(game_data), ]
game_data=game_data[game_data$WTeamID %in% Master$school,]
game_data=game_data[game_data$LTeamID %in% Master$school,]
Wins=game_data %>% count(WTeamID)
Loss=game_data %>% count(LTeamID)
Wins$Loss = Loss$n[match(Wins$WTeamID, Loss$LTeamID)]
Wins=Wins %>%
filter(is.na(Wins$WTeamID)==FALSE)
for(i in 1:nrow(Wins)){
Wins[i,4]=calc_rpi(game_data,team_id = Wins[i,1])
message("Actual RPI Calculated for ",paste(Wins[i,1]," ",round((i/nrow(Wins))*100,digits = 1),"% Complete",sep = ""))
}
Wins=Wins %>%
rename(
Team=WTeamID,
ActualWins=n,
ActuallLosses=Loss,
ActualRPI=V4
)
####Now calculate for simmed games
Sims=SimmedSchedule %>%
filter(SimmedSchedule$Simmed==1)
game_data=Sims
game_data$WTeamID=ifelse(game_data$HomeScore>game_data$VisitingScore,game_data$HomeTeam,game_data$VisitingTeam)
game_data$WScore=ifelse(game_data$VisitingScore>game_data$HomeScore,game_data$VisitingScore,game_data$HomeScore)
game_data$LTeamID=ifelse(game_data$VisitingScore<game_data$HomeScore,game_data$VisitingTeam,game_data$HomeTeam)
game_data$LScore=ifelse(game_data$VisitingScore<game_data$HomeScore,game_data$VisitingScore,game_data$HomeScore)
game_data$WLoc=ifelse(game_data$Neutral==1,"N",ifelse(game_data$WTeamID==game_data$HomeTeam,"H","A"))
game_data=game_data %>%
select("WTeamID","WScore","LTeamID","LScore","WLoc")
game_data=game_data[complete.cases(game_data), ]
game_data=game_data[game_data$WTeamID %in% Master$school,]
game_data=game_data[game_data$LTeamID %in% Master$school,]
SWins=game_data %>% count(WTeamID)
SLoss=game_data %>% count(LTeamID)
SWins$Loss = Loss$n[match(SWins$WTeamID, SLoss$LTeamID)]
SWins=SWins %>%
filter(is.na(SWins$WTeamID)==FALSE)
SWins=SWins %>%
rename(
Team=WTeamID,
SimmedWins=n,
SimmedLosses=Loss
)
Result=Wins
Result=merge.data.frame(Wins,SWins)
Result$SimmedWins=Result$SimmedWins+Result$ActualWins
Result$SimmedLosses=Result$SimmedLosses+Result$ActuallLosses
for(i in 1:nrow(Result)){
Result[i,7]=calc_rpi(game_data,team_id = Result[i,1])
message("Simulated RPI Calculated for ",paste(Result[i,1]," ",round((i/nrow(Result))*100,digits = 1),"% Complete",sep = ""))
}
rm(Actuals)
rm(Wins)
rm(SWins)
rm(game_data)
rm(SLoss)
rm(Sims)
beep(8)
message("RPI Calculations Completed for Sims and Actuals")
return(Result)
}
############################ CALCULATECONFCHAMP() FUNCTION PROTOTYPE ##############################################
### Function prototype that returns the conference champion based on one simulation of each conf champinoship ###
### Function takes a result parameter (returned from the CalcRPI fucntion) and is dependent on Master table ###
### Function does not calculate each tournament game per se, but halfs the odds of each seed (1 gets .5, 2 g... ###
### gets .25 and so on. Should definately be fixed when time allows ###
### Returns ConfR dataframe that is the same as Result df, but has ConfChamp column added with either Y or N ###
###################################################################################################################
CalculateConfChamp=function(Result){
Conf=Result
ConfR=merge.data.frame(Conf,Master,all.x = TRUE,by.x = "Team",by.y = "school")
ConfR=ConfR %>%
select("Team","ActualWins","ActuallLosses","ActualRPI","SimmedWins","SimmedLosses","V7","conference")
ConfR=ConfR %>%
rename(SimmedRPI=7)
ConfR$conference=str_trim(ConfR$conference)
ConfR=ConfR %>%
arrange(ConfR$conference,-ConfR$ActualRPI)
##initialize variables for loop
var=ConfR[1,8]
rank=1
for(i in 1:nrow(ConfR)){
if(ConfR[i,8]==var){
ConfR[i,9]=rank
rank=rank+1
}else{
rank=1
var=ConfR[i,8]
ConfR[i,9]=rank
rank=rank+1
}
}
##set values for random number generator
var=ConfR[1,8]
value=1
for(i in 1:nrow(ConfR)){
if(ConfR[i,8]==var){
value=value/2
ConfR[i,10]=value
}else{
value=1
value=value/2
ConfR[i,10]=value
var=ConfR[i,8]
}
}
#select conference champion
var=ConfR[1,8]
dummy=runif(1)
match=0
for(i in 1:nrow(ConfR)){
if(ConfR[i,8]==var){
if(match==0){
if(ConfR[i,10]<=dummy){
ConfR[i,11]="Yes"
match=1
}else{
ConfR[i,11]="No"
}
}else{
ConfR[i,11]="No"
}
}else{
var=ConfR[i,8]
match=0
dummy=runif(1)
if(ConfR[i,10]<=dummy){
ConfR[i,11]="Yes"
match=1
}else{
ConfR[i,11]="No"
}
}
}
ConfR=ConfR %>%
select("Team","ActualWins","ActuallLosses","ActualRPI","SimmedWins","SimmedLosses","SimmedRPI","conference","V11")
ConfR=ConfR %>%
rename(ConfChamp=V11)
rm(Result)
rm(Conf)
beep(2)
return(ConfR)
}
################################### ApplyModels() FUNCTION PROTOTYPE ##############################################
### Function takes in dataframe returned from CalculateConfChamp() function and returns FinalR dataframe ###
### Function is dependent on models ran from RandomForest models on other table named "model3" and "InSeason" ###
### FinalR dataframe has probabilities for each outcome based on simmed results and actual results ###
###################################################################################################################
ApplyModels=function(ConfR){
#ConfR=CalculateConfChamp(Result)
ConfR=ConfR %>%
arrange(-ConfR$SimmedRPI)
###Set Simulated RPI Rankings
rank=1
for(i in 1:nrow(ConfR)){
ConfR[i,10]=rank
rank=rank+1
}
ConfR=ConfR %>%
rename(Rank=10)
ConfR$`Win%`=ConfR$SimmedWins/(ConfR$SimmedWins+ConfR$SimmedLosses)
PredictR=ConfR %>%
select("Rank","Team","conference","Win%","ConfChamp")
PredictR=PredictR %>%
rename(
Name=Team,
Conf=conference,
`Conf Champ?`=ConfChamp
)
#if(exists(Probabilities)==TRUE){Probabilities=Probabilities[0,]}
for(i in 1:nrow(PredictR)){
#tryCatch({
Row=predict(model3,PredictR[i,],"prob")
Row$Team=PredictR[i,2]
if(i==1){Probabilities=Row}
Probabilities=rbind(Row,Probabilities)
#},error=function(e){})
}
FinalR=merge.data.frame(Probabilities,ConfR,all.y = TRUE,by.x = "Team",by.y = "Team")
FinalR=FinalR %>%
rename(
AutoProbSim=A,
AtLargeProbSim=L,
NoBidProbSim=N,
SeedProbSim=S,
)
Actu=FinalR %>%
select("Team","ActualWins","ActuallLosses","ActualRPI","conference")
Actu$`Win%`=Actu$ActualWins/(Actu$ActualWins+Actu$ActuallLosses)
Actu=Actu %>%
arrange(-Actu$ActualRPI)
##Set Actual RPI Rankings
rank=1
for(i in 1:nrow(Actu)){
Actu[i,7]=rank
rank=rank+1
}
Actu=Actu %>%
rename(
Rank=7
)
Actu=Actu %>%
select("Rank","conference","Win%","Team")
Actu=Actu %>%
rename(Conf=conference)
Probabilities=Probabilities[0,]
##Get Actual Probabilities
for(i in 1:nrow(Actu)){
tryCatch({
Row=predict(InSeason,Actu[i,],"prob")
Row$Team=Actu[i,4]
Probabilities=rbind(Row,Probabilities)
},error=function(e){})
}
FinalR=merge.data.frame(Probabilities,FinalR,all.y = TRUE,by.x = "Team",by.y = "Team")
FinalR=FinalR %>%
rename(
AutoProbAct=A,
AtLargeProbAct=L,
NoBidProbAct=N,
SeedProbAct=S,
)
rm(ConfR)
rm(Actu)
rm(Probabilities)
return(FinalR)
}
###############################################################################################################
ShortSim=function(SimmedSchedule,Year,runs){
##
Year=2020
runs=2
##
Schedule=SimmedSchedule
rm(SimmedSchedule)
NotPlayed=Schedule %>%
filter(Simmed==1)
Schedule=Schedule %>%
filter(Simmed==0)
NotPlayed$VisitingTeam=str_trim(NotPlayed$VisitingTeam,side = "both")
NotPlayed$HomeTeam=str_trim(NotPlayed$HomeTeam,side = "both")
for(i in 1:nrow(NotPlayed)){
tryCatch({
message("Simulating ",paste(NotPlayed[i,3]," @ ",NotPlayed[i,2]," on ",NotPlayed[i,1]," *** ",round((i/nrow(NotPlayed))*100,digits = 1),"% Complete ***",sep = ""))
hold=SimGame(NotPlayed[i,3],NotPlayed[i,2],runs)
#Check to make sure aggregate results from sims to not come out equal which would result in a tie. If they do, chose randomly
if(hold[[1]] == hold[[2]]){
if(runif(1)>=.5){
hold[[1]]=1
hold[[2]]=0
}else{
hold[[1]]=0
hold[[2]]=1
}
}
NotPlayed[i,5]=hold[[1]]
NotPlayed[i,4]=hold[[2]]
},error=function(e){})
}
beep(2)
#view(NotPlayed)
NotPlayed=NotPlayed[complete.cases(NotPlayed),]
NotPlayed=NotPlayed %>%
select("date","HomeTeam","VisitingTeam","HomeScore","VisitingScore","Neutral")
NotPlayed$Simmed=1
Schedule=Schedule %>%
select("date","HomeTeam","VisitingTeam","HomeScore","VisitingScore","Neutral")
Schedule$Simmed=0
Schedule=Schedule[complete.cases(Schedule),]
SimmedSchedule=rbind.data.frame(Schedule,NotPlayed)
SimmedSchedule=SimmedSchedule %>%
arrange(date)
beep(1)
Result=CalculateRPI(SimmedSchedule)
ConfR=CalculateConfChamp(Result)
FinalR=ApplyModels(ConfR)
rm(NotPlayed)
rm(ConfR)
rm(Result)
rm(Schedule)
return(FinalR)
}
beep(2)