Given these skeleton structures for data frames of Intervals and Sightings, what approach in R can best match the most recent sightings with the corresponding intervals?
Intervals {
startTime (integer)
endTime (integer)
uidMostRecentSightingFamilyDescA
uidMostRecentSightingFamilyDescB
uidMostRecentSightingFamilyDescC
...
}
where:
familyDescr A,B,... are interesting,
Intervals do not overlap,
time of most recent siting <= endTime of Interval
Sightings {
uid
times
familyName
familyDescr
commonName
genusSpecies
}
where:
a sighting may appear in multiple Intervals,
family names and descriptions are unique,
any commonName/genusSpecies may appear with multiple familyNames & Descrs
code to generate sample Sightings and Intervals
library(readr)
library(dplyr)
csv<-'
#$Log: birds.csv,v $
#Revision 1.1 2023/12/26 20:49:13 dutky
#Initial revision
#
#
familyName,familyDescr,commonName,genusSpecies
passeriformes,perchingBirds,houseSparrow,passerDomesticus
passeriformes,perchingBirds,americanRobin,turdusMigratorius
passeriformes,perchingBirds,europeanStarling,sturnusVulgaris
passeriformes,perchingBirds,northernCardinal,cardinalisCardinalis
accipitriformes,birdsOfPrey,baldEagle,haliaeetusLeucocephalus
accipitriformes,birdsOfPrey,redTailedHawk,buteoJamaicensis
accipitriformes,birdsOfPrey,peregrineFalcon,falcoPeregrinus
strigiformes,owls,barnOwl,tytoAlba
strigiformes,owls,greatHornedOwl,buboVirginianus
strigiformes,owls,snowyOwl,buboScandiacus
anseriformes,waterfowl,mallard,anasPlatyrhynchos
anseriformes,waterfowl,canadaGoose,brantaCanadensis
anseriformes,waterfowl,mandarinDuck,aixGalericulata
galliformes,gamebirds,chicken,gallusGallusDomesticus
galliformes,gamebirds,wildTurkey,meleagrisGallopavo
galliformes,gamebirds,quail,coturnixCoturnix
columbiformes,pigeonsAndDoves,rockPigeon,columbaLivia
columbiformes,pigeonsAndDoves,mourningDove,zenaidaMacroura
psittaciformes,parrots,budgerigar,melopsittacusUndulatus
psittaciformes,parrots,africanGreyParrot,psittacusErithacus
psittaciformes,parrots,scarletMacaw,araMacao
coraciiformes,kingfishersAndBeeEaters,beltedKingfisher,megaceryleAlcyon
coraciiformes,kingfishersAndBeeEaters,europeanBeeEater,meropsApiaster
apodiformes,swiftsAndHummingbirds,commonSwift,apusApus
apodiformes,swiftsAndHummingbirds,rubyThroatedHummingbird,archilochusColubris
struthioniformes,ostriches,commonOstrich,struthioCamelus
suliformes,cormorantsAndGannets,greatCormorant,phalacrocoraxCarbo
suliformes,cormorantsAndGannets,northernGannet,morusBassanus
'
birds<-invisible(read_csv(csv,comment="#"))
genIntervals<-function(count) {
intervals<-(list())
intervals$starts<- sort(sample(1:65536,count))
intervals$ends<-intervals$starts*0
intervals$endMax<-c(intervals$starts[-1],sum(intervals$starts))
families<- unique(birds$familyDescr)
theseFamilies<-sample(families,sample(1:length(families),1))
for(i in theseFamilies) intervals[[i]]=rep(NA,length(intervals$starts))
a<-intervals %>% as_tibble() %>%
mutate(
ends=mapply(FUN=function(a,b) sample(a:b,1),starts,endMax),
) %>%
select( -endMax)
a
}
genSightings<-function(count=10) {
birds[sample(1:nrow(birds),count,replace=TRUE),] %>%
mutate( times=sample(1:65536,count,replace=TRUE),
uid=1:n()
) %>%
arrange(times)
}
intervals<-genIntervals(10)
sightings<-genSightings(100)