# survey analysis

Hey!
I'm new here and need your help.

I have a dataset:
24.12.1717 - 07.01.1718 there was a flood.
07.01.1718 - 31.12.1721 was the post-flood time

I would now like to calculate the mortality of children between the ages of 0-5.
If the children died faster or slower before the flood, in the flood or after the flood
I hope you understand my question.

I'll post my progress here:

library(data.table)
library(Hmisc)
library(broom)

setwd("data")

fam <- data.table(fam)
kid <- data.table(kid)

temp1 <- kid[,list(doc,sex,idk,famnrk1,gebortk,gebk4,gebk8,todk4,todk8)]
temp2 <- fam[,list(doc,todf4,todf8,todm4,todm8)]

names(temp2)[names(temp2)=="doc"] <- "famnrk1"
kidfam <- merge(temp1,temp2,by="famnrk1",all.x=TRUE)

kidfam[,todk4_cor:=todk4]
kidfam[sex=="W" & is.na(todk4_cor),todk4_cor:=todf4]
kidfam[sex=="M" & is.na(todk4_cor),todk4_cor:=todm4]

kidfam[,todk8_cor:=todk8]
kidfam[sex=="W" & is.na(todk8_cor),todk8_cor:=todf8]
kidfam[sex=="M" & is.na(todk8_cor),todk8_cor:=todm8]
kidfam[gebk4==gebk8 & todk4_cor==todk8_cor & !is.na(gebk4) & !is.na(todk4_cor),ageDk:=(todk4_cor - gebk4) / 365.25]

Create dummy variable:
kidfam[ageDk>=5,todvorfünf:=0]
kidfam[ageDk<5,todvorfünf:=1]
table(kidfam\$todvorfünf)

The number of deaths is thus determined correctly. However, the number of survivors is underestimated. Many children have no known date of death because they emigrated as adults and are therefore censored.

However, we can assume survival until at least the 15th birthday if the child came from a marriage that was under observation. This criterion is met if we know the date of marriage and the date of death of at least one spouse and know that the other survived.

temp <- fam[,list(doc,dat4,dat8,gebf4,gebf8,todf4,todf8,gebm4,gebm8,todm4,todm8)]

# Option 1: We know the exact date of death of both married couple.

temp[dat4==dat8 & !is.na(dat4) &
todf4==todf8 & !is.na(todf4) &
todm4==todm8 & !is.na(todm4),bekannt:=1]
table(temp\$bekannt)

# Option 2: The man survived the woman, but "todm8" is unknown.

temp[dat4==dat8 & !is.na(dat4) &
todf4==todf8 & !is.na(todf4) &
todm4>=todf8 & !is.na(todm4) & is.na(todm8),bekannt:=1]
table(temp\$bekannt)

# Option 3. The woman survived the man, but "todf8" is unknown

temp[dat4==dat8 & !is.na(dat4) &
todm4==todm8 & !is.na(todm4) &
todf4>=todm8 & !is.na(todf4) & is.na(todf8),bekannt:=1]
table(temp\$bekannt)

temp[is.na(bekannt),bekannt:=0]
table(temp\$bekannt)

kidfam[!is.na(gebk4),gebk_5:=(gebk4 + 365.25*5)]

kidfam[,Flut:=as.Date("1717-12-24")]
kidfam[,nachFlut:=as.Date("1718-01-07")]

temp <- kidfam[,list(idk,gebk4,todk4_cor,gebk_5,Flut,nachFlut,todm4,todf4)]

temp_long <- reshape(temp,
direction = "long",
varying = c("gebk4","todk4_cor","gebk_5","Flut","nachFlut","todm4","todf4"),
v.names = c("date"),
idvar = "idk")

setkey(temp_long,idk,date) # Sortieren nach" idk" und "date"
temp_long <- temp_long[!is.na(date),list(idk,date)]

data <- merge(kidfam, temp_long, by="idk")

setkey(data,idk,date)
data[,epi:=seq_len(.N),by=idk]
data[,maxepi:=max(epi),by=idk]
data[,time1:=date]