Segmented Regression on Estimated Probabilities vs. Raw Binary Outcome

I am performing a segmented regression analysis in R and want to confirm if my approach is statistically valid. I first fit a logistic regression model using natural splines to predict a binary outcome. Then, I estimate the predicted probabilities for different values of the independent variable and fit a quasibinomial GLM using these estimated probabilities as the outcome. Finally, I apply the segmented() function to this second model instead of the original logistic regression model. Is this approach valid, or should the segmented regression be performed on the initial model with the raw binary outcome? Would segmenting the estimated probabilities lead to incorrect breakpoint estimation?

library(dplyr)
library(WeightIt)
library(cobalt)
library(marginaleffects)
library(ggplot2)
library(segmented)

d <- my_dataset

d$binary_outcome <- factor(d$binary_outcome)

fit <- glm(binary_outcome ~ splines::ns(x_axis_variable, df=5, intercept = T),
           data=d,
           family="binomial")

values <- with(d, seq(min(d$x_axis_variable, na.rm = TRUE), 
                  max(d$x_axis_variable, na.rm = TRUE),
                      length.out = nrow(d)))

p <- avg_predictions(fit,
                     variables = list(x_axis_variable = values),
                     byfun = function(...) qnorm(mean(...)),
                     transform = pnorm)

fit.lm <- glm(estimate ~ x_axis_variable, data = p, family = quasibinomial)

seg.fit <- segmented(fit.lm, seg.Z = ~ x_axis_variable, npsi=1)

breakpoint <- summary(seg.fit)$psi[, "Est."]
print(breakpoint)

ggplot(p, aes(x = x_axis_variable, y = estimate)) +
  geom_point(alpha = 0.5)+
  # geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
  #             alpha = .3,
  #             fill="slategray")+
  geom_line(aes(y = predict(seg.fit, type = "response")), color = "blue") +
  geom_vline(xintercept = breakpoint, color = "red", linetype = "dashed")

d <- my_dataset

d$binary_outcome <- factor(d$binary_outcome)

fit <- glm(binary_outcome ~ splines::ns(x_axis_variable, df=5, intercept = T),
           data=d,
           family="binomial")

values <- with(d, seq(min(d$x_axis_variable, na.rm = TRUE), 
                  max(d$x_axis_variable, na.rm = TRUE),
                      length.out = nrow(d)))

p <- avg_predictions(fit,
                     variables = list(x_axis_variable = values),
                     byfun = function(...) qnorm(mean(...)),
                     transform = pnorm)

fit.lm <- glm(estimate ~ x_axis_variable, data = p, family = quasibinomial)

seg.fit <- segmented(fit.lm, seg.Z = ~ x_axis_variable, npsi=1)

breakpoint <- summary(seg.fit)$psi[, "Est."]
print(breakpoint)

ggplot(p, aes(x = x_axis_variable, y = estimate)) +
  geom_point(alpha = 0.5)+
  # geom_ribbon(aes(ymin = conf.low, ymax = conf.high),
  #             alpha = .3,
  #             fill="slategray")+
  geom_line(aes(y = predict(seg.fit, type = "response")), color = "blue") +
  geom_vline(xintercept = breakpoint, color = "red", linetype = "dashed")
1 Like