Bootstrapping in Q Methodology: "Standard Error" graph plotting issue

Ephraim02 · February 4, 2025, 10:36am

After running the code, the bootstrapping process in Q methodology was successfully completed [Zabala and Pascual (2016, pp. 8-9).

After running 5000 iterations (however testing can be done with even 100), based on this code example

link

I cant seem to produce the same graphical results as in this example, where is the issue especially on the standard error graphs for three factors showing difference of standard and bootstrapped zscore on the error bars.

Resources :

Folder with dataset and results (link provided)
(folder with the original matrix file: “Clean_Zimsort_EM3.csv”)

The code and the results should be similar (at least the format) to this one here, I used the same exact code but I have different results presented in graphs to compare code: my code:

# STAGE 1: Q_METHOD ============================================================
# STEP 1: LOAD PACKAGES ----------------------------------------
install.packages("tidyverse")
install.packages("forcats")
install.packages("gtable")
install.packages("grid")
install.packages("qmethod")
install.packages("knitr")

# load packages
library(tidyverse)
library(forcats)
library(gtable)
library(grid)
library(qmethod)
library(knitr)

# STEP 2: VISUALISE SURVEY RESULTS ---------------------------------------

# ggplot2 color palette
gg_color_hue <- function(n) {
  hues = seq(15, 375, length = n + 1)
  hcl(h = hues, l = 65, c = 100)[1:n]
}
gg_cols <- gg_color_hue(n = 8)[-5]

# Load dataset
MyData <- read.csv("Clean_Zimsort_EM3.csv", header = TRUE)

# Check column names
print(colnames(MyData))

# Check structure of the dataset
str(MyData)

# Convert to long format
tmp <- MyData %>%
  pivot_longer(cols = -Statements, names_to = "person", values_to = "value") %>%
  group_by(Statements)

# Color coding for boxplots
colcode <- tmp %>%
  summarize(mean = mean(value),
            q25 = quantile(value, 0.25),
            q75 = quantile(value, 0.75)) %>% 
  mutate(col = ifelse(q25 > 0, gg_cols[1],
                      ifelse(q75 < 0, gg_cols[3],
                             gg_cols[5])))

# Plot
p1 <- ggplot(tmp, aes(x = factor(Statements), y = value)) +
  geom_boxplot(fill = colcode$col, alpha = 0.9) + 
  theme_bw() +
  xlab("Statements") +
  scale_y_continuous(name = "Sorting", breaks = seq(-5, 5, 1))

print(p1)
# Save the plot as a 300dpi image
ggsave("plot_p1.png", plot = p1, dpi = 300, width = 8, height = 6, units = "in")

# Optional message to confirm save
cat("Plot saved as 'plot_p1.png' with 300dpi resolution.\n")

# STEP 3: STANDARD QMETHOD ANALYSIS  ---------------------------------------

# Define the distribution based on the provided input
distribution <- c(-7, -6, -6, -5, -5, -4, -4, -4, 
                  -3, -3, -3, -3, -2, -2, -2, -2, -2, 
                  -1, -1, -1, -1, -1, -1, 
                  0, 0, 0, 0, 0, 0, 0, 
                  1, 1, 1, 1, 1, 1, 
                  2, 2, 2, 2, 2, 
                  3, 3, 3, 3, 
                  4, 4, 4, 
                  5, 5, 
                  6, 6, 
                  7)

# Explore the factor loadings
results <- qmethod(MyData, nfactors = 3, rotation = 'varimax', forced = FALSE, distribution = distribution)
results
results$loa
results$flagged
results$zsc
results$zsc_n
results$qdc
# explore standard results
summary(results)

# STAGE 2: BOOTSAPP ============================================================

# STEP 4: BOOTSTRAPPING  ---------------------------------------

# Define the distribution based on the provided input
distribution <- c(-7, -6, -6, -5, -5, -4, -4, -4, 
                  -3, -3, -3, -3, -2, -2, -2, -2, -2, 
                  -1, -1, -1, -1, -1, -1, 
                  0, 0, 0, 0, 0, 0, 0, 
                  1, 1, 1, 1, 1, 1, 
                  2, 2, 2, 2, 2, 
                  3, 3, 3, 3, 
                  4, 4, 4, 
                  5, 5, 
                  6, 6, 
                  7)

# Bootstrapping
boots <- qmboots(MyData, nfactors = 3, nsteps = 10, load = "auto",
                 rotation = "varimax", indet = "procrustes", fsi = TRUE, cor.method = "kendall",
                 forced = FALSE, distribution = distribution)

# Summarize bootstrap results
qms <- qmb.summary(boots)

# Factor Loading ...........................................

# Q-sorts - look at defining factors
boot_qsort <- round(qms$qsorts, digits = 2)
rn <- tibble(qsort = rownames(boot_qsort))
boot_qsort <- bind_cols(rn, boot_qsort) %>% 
  select(-c(f1.std.1, f2.std.1, f3.std.1))

# Table
kable(boot_qsort,
      caption = "Comparison of standard and bootstrap results for Q-sort factor loadings.")

# check flagging frequency (clear definers of the factors in which they are flagged)
ff <- 0.8
boot_qsort %>% filter(flag.freq1 >= ff | flag.freq2 >= ff | flag.freq3 >= ff)

# Statements ................................................

# zsc
boot_zsc <- qms$statement %>%
  select(contains("zsc.bts")) %>% 
  setNames(nm = c("zsc_f1", "zsc_f2", "zsc_f3"))

# sed
MyData_df <- MyData %>%
  as.data.frame()

boot_loa <- sapply(X = boots$loa.stats, function(x) x$mean)
flagged <- qflag(nstat = nrow(MyData_df), loa = boot_loa)
qmzsc <- qzscores(MyData_df, nfactors = 3,
                  flagged = flagged,
                  loa = boot_loa,
                  forced = FALSE,
                  distribution = distribution)  # <-- Added distribution argument here
boot_sed <- qmzsc$f_char$sd_dif

boot_qdc <- qdc(MyData_df, nfactors = 3, zsc = boot_zsc, sed = boot_sed)$dist.and.cons

# final result: distinguishing and consensus statements
qdc_res <- tibble(statement = 1:nrow(MyData_df),
                  orig = results$qdc$dist.and.cons,
                  boot = boot_qdc) %>% 
  mutate(diff = orig == boot) 

# final table
kable(qdc_res,
      caption = "Distinguishing and consensus statements for standard Q-methodology and its bootstrapped variant")

# define significance thresholds
nstat <- nrow(MyData)
thold.01 <- 2.58/sqrt(nstat)
thold.05 <- 1.96/sqrt(nstat)

# table: bootstrap vs standard results
kable(as_tibble(qms$statements) %>%
        round(digits = 2) %>% 
        mutate(statement = as.character(1:nrow(.))) %>% 
        select(statement, matches("f[1-3].bias|fsc")),
      caption = "Comparison of bootstrap and standard results for statements")

# STEP 5: PLOTTING RESULTS ---------------------------------------

# prepare ggplot solution
zsc <- as_tibble(qms$statements) %>% 
  mutate(statement = as.character(1:nrow(.))) %>% 
  select(statement, matches("zsc.bts|SE|zsc.std"))

sds <- abs(apply(zsc[, c(5, 7, 9)], 1, sd))
zsc <- zsc[order(sds), ]

# desired order
ord <- zsc$statement

# tidying: error lines
zsc_se <- zsc %>% 
  mutate(f1_min = f1.zsc.bts - f1.SE,
         f1_max = f1.zsc.bts + f1.SE,
         f2_min = f2.zsc.bts - f2.SE,
         f2_max = f2.zsc.bts + f2.SE,
         f3_min = f3.zsc.bts - f3.SE,
         f3_max = f3.zsc.bts + f3.SE
  ) %>% 
  select(statement, f1_min:f3_max, f1.zsc.bts, f2.zsc.bts, f3.zsc.bts) %>% 
  rename_all(
    list(
      ~stringr::str_replace_all(., '.zsc', '')
    )
  ) %>% 
  gather(key = "key", value = "value", -"statement") %>% 
  separate(key, c("factor", "type")) %>% 
  mutate(statement = ordered(statement, levels = ord)) %>% 
  spread(type, value) %>% 
  rename(value = bts)

# tidying: points
zsc_point <- zsc %>%
  select(statement, contains("zsc")) %>% 
  rename_all(
    list(
      ~stringr::str_replace_all(., '.zsc', '')
    )
  ) %>% 
  gather(key = "key", value = "value", -"statement") %>% 
  separate(key, c("factor", "type")) %>% 
  mutate(statement = ordered(statement, levels = ord),
         factor = factor(factor),
         type = factor(type)) %>% 
  arrange(statement, factor, type)

# tidying: error lines
zsc_se <- zsc %>% 
  mutate(f1_min = f1.zsc.bts - f1.SE,
         f1_max = f1.zsc.bts + f1.SE,
         f2_min = f2.zsc.bts - f2.SE,
         f2_max = f2.zsc.bts + f2.SE,
         f3_min = f3.zsc.bts - f3.SE,
         f3_max = f3.zsc.bts + f3.SE
  ) %>% 
  select(statement, f1_min:f3_max, f1.zsc.bts, f2.zsc.bts, f3.zsc.bts) %>% 
  rename_all(
    list(
      ~stringr::str_replace_all(., '.zsc', '')
    )
  ) %>% 
  gather(key = "key", value = "value", -"statement") %>% 
  separate(key, c("factor", "type")) %>% 
  mutate(statement = ordered(statement, levels = ord)) %>% 
  spread(type, value) %>% 
  rename(value = bts)

# tidying: points
zsc_point <- zsc %>%
  select(statement, contains("zsc")) %>% 
  rename_all(
    list(
      ~stringr::str_replace_all(., '.zsc', '')
    )
  ) %>% 
  gather(key = "key", value = "value", -"statement") %>% 
  separate(key, c("factor", "type")) %>% 
  mutate(statement = ordered(statement, levels = ord),
         factor = factor(factor),
         type = factor(type)) %>% 
  arrange(statement, factor, type)

# ============================================================

str(zsc_point)
table(zsc_point$type)
head(zsc_se)
levels(zsc_point$type)
head(zsc_point)
head(zsc_se)
# Define colors for error bars and points
c1 <- "black"  # Color for standard z-scores
c2 <- gg_cols[1]  # Color for bootstrapped z-scores (red)

# Create a tibble for annotations (e.g., "Consensus" and "Distinction")
ann_text <- tibble(
  x = -2.25,  # x-position of the annotations
  y = c(9.5, 35),  # y-position of the annotations
  label = c("Consensus", "Distinction"),  # Text labels
  factor = factor("f1", levels = c("f1", "f2", "f3"))  # Factor for faceting
)

# Create the plot
p2 <- ggplot() +
  # Add error bars for bootstrapped z-scores
  geom_errorbarh(
    data = zsc_se,
    aes(y = statement, xmax = max, xmin = min),
    color = c1,  # Color of error bars
    height = 0.2  # Adjust height of error bars
  ) +
  # Add points for z-scores (standard and bootstrapped)
  geom_point(
    data = zsc_point,
    aes(
      x = value, y = statement,
      color = type, fill = type,  # Color and fill by type (standard vs. bootstrapped)
      shape = type  # Simplify shapes: one shape for standard, one for bootstrapped
    ),
    size = 2,  # Size of points
    stroke = 0.5  # Reduce stroke thickness for better visibility
  ) +
  # Add a horizontal dashed line for reference
  geom_hline(
    yintercept = 18.5,  # Position of the dashed line
    linetype = "dashed",  # Dashed line style
    color = "dimgrey"  # Color of the line
  ) +
  # Customize colors for points
  scale_color_manual(
    values = c("std" = c1, "bts" = c2),  # Match the levels of 'type'
    guide = "none"
  ) +
  # Customize fill for points
  scale_fill_manual(
    values = c("std" = c1, "bts" = NA),  # Match the levels of 'type'
    guide = "none"
  ) +
  # Simplify shapes: filled circle for standard, hollow circle for bootstrapped
  scale_shape_manual(
    values = c("std" = 16, "bts" = 21),  # Match the levels of 'type'
    guide = "none"
  ) +
  # Apply a clean theme
  theme_bw() +
  # Split the plot into panels for each factor
  facet_wrap(~factor) +
  # Add axis labels
  xlab("z-score") +
  ylab("Statement") +
  # Add annotations (e.g., "Consensus" and "Distinction")
  geom_text(
    data = ann_text,
    aes(x = x, y = y, label = label),
    angle = 90,  # Rotate labels 90 degrees
    size = 3.5  # Adjust text size
  )

# Print the plot
print(p2)