Hi!
I want to create heatmap from qPCR data set. The script now takes as control sample Col-0 DMSO 6 h.
Could you have a look on my script?
alll the time I have this error:
Error in select(., Target, Genotype, Avg_Rel)
unused arguments (Target, Genotype, Avg_Rel)
library("ggplot2")
library("dplyr")
library("tidyr")
library("pheatmap")
library("readxl")
# Set working directory
dir <- "/5/"
setwd(dir)
# Indicate the file
date <- "20231205"
experiment <- "data"
info <- "ZAT11"
filedir <- paste0(dir, date, "_", experiment, "_", info, ".xlsx")
filename <- paste0(date, "_", experiment, "_", info)
# Read the file
df <- read_excel(filedir)
# Remove outliers
df <- df %>%
group_by(Target, Genotype, Treatment, Time) %>%
filter(Cq >= (quantile(Cq, 0.25, na.rm = TRUE) - 1.5 * IQR(Cq, na.rm = TRUE)),
Cq <= (quantile(Cq, 0.75, na.rm = TRUE) + 1.5 * IQR(Cq, na.rm = TRUE)))
# Calculate delta Cq (ΔCq) relative to the housekeeping gene (Actin) for each combination of genotype, treatment, and time
df <- df %>%
group_by(Genotype, Treatment, Time) %>%
mutate(Delta_Cq = Cq - mean(Cq[Target == "Actin"], na.rm = TRUE))
# Calculate absolute expression (fold change) to housekeeping gene
df <- df %>%
mutate(Absolute_Expression = 2 ^ -Delta_Cq)
# Determine control sample
control_genotype <- "Col0"
control_treatment <- "DMSO"
control_time <- 6
# Calculate mean Delta Cq for each target in the control condition
control_mean_Delta_Cq <- df %>%
filter(Genotype == control_genotype, Treatment == control_treatment, Time == control_time) %>%
group_by(Target) %>%
summarise(mean_Delta_Cq = mean(Delta_Cq, na.rm = TRUE))
# Join this control mean Delta Cq with the main dataframe
df <- left_join(df, control_mean_Delta_Cq, by = "Target")
# Calculate relative expression (fold change) to the control condition for the same target
df <- df %>%
mutate(Relative_Expression = 2 ^ -(Delta_Cq - mean_Delta_Cq))
# Filter out the housekeeping gene for plotting
df_noactin <- filter(df, Target != "Actin")
df_expression <- filter(df, Target != "Actin")
# Compute average and standard error
df_expression <- df_expression %>%
group_by(Target, Genotype, Treatment, Time) %>%
summarise(Avg_Abs = mean(Absolute_Expression, na.rm = TRUE),
SE_Abs = sd(Absolute_Expression, na.rm = TRUE) / sqrt(n()),
Avg_Rel = mean(Relative_Expression, na.rm = TRUE),
SE_Rel = sd(Relative_Expression, na.rm = TRUE) / sqrt(n()),
.groups = "drop")
# Reorder factors based on specified order (change when necessary)
order_genotypes <- c("Col0", "zat11", "zat18", "zat11xzat18", "ZAT18OE")
order_treatments <- c("DMSO", "ISX", "Sorbitol", "ISX+S", "Salt")
df_expression$Genotype <- factor(df_expression$Genotype, levels = order_genotypes)
df_expression$Treatment <- factor(df_expression$Treatment, levels = order_treatments)
df_noactin$Genotype <- factor(df_noactin$Genotype, levels = order_genotypes)
df_noactin$Treatment <- factor(df_noactin$Treatment, levels = order_treatments)
# Prepare the data for heatmap
# Reshape the data to wide format with Genotype as columns and Target as rows
df_heatmap <- df_expression %>%
select(Target, Genotype, Avg_Rel) %>%
pivot_wider(names_from = Genotype, values_from = Avg_Rel) %>%
column_to_rownames("Target") # Set Target as row names for heatmap
# Generate the heatmap with clustering
pheatmap(df_heatmap,
cluster_rows = TRUE, # Cluster genes
cluster_cols = TRUE, # Cluster genotypes/treatments
scale = "none", # Data already log-transformed (you can also try "row" or "column" scaling)
color = colorRampPalette(c("blue", "white", "red"))(50), # Color gradient
show_rownames = TRUE,
show_colnames = TRUE,
clustering_distance_rows = "euclidean", # Distance metric for rows
clustering_distance_cols = "euclidean", # Distance metric for columns
clustering_method = "complete", # Clustering method
main = paste("Clustered Gene Expression Heatmap:", filename),
border_color = "white",
legend = TRUE)