I'll try this out, thank you! Yes, the the subdirectory "~/Not clustered/some value from splist " is correct (1 folder for 2 plots (clustered and unclustered) by value in splist = 154 folders total all in "~/Not clustered")
Ya, I thought I could get away with not posting the PlotHeatMap function because 1. it's probably too long for my short question, 2. A colleague wrote it so I couldn't answer questions about it except that it helps build the maps, and 3. probably distracts the reader, but it seems necessary, so here it is anyway.
PlotHeatMap<-function(data,common_name,fileName=NULL,doCluster=FALSE,redCols=c("2021_WET")) {
printPlot<-TRUE
#Check if there are any non-zero values.
if(sum(data,na.rm=TRUE)==0) {
print(paste("Not enough data for heatmap for",sp))
printPlot<-FALSE
} else { #If there is enough data carry on
#If clustering throw out data until all rows and columns have non-zero data
#and all year_seasons have at least one sample in common with all others
if(doCluster) {
data<-data[rowSums(data,na.rm=TRUE)>0,colSums(data,na.rm=TRUE)>0] #Exclude sites or time periods with no non-zero observations
if(is.null(dim(data)) | sum(dim(data))<5) {
print(paste("Not enough observations to cluster",common_name))
printPlot<-FALSE
} else {
colCluster<-suppressWarnings(vegdist(t(log(data+1)),na.rm=TRUE)) #Check if both clusters work
rowCluster<-suppressWarnings(vegdist(log(data+1),na.rm=TRUE))
if(any(is.na(rowCluster)) | any(is.na(colCluster)) ) {
z<-as.matrix(colCluster)
badCols<-dimnames(z)[[2]][is.na(colSums(z))]
data<-data[,!dimnames(data)[[2]] %in% badCols]
z<-as.matrix(rowCluster)
badRows<-dimnames(z)[[2]][is.na(colSums(z))]
data<-data[!dimnames(data)[[1]] %in% badRows,]
data<-data[rowSums(data,na.rm=TRUE)>0,colSums(data,na.rm=TRUE)>0]
if(is.null(dim(data))) {
print(paste("Not enough observations to cluster",common_name))
printPlot<-FALSE
}
}
}
}
if(printPlot) {
season_mean <- colMeans(data,na.rm=TRUE)
site_mean <- rowMeans(data,na.rm=TRUE)
ha <- try(HeatmapAnnotation(Ave=anno_points(season_mean)))
ha2 <- try(rowAnnotation(Ave= anno_points(site_mean)),silent=TRUE)
if(class(ha)=="try-error" | class(ha2)=="try-error") {
print(paste("Not enough observations to cluster",sp))
printPlot<-FALSE
}
}
if(printPlot) {
cols <- rep('black', ncol(data))
cols[colnames(data) %in% redCols] <- 'red'
rg<-brewer.pal(n = 8, name = "RdYlBu") #originally 8
if(!is.null(fileName)) png(fileName, units="in", width=5, height=5, res=700)
plot1 <- getPlot1(data,doCluster,rg,cols,ha,ha2)
if(any(is.na(plot1@matrix_color_mapping@levels))) {
rg <- brewer.pal(n = 7, name = "RdYlBu") #Change to 7 if plot doesn't work
plot1 <- getPlot1(data, doCluster,rg,cols,ha,ha2)
}
try(print(plot1))
if(!is.null(fileName)) dev.off()
}
}
}
#This is now a standalone function so it can be called from other functions
getPlot1<-function(data,doCluster,rg,cols,ha,ha2) {
if(doCluster) {
plot1<-Heatmap(as.matrix(round(log(data+1), digits = 1)),
cluster_rows = TRUE,
cluster_columns = TRUE,
col = rev(rg),
na_col = "black",
row_names_side = "left", # Row names move to left side
row_dend_side = "left",
column_names_side = "top",# Dendragrams move to left side
# row_names_gp = gpar(cex=0.2, fontface = "bold"),
row_names_gp = gpar(cex=0.4, fontface = "bold"), # L-sites
column_names_gp = gpar(cex=0.4, fontface = "bold", col = cols), # change font size
row_dend_width = unit(2, "cm"),
column_dend_height = unit(1, "cm"),
rect_gp = gpar(col = "grey"),
column_title = "Year/Season",
column_title_gp = gpar(fontsize = 7),
# column_names_rot = 70,
row_title = "Site",
row_title_gp = gpar(fontsize = 7),
heatmap_legend_param = list(title = "ln(x+1)"), # ..."ln(x+1)", color_bar = "discrete"),
clustering_distance_rows = function(x) {vegan::vegdist(x, method = "euclidean",na.rm=TRUE)},
clustering_distance_columns = function(x) {vegan::vegdist(x, method = "euclidean",na.rm=TRUE)},
clustering_method_rows = "ward.D2",
clustering_method_columns = "ward.D2",
row_dend_reorder = FALSE,
column_dend_reorder = FALSE,
column_split = 4,
row_split = 3,
top_annotation = ha,
left_anno = ha2)
} else {
plot1<-Heatmap(as.matrix(round(log(data+1), digits = 1)),
cluster_rows = FALSE,
cluster_columns = FALSE,
col = rev(rg),
na_col = "black",
# row_names_side = "left", # Row names move to left side
# row_dend_side = "left",
# row_names_gp = gpar(cex=0.2, fontface = "bold"),
row_names_gp = gpar(cex=0.4, fontface = "bold"), # L-sites
row_names_side = "left",
column_names_gp = gpar(cex=0.4, fontface = "bold"), # change font size
column_names_side = "top",# Dendragrams move to left side
# row_dend_width = unit(2, "cm"),
# column_dend_height = unit(1, "cm"),
rect_gp = gpar(col = "grey"),
column_title = "Year/Season",
column_title_gp = gpar(fontsize = 7),
# column_names_rot = 70,
row_title = "Site",
row_title_gp = gpar(fontsize = 7),
heatmap_legend_param = list(title = "ln(x+1)"),
# clustering_distance_rows = function(x) {vegan::vegdist(x, method = "bray")},
# clustering_distance_columns = function(x) {vegan::vegdist(x, method = "bray")},
# clustering_method_rows = "ward.D2",
# clustering_method_columns = "ward.D2",
# row_dend_reorder = FALSE,
# column_dend_reorder = FALSE,
# column_split = 4,
# row_split = 3,
top_annotation = ha,
left_anno = ha2)
}
plot1
}
# STOP
# Skip when not clustering
# I couldn't get this to work without deleting the NA rows.
# dcast can automatically give NAs where there are no observations
datlong<-dat
dat<-dat[!is.na(dat$num),]
#Check if there is overlap in sites sampled by time
temp<-datlong %>% group_by(site,year_season) %>%
summarize(samples=length(num[!is.na(num)])) %>%
pivot_wider(names_from = year_season,values_from = samples)
write.csv(temp,"temp.csv")
#The above prints out a table with the sites sampled each year.season
#Below, calculate the number of sites that are sampled in both of two year.seasons
year.season<-sort(unique(dat$year_season))
sites<-sort(unique(dat$site))
SampleMatrix<-matrix(0,length(year.season),length(year.season),dimnames=list(year.season,year.season))
for(i in 1:length(year.season))
for(j in 1:length(year.season))
SampleMatrix[i,j]<-length(unique(sites[sites %in% dat$site[dat$year_season==year.season[i]] & sites %in% dat$site[dat$year_season==year.season[j]]]))
write.csv(SampleMatrix,"SampleMatrix.csv")
#Note that in the early years, there were some time periods that had not overlap
# in which sites were sampled. It isn't possible to cluster two time periods
# with no shared sample sites, because there is no data to compare. Doing the clustering
# from 2005 onward only solves this problem for many species. For some species,
# deleting the sites with only zero observations makes some periods not viable.
#Make summary of number of non-zero observations and total count
# and sort by decreasing total count
spSummary<- dat %>% group_by(common_name) %>%
summarize(total.number=sum(num,na.rm=TRUE),
observations=length(num[num>0 &!is.na(num)])) %>%
arrange(-total.number)
spSummary
summary(spSummary)
dim(spSummary)
tail(spSummary)
# There are species in the data frame that have no observations, so they
# can't be used in analysis.
# Make a splist of only species with at least 1 observation
splist<-spSummary$common_name[spSummary$observations>=1]
length(splist)