I've been using a topological data analysis library for clustering in R known as Semi-Supervised Topological Analysis "STA."
However, unlike many libraries I've used before for clustering, there is no call to extract the clusters, e.g. x$cluster
. So, I'd like to how to extract things such as cluster assignment and Silhouette values?
Code and links below.
https://rdrr.io/github/TianshuFeng/SemiMapper/man/mapper.sta.html
#INSTALLATION AND LOADING
devtools::install_github("TianshuFeng/STA")
library(STA)
#DUMMY DATA
x1 = rep(1:3, times = 100)
x2 = rep(1:3, times = 100)
x3 = rep(1:3, times = 100)
x4 = rep(1:3, times = 100)
x5 = rep(1:3, times = 100)
DAT <- data.frame(x1, x2,x3,x4,x5)
DAT <- data.frame(lapply(DAT, function(x) as.numeric(as.character(x))))
#STA CODE
MAP <- mapper.sta(DAT,
filter_values = DAT$x1,
num_intervals = 5,
percent_overlap = 40,
dist_method = "manhattan",
cluster_method = "hierarchical",
NbClust_cluster_method = "single",
num_bins_when_clustering = 10,
cluster_index = "silhouette"
)
simple_visNet(MAP, filter = DAT$x1, color_filter = TRUE)
####SIDE NOTE
#PLEASE ONLY RUN THIS LAST PIECE OF CODE IF THERE IS AN ERROR MESSAGE WITH DEPENDENCIES
cluster_cutoff_at_first_empty_bin <- function(heights, diam, num_bins_when_clustering) {
# if there are only two points (one height value), then we have a single cluster
if (length(heights) == 1) {
if (heights == diam) {
cutoff <- Inf
return(cutoff)
}
}
bin_breaks <- seq(from=min(heights), to=diam,
by=(diam - min(heights))/num_bins_when_clustering)
if (length(bin_breaks) == 1) { bin_breaks <- 1 }
myhist <- hist(c(heights,diam), breaks=bin_breaks, plot=FALSE)
z <- (myhist$counts == 0)
if (sum(z) == 0) {
cutoff <- Inf
return(cutoff)
} else {
# which returns the indices of the logical vector (z == TRUE), min gives the smallest index
cutoff <- myhist$mids[ min(which(z == TRUE)) ]
return(cutoff)
}
}