I made an elbow method to validate the number of clusters i need to use as input on my K-Means algorithm, but i need a new confirmation, so i made the silhouette score, but the result is wrong, where is the error?
#READ EXCEL DB fCenso<- read_excel('fCenso.xlsx') #Z-SCORE fCenso ```Z-Score Qtd Estabelecimentos' <- (fCenso``` Qtd estabelecimentos (Un)` - mean(fCenso``` Qtd estabelecimentos (Un)`))/sd(fCenso``` Qtd estabelecimentos (Un)`) fCenso ```Z-Score Área colhida' <- (fCenso``` Área colhida (Há)` - mean(fCenso``` Área colhida (Há)`))/sd(fCenso``` Área colhida (Há)`) #EXCLUDING DESCRIPTIVE VARIABLES (city, production culture, year) fCensoPadronizado <- fCenso[, -c(1,2,3,4,5)] #ELBOW METHOD (identify the number of clusters) dev.off() fviz_nbclust(fCensoPadronizado, kmeans, method = "wss", k.max = 10) #ELABORATING THE K-MEANS ALGORITHM WITH 3 CLUSTERS AND APPLYING THE COLUMN IN THE ORIGINAL BASE cluster_kmeans3 <- kmeans(fCensoPadronizado, centers = 3) fCenso$cluster_K3 <- factor(cluster_kmeans3$cluster) #ELABORATING THE K-MEANS ALGORITHM WITH 4 CLUSTERS AND APPLYING THE COLUMN IN THE ORIGINAL BASE cluster_kmeans4 <- kmeans(fCensoPadronizado, centers = 4) fCenso$cluster_K4 <- factor(cluster_kmeans4$cluster) #ELABORATING THE K-MEANS ALGORITHM WITH 5 CLUSTERS AND APPLYING THE COLUMN IN THE ORIGINAL BASE cluster_kmeans5 <- kmeans(fCensoPadronizado, centers = 5) fCenso$cluster_K5 <- factor(cluster_kmeans5$cluster) #ELABORATING THE K-MEANS ALGORITHM WITH 6 CLUSTERS AND APPLYING THE COLUMN IN THE ORIGINAL BASE cluster_kmeans6 <- kmeans(fCensoPadronizado, centers = 6) fCenso$cluster_K6 <- factor(cluster_kmeans6$cluster) #RUNNING K-MEANS FOR DIFFERENT VALUES OF K (K = 3, 4, 5, 6) k_means_result <- list() for (k in 3:6) { k_means_result[[as.character(k)]] <- kmeans(fCensoPadronizado, centers = k) } #CALCULATING THE SILHOUETTE SCORE FOR EACH VALUE OF K silhouette_scores <- numeric(length(k_means_result)) for (i in seq_along(k_means_result)) { silhouette_scores[i] <- mean(silhouette(k_means_result[[i]]$cluster, dist(fCensoPadronizado))) } #PRINTING THE SILHOUETTES SCORE'S RESULTS FOR EACH VALUE OF K print(silhouette_scores)
The result:
> print(silhouette_scores)
[1] 1.612449 2.236173 3.186013 2.568394