The code below finds the "optimal" number of clusters. In the example below, the result was 2 clusters.
Quickly explaining the code: first, the Ideal Point is calculated, which has the minimum breadth of coverage and the maximum waste production value. Then, the Final Solution is selected as the closest Sk solution to the Ideal Point, and k is selected as the best number of clusters. For my purpose this code works fine. However, I would like to know if there is any approach to evaluating the criteria that help to select the number of clusters for this example? In this case, the criteria are breadth of coverage and waste generation potential. Maybe use some multicriteria method? I'm open to suggestions.
library(rdist)
library(geosphere)
library(dplyr)
df1<-structure(c(3315.15739850453, 2589.99900391847, 8869.03953528711,7708.11156943467,
7708.11156943467, 6015.73943344633, 6577.67722745424,5805.83051830159,
4791.95901175901, 4791.95901175901, 4791.95901175901,
4617.00232604443, 4430.08754078434, 4430.08754078434, 4430.08754078434,
4483.18948278638, 4483.18948278638, 3302.09189638597,
1635156.04305,474707.64025, 170773.40775, 64708.312, 64708.312,
64708.312, 949.72635, 949.72635, 949.72635, 949.72635, 949.72635, 949.72635,
949.72635, 949.72635, 949.72635, 949.72635, 949.72635, 949.72635), .Dim = c(18L, 2L
), .Dimnames = list(NULL, c("Breadth of Coverage", "Waste")))
df2<-structure(c(14833.1911512297, 11518.0337527251, 10088.9627146591,8928.03474880667,
8928.03474880667, 7235.66261281833, 7235.66261281833,6463.81590366569,
5449.94439712311, 5449.94439712311, 5449.94439712311,
5274.98771140853, 5088.07292614843, 5088.07292614843, 5088.07292614843,
5088.07292614843,5088.07292614843,3906.975,3315.15739850453,
2589.99900391847, 8869.03953528711, 7708.11156943467, 7708.11156943467,
6015.73943344633, 6577.67722745424, 5805.83051830159, 4791.95901175901,
4791.95901175901, 4791.95901175901, 4617.00232604443, 4430.08754078434,
4430.08754078434, 4430.08754078434, 4483.18948278638,4483.189,4483.189,
1635156.04305, 474707.64025, 170773.40775,64708.312, 64708.312, 64708.312,
949.72635, 949.72635, 949.72635,949.72635, 949.72635, 949.72635, 949.72635,
949.72635, 949.72635,949.72635,949.7264,949.7264),
.Dim = c(18L, 3L),.Dimnames = list(NULL, c("Coverage","Breadth of Coverage", "Waste")))
#Ideal Point is considered the minimum breadth of coverage and maximum production of Waste
IdealPoint<-as.matrix(t(c(min(df1[,1]),max(df1[,2]))))
distance_df1_Ideal<-as.matrix(dist(rbind(df1,IdealPoint)))
#calculating the distance of the cluster solutions to the ideal point
distance_cluster_ideal<-min(distance_df1_Ideal[as.matrix(dim(df1))[1,1]+1,1:as.matrix(dim(df1))[1,1]])
a<-which(distance_df1_Ideal[dim(df1)[1]+1,]==distance_cluster_ideal)
FinalSolution<-df1[a[1],]
f1 <- function(mat, vec, dim = 1, tol = 1e-7, fun = all)
which(apply(mat, dim, function(x) fun(dist(x - vec) < tol)))
b<-as.matrix(f1(df2[,2:3],FinalSolution,fun=any))# optimal value of number of clusters
k<-b[1]+1 #number of clusters
> k
[1] 2