from sklearn import metrics
labels_true = [0, 0, 0, 1, 1, 1]
labels_pred = [0, 0, 1, 1, 2, 2]

print(metrics.homogeneity_score(labels_true, labels_pred))
print(metrics.completeness_score(labels_true, labels_pred))
print(metrics.v_measure_score(labels_true, labels_pred, beta=0.6))

0.6666666666666669
0.420619835714305
0.5467344787062375

from sklearn.metrics.cluster import pair_confusion_matrix
from sklearn import metrics
import numpy as np
C = pair_confusion_matrix([0, 0, 1, 1], [0, 0, 1, 2])
print(C)
TN = C[0, 0]
FP = C[0, 1]
FN = C[1, 0]
TP = C[1, 1]

[[8 0]
 [2 2]]

FMI = TP / np.sqrt((TP + FP) * (TP + FN))
print(FMI)
# You Can also Compute FMI Using Scikit-learn. The Results Match, of Course.
print(metrics.fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 2]))

# Also Two Same Partitions Will Have no Off-diagonal Elements in the Pair Confusion Matrix and a FMI Score of 1.

C = pair_confusion_matrix([0, 0, 1, 1], [0, 0, 1, 1])
print(C)
TN = C[0, 0]
FP = C[0, 1]
FN = C[1, 0]
TP = C[1, 1]

FMI = TP / np.sqrt((TP + FP) * (TP + FN))
print(FMI)

print(metrics.fowlkes_mallows_score([0, 0, 1, 1], [0, 0, 1, 1]))

0.7071067811865475
0.7071067811865476
[[8 0]
 [0 4]]
1.0
1.0

from clusim.clustering import Clustering
import clusim.sim as sim

true_labels = [1, 1, 1, 2, 2, 2, 3, 3, 3]
predicted_labels = [1, 2, 2, 3, 3, 1, 1, 1, 1]
single_cluster_labels = [1, 1, 1, 1, 1, 1, 1, 1, 1]
completely_fragmented_labels = [1, 2, 3, 4, 5, 6, 7, 8, 9]

# Their Data is Differently Formatted.
true_clustering = Clustering().from_membership_list(true_labels)
predicted_clustering = Clustering().from_membership_list(predicted_labels)
predicted_single_cluster = Clustering().from_membership_list(single_cluster_labels)
predicted_completely_fragmented = Clustering().from_membership_list(
    completely_fragmented_labels
)

for _ in [
    predicted_clustering,
    predicted_single_cluster,
    predicted_completely_fragmented,
]:
    print(
 f"FMI = {sim.fowlkes_mallows_index(true_clustering,_)}, NMI = {sim.nmi(true_clustering,_)}, elem-cent = {sim.element_sim(true_clustering,_)}"
    )
# The Package Can Compute Many Scores such As... (code from Their Documentation https://hoosier-clusters.github.io/clusim/html/clusim.html)

row_format2 = "{:>25}" * (2)
for simfunc in sim.available_similarity_measures:
    print(
 row_format2.format(
     simfunc, eval("sim." + simfunc +
"(true_clustering, predicted_clustering)")
 )
    )

FMI = 0.4811252243246881, NMI = 0.5451600159416435, elem-cent = 0.5407407407407406
FMI = 0.5, NMI = 0.0, elem-cent = 0.33333333333333326
FMI = 0.0, NMI = 0.6666666666666665, elem-cent = 0.33333333333333326
            jaccard_index                   0.3125
               rand_index       0.6944444444444444
            adjrand_index      0.26666666666666655
    fowlkes_mallows_index       0.4811252243246881
                 fmeasure      0.47619047619047616
             purity_index       0.7777777777777777
     classification_error      0.22222222222222232
        czekanowski_index      0.47619047619047616
               dice_index      0.47619047619047616
           sorensen_index      0.47619047619047616
    rogers_tanimoto_index       0.5319148936170213
          southwood_index      0.45454545454545453
      pearson_correlation      0.00102880658436214
         corrected_chance      0.16994265720286564
      sample_expected_sim      0.10526315789473684
                      nmi       0.5451600159416435
                       mi       0.8233232815796736
                   adj_mi       0.3410389011275906
                      rmi       0.1464053299155769
                       vi       1.3738364418444755
       geometric_accuracy       0.7777777777777778
          overlap_quality                     -0.0
                     onmi       0.6303315236619905
              omega_index      0.26666666666666655

Evaluating Clustering Performance

Silhouette coefficient¶

Calinski Harabasz Index¶

Davies-Bouldin Index¶

Homogeneity Score¶

Completeness Score¶

V-Measure¶

Mutual-information-based similarity score¶

Pair confusion matrix¶

Fowlkes–Mallows Index¶

Element-centric similarity and issues with FMI and NMI¶