Silhouettes:聚类结果衡量指标

来源:互联网 发布:网络是把双刃剑图片 编辑:程序博客网 时间:2024/05/16 07:27
import numpy as npfrom sklearn import datasetsfrom sklearn.metrics import pairwise_distancesfrom sklearn.preprocessing import LabelEncoderdef silhouette_samples(X, labels, metric='euclidean', **kwds):    le = LabelEncoder()    labels = le.fit_transform(labels)    unique_labels = le.classes_    distances = pairwise_distances(X, metric=metric, **kwds)    intra_clust_dists = np.ones(distances.shape[0], dtype=distances.dtype)    inter_clust_dists = np.inf * intra_clust_dists    for curr_label in unique_labels:        mask = curr_label == labels        current_distances = distances[mask]        n_samples_curr_lab = np.sum(mask) - 1        if n_samples_curr_lab != 0:            intra_clust_dists[mask] = np.sum(current_distances[:, mask], axis=1) / n_samples_curr_lab        for other_label in unique_labels:            if other_label != curr_label:                other_mask = other_label == labels                other_distances = np.mean(current_distances[:, other_mask], axis=1)                inter_clust_dists[mask] = np.minimum(inter_clust_dists[mask], other_distances)    sil_samples = inter_clust_dists - intra_clust_dists    sil_samples /= np.maximum(intra_clust_dists, inter_clust_dists)    return sil_samplesdataset = datasets.load_iris()data = dataset.datatarget = dataset.targetresult = silhouette_samples(data, target)print(result)

0 0