python肘部法则 最优分类

来源:互联网 发布:云计算工程师薪资待遇 编辑:程序博客网 时间:2024/03/28 19:03

改进版:最佳分类数获取改进了一下

# -*- coding:utf-8 -*-import numpy as npimport matplotlib.pyplot as pltfrom sklearn.cluster import KMeansfrom scipy.spatial.distance import cdistimport sysdef elbow_rule():    x = np.array([1, 2, 3, 1, 5, 6, 5, 5, 6, 7, 8, 9, 7, 9])    y = np.array([1, 3, 2, 2, 8, 6, 7, 6, 7, 1, 2, 1, 1, 3])    data = np.array(list(zip(x, y)))    # 肘部法则 求解最佳分类数    # K-Means参数的最优解也是以成本函数最小化为目标    # 成本函数是各个类畸变程度(distortions)之和。每个类的畸变程度等于该类重心与其内部成员位置距离的平方和    a=[]    K = range(1, 10)    for k in range(1,10):        kmeans=KMeans(n_clusters=k)        kmeans.fit(data)        value=sum(np.min(cdist(data, kmeans.cluster_centers_, 'euclidean'),axis=1))/data.shape[0]        print(k,value)        a.append(value)    cha = [a[i] - a[i + 1] for i in range(len(a) - 1)]    a_v=a[cha.index(max(cha)) + 1]    index=a.index(a_v)+1    print(max(cha), a_v,index)    return indexelbow_rule()

旧版:

# -*- coding:utf-8 -*-import numpy as npimport matplotlib.pyplot as pltfrom sklearn.cluster import KMeansfrom scipy.spatial.distance import cdistimport sysdef elbow_rule():    x = np.array([1, 2, 3, 1, 5, 6, 5, 5, 6, 7, 8, 9, 7, 9])    y = np.array([1, 3, 2, 2, 8, 6, 7, 6, 7, 1, 2, 1, 1, 3])    data = np.array(list(zip(x, y)))    # 肘部法则 求解最佳分类数    # K-Means参数的最优解也是以成本函数最小化为目标    # 成本函数是各个类畸变程度(distortions)之和。每个类的畸变程度等于该类重心与其内部成员位置距离的平方和    aa=[]    K = range(1, 10)    for k in range(1,10):        kmeans=KMeans(n_clusters=k)        kmeans.fit(data)        value=sum(np.min(cdist(data, kmeans.cluster_centers_, 'euclidean'),axis=1))/data.shape[0]        print(k,value)        aa.append(value)    b = aa[1:]    cc = [x - y for x, y in zip(aa,b)]    print(cc)    index=cc.index(max(cc))+2 #aa[1:]要+1,肘部法则的下一个值是最优值    print(max(cc), index)    return indexelbow_rule()