python 实现 Kmeans

来源:互联网 发布:数据分析培训课程 编辑:程序博客网 时间:2024/06/01 20:25

# -*- coding: utf-8 -*-"""Spyder EditorThis is a temporary script file."""import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport scipy.io as sioimport randomimport mathmaps = {}clr = ['blue', 'green', 'yellow','red','black','cyan', 'pink']def calcCetroid(X, sub):    sn = sub.shape[0]    n,p = X.shape    centroid = np.zeros(p, dtype=float)    for i in range(sn):        centroid += X[sub[i],:]    return centroid/float(sn)    def k_medoids(X,k):    iterNums = 1000    n, p = X.shape        distance = np.zeros((k,n))    ctrx = np.zeros((k,p))    iter_ctrs = np.zeros((iterNums, k, p))        cetroid = random.sample(range(n), k)        for i in range(k):        ctrx[i] = np.array(X[cetroid[i],:])        for i in range(100):                for j in range(k):            for l in range(n):                distance[j][l] = math.sqrt(np.sum(np.power(ctrx[j]-X[l,:],2)))                        dist_index = np.argsort(distance, axis=0)                maps.clear()                    for j in range(n):            keys = dist_index[0][j]            if not maps.has_key(keys):                maps[keys] = []            maps[keys].append(j)                ctrx = np.zeros((k,p))                for (j, l) in zip(range(k), maps.itervalues()):                        ctrx[j] = calcCetroid(X, np.array(l)) # jth centroids            dd = np.zeros(len(l))            for ii in range(len(l)): # find medoid                dd[ii] = math.sqrt(np.sum(np.power(ctrx[j]-X[l[ii],:],2)))            dd = np.argsort(dd)            ctrx[j] = X[l[dd[0]],:]                for (i,j) in zip(range(k), maps.itervalues()):        plt.scatter(X[j,0], X[j,1], c=clr[i])    plt.show()     def k_means(X,k):    """        Input:            X---data point features, n-by-p matrix            k-- the number of clusters        output:            idx cluster label            ctrx-- cluster centers, k-by-p matrix            iter_ctrs---cluster centers of each iteration, K-by-p-by-iter 3D matrix    """        iterNums = 100    n, p = X.shape        distance = np.zeros((k,n))    ctrx = np.zeros((k,p))    iter_ctrs = np.zeros((iterNums, k, p))        cetroid = random.sample(range(n), k)        for i in range(k):        ctrx[i] = np.array(X[cetroid[i],:])        for i in range(100):                for j in range(k):            for l in range(n):                distance[j][l] = math.sqrt(np.sum(np.power(ctrx[j]-X[l,:],2)))                        dist_index = np.argsort(distance, axis=0)                maps.clear()                    for j in range(n):            keys = dist_index[0][j]            if not maps.has_key(keys):                maps[keys] = []            maps[keys].append(j)                ctrx = np.zeros((k,p))                for (j, l) in zip(range(k), maps.itervalues()):            ctrx[j] = calcCetroid(X, np.array(l))        for (i,j) in zip(range(k), maps.itervalues()):        plt.scatter(X[j,0], X[j,1], c=clr[i])    plt.show()    if __name__ == "__main__":        dataSet = sio.loadmat("kmeans_data.mat")        X = dataSet["X"]        k = 5        #k_means(X,k)        k_medoids(X,k)        







0 0
原创粉丝点击