K-Means Python实现

来源:互联网 发布:小旭音乐 知乎 编辑:程序博客网 时间:2024/06/07 01:28

转载自http://nathanlvzs.github.io/blog/Clustering-KMeans.html

实现代码基本参考K-Means聚类及其Python实现,中间加了一些对距离矩阵的理解,将源码自己研究一遍,逐渐掌握用python进行矩阵运算。其中包括:

import numpy as npnp.sum()   #axis=0,1np.outer()np.dot()np.mean()np.square()


# -*- coding: utf-8 -*-"""Created on Mon Sep 19 22:23:34 2016@author: soso"""import numpy as npimport matplotlib.pyplot as pltfrom numpy import arrayimport matplotlib.cm as cmimport matplotlib#%matplotlib inlinemean1 = (0, 8)mean2 = (5, 5)mean3 = (1, -1)cov = [[1, 0], [0, 1]]x1 = np.random.multivariate_normal(mean1, cov, 20)# shape: 20*2#x1=array([[1,1],[2,2]])y1=[0 for i in range(20)]x2 = np.random.multivariate_normal(mean2, cov, 30)#x2=array([[1,0],[2,0],[3,0]])y2=[1 for i in range(30)]x3 = np.random.multivariate_normal(mean3, cov, 20)#x3=array([[0,1],[0,2]])y3=[2 for i in range(20)]x = np.concatenate((x1, x2, x3), axis=0)print "x:"print x#print x.shapey=y1+y2+y3#colors=array([[1,0,0],[0,1,0],[0,0,1]])colors=cm.rainbow(np.linspace(0, 1, 3))color_label=[colors[label] for label in y]#plt.scatter(x[:,0],x[:,1],c=color_label)#plt.show()#print cm.rainbow(np.linspace(0, 1, 3))class kmeansclustering:    def __init__(self,data,k,maiter=100,epsilon=1e-12):        self.data=data        self.k=k        self.maiter=maiter        self.epsilon=epsilon        self.N=len(data)        self.colors=cm.rainbow(np.linspace(0,1,k))        self.classess=np.zeros(self.N,dtype=int)        #self.center = self.data[np.random.choice(self.N, self.k, replace=False), :]        self.center=array([[0,0],[1,1],[2,2]])    def getdismat(self):        #数据各个维度的平方和        data_sqrt_sum=np.sum(self.data*self.data,axis=1)        #print data_sqrt_sum        #质心的各个维度的平方和        cent_sqrt_sum=np.sum(self.center*self.center,axis=1)        #数据和质心的内积        #数据为N个,质心为K个        #内积结果为N*K矩阵        #[x11,x12]  [[c11,c12],        #            [c21,c22],...        #            [ck1,ck2]]        #结果为        #        [x11*c11+x12*c12, x11*c21+x11*c22....   x11*ck1+x12*ck2]        #        [x21*c11+x22*c12, x21*c21+x22*c22.....  x21*ck1+x22*ck2]           #           ..............................................        #        [xn1*c11+xn2*c12, xn1*c21+xn2*c22...... xn1*ck1+xn2*ck2]        dot_data_center=np.dot(self.data,self.center.T)        return np.outer(data_sqrt_sum,np.ones((1,self.k)))- \            2*dot_data_center+np.outer(np.ones((self.N,1)),cent_sqrt_sum)     def cal_cost(self):        cost=0;        for i in xrange(self.N):            cost+=np.sum((np.square(self.data[i]-self.center[self.classess[i]])))        return cost    def kmeans(self,plot=True):        numiter=0        pre_cost=self.cal_cost()        if plot:            self.draw()        while numiter<self.maiter:            distmat=self.getdismat()            self.classess=np.argmin(distmat,axis=1)            for c in xrange(self.k):                self.center[c]=np.mean(self.data[self.classess==c],axis=0)            now_cost=self.cal_cost()            if pre_cost-now_cost<self.epsilon:                print "break befor maxiter...."                break            if plot:                self.draw()            pre_cost=now_cost            numiter+=1    def draw(self,plotcen=True):        plt.figure(figsize=(10,10),facecolor='white')        colors_data=[self.colors[c] for c in self.classess]        plt.scatter(self.data[:,0],self.data[:,1],color=colors_data,marker=',',alpha=0.9,s=80)        plt.axis('equal')        if(plotcen):            plt.scatter(self.center[:,0],self.center[:,1],marker='o',color=self.colors,s=200)        plt.show()kmeans=kmeansclustering(data=x,k=3)print kmeans.getdismat()print kmeans.cal_cost()kmeans.kmeans()

参考:

K-means聚类算法

深入浅出k-means

K-Means聚类及其Python实现

0 0
原创粉丝点击