python进行聚类(scikit-lean、scipy)
来源:互联网 发布:智百威软件800 编辑:程序博客网 时间:2024/05/19 08:26
python进行聚类(scikit-lean、scipy)
- %matplotlib inline
- import scipy.io as sio
- import matplotlib.pyplot as plt
-
-
- ''
-
-
-
- def two_cluster():
- two_cluster=u'cluster_data/two_cluster.mat'
- two_cluster=sio.loadmat(two_cluster)['X'].T
- data = two_cluster
- return data
-
- def three_cluster():
- path=u'cluster_data/three_cluster.mat'
- three_cluster=sio.loadmat(path)['X'].T
- data = three_cluster
- return data
-
- def five_cluster():
- path=u'cluster_data/five_cluster.mat'
- five_cluster=sio.loadmat(path)
- x=five_cluster['x']
- y=five_cluster['y']
- data = np.vstack((x,y)).T
-
- return data
-
- def spiral():
- path=u'cluster_data/spiral.mat'
- spiral=sio.loadmat(path)['spiral']
- spiral = spiral[0::3,:]
- data = spiral
- data = np.array([data[:,1],data[:,2],data[:,0]]).T
- return data
-
- def spiral_unbalance():
- path=u'cluster_data/spiral_unbalance.mat'
- spiral_unbalance=sio.loadmat(path)['spiral_unbalance']
- spiral_unbalance = spiral_unbalance[0::3,:]
- data = spiral_unbalance
- data = np.array([data[:,1],data[:,2],data[:,0]]).T
- return data
-
- def ThreeCircles():
- path=u'cluster_data/ThreeCircles.mat'
- ThreeCircles=sio.loadmat(path)['ThreeCircles']
- ThreeCircles = ThreeCircles[0::3,:]
- data = ThreeCircles
- data = np.array([data[:,1],data[:,2],data[:,0]]).T
- return data
-
- def Twomoons():
- path=u'cluster_data/Twomoons.mat'
- Twomoons=sio.loadmat(path)['Twomoons']
- Twomoons = Twomoons[0::3,:]
- data = Twomoons
- data = np.array([data[:,1],data[:,2],data[:,0]]).T
- plt.scatter(data[:,0],data[:,1],c=data[:,2])
- return data
-
- def Twomoons1():
- path=u'cluster_data/Twomoons.mat'
- Twomoons1=sio.loadmat(path)['Twomoons']
- Twomoons1 = Twomoons1[0::3,:]
- data = Twomoons1
- data = np.array([data[:,1],data[:,2],data[:,0]]).T
- return data
- def test():
- print 'test'
-
-
- def show_all():
- plt.figure(figsize=(16,8))
-
- func_name_list = ['two_cluster','three_cluster','five_cluster','spiral','spiral_unbalance','ThreeCircles','Twomoons','Twomoons1']
- for i in range(8):
- data_list.append(eval(func_name_list[i])())
-
- for i in range(8):
- data = data_list[i]
- plt.subplot(2,4,i+1)
-
- plt.scatter(data[:,0],data[:,1],c=data[:,2])
-
- data_list = []
- show_all()
- %matplotlib inline
- import scipy.io as sio
-
- two_cluster=u'cluster_data/two_cluster.mat'
- data=sio.loadmat(two_cluster)
- print data
- %matplotlib inline
- import matplotlib.pyplot as plt
- x = data['X']
- cValue = x[2]
- plt.scatter(x[0],x[1],c=cValue)
- from sklearn import cluster, datasets
- b = np.array(x).T
- b = b[:,0:2]
-
- y_pred = cluster.KMeans(n_clusters=2, random_state=170).fit_predict(b)
-
- cValue = x[2]
- plt.scatter(x[0],x[1],c=y_pred)
数据集下载
scikit-learn教程
- %matplotlib inline
- import scipy.io as sio
-
- two_cluster=u'cluster_data/spiral.mat'
- spiral=sio.loadmat(two_cluster)['spiral']
- spiral = spiral[0::3,:]
- print len(spiral),len(spiral[0])
- cValue = spiral[:,0]
- print cValue.shape
- color = ['b','y']
- cValue = [color[int(i)] for i in list(cValue)]
- plt.scatter(spiral[:,1],spiral[:,2],c=cValue)
使用kmeans结果
- from sklearn import cluster, datasets
-
- y_pred = cluster.KMeans(n_clusters=2, random_state=170).fit_predict(spiral[:,1:3])
-
- plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)
使用scipy进行聚类效果
-
- %matplotlib inline
- import scipy.io as sio
- import matplotlib.pyplot as plt
- import scipy.cluster.hierarchy as hcluster
- from sklearn.cluster import AgglomerativeClustering
- import numpy.random as random
- import numpy as np
- import numpy.core.fromnumeric
-
-
- def loadData():
-
- two_cluster=u'cluster_data/spiral.mat'
- spiral=sio.loadmat(two_cluster)['spiral']
- spiral = spiral[0::3,:]
- print len(spiral),len(spiral[0])
- cValue = spiral[:,0]
- print cValue.shape
- color = ['b','y']
- cValue = [color[int(i)] for i in list(cValue)]
- plt.scatter(spiral[:,1],spiral[:,2],c=cValue)
-
-
- def spiralSample():
- plt.subplot(131)
- plt.title(u'origal data')
- plt.scatter(spiral[:,1],spiral[:,2],c=spiral[:,0])
-
-
- y_pred=hcluster.fclusterdata(spiral[:,1:3],criterion='maxclust',t=2)
- plt.subplot(132)
- plt.title(u'use scipy to hierarchy cluster')
- plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)
-
- plt.subplot(133)
- plt.title(u'use scikit to hierarchy cluster')
- y_pred = AgglomerativeClustering(n_clusters=2, linkage='ward').fit_predict(spiral[:,1:3])
- plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)
- plt.show()
- spiralSample()
转自:http://blog.csdn.net/yan456jie/article/details/52214815
0 0