python进行聚类(scikit-lean、scipy)
来源:互联网 发布:淘宝客文案 编辑:程序博客网 时间:2024/06/10 11:11
用于聚类的数据集
%matplotlib inlineimport scipy.io as sioimport matplotlib.pyplot as plt'''各种聚类数据'''#two_clusterdef two_cluster(): two_cluster=u'cluster_data/two_cluster.mat' two_cluster=sio.loadmat(two_cluster)['X'].T data = two_cluster return data#three_clusterdef three_cluster(): path=u'cluster_data/three_cluster.mat' three_cluster=sio.loadmat(path)['X'].T data = three_cluster return data#five_clusterdef five_cluster(): path=u'cluster_data/five_cluster.mat' five_cluster=sio.loadmat(path) x=five_cluster['x'] #得到的数据为二行n列 y=five_cluster['y'] #到的数据为一行n列 data = np.vstack((x,y)).T #先垂直合并,而后转置 #data = np.array([x[0,:],x[1,:],y[0,:]]).T #list与array互换 return data#spiraldef spiral(): path=u'cluster_data/spiral.mat' spiral=sio.loadmat(path)['spiral'] spiral = spiral[0::3,:] #每隔3行取一个数据 data = spiral data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换 return data#spiral_unbalancedef spiral_unbalance(): path=u'cluster_data/spiral_unbalance.mat' spiral_unbalance=sio.loadmat(path)['spiral_unbalance'] spiral_unbalance = spiral_unbalance[0::3,:] #每隔3行取一个数据 data = spiral_unbalance data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换 return data#ThreeCirclesdef ThreeCircles(): path=u'cluster_data/ThreeCircles.mat' ThreeCircles=sio.loadmat(path)['ThreeCircles'] ThreeCircles = ThreeCircles[0::3,:] #每隔3行取一个数据 data = ThreeCircles data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换 return data#Twomoonsdef Twomoons(): path=u'cluster_data/Twomoons.mat' Twomoons=sio.loadmat(path)['Twomoons'] Twomoons = Twomoons[0::3,:] #每隔3行取一个数据 data = Twomoons data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换 plt.scatter(data[:,0],data[:,1],c=data[:,2]) return data#Twomoons1def Twomoons1(): path=u'cluster_data/Twomoons.mat' Twomoons1=sio.loadmat(path)['Twomoons'] Twomoons1 = Twomoons1[0::3,:] #每隔3行取一个数据 data = Twomoons1 data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换 return datadef test(): print 'test'def show_all(): plt.figure(figsize=(16,8)) #动态调用方法 func_name_list = ['two_cluster','three_cluster','five_cluster','spiral','spiral_unbalance','ThreeCircles','Twomoons','Twomoons1'] for i in range(8): data_list.append(eval(func_name_list[i])()) #动态画图 for i in range(8): data = data_list[i] plt.subplot(2,4,i+1) #plt.figure() plt.scatter(data[:,0],data[:,1],c=data[:,2]) data_list = []show_all()
使用scikit的kmeans进行聚类
%matplotlib inlineimport scipy.io as sio#matlab文件名 two_cluster=u'cluster_data/two_cluster.mat'data=sio.loadmat(two_cluster)print data
%matplotlib inlineimport matplotlib.pyplot as pltx = data['X']cValue = x[2]plt.scatter(x[0],x[1],c=cValue)
from sklearn import cluster, datasetsb = np.array(x).Tb = b[:,0:2]y_pred = cluster.KMeans(n_clusters=2, random_state=170).fit_predict(b)cValue = x[2]plt.scatter(x[0],x[1],c=y_pred)
数据集下载
scikit-learn教程
%matplotlib inlineimport scipy.io as sio#matlab文件名 two_cluster=u'cluster_data/spiral.mat'spiral=sio.loadmat(two_cluster)['spiral']spiral = spiral[0::3,:] #每隔3行取一个数据print len(spiral),len(spiral[0])cValue = spiral[:,0]print cValue.shapecolor = ['b','y']cValue = [color[int(i)] for i in list(cValue)]plt.scatter(spiral[:,1],spiral[:,2],c=cValue)
使用kmeans结果
from sklearn import cluster, datasetsy_pred = cluster.KMeans(n_clusters=2, random_state=170).fit_predict(spiral[:,1:3])plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)
使用scipy进行聚类效果
# -*- coding: utf8 -*-%matplotlib inlineimport scipy.io as sioimport matplotlib.pyplot as pltimport scipy.cluster.hierarchy as hclusterfrom sklearn.cluster import AgglomerativeClusteringimport numpy.random as random import numpy as np import numpy.core.fromnumeric def loadData(): #matlab文件名 two_cluster=u'cluster_data/spiral.mat' spiral=sio.loadmat(two_cluster)['spiral'] spiral = spiral[0::3,:] #每隔3行取一个数据 print len(spiral),len(spiral[0]) cValue = spiral[:,0] print cValue.shape color = ['b','y'] cValue = [color[int(i)] for i in list(cValue)] plt.scatter(spiral[:,1],spiral[:,2],c=cValue)def spiralSample(): plt.subplot(131) plt.title(u'origal data') plt.scatter(spiral[:,1],spiral[:,2],c=spiral[:,0]) #scipy进行聚类,默认depth=2(可得到两类),阈值t为距离阈值,设置criterion='maxclust',找到两类之间最小距离小于t的进行合并 #http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.fcluster.html#scipy.cluster.hierarchy.fcluster y_pred=hcluster.fclusterdata(spiral[:,1:3],criterion='maxclust',t=2) plt.subplot(132) plt.title(u'use scipy to hierarchy cluster') plt.scatter(spiral[:,1],spiral[:,2],c=y_pred) #scikit进行聚类 plt.subplot(133) plt.title(u'use scikit to hierarchy cluster') y_pred = AgglomerativeClustering(n_clusters=2, linkage='ward').fit_predict(spiral[:,1:3]) plt.scatter(spiral[:,1],spiral[:,2],c=y_pred) plt.show()spiralSample()
0 0
- python进行聚类(scikit-lean、scipy)
- python进行聚类(scikit-lean、scipy)
- 使用scipy进行聚类
- [python] 安装numpy+scipy+matlotlib+scikit-learn及问题解决
- python依赖包numpy、scipy、scikit-learn运行冲突解决方案
- [python] 安装numpy+scipy+matlotlib+scikit-learn及问题解决
- python scipy和scikit-learn包的安装
- 2017 windows python 安装numpy+mkl+scipy+scikit-learn
- [python] 安装numpy+scipy+matlotlib+scikit-learn及问题解决
- [python] 安装numpy+scipy+matlotlib+scikit-learn及问题解决
- [python] 安装numpy+scipy+matlotlib+scikit-learn及问题解决
- [python] 安装numpy+scipy+matlotlib+scikit-learn及问题解决
- Python聚类工具scipy cluster
- 使用scipy进行层次聚类和k-means聚类
- 使用scipy进行层次聚类和k-means聚类
- Scikit Learn: 用python进行机器学习
- Numpy\Scipy\Matlotlib\Scikit-Learn
- python-scipy
- redis安装部署
- STL之vector容器
- 16.8.15 C组总结
- Spring的事务传播特性
- sublime text 3.0 安装 HTML-CSS-JS Prettify
- python进行聚类(scikit-lean、scipy)
- android文件系统区分大小写的问题
- 2016年8月15号
- Ekka Dokka
- ListView优化
- 把子类当成父对象使用(子类赋值给父类)
- thinkphp3.2的运用
- cocos2dx 3.2 内存管理
- bzoj2144 跳跳棋 二分+lca