【python sklearn】kmeans算法运用

来源:互联网 发布:山东大学法学院知乎 编辑:程序博客网 时间:2024/06/05 22:46
# -*- coding:utf-8*-import sysreload(sys)sys.setdefaultencoding('utf-8')import timetime1=time.time()import pandas as pdfrom sklearn.externals import joblibfrom sklearn.preprocessing import scalefrom sklearn.cluster import KMeans#####################读取数据################data = pd.read_csv('C:/pic/data/wholesale customers data.csv', sep=',', dtype=str, na_filter=False)# print data##################数据标准化##################feature=scale(data)####################设定聚类个数##################k=9#调用kmeans类clf = KMeans(n_clusters=k)s = clf.fit(feature)print s#打印中心点print clf.cluster_centers_#每个样本所属的簇print clf.labels_#用来评估簇的个数是否合适,距离越小说明簇分的越好,选取临界点的簇个数print clf.inertia_#保存模型joblib.dump(clf , 'c:/km.pkl')#载入保存的模型clf2 = joblib.load('c:/km.pkl')#进行预测print clf2.predict(feature)##############写出数据###################data['label']=clf.labels_print datapd.DataFrame.to_excel(data,'c:/kmeans_result.xlsx',index=False)print(u'聚类完成')