Python推荐算法
来源:互联网 发布:漫威奇异博士 实力知乎 编辑:程序博客网 时间:2024/05/29 19:33
# coding: utf-8import timeimport randomimport osimport reimport xlwtimport requestsimport numpy as npimport xlsxwriterimport pandas as pdfrom matplotlib import pyplot as pltfrom sklearn.cluster import KMeansimport sysimport mathreload(sys)sys.setdefaultencoding('utf8')os.chdir(u'**********')guangdabase = pd.read_csv('guangdabase.csv' ,header=None)guangdabase.head()def xybasenames(data1): data1. colnames =['id', 'UpdateFlag', 'branch', 'ajbh', 'kehu', 'ajlx', 'shfzh', 'shfzh18', 'shebaoID', 'xm', 'pinyin', 'sex', 'zhiwu', 'zjqkje', 'zjshje', 'zjzxqke', 'zjzxqkerq', 'zjyhlx', 'jdsj', 'dqsj', 'zu', 'ywy', 'states', 'period', 'yjbl', 'fenpeisj', 'urgent', 'lasttime', 'closetime', 'czy', 'addtime', 'pici', 'inpici', 'shengfen', 'chengshi', 'remark1', 'remark2', 'remark3', 'lastJzSj', 'kongguan', 'PromisedDate', 'PromisedJe', 'nextStep', 'hint', 'dingyueTime', 'fabuTime', 'gaNum', 'Ajsx', 'ajInfo', 'kehuAjBh', 'ajStop', 'ajLock', 'yxAj', 'isShare', 'zxxddm', 'picipizhu'] return data1guangdabase =xybasenames(guangdabase[:])guangdabase.columns = guangdabase.colnamesguangdabaseguangda1 = guangdabase[["ajbh", "shfzh18", "ywy", "zjqkje", "zjshje"]]guangda1.shapeguangda1["hkzhb"] = guangda1["zjshje"] / guangda1["zjqkje"]guangda1.describe()guangda1["bornyear"] = guangda1["shfzh18"].str.slice(6, 10)guangda1["sex"] = guangda1["shfzh18"].str.get(16)guangda1["address"] = guangda1["shfzh18"].str.slice(0, 6)guangda1["shfzhnum"] = guangda1["shfzh18"].str.len()guangda2 = guangda1[["ajbh", "shfzh18", "bornyear", "shfzhnum", "sex", "address", "zjqkje", "zjshje", "ywy"]]guangda2.shapeguangda2 = guangda2[guangda2["shfzhnum"] == 18]guangda2["yearlen"] = guangda2["bornyear"].str.len()guangda2 = guangda2[guangda2["yearlen"] == 4]list(set(guangda2["bornyear"]))guangda2 = guangda2[guangda2["bornyear"] != '\xe7\xac\xac2']guangda2["bornyear"] = guangda2["bornyear"].astype(int)guangda2["age"] = 2017 - guangda2["bornyear"]guangda2["sex"] = guangda2["sex"].astype(int)guangda2guangda2["sex"][guangda2["sex"] % 2 == 0] = 0guangda2["sex"][guangda2["sex"] != 0] = 1guangda2["address"] = guangda2["address"].astype(int)guangda2["hkzhb"] = guangda2["zjshje"] / guangda2["zjqkje"]guangda2["ywy"] = guangda2["ywy"].str.upper()guangda3 = guangda2.dropna()guangda3.shapeguangda = guangda3[["ajbh", "shfzh18", "age", "sex", "address", "zjqkje", "zjshje", "hkzhb", "ywy"]]guangda = guangda[guangda.zjqkje > 0]guangda.head()def maxminscale(normal): max1 = np.max(normal) min1 = np.min(normal) normal = (normal - min1) / (max1 - min1) return normaldef datascale(scaledata): mean1 = np.mean(scaledata) std1 = np.std(scaledata) scaledata = (scaledata - mean1) / std1 return scaledataguangda4 = guangdaguangda4["age"] = maxminscale(guangda4["age"])guangda4["address"] = maxminscale(guangda4["address"])guangda4["zjqkje"] = maxminscale(guangda4["zjqkje"])guangda4["zjshje"] = maxminscale(guangda4["zjshje"])guangda4["hkzhb"] = maxminscale(guangda4["hkzhb"])guangda4.head()testclust = guangda4[["age", "sex", "address", "zjqkje", "zjshje", "hkzhb"]]testclust.describe()a = np.array(range(10))for j in range(2, 12): kmeanss = KMeans(n_clusters=j, init='k-means++', n_init=10, max_iter=300, algorithm='auto').fit(testclust) a[j - 2] = kmeanss.inertia_ax = np.array(range(1, 11))xy = ayplt.rc('font', family='SimHei', size=13)plt.xlabel("聚类个数")plt.ylabel("均方误差")plt.plot(x, y)plt.show()kmeans = KMeans(n_clusters=5, init='k-means++', n_init=10, max_iter=300, algorithm='auto').fit(testclust)testclust["label"] = kmeans.labels_kcenters = kmeans.cluster_centers_kcenterskmeans.inertia_def centerDistance(labeldata, centerdata): ldata0 = labeldata[labeldata.label == 0] ldata0 = ldata0.iloc[:, 0:6] ldata00 = np.array(ldata0) ldata1 = labeldata[labeldata.label == 1] ldata1 = ldata1.iloc[:, 0:6] ldata11 = np.array(ldata1) ldata2 = labeldata[labeldata.label == 2] ldata2 = ldata2.iloc[:, 0:6] ldata22 = np.array(ldata2) ldata3 = labeldata[labeldata.label == 3] ldata3 = ldata3.iloc[:, 0:6] ldata33 = np.array(ldata3) ldata4 = labeldata[labeldata.label == 4] ldata4 = ldata4.iloc[:, 0:6] ldata44 = np.array(ldata4) test0 = ldata00 - kcenters[0] test0 = test0 * test0 test0 = test0.sum(axis=1) test0 = np.sqrt(test0) test1 = ldata11 - kcenters[1] test1 = test1 * test1 test1 = test1.sum(axis=1) test1 = np.sqrt(test1) test2 = ldata22 - kcenters[2] test2 = test2 * test2 test2 = test2.sum(axis=1) test2 = np.sqrt(test2) test3 = ldata33 - kcenters[3] test3 = test3 * test3 test3 = test3.sum(axis=1) test3 = np.sqrt(test3) test4 = ldata44 - kcenters[4] test4 = test4 * test4 test4 = test4.sum(axis=1) test4 = np.sqrt(test4) test = np.concatenate((test0, test1, test2, test3, test4)) return testttttt = centerDistance(testclust, kcenters)testclust["ddd"] = ttttttestclust.head()guangda["label"] = testclust["label"]guangda["ddd"] = testclust["ddd"]testclust["ywy"] = guangda["ywy"]guangda["nnn"] = 1guangda.head()data1 = guangda.groupby("label").sum().reset_index()data1 = data1[["label", "nnn"]]data1data2 = guangda.groupby(["label", "ywy"]).sum().reset_index()data2 = data2.sort(["label", "zjshje"], ascending=False)data2 = data2[["label", "ywy", "zjshje", "nnn"]]data2fpresult = pd.merge(data2, data1, on="label", how="left")fpresult["fpb"] = fpresult["nnn_x"] / fpresult["nnn_y"]fpresult = fpresult[["label", "ywy", "zjshje", "nnn_x", "fpb"]]fpresultguangdaresult = pd.merge(guangda, data3, on=["label", "ywy"], how="left")rankdata = guangdaresult.sort(["label", "ddd"])rankdata[rankdata.label == 4]label4 = fpresult[fpresult.label == 4].shape[0]fpdata4 = fpresult[fpresult.label == 4]rankdata4 = rankdata[rankdata.label == 4]sumnum = 0for i in range(label4): rank4 = int(fpdata4["nnn_x"][i]) sumnum = sumnum + rank4 sumnum1 = sumnum - rank4 if sumnum < rank4 + 1: rankdata4["ywy"][0:sumnum] = fpdata4["ywy"][i] else: rankdata4["ywy"][sumnum1:sumnum] = fpdata4["ywy"][i]label3 = fpresult[fpresult.label == 3].shape[0]fpdata3 = fpresult[fpresult.label == 3]rankdata3 = rankdata[rankdata.label == 3]sumnum = 0for i in range(label3): rank3 = int(fpdata3["nnn_x"][i]) sumnum = sumnum + rank3 sumnum1 = sumnum - rank3 if sumnum < rank3 + 1: rankdata3["ywy"][0:sumnum] = fpdata3["ywy"][i] else: rankdata3["ywy"][sumnum1:sumnum] = fpdata3["ywy"][i]label2 = fpresult[fpresult.label == 2].shape[0]fpdata2 = fpresult[fpresult.label == 2]rankdata2 = rankdata[rankdata.label == 2]sumnum = 0for i in range(label2): rank2 = int(fpdata2["nnn_x"][i]) sumnum = sumnum + rank2 sumnum1 = sumnum - rank2 if sumnum < rank2 + 1: rankdata2["ywy"][0:sumnum] = fpdata2["ywy"][i] else: rankdata2["ywy"][sumnum1:sumnum] = fpdata2["ywy"][i]label1 = fpresult[fpresult.label == 1].shape[0]fpdata1 = fpresult[fpresult.label == 1]rankdata1 = rankdata[rankdata.label == 1]sumnum = 0for i in range(label1): rank1 = int(fpdata1["nnn_x"][i]) sumnum = sumnum + rank1 sumnum1 = sumnum - rank1 if sumnum < rank1 + 1: rankdata1["ywy"][0:sumnum] = fpdata1["ywy"][i] else: rankdata1["ywy"][sumnum1:sumnum] = fpdata1["ywy"][i]label0 = fpresult[fpresult.label == 0].shape[0]fpdata0 = fpresult[fpresult.label == 0]rankdata0 = rankdata[rankdata.label == 0]sumnum = 0sumnum1 = 0for i in range(label0): rank0 = int(fpdata0["nnn_x"][i]) sumnum = sumnum + rank0 sumnum1 = sumnum - rank0 if sumnum < rank0 + 1: rankdata0["ywy"][0:sumnum] = fpdata0["ywy"][i] else: rankdata0["ywy"][sumnum1:sumnum] = fpdata0["ywy"][i]resultrankdata = pd.concat(rankdata0, rankdata1, rankdata2, rankdata3, rankdata4)resultrankdata
阅读全文
0 0
- 推荐算法入门-python
- Python推荐算法
- 个性化推荐算法python实现
- python实现的推荐算法
- Python机器学习算法 推荐
- python 实现协同过滤推荐算法
- python数据挖掘 商品推荐算法
- Python用户推荐系统曼哈顿算法实现
- Python实现KNN算法项目 --- 约会推荐算法
- [推荐算法]Pearson Correlation Similarity 的python实现
- 基于用户协同过滤的推荐系统算法,python 实现
- Mrec(python推荐算法开源包)——开篇
- 用于推荐系统的SVD算法python实现
- 推荐算法的Python实现(样例代码)
- 利用Python实现基于协同过滤算法的影片推荐
- RBM算法模型应用在推荐系统 Python代码实现
- 算法推荐
- 推荐算法
- UML类图中类与类的关系
- echart地图城市无法显示?
- Less 的使用心得
- bzoj2152 聪聪可可
- 8月15日云栖精选夜读:阿里云新一代关系型数据库 PolarDB 剖析
- Python推荐算法
- Java 泛型中? super T和? extends T的区别
- 如何理解采样定理
- JAVA 内存泄漏与内存溢出
- fances桌面整理+TeamViewer远程
- DbUtils学习----DbUtils类
- 【嵌入式安全扫盲二】Uncontrolled format string
- Linux中find命名和grep命令的结合使用
- [编程题] 操作序列