python数据分析之kDD(二)
来源:互联网 发布:mac如何隐藏dock栏 编辑:程序博客网 时间:2024/06/06 08:47
针对前篇得到的数据,目的做聚类,希望找到不同的学生群体,这些群体分别都有什么特征。聚类算法用的最简单的二元聚类,就是将簇分成两个慢慢聚下去。`#coding: utf-8
”’
Created on Feb 16, 2011
k Means Clustering for Ch10 of Machine Learning in Action
@author: Peter Harrington
”’
from numpy import *
import edXnew
def loadDataSet(fileName): #general function to parse tab -delimited floats
dataMat = [] #assume last column is target value
fileName= unicode(fileName, “utf8”)
fr = open(fileName)
for line in fr.readlines():
curLine = line.strip().split(‘\t’)
fltLine = map(float,curLine) #map all elements to float()
dataMat.append(fltLine)
return dataMat
def distEclud(vecA, vecB):
return sqrt(sum(power(vecA - vecB, 2))) #la.norm(vecA-vecB)
def randCent(dataSet, k):
n = shape(dataSet)[1]
centroids = mat(zeros((k,n)))#create centroid mat
for j in range(1,n):#create random cluster centers, within bounds of each dimension
minJ = min(dataSet[:,j])
# print minJ
rangeJ = float(max(dataSet[:,j]) - minJ)
centroids[:,j] = mat(minJ + rangeJ * random.rand(k,1))
# print centroids
return centroids
def kMeans1(dataSet, k, distMeas=distEclud, createCent=randCent):
m = shape(dataSet)[0]
clusterAssment = mat(zeros((m,2)))#簇分配结果矩阵:一列记录簇的索引,第二列存储误差(质心与数据点的距离).该变量代表数据点的分配情况
centroids = createCent(dataSet, k)
clusterChanged = True
while clusterChanged:
clusterChanged = False
for i in range(m):#for each data point assign it to the closest centroid
minDist = inf; minIndex = -1
for j in range(k):
distJI = distMeas(centroids[j,:],dataSet[i,:])#每个质心逐个与数据点计算距离,找到当前数据点i的质心
if distJI < minDist:
minDist = distJI; minIndex = j
if clusterAssment[i,0] != minIndex: #判断之前的计算中质心是否有所改变
clusterChanged = True
clusterAssment[i,:] = minIndex,minDist**2
for cent in range(k):#recalculate centroids
ptsInClust = dataSet[nonzero(clusterAssment[:,0].A==cent)[0]]#获取当前簇cent的所有点
# if ptsInClust != []:
centroids[cent,:] = mean(ptsInClust, axis=0) #固定行,求每列的平均值。质心等于当前簇获得的所有点的均值点
# else:
# continue
return centroids, clusterAssment
def biKmeans(dataSet, k, distMeas=distEclud):
print ‘start kmeans’
m = shape(dataSet)[0]
clusterAssment = mat(zeros((m,2)))
centroid0 = mean(dataSet, axis=0).tolist()[0]
centList =[centroid0] #create a list with one centroid
for j in range(m):#calc initial Error
clusterAssment[j,1] = distMeas(mat(centroid0), dataSet[j,:])**2
while (len(centList) < k):
lowestSSE = inf
for i in range(len(centList)):
ptsInCurrCluster = dataSet[nonzero(clusterAssment[:,0].A==i)[0],:]#get the data points currently in cluster i
if ptsInCurrCluster!=[]:
centroidMat, splitClustAss = kMeans1(ptsInCurrCluster, 2, distMeas)
sseSplit = sum(splitClustAss[:,1])#compare the SSE to the currrent minimum
sseNotSplit = sum(clusterAssment[nonzero(clusterAssment[:,0].A!=i)[0],1])
else:
continue
# print “sseSplit, and notSplit: “,sseSplit,sseNotSplit
if (sseSplit + sseNotSplit) < lowestSSE:
bestCentToSplit = i
bestNewCents = centroidMat
bestClustAss = splitClustAss.copy()
lowestSSE = sseSplit + sseNotSplit
bestClustAss[nonzero(bestClustAss[:,0].A == 1)[0],0] = len(centList) #change 1 to 3,4, or whatever
bestClustAss[nonzero(bestClustAss[:,0].A == 0)[0],0] = bestCentToSplit
# print ‘the bestCentToSplit is: ‘,bestCentToSplit
# print ‘the len of bestClustAss is: ‘, len(bestClustAss)
print ‘assign cluster’
centList[bestCentToSplit] = bestNewCents[0,:].tolist()[0]#replace a centroid with two best centroids
centList.append(bestNewCents[1,:].tolist()[0])
clusterAssment[nonzero(clusterAssment[:,0].A == bestCentToSplit)[0],:]= bestClustAss#reassign new clusters, and SSE
print ‘complete kMeans!’
return mat(centList), clusterAssment
代码是下载来的,也不记得在哪下的。主函数是biKmeans.
针对自己的数据的代码#coding: utf-8
import numpy
from numpy import *
import kMeans
class dataMining():
FeatureDict={}
FeatureVec=[]
FeatureVecSVM=[]
enrollment=[]
labelSVM=[]
id=[]
def getDataSet2(self,filename,trainNum1,trainNum2):
self.grade=[]
self.explore=[]
f=open(filename)
i=0
for line in f.readlines():
i=i+1
if i<trainNum2 and i>=trainNum1:
str=line.split(",")
if str[0]!='id':
features=[float(str[1]),float(str[2]),float(str[3]),float(str[4]),float(str[5]),float(str[6]),float(str[7])]
# features=[float(str[4]),float(str[5])]
self.FeatureVec.append(features)
self.enrollment.append(str[8][0])
f.close()
return self.FeatureVec,self.enrollment
def getDataSet3(self,filename,trainNum1,trainNum2):
self.grade=[]
self.explore=[]
f=open(filename)
i=0
for line in f.readlines():
i=i+1
if i<trainNum2 and i>=trainNum1:
str=line.split(",")
if str[0]!='id':
features=[float(str[1]),float(str[2]),float(str[3]),float(str[4]),float(str[5]),float(str[6]),float(str[7]),float(str[8]),float(str[9]),float(str[10])]
# features=[float(str[1])]
self.FeatureVec.append(features)
self.enrollment.append(str[3][0])
f.close()
return self.FeatureVec,self.enrollment
def getDataSet4(self,filename,trainNum1,trainNum2):
self.grade=[]
self.explore=[]
f=open(filename)
i=0
for line in f.readlines():
i=i+1
if i<trainNum2 and i>=trainNum1:
str=line.split(",")
if str[0]!='id':
features=[float(str[1]),float(str[2]),float(str[3]),float(str[4]),float(str[5]),float(str[6]),float(str[7])]
# features=[float(str[1])]
self.id.append(str[0])
self.FeatureVec.append(features)
self.enrollment.append(str[8][0])
f.close()
return self.id,self.FeatureVec,self.enrollment
def getDataSet5(self,filename,trainNum1,trainNum2):
self.grade=[]
self.explore=[]
f=open(filename)
i=0
for line in f.readlines():
i=i+1
if i<trainNum2 and i>=trainNum1:
str=line.split(",")
if str[0]!='id':
features=[float(str[1]),float(str[2]),float(str[3]),float(str[4]),float(str[5]),float(str[6]),float(str[7])]
self.id.append(str[0])
self.FeatureVec.append(features)
self.enrollment.append(str[50][0])
f.close()
return self.id,self.FeatureVec,self.enrollment
def PCA(self,filename,trainNum1,trainNum2):
dataSet,labels=self.getDataSet2(filename,trainNum1,trainNum2)
dataMat=mat(dataSet).transpose()
n,m=dataMat.shape
for i in range(0,n):
mean=dataMat.mean(axis=1)
for j in range(0,m):
dataMat[i,j]=dataMat[i,j]-mean[i]
C=(1/float(m))*dataMat*(dataMat.transpose())
featureValue,featureVec=numpy.linalg.eig(C)
print featureValue,featureVec
sumFeatureValue=0
listValue=[]
listVec={}
for l in range(0,n):
listValue.append(featureValue[l])
sumFeatureValue=sumFeatureValue+listValue[l]
listVec[featureValue[l]]=featureVec[l]
listValue.sort()
# print listValue,listVec
tributeFeature=listValue[7]+listValue[8]+listValue[9]
tribute=float(tributeFeature)/float(sumFeatureValue)
# print featureValue
# print featureVec
print float(tributeFeature)/float(sumFeatureValue)
P=numpy.vstack([listVec[listValue[7]],listVec[listValue[8]],listVec[listValue[9]]])
newDataMat=(P*dataMat).transpose()
X={}
newlist=[]
for p in xrange(m):
newlist.append(newDataMat[p])
for i in xrange(m):
X[newlist[i]]=dataSet[i]
return newDataMat,newlist,labels,X,tribute
def PCA2(self,filename,trainNum1,trainNum2):
dataSet,labels=self.getDataSet2(filename,trainNum1,trainNum2)
dataMat=mat(dataSet)
n,m=dataMat.shape
print n,m
featureValue,featureVec=numpy.linalg.eig(dataMat)
return featureValue,featureVec
def scatterKMeans1(self,filename,dataNum1,dataNum2,k):
print 'please waiting...'
# dataMat,newlist,labels,X,tribute=self.PCA(filename,dataNum1,dataNum2)
dataSet,labels=self.getDataSet3(filename,dataNum1,dataNum2)
dataMat=mat(dataSet)
F=[]
filenamejia=r'E:\new'
f1=open(filenamejia+'\\cluster1.csv','w')
f2=open(filenamejia+'\\cluster2.csv','w')
f3=open(filenamejia+'\\cluster3.csv','w')
f4=open(filenamejia+'\\cluster4.csv','w')
f5=open(filenamejia+'\\cluster5.csv','w')
f6=open(filenamejia+'\\cluster6.csv','w')
f7=open(filenamejia+'\\cluster7.csv','w')
f8=open(filenamejia+'\\cluster8.csv','w')
f9=open(filenamejia+'\\cluster9.csv','w')
f10=open(filenamejia+'\\cluster10.csv','w')
f11=open(filenamejia+'\\result.txt','w')
# f11.write('the feature tribute is :'+str(tribute)+'\n'+'\n')
F.append(f1)
F.append(f2)
F.append(f3)
F.append(f4)
F.append(f5)
F.append(f6)
F.append(f7)
F.append(f8)
F.append(f9)
F.append(f10)
listtotal=[]
listlabel=[]
# initialDataMat=mat(dataSet)
for i2 in range(0,k):
# F[i2].write('access->access,access->wiki,access->discussion,discussion->discussion,discussion->access,discussion->wiki,wiki->wiki,wiki->discussion,wiki->access,others,enrollment'+'\n')
listtotal.append(0)
listlabel.append(0)
# dataMat2=dataMat[:30000]
Centroids, clustAssing=kMeans.biKmeans(dataMat,k,kMeans.distEclud)
numSamples, dim = dataMat.shape
# Centroids=(P*Centroids).transpose()
mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']
# draw all samples
for i in xrange(numSamples):
markIndex=int(clustAssing[i, 0])
for i3 in range(0,k):
# print markIndex,i3
if markIndex==i3:
# for k1 in range(0,10):
# F[i3].write(str(X.get(newlist[i])[k1])+',')
F[i3].write(str(dataMat[i,0])+','+labels[i]+'\n')
# F[i3].write(labels[i]+'\n')
# F[i3].write(str(dataMat[i,0])+','+str(dataMat[i,1])+','+str(dataMat[i,2])+','+str(dataMat[i,3])+','+str(dataMat[i,4])+','+str(dataMat[i,5])+','+str(dataMat[i,6])+','+labels[i]+'\n')
listtotal[i3]=listtotal[i3]+1
listlabel[i3]=int(labels[i])+listlabel[i3]
for i4 in range(0,k):
F[i4].close()
f11.write('cluster:'+str(i4+1)+'\n'+'DropOut rate:'+str(float(listlabel[i4])/float(listtotal[i4]))+'\n')
f11.close()
# Centroids2,clustAssing2=kMeans.biKmeans(dataMat2,k,kMeans.distEclud)
# for i in range(k):
# matplotlib.pyplot.plot(Centroids2[i, 0], Centroids2[i, 1],marker='*',c='white', markersize = 20)
# matplotlib.pyplot.savefig('E:\experiment data\TsingHuaKMeans\TsingHua.png')
# matplotlib.pyplot.show()
def scatterKMeans2(self,filename,dataNum1,dataNum2,k):
print 'please waiting...'
# dataMat,newlist,labels,X,tribute=self.PCA(filename,dataNum1,dataNum2)
id,dataSet,labels=self.getDataSet4(filename,dataNum1,dataNum2)
dataMat=mat(dataSet)
self.F=[]
title=["navigate", "access", "page_close","discussion", "wiki", "video", "problem"]
# title=['navigate->navigate', 'navigate->access', 'navigate->page_close', 'navigate->discussion', 'navigate->wiki', 'navigate->video', 'navigate->problem', 'access->navigate', 'access->access', 'access->page_close', 'access->discussion', 'access->wiki', 'access->video', 'access->problem', 'page_close->navigate', 'page_close->access', 'page_close->page_close', 'page_close->discussion', 'page_close->wiki', 'page_close->video', 'page_close->problem', 'discussion->navigate', 'discussion->access', 'discussion->page_close', 'discussion->discussion', 'discussion->wiki', 'discussion->video', 'discussion->problem', 'wiki->navigate', 'wiki->access', 'wiki->page_close', 'wiki->discussion', 'wiki->wiki', 'wiki->video', 'wiki->problem', 'video->navigate', 'video->access', 'video->page_close', 'video->discussion', 'video->wiki', 'video->video', 'video->problem', 'problem->navigate', 'problem->access', 'problem->page_close', 'problem->discussion', 'problem->wiki', 'problem->video', 'problem->problem']
# filenamejia=r'E:\experiment data\TimeSequence\OneStepTrans\\'+str(k0+1)+'\\'+str(k)
filenamejia=r'E:\experiment data\TimeSequence\week classify\2ndweek\\'+str(k)
for i10 in range(0,k):
self.F.append(i10)
for i9 in range(0,k):
self.F[i9]=open(filenamejia+'\\cluster'+str(i9+1)+'.csv','w')
print self.F[i9]
self.F[i9].write('id,')
for i90 in range(0,len(title)):
self.F[i9].write(title[i90]+',')
self.F[i9].write('enrollment,\n')
# f11=open(r'E:\experiment data\TimeSequence\OneStepTrans\\'+str(k0+1)+'\\'+str(k)+'\\result.txt','w')
self.f11=open(r'E:\experiment data\TimeSequence\week classify\2ndweek\\'+str(k)+'\\result.txt','w')
# f11.write('the feature tribute is :'+str(tribute)+'\n'+'\n')
self.listtotal=[]
self.listlabel=[]
# initialDataMat=mat(dataSet)
for i2 in range(0,k):
# F[i2].write('access->access,access->wiki,access->discussion,discussion->discussion,discussion->access,discussion->wiki,wiki->wiki,wiki->discussion,wiki->access,others,enrollment'+'\n')
self.listtotal.append(0)
self.listlabel.append(0)
Centroids, clustAssing=kMeans.biKmeans(dataMat,k,kMeans.distEclud)
numSamples, dim = dataMat.shape
# Centroids=(P*Centroids).transpose()
mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']
# draw all samples
for i in xrange(numSamples):
markIndex=int(clustAssing[i, 0])
for i3 in range(0,k):
# print markIndex,i3
if markIndex==i3:
# for k1 in range(0,10):
# F[i3].write(str(X.get(newlist[i])[k1])+',')
# F[i3].write(str(dataMat[i,0])+','+labels[i]+'\n')
# F[i3].write(labels[i]+'\n')
self.F[i3].write(id[i]+','+str(dataMat[i,0])+','+str(dataMat[i,1])+','+str(dataMat[i,2])+','+str(dataMat[i,3])+','+str(dataMat[i,4])+','+str(dataMat[i,5])+','+str(dataMat[i,6])+','+labels[i]+',\n')
self.listtotal[i3]=self.listtotal[i3]+1
self.listlabel[i3]=int(labels[i])+self.listlabel[i3]
for i4 in range(0,k):
self.F[i4].flush()
self.F[i4].close()
self.f11.write('cluster:'+str(i4+1)+'\n'+'DropOut rate:'+str(float(self.listlabel[i4])/float(self.listtotal[i4]))+'\n')
self.f11.flush()
self.f11.close()
def scatterKMeans3(self,filename,dataNum1,dataNum2,k0,k):
print 'please waiting...'
# dataMat,newlist,labels,X,tribute=self.PCA(filename,dataNum1,dataNum2)
id,dataSet,labels=self.getDataSet4(filename,dataNum1,dataNum2)
dataMat=mat(dataSet)
F=[]
# title=['navigate->navigate', 'navigate->access', 'navigate->page_close', 'navigate->discussion', 'navigate->wiki', 'navigate->video', 'navigate->problem', 'access->navigate', 'access->access', 'access->page_close', 'access->discussion', 'access->wiki', 'access->video', 'access->problem', 'page_close->navigate', 'page_close->access', 'page_close->page_close', 'page_close->discussion', 'page_close->wiki', 'page_close->video', 'page_close->problem', 'discussion->navigate', 'discussion->access', 'discussion->page_close', 'discussion->discussion', 'discussion->wiki', 'discussion->video', 'discussion->problem', 'wiki->navigate', 'wiki->access', 'wiki->page_close', 'wiki->discussion', 'wiki->wiki', 'wiki->video', 'wiki->problem', 'video->navigate', 'video->access', 'video->page_close', 'video->discussion', 'video->wiki', 'video->video', 'video->problem', 'problem->navigate', 'problem->access', 'problem->page_close', 'problem->discussion', 'problem->wiki', 'problem->video', 'problem->problem']
filenamejia=r'E:\experiment data\TimeSequence\1_2week\\'+str(k0+1)+'\\'+str(k)
for i10 in range(0,k):
F.append(i10)
for i9 in range(0,k):
F[i9]=open(filenamejia+'\\cluster'+str(i9+1)+'.csv','w')
print F[i9]
F[i9].write('id,navigate,access,page_close,discussion,wiki,video,problem,enrollment,\n')
f11=open(r'E:\experiment data\TimeSequence\1_2week\\'+str(k0+1)+'\\'+str(k)+'\\result.txt','w')
# f11.write('the feature tribute is :'+str(tribute)+'\n'+'\n')
listtotal=[]
listlabel=[]
# initialDataMat=mat(dataSet)
for i2 in range(0,k):
# F[i2].write('access->access,access->wiki,access->discussion,discussion->discussion,discussion->access,discussion->wiki,wiki->wiki,wiki->discussion,wiki->access,others,enrollment'+'\n')
listtotal.append(0)
listlabel.append(0)
Centroids, clustAssing=kMeans.biKmeans(dataMat,k,kMeans.distEclud)
numSamples, dim = dataMat.shape
# Centroids=(P*Centroids).transpose()
mark = ['or', 'ob', 'og', 'ok', '^r', '+r', 'sr', 'dr', '<r', 'pr']
# draw all samples
for i in xrange(numSamples):
markIndex=int(clustAssing[i, 0])
for i3 in range(0,k):
# print markIndex,i3
if markIndex==i3:
# for k1 in range(0,10):
# F[i3].write(str(X.get(newlist[i])[k1])+',')
# F[i3].write(str(dataMat[i,0])+','+labels[i]+'\n')
# F[i3].write(labels[i]+'\n')
F[i3].write(id[i]+','+str(dataMat[i,0])+','+str(dataMat[i,1])+','+str(dataMat[i,2])+','+str(dataMat[i,3])+','+str(dataMat[i,4])+','+str(dataMat[i,5])+','+str(dataMat[i,6])+','+labels[i]+',\n')
listtotal[i3]=listtotal[i3]+1
listlabel[i3]=int(labels[i])+listlabel[i3]
for i4 in range(0,k):
F[i4].close()
f11.write('cluster:'+str(i4+1)+'\n'+'DropOut rate:'+str(float(listlabel[i4])/float(listtotal[i4]))+'\n')
f11.close()
包含了机器学习的KNN,支持向量机用于分类。包含了聚类算法的应用。自己看一下应当能回忆起来。
聚类评估是自己写的,参考了一个文档,也不记得来源了。果然是需要多整理才行啊。
#coding: utf-8from kMeans import distEcludfrom numpy import *#计算单个簇内每两个点的距离的均值,用classinnerdis数组表示,一共k组数据;单个簇到其他单个簇的点对点距离的均值。用classdis表示,一共k平方组数据def classinner(filename): distance=0.0 distanceCen=0.0 # distanceother=[] # distanceother2=[] classinnerdis=[] disClass=0.0 averdis=0.0 averCen=[] averClass=[] sumdis=0 classdis1=[] classdis2=[] classdis=[] for k0 in range(0,len(filename)): # o1=0 point=[] f=open(filename[k0]) classinnerdis.append(0.0) classdis.append(0.0) averCen.append(0.0) print 'compute'+filename[k0] for line in f.readlines(): # o1=o1+1 # if o1<100: str=line.split(',') # if not(str[0].__contains__('->')): if not(str[0]=='id'): point.append([float(str[1]),float(str[2]),float(str[3]),float(str[4]),float(str[5]),float(str[6]),float(str[7])]) point=mat(point) # print point classdis[k0]=point.mean(axis=0) print classdis[k0] if len(point)>1: for i in range(0,len(point)): distanceCen=distEclud(point[i,:],classdis[k0])+distanceCen # for j in range(0,len(point)): # if i!=j: # distance=distEclud(point[i,:],point[j,:])+distance # classinnerdis[k0]=float(distance)/float((len(point))**(len(point)-1)) averCen[k0]=(float(distanceCen)/float(len(point)))**(1./2) print 'complete a cluster point compute!' for i01 in range(0,len(classinnerdis)): sumdis=sumdis+classinnerdis[i01] averdis=float(sumdis)/float(len(classinnerdis)) print 'complete all cluster compute' return classinnerdis,averdis,classdis,averCendef autoNorm(dataSet): minVals=dataSet.min(0) maxVals=dataSet.max(0) ranges=maxVals-minVals normDataSet=zeros(shape(dataSet)) m=dataSet.shape[0] normDataSet=dataSet-tile(minVals,(m,1)) normDataSet=normDataSet/tile(ranges,(m,1)) return normDataSetdef getInter(filename1,k,b): print 'start compute...' Cmp=0.0 Sep=0.0 classdis11=[] point1=[] distanceCen11=0.0 averdis11=0.0 disCluster=0.0 f1=open(filename1) # o2=0 ocq=0.0 for line2 in f1.readlines(): # o2=o2+1 # if o2<100: str1=line2.split(',') if not(str1[0]=='id'): point1.append([float(str1[1]),float(str1[2]),float(str1[3]),float(str1[4]),float(str1[5]),float(str1[6]),float(str1[7])]) point1=mat(point1) classdis11=point1.mean(axis=0) print 'intergrete data centry:' if len(point1)>1: for i in range(0,len(point1)): distanceCen11=distEclud(point1[i,:],classdis11)+distanceCen11 averdis11=(float(distanceCen11)/float(len(point1)))**(1./2) print 'complete intergrete data point compute!' filename=[] filenamejia=r'E:\experiment data\TimeSequence\week classify\4week\\'+str(k) for i00 in range(0,k): filename.append(filenamejia+'\\cluster'+str(i00+1)+'.csv') classinnerdis,averdis,classdis,averCen=classinner(filename) for j1 in range(0,k): Cmp=Cmp+float(averCen[j1])/float(averdis11) Cmp=float(Cmp)/float(k) for j3 in range(0,len(classdis)): for j4 in range(0,len(classdis)): if j3!=j4: disCluster=disCluster+exp(-((distEclud(classdis[j3],classdis[j4]))/float(1000))) Sep=(float(disCluster))/float(k*(k-1)) print Sep ocq=1-(b*Cmp+(1-b)*Sep) return Cmp,Sep,ocqf0=open(r'E:\experiment data\TimeSequence\week classify\4week\clusterassess.csv','w')f0.write('Cmp,Sep,ocq,\n')for k in range(2,11): Cmp,Sep,ocq= getInter(r'E:\experiment data\TimeSequence\autoNormData\4\TsingHuaAnalysis4thWeek.csv',k,0.5) f0.write(str(Cmp)+','+str(Sep)+','+str(ocq)+',\n')f0.close()
包括了结果的输出。自动化地对每一组结果进行计算。从2到11循环是因为自己之前聚类的时候设置的k值是2到10.
大概就是计算类内距离和类间距离。取其综合值ocq作为聚类评估值,ocq越大,该k值下的聚类效果越好。
- python数据分析之kDD(二)
- python数据分析之KDD大赛数据(一)
- Python数据分析之pandas学习(二)
- Python数据分析之numpy学习(二)
- python数据分析(二)
- KDD CUP99数据集
- 利用python进入数据分析之数据规整化:清理、转换、合并、重塑(二)
- python数据分析之Numpy
- Python数据分析学习笔记二
- 分析python处理基本数据<二>
- python数据分析学习笔记二
- python数据分析实践(二)
- Python数据分析学习笔记(二)
- python数据分析与展示(二)
- Python数据分析笔记(二)
- 利用Python进行数据分析(二)
- KDD CUP 99数据集
- KDD(数据挖掘)概念
- 批处理(.bat)学习笔记
- Android蓝牙打印机例子
- Spark 提交任务时,报: Invalid signature file digest for Manifest main attributes
- 几行代码实现tab+fragment+viewpager---还有酷炫的动画效果哦
- CMS用通用图片轮换flash幻灯片播放器:Bcastr3和Bcastr4
- python数据分析之kDD(二)
- log4j.properties配置文件配置项的说明
- window.getSelection()相关
- Android 轮播图---ViewFlipper
- Android基于插件化的技术二
- tomcat8 实现的websock,可以跨域哦
- postman & rest easy
- IMS
- nodejs+mongodb系列教程之(1/5)--开篇总述