Machine Learning学习之 系统(层次)聚类法

来源:互联网 发布:linux修改网卡配置 编辑:程序博客网 时间:2024/06/11 14:34
#coding=utf8'''    1、定义距离 计算两个样品的距离,最开始每一个自成一类    2、找出非对角线最小元素,合并一类    3、递推,构造新矩阵 重复2的过程    4、重复2、3 直到所有元素都被合并'''import timeimport mathimport typesclass HCM:    def __init__(self,data):        self.data=data        self.num,self.result=self.__hcm()    def __hcm(self):        length=len(self.data)        self.Nlist=self.data[:]        flag=len(self.data)        while(flag!=1):            Martix=[[0 for i in range(len(self.Nlist))] for j in range(len(self.Nlist))]            for i in range(len(self.Nlist)):                for j in range(i):                    Martix[i][j]=self.__calDistance(self.Nlist[i],self.Nlist[j])            #重新计算martix            #print Martix            sw1,sw2=self.__getMin(Martix)            #print sw1,sw2            self.Nlist=self.__swap(self.Nlist,sw1,sw2)            #检查是否已经完成            if self.__checkL(self.Nlist):                flag=1            #返回值        return len(self.Nlist),self.Nlist    def __checkL(self,lis):        checkFlag=1        for li in lis:            if isinstance(li[0], list):                pass            else:                checkFlag=0                break        #print lis,checkFlag,">>>>>>>>>>>>>"        return checkFlag    def __getMin(self,Mat):        sw1=0        sw2=0        minC=999        for i in range(1,len(Mat)):            for j in range(i):                if minC>Mat[i][j]:                    minC=Mat[i][j]                    sw1=i                    sw2=j        return sw1,sw2    def __swap(self,lis,i,j):        #print lis ,i,j         try:            if len(lis[i][0])>=1:                try:                    if len(lis[j][0])>=1:                        lis[i]+=lis[j]                        lis.remove(lis[j])                        return lis                except:                    lis[i].append(lis[j])                    lis.remove(lis[j])                    return lis        except:               # print "b"            try:                if len(lis[j][0])>=1:                    lis[i]=[lis[i]]                    lis[i]+=lis[j]                    lis.remove(lis[j])                    return lis            except:                    li=[]                    li.append(lis[i])                    li.append(lis[j])                    lis[i]=li                    lis.remove(lis[j])                    return lis    def __calDistance(self,data1,data2):        distance=0        minD=999        #if type(1) is types.Integer:        if isinstance(data1[0], int) or isinstance(data1[0], float):            if isinstance(data2[0], int) or isinstance(data2[0], float):                #print "case1"                distance=0                for k in range(len(data1)):                    distance+=(data1[k]-data2[k])**2                distance=math.sqrt(distance)                #print distance                if minD>distance:                    minD=distance            else:                #print "case2"                for i in range(len(data2)):                    distance=0                    for k in range(len(data2[0])):                        distance+=(data2[i][k]-data1[k])**2                    distance=math.sqrt(distance)                    #print distance                    if minD>distance:                         minD=distance        else:            if isinstance(data2[0], float) or isinstance(data2[0], int):                #print "case3"                for i in range(len(data1)):                    distance=0                    for k in range(len(data1[0])):                        distance+=(data1[i][k]-data2[k])**2                    distance=math.sqrt(distance)                    #print distance                    if minD>distance:                         minD=distance            else:                #print "case4"                for i in range(len(data1)):                    for j in range(len(data2)):                        distance=0                        for k in range(len(data1[0])):                            distance+=(data1[i][k]-data2[j][k])**2                        distance=math.sqrt(distance)                        #print distance                        if minD>distance:                            minD=distance        return minD#data=[[7.1],[7.1],[7.2],[7.1],[7.1],[6],[6.5],[2],[3.7765],[3.7765],[3.7785],[3.2765]]data=[[7.1],[7.1],[7.2],[7.1],[7.1],[6],[6.5],[2],[3.7765],[3.7765],[3.7785],[3.2765]]time1=time.clock()k1=HCM(data)time2=time.clock()print "time cost is :" ,time2-time1print k1.num,k1.resultfor qq in range(len(k1.result)):    print qq    print k1.result[qq]
阅读全文
0 0
原创粉丝点击