FCM聚类算法

来源:互联网 发布:软件详细设计模板 java 编辑:程序博客网 时间:2024/05/21 22:55

之前提到了K均值算法,多数讨论认为K均值与硬C均值(HCM)算法本质相同。在HCM的基础上加入了对聚类簇的模糊划分,引入了隶属度来提升算法性能。

import copy,math,random,time,sysimport matplotlib.pyplot as pltimport matplotlib.animation as animationimport decimalMAX = 10000.0#构建初始理数矩阵Epsilon = 0.000001#停止条件def randomise(data):#随机打乱数据,同时返回原数据顺序    order = list(range(len(data)))    random.shuffle(oder)#记录原始数据顺序    new_data = [[]for i in range(len(data))]    for index in range(len(order)):        new_data[index] = data[order[index]]    return new_data,orderdef de_randomise(data,order):#恢复被打乱的数据    new_data = [[]for i in range(len(data))]    for index in range(len(order)):        new_data[order[index]] = data[index]    return new_datadef print_matrix(lists):#打印矩阵    for i in range(len(list)):        print lists[i]        def init_U(data,clu_num):#随机初始化隶属矩阵        global MAX        U = []        for i in range(len(data)):            current = []            random_sum = 0.0            for j in range(clu_num):                a = random.randint(1,int(MAX))                current.append(a)                random_sum += a            for j in range(clu_num):                current[j] = current[j]/random_sum#归一化处理            U.append(current)        return U                def distance(v1,v2):#两点距离公式    if len(v1)!=len(v2):        return -1            return sqrt(sum(power(v2-v1,2)))def end_iterate(U,U_old):#隶属矩阵变化过小时停止迭代    global Epsilon    for i in range(len(U)):        for j in range(len(U[0])):            if abs(U[i][j]-U_old[i][j])>Epsilon:                return False    return Truedef normalise_U(U):#将U规范化,找出最可能的簇    for i in range(len(U)):        max_u = max(U[i])        for j in range(len(U[0])):            if U[i][j]!=max_u:                U[i][j]=0            else:                U[i][j]=1    return Udef fuzz_c_mean(data,clu_num,m,max_iterate):#FCM算法    U = init_U(data,clu_num)    current_iterate = 0    while 1:        current_iterate += 1        U_old = copy.deepcopy        C = []        for i in range(clu_num):#更新聚类中心            current_clu = []            for j in range(len(data[0])):                sum_num = 0.0                sum_dum = 0.0                for k in range(len(data)):                    sum_num += (u[k][i]**m)*data[k][j]                    sum_dum += (u[k][i]**m)#带入Pj公式                current_clu.append(sum_num/sum_dum)#第j列聚类中心            C.append(current_clu)#第i簇聚类中心                    distance_mat = []#生成距离矩阵            for i in range (len(data)):            current = []            for j in range(clu_num):                current.append(distance(data[i],C[j]))            distance_mat.append(current)            for i in range(clu_num):#更新U            for j in range(len(data)):                for k in range(clu_num):                    a += (distance_mat[j][i]/distance_mat[j][k])**(2/(m-1))                U[j][i] = 1/a                                        if end_iterate(U,U_old):#判断是否迭代有效            print 'mission complete'            break        elif current_iterate>max_iterate:#判断是否超出迭代次数            print 'iterate overflow'            break                                                U = normalise_U(U)#标准化U后返回U        return U