层次聚类算法

来源:互联网 发布:西安电子大学网络教育 编辑:程序博客网 时间:2024/05/17 08:17

层次聚类算法很简单
类似于树的结构
树自下而上更新
即n->1个簇的更新
具体见代码
增加了树的几点操作函数
应用暂时没有
后续会更新

# -*- coding:utf-8 _*_import numpydef getMax(x, y):    if x > y:        return x    else:        return ydef getMin(x, y):    if x > y:        return y    else:        return xdef loadData(fileName):    data = []    with open(fileName) as txtFile:        for line in txtFile.readlines():            init = map(float, line.split())  # 全部转换为float类型            data.append(init)    return data# 树的节点class clusterNode:    # vec为坐标 lr左右节点 目前没有get到distance的作用    def __init__(self, vec, left=None, right=None, id=None, distance=0):        self.vec = vec        self.left = left        self.right = right        self.id = id        self.distance = distancedef L2Distance(v1, v2):    return numpy.sqrt(sum(pow(v1 - v2, 2)))deep = 0  # 得到树的深度def getDeep(node, high):    if node.left is None and node.right is None:        global deep        deep = getMax(deep, high)    else:        print node.vec        getDeep(node.left, high + 1)        getDeep(node.right, high + 1)leavesNum = 0  # 得到树的叶子树def getLeaves(node):    if node.left is None and node.right is None:        global leavesNum        leavesNum += 1    if node.left is not None:        getLeaves(node.left)    if node.right is not None:        getLeaves(node.right)def hiCluster(dataSet, calDistance=L2Distance):    # 利用list建立树形clust 在中括号内循环    clust = [clusterNode(numpy.array(dataSet[i]), id=i)             for i in range(len(dataSet))]    while len(clust) > 1:    # 没有合并完成        lowestpair = (0, 1)  # 默认01最近 并求距离        closest = calDistance(clust[0].vec, clust[1].vec)        # 两两求距离 打擂台找最近的"点" (其实是簇)        for i in range(len(clust)):            for j in range(i + 1, len(clust)):                temp = calDistance(clust[i].vec, clust[j].vec)                if closest > temp:                    closest = temp                    lowestpair = (i, j)# 重新声明tuple (元组无法赋值)        # 新的簇 中值加和得来        mergeVec = [(clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i]) / 2.0                    for i in range(len(clust[0].vec))]        # 合并后的"质心"        newcluster = clusterNode(numpy.array(mergeVec), left=clust[lowestpair[0]],                                 right=clust[lowestpair[1]], distance=closest)        # 删除两个距离最近的已经合并的簇 从大的开始删        Max = getMax(lowestpair[0], lowestpair[1])        Min = getMin(lowestpair[0], lowestpair[1])        del (clust[Max])        del (clust[Min])        # 添加新的簇        clust.append(newcluster)    # 返回的是类的对象 clust[0]是树根    return clust[0]if __name__ == "__main__":    dataSet = loadData("testSet.txt")    root = hiCluster(dataSet)    print "node is"    getDeep(root, 0)    print "deep is", deep    getLeaves(root)    print "leavesNum is", leavesNum
原创粉丝点击