用python实现Huffman编码

来源：互联网发布：字符串压缩算法编辑：程序博客网时间：2024/06/05 10:40

用python实现Huffman编码，以下的代码分为两个模块，一个为功能模块newHuffman.py，一个为控制模块demo.py。

newHuffman.py，这个模块主要实现的功能包括：

①核心为实现Huffman编码，

②计算Huffman编码的平均长度

③计算编码的编码效率。

代码如下：

#encoding:utf-8import numpy as npfrom codetools.util.cbook import Nullclass HuffmanCell():    name = ""    value = 0    code = ""    left=Null    right=Null    encodingLength=0    def __init__(self,name="",value=0):        self.name = name        self.value = valueclass Huffman():    result=[]    informationSource=[]#信息源列表    def __init__(self):        pass#         self.initArr()#          #     def initArr(self):#         self.addname("a",0.3)#         self.addname("b",0.2)#         self.addname("c",0.15)#         self.addname("d",0.15)#         self.addname("e",0.1)#         self.addname("f",0.1)            def addname(self,name,value):                if name and value > 0:            self.informationSource.append(HuffmanCell(name,value))        #        def getMinList(self,newInformationSource):        value=[]#存放概率值的列表        index=[]#存放按概率从小到大排列的索引号        orderInformation=[]#从小到大排序后的cell的列表        num=len(newInformationSource)        for i in range(num):            value.append(newInformationSource[i].value)        for i in range(num):            min=np.min(value)#得到概率列表中最小的概率            for j in range(-1,num):                if value[j]==min:                    index.append(j)                    value[j]=2#由于概率都是小于1的                    break#找到第一个等于min的cell跳出循环        for i in range(num):            orderInformation.append(newInformationSource[index[i]])        return orderInformation        def encodingTree(self):        self.encodingInformation=self.getMinList(self.informationSource)        self.num=len(self.encodingInformation)        while(self.num>=2):                self.addcode(self.encodingInformation[0], "1")            self.addcode(self.encodingInformation[1], "0")            sum=self.encodingInformation[0].value+self.encodingInformation[1].value            newNodeName=self.encodingInformation[0].name+self.encodingInformation[1].name            newNode=HuffmanCell(newNodeName,sum)            newNode.left=self.encodingInformation[0]            newNode.right=self.encodingInformation[1]            newNode.code="不编码节点"            self.encodingInformation=self.encodingInformation[2:self.num]            self.encodingInformation.insert(0,newNode)            self.encodingInformation=self.getMinList(self.encodingInformation)            self.num=len(self.encodingInformation)               return self.encodingInformation        def addcode(self,node,code):        if (node.code=="不编码节点"):            self.addcode(node.left, code)            self.addcode(node.right, code)        else:            node.code=code+node.code        def rootTraverse(self,node):        if (node.left!=Null):            self.rootTraverse(node.left)            self.rootTraverse(node.right)        else:            self.result.append(node)                   def printCoding(self):        info=""        list=self.encodingTree()        node=list[0]        self.rootTraverse(node)        self.encodingList=self.result        num=len(self.encodingList)        for i in range(num):            codeLength=len(self.encodingList[i].code)            self.encodingList[i].encodingLength=codeLength            info += "{%s,%f,huffmanCode:%s,length:,%d} " \                    %(self.encodingList[i].name,\                    self.encodingList[i].value,self.encodingList[i].code,\                    self.encodingList[i].encodingLength)+"\r\n"        return info            def informationEntropy(self):        entropy=0        length=len(self.encodingList)        for i in range(length):            probility=self.encodingList[i].value            entropy=entropy+probility*np.log2(probility)        entropy=-entropy        return entropy        def averageHuffmanLength(self):        averageLength=0        length=len(self.encodingList)        for i in range(length):            probility=self.encodingList[i].value            Tlength=self.encodingList[i].encodingLength            averageLength=averageLength+probility*Tlength        return averageLength        def efficiency(self):        entropy=self.informationEntropy()        averageLength=self.averageHuffmanLength()        efficiency=entropy/averageLength        return efficiency                     # a=Huffman()# a.printCoding()# a.informationEntropy()# a.averageHuffmanLength()# a.efficiency()

demo.py模块主要负责一些输入和输出，比如需要编码的符号及其相应的概率。

代码如下：

#encoding:utf-8from newHuffman import HuffmanH=Huffman()H.addname("第一个符号",0.3)H.addname("第二个符号",0.2)H.addname("第三个符号",0.15)H.addname("第四个符号",0.15)H.addname("第五个符号",0.1)H.addname("第六个符号",0.1)result=H.printCoding()print "书上练习题4.3"print "Huffman信源编码为:"print resultprint "此题的Huffman编码平均长度为："print H.averageHuffmanLength()print "此题的Huffman编码效率为："print H.efficiency()

输出结果为：

由于每个人对Huffman编码的写法略有不同（对于同概率的处理不同），可能具体
的每个符号编码出来的结果huffmanCode会不同，但是平均长度和编码效率的值必
定是相同的。

0 0