数据压缩原理实验3_实验报告
来源:互联网 发布:深圳淘宝摄影 编辑:程序博客网 时间:2024/05/18 13:26
一、实验原理
1、本实验中Huffman编码算法
(1)将文件以ASCII字符流的形式读入,统计每个符号的发生频率;
(2)将所有文件中出现过的字符按照频率从小到大的顺序排列;
(3)每一次选出最小的两个值,作为二叉树的两个叶子节点,将和作为它们的根节点, 这两个叶子节点不再参与比较,新的根节点参与比较;
(4)重复3,直到最后得到和为1的根节点;
(5)将形成的二叉树的左节点标0,右节点标1,把从最上面的根节点到最下面的叶子节 点途中遇到的0、1序列串起来,得到了各个字符的编码表示。
2、Huffman编码的数据结构设计,在程序实现中使用一种叫做二叉树的数据结构实现Huffman编码。
(1)哈夫曼节点结构
typedef struct huffman_node_tag{ unsigned char isLeaf;//是否为树叶 unsigned long count;//节点代表的符号加权和 struct huffman_node_tag *parent;//父节点指针 union { struct { struct huffman_node_tag *zero, *one; //子节点指针,分别代表0,1子节点指针 }; unsigned char symbol;//节点代表的符号 };} huffman_node;
(2)哈夫曼码结构
typedef struct huffman_code_tag {unsigned long numbits;//该码所用的比特数 unsigned char *bits; //指向该码比特串的指针} huffman_code;
二、实验步骤
1.huffman编码流程
(1)读入源文件
char memory = 0; //memory为1表示对内存编码 char compress = 1;//compress为1表示压缩,为0是解压 int opt; //add by zhn const char *file_in = NULL, *file_out = NULL; const char *file_table=NULL; FILE *in = stdin;//标准输入 FILE *out = stdout;//标准输出 //add by zhn FILE *table;//输出码表 while((opt = getopt(argc, argv, "i:o:t:cdhvm")) != -1)//对argc,argv的解析,单个字符后跟一个冒号表示后面必须接参数 { switch(opt) { case 'i': file_in = optarg; break; case 'o': file_out = optarg; break; //add by zhn case 't': file_table = optarg; break; case 'c': compress = 1; break; case 'd': compress = 0; break; case 'h': usage(stdout); return 0; case 'v': version(stdout); return 0; case 'm': memory = 1; break; default: usage(stderr); return 1; } }
(2)第一次扫描,统计文件中各个字符出现频率
#define MAX_SYMBOLS 256//共有256个字符typedef huffman_node* SymbolFrequencies[MAX_SYMBOLS];typedef huffman_code* SymbolEncoder[MAX_SYMBOLS];static voidinit_frequencies(SymbolFrequencies *pSF){ memset(*pSF, 0, sizeof(SymbolFrequencies));}static huffman_node*new_leaf_node(unsigned char symbol)//新建叶子结点父节点为0{ huffman_node *p = (huffman_node*)malloc(sizeof(huffman_node)); p->isLeaf = 1; p->symbol = symbol; p->count = 0; p->parent = 0; return p;}static unsigned intget_symbol_frequencies(SymbolFrequencies *pSF, FILE *in){ int c; unsigned int total_count = 0; /* Set all frequencies to 0. */ init_frequencies(pSF);//初始化256个节点 /* Count the frequency of each symbol in the input file. */ while((c = fgetc(in)) != EOF)//每次取一个字符 { unsigned char uc = c; if(!(*pSF)[uc])//如果该字符以前没有出现则建立新的叶子节点 (*pSF)[uc] = new_leaf_node(uc); ++(*pSF)[uc]->count;//统计频率 ++total_count; } return total_count;}
(3)按频率从小到大排序并建立Huffman树
static intSFComp(const void *p1, const void *p2){ const huffman_node *hn1 = *(const huffman_node**)p1; const huffman_node *hn2 = *(const huffman_node**)p2; /* Sort all NULLs to the end. */ if(hn1 == NULL && hn2 == NULL) return 0; if(hn1 == NULL) return 1; if(hn2 == NULL) return -1; if(hn1->count > hn2->count)//qsort为1时排为elem2,elem2;-1时排为elem1,elem2; return 1; else if(hn1->count < hn2->count) return -1; return 0;}//以频率从从小到大排序,并且count为0时不参与排序static voidbuild_symbol_encoder(huffman_node *subtree, SymbolEncoder *pSF){ if(subtree == NULL) return; if(subtree->isLeaf) { (*pSF)[subtree->symbol] = new_code(subtree); } else { build_symbol_encoder(subtree->zero, pSF); build_symbol_encoder(subtree->one, pSF); }}//先遍历右节点,再遍历左节点,直到为叶子结点才分配码字static SymbolEncoder*calculate_huffman_codes(SymbolFrequencies * pSF){ unsigned int i = 0; unsigned int n = 0; huffman_node *m1 = NULL, *m2 = NULL; SymbolEncoder *pSE = NULL; qsort((*pSF), MAX_SYMBOLS, sizeof((*pSF)[0]), SFComp);//qsort以SFComp为函数进行升序排序,并交换地址, /* 得到非零字符的个数 */ for(n = 0; n < MAX_SYMBOLS && (*pSF)[n]; ++n) ; for(i = 0; i < n - 1; ++i) { /* Set m1 and m2 to the two subsets of least probability. */ m1 = (*pSF)[0]; m2 = (*pSF)[1]; /* Replace m1 and m2 with a set {m1, m2} whose probability * is the sum of that of m1 and m2. */ (*pSF)[0] = m1->parent = m2->parent = new_nonleaf_node(m1->count + m2->count, m1, m2); (*pSF)[1] = NULL; /* Put newSet into the correct count position in pSF. */ qsort((*pSF), n, sizeof((*pSF)[0]), SFComp); } /* Build the SymbolEncoder array from the tree. */ pSE = (SymbolEncoder*)malloc(sizeof(SymbolEncoder)); memset(pSE, 0, sizeof(SymbolEncoder)); build_symbol_encoder((*pSF)[0], pSE); return pSE;}
(4)将码表和其他信息写入输出文件
static intwrite_code_table(FILE* out, SymbolEncoder *se, unsigned int symbol_count){ unsigned long i, count = 0; /* Determine the number of entries in se. */ for(i = 0; i < MAX_SYMBOLS; ++i) { if((*se)[i]) ++count; } /* Write the number of entries in network byte order. */ i = htonl(count); if(fwrite(&i, sizeof(i), 1, out) != 1) return 1; /* Write the number of bytes that will be encoded. */ symbol_count = htonl(symbol_count); if(fwrite(&symbol_count, sizeof(symbol_count), 1, out) != 1) return 1; /* Write the entries. */ for(i = 0; i < MAX_SYMBOLS; ++i) { huffman_code *p = (*se)[i]; if(p) { unsigned int numbytes; /* Write the 1 byte symbol. */ fputc((unsigned char)i, out); /* Write the 1 byte code bit length. */ fputc(p->numbits, out); /* Write the code bytes. */ numbytes = numbytes_from_numbits(p->numbits); if(fwrite(p->bits, 1, numbytes, out) != numbytes) return 1; } } return 0;}
(5)第二次扫描文件,对文件查表进行Huffman编码
static unsigned charget_bit(unsigned char* bits, unsigned long i){ return (bits[i / 8] >> i % 8) & 1;}static intdo_file_encode(FILE* in, FILE* out, SymbolEncoder *se){ unsigned char curbyte = 0; unsigned char curbit = 0; int c; while((c = fgetc(in)) != EOF) { unsigned char uc = (unsigned char)c; huffman_code *code = (*se)[uc]; unsigned long i; for(i = 0; i < code->numbits; ++i) { /* Add the current bit to curbyte. */ curbyte |= get_bit(code->bits, i) << curbit; /* If this byte is filled up then write it * out and reset the curbit and curbyte. */ if(++curbit == 8) { fputc(curbyte, out); curbyte = 0; curbit = 0; } } } /* * If there is data in curbyte that has not been * output yet, which means that the last encoded * character did not fall on a byte boundary, * then output it. */ if(curbit > 0) fputc(curbyte, out); return 0;}
(6)释放码树
static voidfree_huffman_tree(huffman_node *subtree){ if(subtree == NULL) return; if(!subtree->isLeaf) { free_huffman_tree(subtree->zero); free_huffman_tree(subtree->one); } free(subtree);}static voidfree_encoder(SymbolEncoder *pSE){ unsigned long i; for(i = 0; i < MAX_SYMBOLS; ++i) { huffman_code *p = (*pSE)[i]; if(p) free_code(p); } free(pSE);}
2.huffman编码流程
(1)读入解码文件,读取码树
static huffman_node*read_code_table(FILE* in, unsigned int *pDataBytes){ huffman_node *root = new_nonleaf_node(0, NULL, NULL);//建立根节点 unsigned int count; if(fread(&count, sizeof(count), 1, in) != 1)//读取符号数,如果读取失败则不解码直接返回 { free_huffman_tree(root); return NULL; } count = ntohl(count); if(fread(pDataBytes, sizeof(*pDataBytes), 1, in) != 1)//读取总的解码出来的文件字节数 { free_huffman_tree(root); return NULL; } *pDataBytes = ntohl(*pDataBytes); /* Read the entries. */ while(count-- > 0)//文件指向码表,第一项为信源符号,第二项为码字长度,码字 { int c; unsigned int curbit; unsigned char symbol; unsigned char numbits; unsigned char numbytes; unsigned char *bytes; huffman_node *p = root; if((c = fgetc(in)) == EOF)//信源符号 { free_huffman_tree(root); return NULL; } symbol = (unsigned char)c; if((c = fgetc(in)) == EOF)//码长 { free_huffman_tree(root); return NULL; } numbits = (unsigned char)c; numbytes = (unsigned char)numbytes_from_numbits(numbits);//码长转化为字节大小 bytes = (unsigned char*)malloc(numbytes); if(fread(bytes, 1, numbytes, in) != numbytes) { free(bytes); free_huffman_tree(root); return NULL; } for(curbit = 0; curbit < numbits; ++curbit) { if(get_bit(bytes, curbit))//如果当前码字为1,则建立一个右节点 { if(p->one == NULL)//如果右节点不存在,则新建右节点 { p->one = curbit == (unsigned char)(numbits - 1)//如果是当前的比特位到达了最后则建立一个叶子节点,没有则建立一个中间节点 ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->one->parent = p; } p = p->one;//把当前节点当成中间节点,以便建立后面节点 } else//当前码字为0,则建立左节点 { if(p->zero == NULL) { p->zero = curbit == (unsigned char)(numbits - 1) ? new_leaf_node(symbol) : new_nonleaf_node(0, NULL, NULL); p->zero->parent = p; } p = p->zero; } } free(bytes); } return root;}
(2)解码文件
inthuffman_decode_file(FILE *in, FILE *out){ huffman_node *root, *p; int c; unsigned int data_count; /* Read the Huffman code table. */ root = read_code_table(in, &data_count); if(!root) return 1; /* Decode the file. */ p = root; while(data_count > 0 && (c = fgetc(in)) != EOF)//解码没完成时条件成立 { unsigned char byte = (unsigned char)c;//当前符号 unsigned char mask = 1; while(data_count > 0 && mask) { p = byte & mask ? p->one : p->zero;//从根节点按码字遍历 mask <<= 1; if(p->isLeaf)//遍历到叶子结点时输出信源符号,并且重新返回根节点 { fputc(p->symbol, out); p = root; --data_count; } } } free_huffman_tree(root); return 0;}
(3)输出码表
int write_table(FILE* out, SymbolEncoder *se, unsigned int symbol_count){ int i,j; fprintf(out,"字符\t概率\t长度\t码字\n");//输出表头 for(i = 0; i < MAX_SYMBOLS; ++i) { huffman_code *p = (*se)[i]; if(p) { float chance=p->count/(double)symbol_count; //计算概率 fprintf(out,"%d\t",i);//信源符号 fprintf(out,"%f\t",chance);//概率 fprintf(out,"%d\t",p->numbits);//码长 for(j=0;j<p->numbits;j++)//二进制表示码字,每次取1bit输出到文件中 { unsigned char c=get_bit(p->bits,j); fprintf(out,"%d",c); } fprintf(out,"\t\n"); } } return 0;}
三、实验结果
test1
test2
test3
test4
test5
test6
test7
test8
test9
test10
四、实验结论
1、各文件类型压缩编码后的平均码长和信源熵大小基本相同,平均码长无限逼近信源熵,可以验证无失真编码的平均码长界限定理。
2、huffman编码对于概率分布不均匀,数据较大的信源压缩效果好,而对于符号分布接近等概,数据较小的压缩效果则比较差。
阅读全文
0 0
- 数据压缩原理实验3_实验报告
- 数据压缩原理实验1_实验报告
- 数据压缩原理实验2_实验报告
- 数据压缩原理实验4_实验报告
- 数据压缩原理实验5_实验报告
- 数据压缩原理实验6_实验报告
- 数据压缩实验一实验报告
- 数据压缩 实验报告一
- 【数据压缩】BMP2YUV实验报告
- 数据压缩原理实验1_彩色空间转换实验YUVtoRGB
- 数据压缩原理与应用 彩色空间转换 实验报告
- 数据压缩原理 实验报告一 彩色空间转换
- 【数据压缩】RGB2YUV/YUV2RGB实验报告
- 《数据压缩》实验报告一·YUV2RGB实验
- 《数据压缩》实验报告二·BMP2YUV实验
- 数据压缩原理实验1_彩色空间转换实验(yuv转rgb)
- 数据压缩实验报告2-bmp转yuv
- 数据压缩 实验报告一 彩色空间转换
- JAVA-14.2-登录注册案例分析(IO流版)
- 在tomcat下context.xml中配置各种数据库连接池
- 【php基础班】第13天 this、星星案例、图片切换、总结
- Windows NT/2000下的空连接
- Apache Web服务
- 数据压缩原理实验3_实验报告
- hadoop搭建教程,多环境通吃
- LeetCode No.7 Reverse Integer
- 【设计模式】—— 单例模式Singleton
- Python数据类型和变量5.16
- 线段树模版
- Java String类常用方法介绍(3)
- Python输错重输while小程序
- 树莓派与笔记本用远程桌面连接(Xrdp远程桌面服务)