解压缩内核
来源:互联网 发布:万网域名注册官网 编辑:程序博客网 时间:2024/05/22 21:21
4.1.2 解压缩内核
解压缩内核使用的是decompress_kernel函数,来自arch/x86/boot/compressed/misc.c:
301asmlinkage void decompress_kernel(void *rmode, memptr heap,
302 unsigned char *input_data,
303 unsigned long input_len,
304 unsigned char *output)
305{
306 real_mode = rmode;
307
308 if (real_mode->hdr.loadflags & QUIET_FLAG)
309 quiet = 1;
310
311 if (real_mode->screen_info.orig_video_mode == 7) {
312 vidmem = (char *) 0xb0000;
313 vidport = 0x3b4;
314 } else {
315 vidmem = (char *) 0xb8000;
316 vidport = 0x3d4;
317 }
318
319 lines = real_mode->screen_info.orig_video_lines;
320 cols = real_mode->screen_info.orig_video_cols;
321
322 free_mem_ptr = heap; /* Heap */
323 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
324
325 if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
326 error("Destination address inappropriately aligned");
327#ifdef CONFIG_X86_64
328 if (heap > 0x3fffffffffffUL)
329 error("Destination address too large");
330#else
331 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
332 error("Destination address too large");
333#endif
334#ifndef CONFIG_RELOCATABLE
335 if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
336 error("Wrong destination address");
337#endif
338
339 if (!quiet)
340 putstr("/nDecompressing Linux... ");
341 decompress(input_data, input_len, NULL, NULL, output, NULL, error);
342 parse_elf(output);
343 if (!quiet)
344 putstr("done./nBooting the kernel./n");
345 return;
346}
看到他的参数,共有5个,是刚才依次压栈而得到的。第一个是压入的output(还记得吧,倒着来的),在刚才的131行看到的,来自z_extract_offset_negative,用于作为解压缩的缓存首地址;第二个,input_len,其值等于z_input_len,表示待压缩内核大小;第三个,input_data,来自input_data,表示待压缩内核地址;第四个参数,heap,32位下是32位,来自boot_heap,表示解压缩阶段的堆;最后一个参数,rmode,来自我们刚才用到的esi寄存器,表示刚才拷贝之前内核映像地址。
假设我配置的是CONFIG_KERNEL_BZIP2,那么会调用顶层lib/decompress_bunzip2.c中的decompress函数,最终会调用位于同一文件的bunzip2函数(注意,在我们制作bzImage的时候使用的压缩程序只有一个,如果指定的bunzip2,那么都在decompress_bunzip2.c里):
674/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,
675 not end of file.) */
676STATIC int INIT bunzip2(unsigned char *buf, int len,
677 int(*fill)(void*, unsigned int),
678 int(*flush)(void*, unsigned int),
679 unsigned char *outbuf,
680 int *pos,
681 void(*error_fn)(char *x))
682{
683 struct bunzip_data *bd;
684 int i = -1;
685 unsigned char *inbuf;
686
687 set_error_fn(error_fn);
688 if (flush)
689 outbuf = malloc(BZIP2_IOBUF_SIZE);
690
691 if (!outbuf) {
692 error("Could not allocate output bufer");
693 return RETVAL_OUT_OF_MEMORY;
694 }
695 if (buf)
696 inbuf = buf;
697 else
698 inbuf = malloc(BZIP2_IOBUF_SIZE);
699 if (!inbuf) {
700 error("Could not allocate input bufer");
701 i = RETVAL_OUT_OF_MEMORY;
702 goto exit_0;
703 }
704 i = start_bunzip(&bd, inbuf, len, fill);
705 if (!i) {
706 for (;;) {
707 i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
708 if (i <= 0)
709 break;
710 if (!flush)
711 outbuf += i;
712 else
713 if (i != flush(outbuf, i)) {
714 i = RETVAL_UNEXPECTED_OUTPUT_EOF;
715 break;
716 }
717 }
718 }
719 /* Check CRC and release memory */
720 if (i == RETVAL_LAST_BLOCK) {
721 if (bd->headerCRC != bd->totalCRC)
722 error("Data integrity error when decompressing.");
723 else
724 i = RETVAL_OK;
725 } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
726 error("Compressed file ends unexpectedly");
727 }
728 if (!bd)
729 goto exit_1;
730 if (bd->dbuf)
731 large_free(bd->dbuf);
732 if (pos)
733 *pos = bd->inbufPos;
734 free(bd);
735exit_1:
736 if (!buf)
737 free(inbuf);
738exit_0:
739 if (flush)
740 free(outbuf);
741 return i;
742}
bunzip2函数的第一、二个参数是待压缩内核的首地址和长度;第三、四是两个函数参数,传进来的时候为空;第五个参数是解压缩后的地址,也是刚才传进来的。683行首先初始化一个指向bunzip_data数据结构的指针bd,待会再去说他。我们没有定义flush函数,所以定义一个局部指针inbuf指向刚才传递进来的待压缩内核首地址buf,随后704行调用函数start_bunzip:
626static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
627 int (*fill)(void*, unsigned int))
628{
629 struct bunzip_data *bd;
630 unsigned int i, j, c;
631 const unsigned int BZh0 =
632 (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
633 +(((unsigned int)'h') << 8)+(unsigned int)'0';
634
635 /* Figure out how much data to allocate */
636 i = sizeof(struct bunzip_data);
637
638 /* Allocate bunzip_data. Most fields initialize to zero. */
639 bd = *bdp = malloc(i);
640 if (!bd)
641 return RETVAL_OUT_OF_MEMORY;
642 memset(bd, 0, sizeof(struct bunzip_data));
643 /* Setup input buffer */
644 bd->inbuf = inbuf;
645 bd->inbufCount = len;
646 if (fill != NULL)
647 bd->fill = fill;
648 else
649 bd->fill = nofill;
650
651 /* Init the CRC32 table (big endian) */
652 for (i = 0; i < 256; i++) {
653 c = i << 24;
654 for (j = 8; j; j--)
655 c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
656 bd->crc32Table[i] = c;
657 }
658
659 /* Ensure that file starts with "BZh['1'-'9']." */
660 i = get_bits(bd, 32);
661 if (((unsigned int)(i-BZh0-1)) >= 9)
662 return RETVAL_NOT_BZIP_DATA;
663
664 /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
665 uncompressed data. Allocate intermediate buffer for block. */
666 bd->dbufSize = 100000*(i-BZh0);
667
668 bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
669 if (!bd->dbuf)
670 return RETVAL_OUT_OF_MEMORY;
671 return RETVAL_OK;
672}
由于bdp和fill都是空的,所以传入这个函数的有效的仅仅是待压缩内核首地址及其长度,内部变量i是数据结构bunzip_data的长度。而631行BZh0常量很重要,就是著名的bz压缩常量。我们看到这段代码:
const unsigned int BZh0 =
(((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
+(((unsigned int)'h') << 8)+(unsigned int)'0';
就是压缩常量的值,'B'、'Z'、'h'、'0'是ASCII码,计数出来就是0x48<<24 +0x5A<<16+0x68最后等于0x48166800。
继续走,分配i个内存单元,由指针bd指上,同时bunzip2函数的内部变量bd也指到它了,然后642行将该结构清零(这些都是规定动作,值得我们程序员学习)。看了我们不得不把数据结构bunzip_data列出来了:
91/* Structure holding all the housekeeping data, including IO buffers and
92 memory that persists between calls to bunzip */
93struct bunzip_data {
94 /* State for interrupting output loop */
95 int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
96 /* I/O tracking data (file handles, buffers, positions, etc.) */
97 int (*fill)(void*, unsigned int);
98 int inbufCount, inbufPos /*, outbufPos*/;
99 unsigned char *inbuf /*,*outbuf*/;
100 unsigned int inbufBitCount, inbufBits;
101 /* The CRC values stored in the block header and calculated from the
102 data */
103 unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
104 /* Intermediate buffer and its size (in bytes) */
105 unsigned int *dbuf, dbufSize;
106 /* These things are a bit too big to go on the stack */
107 unsigned char selectors[32768]; /* nSelectors = 15 bits */
108 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */
109 int io_error; /* non-zero if we have IO error */
110};
644和645行初始化bunzip_data的inbuf和inbufCount字段。随后给crc32Table字段赋值,根据注释说,这个字段是存储块的头,随后调用get_bits函数:
113/* Return the next nnn bits of input. All reads from the compressed input
114 are done through this function. All reads are big endian */
115static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
116{
117 unsigned int bits = 0;
118
119 /* If we need to get more data from the byte buffer, do so.
120 (Loop getting one byte at a time to enforce endianness and avoid
121 unaligned access.) */
122 while (bd->inbufBitCount < bits_wanted) {
123 /* If we need to read more data from file into byte buffer, do
124 so */
125 if (bd->inbufPos == bd->inbufCount) {
126 if (bd->io_error)
127 return 0;
128 bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
129 if (bd->inbufCount <= 0) {
130 bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
131 return 0;
132 }
133 bd->inbufPos = 0;
134 }
135 /* Avoid 32-bit overflow (dump bit buffer to top of output) */
136 if (bd->inbufBitCount >= 24) {
137 bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
138 bits_wanted -= bd->inbufBitCount;
139 bits <<= bits_wanted;
140 bd->inbufBitCount = 0;
141 }
142 /* Grab next 8 bits of input from buffer. */
143 bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
144 bd->inbufBitCount += 8;
145 }
146 /* Calculate result */
147 bd->inbufBitCount -= bits_wanted;
148 bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
149
150 return bits;
151}
这个函数比较简单,一开始inbufBitCount肯定小于32,因为被清零了。由于是解压缩阶段,bd->inbufPos也为0,不可能等于inbufCount。所以第一次循环直接来到143行,第一次循环,bd->inbufBits也就是bd->inbuf[bd->inbufPos++]的值,bd->inbufPos加加之后就变成1了,所以bd->inbufBits = bd->inbuf[1],144行bd->inbufBitCount为8了;
第二次循环,bd->inbufBitCount为8还是小于32,也小于24,所以又赋值bd->inbufBits为bd->inbuf[1] << 8|bd->inbuf[2],bd->inbufPos加加之后就变成2了,bd->inbufBitCount成了16。
第三次循环,bd->inbufBitCount为16还是小于32,也小于24,所以又赋值bd->inbufBits为bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3],bd->inbufPos加加之后就变成3了,bd->inbufBitCount成了24。
第四次循环,bd->inbufBitCount为24了,还是小于32,但是要进入136行的条件块。只不过得到了一个bits值,这个值是:
bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]&(1 << bd->inbufBitCount)-1)
换算过来就是bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]&0xfffff。最后139行再把这么长串左移8位(此时bits_wanted在138行做了个运算:32-24=8)。最后转了这么久,bd->inbufBitCount又为0了。是不是要进入一个死循环了呢?没有,143行bd->inbufBits继续走,bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]|bd->inbuf[4],但是bits_wanted却是变成8了,所以跳出循环了。现在感受到内核代码的变态了吧,下面的更变态:bd->inbufBitCount被清零了,bits,也就是刚才那么长一串还要继续去|= (bd->inbufBits >> 8)&((1 << 8)-1)。
最后返回这个bits值。这个值是多少,我实在是不知道,只有机器知道。但根据注释我们知道这个值的意思是待压缩内核压缩组合块的数量,压缩程序是按4个字节一组进行压缩的,每个字节按位移动8位,4个字节压缩成一个块,称为压缩组合块。以我的水平只能分析到这种程度了,请吃透了这里代码的同志联系我啊,一定会请你喝酒的。
回到start_bunzip中,刚才bits返回给了i,660行i-BZh0-1就是0-0x48166800-1转换成无符号整形,就是0x48166801,最后i-0x48166801肯定是小于9的,不然就不可能继续走了,我们假设这个值为8。最后666行和668行给bd->dbufSize和bd->dbuf赋值,其长度就是800k。
最后返回到bunzip2中,start_bunzip最后返回的RETVAL_OK为0,所以进入705这个条件语句中,一来就进入read_bunzip函数:
/* Undo burrows-wheeler transform on intermediate buffer to produce output.
514 If start_bunzip was initialized with out_fd =-1, then up to len bytes of
515 data are written to outbuf. Return value is number of bytes written or
516 error (all errors are negative numbers). If out_fd!=-1, outbuf and len
517 are ignored, data is written to out_fd and return is RETVAL_OK or error.
518*/
519
520static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
521{
522 const unsigned int *dbuf;
523 int pos, xcurrent, previous, gotcount;
524
525 /* If last read was short due to end of file, return last block now */
526 if (bd->writeCount < 0)
527 return bd->writeCount;
528
529 gotcount = 0;
530 dbuf = bd->dbuf;
531 pos = bd->writePos;
532 xcurrent = bd->writeCurrent;
533
534 /* We will always have pending decoded data to write into the output
535 buffer unless this is the very first call (in which case we haven't
536 Huffman-decoded a block into the intermediate buffer yet). */
537
538 if (bd->writeCopies) {
539 /* Inside the loop, writeCopies means extra copies (beyond 1) */
540 --bd->writeCopies;
541 /* Loop outputting bytes */
542 for (;;) {
543 /* If the output buffer is full, snapshot
544 * state and return */
545 if (gotcount >= len) {
546 bd->writePos = pos;
547 bd->writeCurrent = xcurrent;
548 bd->writeCopies++;
549 return len;
550 }
551 /* Write next byte into output buffer, updating CRC */
552 outbuf[gotcount++] = xcurrent;
553 bd->writeCRC = (((bd->writeCRC) << 8)
554 ^bd->crc32Table[((bd->writeCRC) >> 24)
555 ^xcurrent]);
556 /* Loop now if we're outputting multiple
557 * copies of this byte */
558 if (bd->writeCopies) {
559 --bd->writeCopies;
560 continue;
561 }
562decode_next_byte:
563 if (!bd->writeCount--)
564 break;
565 /* Follow sequence vector to undo
566 * Burrows-Wheeler transform */
567 previous = xcurrent;
568 pos = dbuf[pos];
569 xcurrent = pos&0xff;
570 pos >>= 8;
571 /* After 3 consecutive copies of the same
572 byte, the 4th is a repeat count. We count
573 down from 4 instead *of counting up because
574 testing for non-zero is faster */
575 if (--bd->writeRunCountdown) {
576 if (xcurrent != previous)
577 bd->writeRunCountdown = 4;
578 } else {
579 /* We have a repeated run, this byte
580 * indicates the count */
581 bd->writeCopies = xcurrent;
582 xcurrent = previous;
583 bd->writeRunCountdown = 5;
584 /* Sometimes there are just 3 bytes
585 * (run length 0) */
586 if (!bd->writeCopies)
587 goto decode_next_byte;
588 /* Subtract the 1 copy we'd output
589 * anyway to get extras */
590 --bd->writeCopies;
591 }
592 }
593 /* Decompression of this block completed successfully */
594 bd->writeCRC = ~bd->writeCRC;
595 bd->totalCRC = ((bd->totalCRC << 1) |
596 (bd->totalCRC >> 31)) ^ bd->writeCRC;
597 /* If this block had a CRC error, force file level CRC error. */
598 if (bd->writeCRC != bd->headerCRC) {
599 bd->totalCRC = bd->headerCRC+1;
600 return RETVAL_LAST_BLOCK;
601 }
602 }
603
604 /* Refill the intermediate buffer by Huffman-decoding next
605 * block of input */
606 /* (previous is just a convenient unused temp variable here) */
607 previous = get_next_block(bd);
608 if (previous) {
609 bd->writeCount = previous;
610 return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
611 }
612 bd->writeCRC = 0xffffffffUL;
613 pos = bd->writePos;
614 xcurrent = bd->writeCurrent;
615 goto decode_next_byte;
616}
看到这个函数,我都冒冷汗了。具体的我实在没能力去分析他了,感兴趣的同志可以帮帮我,谢谢。最后解压后的程序会存放到outbuf开始的内存中。这个函数结束后,bunzip2也就带着结束了RETVAL_OK结束了,decompress也就结束了。回到boot/compressed/head_32.S的代码中:
141#if CONFIG_RELOCATABLE
……
165#endif
由于我们.config没有CONFIG_RELOCATABLE,所以不去详细分析141~165行的代码,直接来到170行:
170 xorl %ebx, %ebx
171 jmp *%ebp
开始执行解压缩后的第一条代码,即第二个startup_32()函数。这个函数主要是为第一个Linux进程(进程0)建立执行环境。该函数主要执行以下操作:
1. 把段寄存器初始化为最终值。
2. 把内核的bss段填充为0。
3. 初始化包含在swapper_pg_dir的临时内核页表,并初始化pg0,以使线性地址一致地映射同一物理地址。
4. 把页全局目录的地址存放在cr3寄存器中,并通过设置cr0寄存器的PG位启用分页。
5. 把从BIOS中获得的系统参数和传递给操作系统的参数boot_params放入第一个页框中。
6. 为进程0建立内核态堆栈。
7. 该函数再一次清零eflags寄存器的所有位。
8. 调用setup_idt用空的中断处理程序填充中断描述符表IDT。
9. 识别处理器的型号。
10. 用编译好的GDT和IDT表的地址来填充gdtr和idtr寄存器。
11. 初始化虚拟机监视器Xen。
12. 向start_kernel()函数进发。
后面咱们会详细分析以上步骤。
- 解压缩内核
- 4.1.2 解压缩内核
- linux启动流程分析-内核解压缩过程
- 内核gzip压缩与解压缩介绍
- linux内核启动地址 解压缩 启动参数
- (收集/整理)linux 内核启动 解压缩
- linux启动流程分析-内核解压缩过程
- 未解压缩时的内核页表
- linux内核启动地址,解压缩,内核参数问题探讨
- linux内核启动地址,解压缩,内核参数问题探讨【转】
- 读Kernel感悟-Linux内核启动-内核解压缩
- Linux kernel 分析之五:内核启动-内核解压缩
- 解压缩
- 解压缩
- 解压缩
- [转载] linux启动流程分析(3)---内核解压缩过程
- linux启动流程分析(3)---内核解压缩过程
- 内核启动分析(三)——zImage 解压缩阶段
- 深度理解C语言指针的奥秘
- 新年快乐!
- wifi直连
- ComboBox.SelectedItem
- 2010小结
- 解压缩内核
- 如何防止按钮克星激活我们的按钮
- 创建SFS的Java Extention
- 第二次启动保护模式
- Android培训班(28)
- GREP、SED、AWK、TR、SORT、UNIQ的一些用法
- Asp.net mvc controls
- 第一次启动分页管理
- C#编程QQ群收藏