解压缩内核

来源：互联网发布：万网域名注册官网编辑：程序博客网时间：2024/05/22 21:21

4.1.2 解压缩内核

解压缩内核使用的是decompress_kernel函数，来自arch/x86/boot/compressed/misc.c：

301asmlinkage void decompress_kernel(void *rmode, memptr heap,

302 unsigned char *input_data,

303 unsigned long input_len,

304 unsigned char *output)

305{

306 real_mode = rmode;

307

308 if (real_mode->hdr.loadflags & QUIET_FLAG)

309 quiet = 1;

310

311 if (real_mode->screen_info.orig_video_mode == 7) {

312 vidmem = (char *) 0xb0000;

313 vidport = 0x3b4;

314 } else {

315 vidmem = (char *) 0xb8000;

316 vidport = 0x3d4;

317 }

318

319 lines = real_mode->screen_info.orig_video_lines;

320 cols = real_mode->screen_info.orig_video_cols;

321

322 free_mem_ptr = heap; /* Heap */

323 free_mem_end_ptr = heap + BOOT_HEAP_SIZE;

324

325 if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))

326 error("Destination address inappropriately aligned");

327#ifdef CONFIG_X86_64

328 if (heap > 0x3fffffffffffUL)

329 error("Destination address too large");

330#else

331 if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))

332 error("Destination address too large");

333#endif

334#ifndef CONFIG_RELOCATABLE

335 if ((unsigned long)output != LOAD_PHYSICAL_ADDR)

336 error("Wrong destination address");

337#endif

338

339 if (!quiet)

340 putstr("/nDecompressing Linux... ");

341 decompress(input_data, input_len, NULL, NULL, output, NULL, error);

342 parse_elf(output);

343 if (!quiet)

344 putstr("done./nBooting the kernel./n");

345 return;

346}

看到他的参数，共有5个，是刚才依次压栈而得到的。第一个是压入的output（还记得吧，倒着来的），在刚才的131行看到的，来自z_extract_offset_negative，用于作为解压缩的缓存首地址；第二个，input_len，其值等于z_input_len，表示待压缩内核大小；第三个，input_data，来自input_data，表示待压缩内核地址；第四个参数，heap，32位下是32位，来自boot_heap，表示解压缩阶段的堆；最后一个参数，rmode，来自我们刚才用到的esi寄存器，表示刚才拷贝之前内核映像地址。

假设我配置的是CONFIG_KERNEL_BZIP2，那么会调用顶层lib/decompress_bunzip2.c中的decompress函数，最终会调用位于同一文件的bunzip2函数（注意，在我们制作bzImage的时候使用的压缩程序只有一个，如果指定的bunzip2，那么都在decompress_bunzip2.c里）：

674/* Example usage: decompress src_fd to dst_fd. (Stops at end of bzip2 data,

675 not end of file.) */

676STATIC int INIT bunzip2(unsigned char *buf, int len,

677 int(*fill)(void*, unsigned int),

678 int(*flush)(void*, unsigned int),

679 unsigned char *outbuf,

680 int *pos,

681 void(*error_fn)(char *x))

682{

683 struct bunzip_data *bd;

684 int i = -1;

685 unsigned char *inbuf;

686

687 set_error_fn(error_fn);

688 if (flush)

689 outbuf = malloc(BZIP2_IOBUF_SIZE);

690

691 if (!outbuf) {

692 error("Could not allocate output bufer");

693 return RETVAL_OUT_OF_MEMORY;

694 }

695 if (buf)

696 inbuf = buf;

697 else

698 inbuf = malloc(BZIP2_IOBUF_SIZE);

699 if (!inbuf) {

700 error("Could not allocate input bufer");

701 i = RETVAL_OUT_OF_MEMORY;

702 goto exit_0;

703 }

704 i = start_bunzip(&bd, inbuf, len, fill);

705 if (!i) {

706 for (;;) {

707 i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);

708 if (i <= 0)

709 break;

710 if (!flush)

711 outbuf += i;

712 else

713 if (i != flush(outbuf, i)) {

714 i = RETVAL_UNEXPECTED_OUTPUT_EOF;

715 break;

716 }

717 }

718 }

719 /* Check CRC and release memory */

720 if (i == RETVAL_LAST_BLOCK) {

721 if (bd->headerCRC != bd->totalCRC)

722 error("Data integrity error when decompressing.");

723 else

724 i = RETVAL_OK;

725 } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {

726 error("Compressed file ends unexpectedly");

727 }

728 if (!bd)

729 goto exit_1;

730 if (bd->dbuf)

731 large_free(bd->dbuf);

732 if (pos)

733 *pos = bd->inbufPos;

734 free(bd);

735exit_1:

736 if (!buf)

737 free(inbuf);

738exit_0:

739 if (flush)

740 free(outbuf);

741 return i;

742}

bunzip2函数的第一、二个参数是待压缩内核的首地址和长度；第三、四是两个函数参数，传进来的时候为空；第五个参数是解压缩后的地址，也是刚才传进来的。683行首先初始化一个指向bunzip_data数据结构的指针bd，待会再去说他。我们没有定义flush函数，所以定义一个局部指针inbuf指向刚才传递进来的待压缩内核首地址buf，随后704行调用函数start_bunzip：

626static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,

627 int (*fill)(void*, unsigned int))

628{

629 struct bunzip_data *bd;

630 unsigned int i, j, c;

631 const unsigned int BZh0 =

632 (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)

633 +(((unsigned int)'h') << 8)+(unsigned int)'0';

634

635 /* Figure out how much data to allocate */

636 i = sizeof(struct bunzip_data);

637

638 /* Allocate bunzip_data. Most fields initialize to zero. */

639 bd = *bdp = malloc(i);

640 if (!bd)

641 return RETVAL_OUT_OF_MEMORY;

642 memset(bd, 0, sizeof(struct bunzip_data));

643 /* Setup input buffer */

644 bd->inbuf = inbuf;

645 bd->inbufCount = len;

646 if (fill != NULL)

647 bd->fill = fill;

648 else

649 bd->fill = nofill;

650

651 /* Init the CRC32 table (big endian) */

652 for (i = 0; i < 256; i++) {

653 c = i << 24;

654 for (j = 8; j; j--)

655 c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);

656 bd->crc32Table[i] = c;

657 }

658

659 /* Ensure that file starts with "BZh['1'-'9']." */

660 i = get_bits(bd, 32);

661 if (((unsigned int)(i-BZh0-1)) >= 9)

662 return RETVAL_NOT_BZIP_DATA;

663

664 /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of

665 uncompressed data. Allocate intermediate buffer for block. */

666 bd->dbufSize = 100000*(i-BZh0);

667

668 bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));

669 if (!bd->dbuf)

670 return RETVAL_OUT_OF_MEMORY;

671 return RETVAL_OK;

672}

由于bdp和fill都是空的，所以传入这个函数的有效的仅仅是待压缩内核首地址及其长度，内部变量i是数据结构bunzip_data的长度。而631行BZh0常量很重要，就是著名的bz压缩常量。我们看到这段代码：

const unsigned int BZh0 =

(((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)

+(((unsigned int)'h') << 8)+(unsigned int)'0';

就是压缩常量的值，'B'、'Z'、'h'、'0'是ASCII码，计数出来就是0x48<<24 +0x5A<<16+0x68最后等于0x48166800。

继续走，分配i个内存单元，由指针bd指上，同时bunzip2函数的内部变量bd也指到它了，然后642行将该结构清零（这些都是规定动作，值得我们程序员学习）。看了我们不得不把数据结构bunzip_data列出来了：

91/* Structure holding all the housekeeping data, including IO buffers and

92 memory that persists between calls to bunzip */

93struct bunzip_data {

94 /* State for interrupting output loop */

95 int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;

96 /* I/O tracking data (file handles, buffers, positions, etc.) */

97 int (*fill)(void*, unsigned int);

98 int inbufCount, inbufPos /*, outbufPos*/;

99 unsigned char *inbuf /*,*outbuf*/;

100 unsigned int inbufBitCount, inbufBits;

101 /* The CRC values stored in the block header and calculated from the

102 data */

103 unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;

104 /* Intermediate buffer and its size (in bytes) */

105 unsigned int *dbuf, dbufSize;

106 /* These things are a bit too big to go on the stack */

107 unsigned char selectors[32768]; /* nSelectors = 15 bits */

108 struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */

109 int io_error; /* non-zero if we have IO error */

110};

644和645行初始化bunzip_data的inbuf和inbufCount字段。随后给crc32Table字段赋值，根据注释说，这个字段是存储块的头，随后调用get_bits函数：

113/* Return the next nnn bits of input. All reads from the compressed input

114 are done through this function. All reads are big endian */

115static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)

116{

117 unsigned int bits = 0;

118

119 /* If we need to get more data from the byte buffer, do so.

120 (Loop getting one byte at a time to enforce endianness and avoid

121 unaligned access.) */

122 while (bd->inbufBitCount < bits_wanted) {

123 /* If we need to read more data from file into byte buffer, do

124 so */

125 if (bd->inbufPos == bd->inbufCount) {

126 if (bd->io_error)

127 return 0;

128 bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);

129 if (bd->inbufCount <= 0) {

130 bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;

131 return 0;

132 }

133 bd->inbufPos = 0;

134 }

135 /* Avoid 32-bit overflow (dump bit buffer to top of output) */

136 if (bd->inbufBitCount >= 24) {

137 bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);

138 bits_wanted -= bd->inbufBitCount;

139 bits <<= bits_wanted;

140 bd->inbufBitCount = 0;

141 }

142 /* Grab next 8 bits of input from buffer. */

143 bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];

144 bd->inbufBitCount += 8;

145 }

146 /* Calculate result */

147 bd->inbufBitCount -= bits_wanted;

148 bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);

149

150 return bits;

151}

这个函数比较简单，一开始inbufBitCount肯定小于32，因为被清零了。由于是解压缩阶段，bd->inbufPos也为0，不可能等于inbufCount。所以第一次循环直接来到143行，第一次循环，bd->inbufBits也就是bd->inbuf[bd->inbufPos++]的值，bd->inbufPos加加之后就变成1了，所以bd->inbufBits = bd->inbuf[1]，144行bd->inbufBitCount为8了；

第二次循环，bd->inbufBitCount为8还是小于32，也小于24，所以又赋值bd->inbufBits为bd->inbuf[1] << 8|bd->inbuf[2]，bd->inbufPos加加之后就变成2了，bd->inbufBitCount成了16。

第三次循环，bd->inbufBitCount为16还是小于32，也小于24，所以又赋值bd->inbufBits为bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]，bd->inbufPos加加之后就变成3了，bd->inbufBitCount成了24。

第四次循环，bd->inbufBitCount为24了，还是小于32，但是要进入136行的条件块。只不过得到了一个bits值，这个值是：

bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]&(1 << bd->inbufBitCount)-1)

换算过来就是bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]&0xfffff。最后139行再把这么长串左移8位（此时bits_wanted在138行做了个运算：32-24=8）。最后转了这么久，bd->inbufBitCount又为0了。是不是要进入一个死循环了呢？没有，143行bd->inbufBits继续走，bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]|bd->inbuf[4]，但是bits_wanted却是变成8了，所以跳出循环了。现在感受到内核代码的变态了吧，下面的更变态：bd->inbufBitCount被清零了，bits，也就是刚才那么长一串还要继续去|= (bd->inbufBits >> 8)&((1 << 8)-1)。

最后返回这个bits值。这个值是多少，我实在是不知道，只有机器知道。但根据注释我们知道这个值的意思是待压缩内核压缩组合块的数量，压缩程序是按4个字节一组进行压缩的，每个字节按位移动8位，4个字节压缩成一个块，称为压缩组合块。以我的水平只能分析到这种程度了，请吃透了这里代码的同志联系我啊，一定会请你喝酒的。

回到start_bunzip中，刚才bits返回给了i，660行i-BZh0-1就是0-0x48166800-1转换成无符号整形，就是0x48166801，最后i-0x48166801肯定是小于9的，不然就不可能继续走了，我们假设这个值为8。最后666行和668行给bd->dbufSize和bd->dbuf赋值，其长度就是800k。

最后返回到bunzip2中，start_bunzip最后返回的RETVAL_OK为0，所以进入705这个条件语句中，一来就进入read_bunzip函数：

/* Undo burrows-wheeler transform on intermediate buffer to produce output.

514 If start_bunzip was initialized with out_fd =-1, then up to len bytes of

515 data are written to outbuf. Return value is number of bytes written or

516 error (all errors are negative numbers). If out_fd!=-1, outbuf and len

517 are ignored, data is written to out_fd and return is RETVAL_OK or error.

518*/

519

520static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)

521{

522 const unsigned int *dbuf;

523 int pos, xcurrent, previous, gotcount;

524

525 /* If last read was short due to end of file, return last block now */

526 if (bd->writeCount < 0)

527 return bd->writeCount;

528

529 gotcount = 0;

530 dbuf = bd->dbuf;

531 pos = bd->writePos;

532 xcurrent = bd->writeCurrent;

533

534 /* We will always have pending decoded data to write into the output

535 buffer unless this is the very first call (in which case we haven't

536 Huffman-decoded a block into the intermediate buffer yet). */

537

538 if (bd->writeCopies) {

539 /* Inside the loop, writeCopies means extra copies (beyond 1) */

540 --bd->writeCopies;

541 /* Loop outputting bytes */

542 for (;;) {

543 /* If the output buffer is full, snapshot

544 * state and return */

545 if (gotcount >= len) {

546 bd->writePos = pos;

547 bd->writeCurrent = xcurrent;

548 bd->writeCopies++;

549 return len;

550 }

551 /* Write next byte into output buffer, updating CRC */

552 outbuf[gotcount++] = xcurrent;

553 bd->writeCRC = (((bd->writeCRC) << 8)

554 ^bd->crc32Table[((bd->writeCRC) >> 24)

555 ^xcurrent]);

556 /* Loop now if we're outputting multiple

557 * copies of this byte */

558 if (bd->writeCopies) {

559 --bd->writeCopies;

560 continue;

561 }

562decode_next_byte:

563 if (!bd->writeCount--)

564 break;

565 /* Follow sequence vector to undo

566 * Burrows-Wheeler transform */

567 previous = xcurrent;

568 pos = dbuf[pos];

569 xcurrent = pos&0xff;

570 pos >>= 8;

571 /* After 3 consecutive copies of the same

572 byte, the 4th is a repeat count. We count

573 down from 4 instead *of counting up because

574 testing for non-zero is faster */

575 if (--bd->writeRunCountdown) {

576 if (xcurrent != previous)

577 bd->writeRunCountdown = 4;

578 } else {

579 /* We have a repeated run, this byte

580 * indicates the count */

581 bd->writeCopies = xcurrent;

582 xcurrent = previous;

583 bd->writeRunCountdown = 5;

584 /* Sometimes there are just 3 bytes

585 * (run length 0) */

586 if (!bd->writeCopies)

587 goto decode_next_byte;

588 /* Subtract the 1 copy we'd output

589 * anyway to get extras */

590 --bd->writeCopies;

591 }

592 }

593 /* Decompression of this block completed successfully */

594 bd->writeCRC = ~bd->writeCRC;

595 bd->totalCRC = ((bd->totalCRC << 1) |

596 (bd->totalCRC >> 31)) ^ bd->writeCRC;

597 /* If this block had a CRC error, force file level CRC error. */

598 if (bd->writeCRC != bd->headerCRC) {

599 bd->totalCRC = bd->headerCRC+1;

600 return RETVAL_LAST_BLOCK;

601 }

602 }

603

604 /* Refill the intermediate buffer by Huffman-decoding next

605 * block of input */

606 /* (previous is just a convenient unused temp variable here) */

607 previous = get_next_block(bd);

608 if (previous) {

609 bd->writeCount = previous;

610 return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;

611 }

612 bd->writeCRC = 0xffffffffUL;

613 pos = bd->writePos;

614 xcurrent = bd->writeCurrent;

615 goto decode_next_byte;

616}

看到这个函数，我都冒冷汗了。具体的我实在没能力去分析他了，感兴趣的同志可以帮帮我，谢谢。最后解压后的程序会存放到outbuf开始的内存中。这个函数结束后，bunzip2也就带着结束了RETVAL_OK结束了，decompress也就结束了。回到boot/compressed/head_32.S的代码中：

141#if CONFIG_RELOCATABLE

……

165#endif

由于我们.config没有CONFIG_RELOCATABLE，所以不去详细分析141～165行的代码，直接来到170行：

170 xorl %ebx, %ebx

171 jmp *%ebp

开始执行解压缩后的第一条代码，即第二个startup_32()函数。这个函数主要是为第一个Linux进程（进程0）建立执行环境。该函数主要执行以下操作：

1. 把段寄存器初始化为最终值。

2. 把内核的bss段填充为0。

3. 初始化包含在swapper_pg_dir的临时内核页表，并初始化pg0，以使线性地址一致地映射同一物理地址。

4. 把页全局目录的地址存放在cr3寄存器中，并通过设置cr0寄存器的PG位启用分页。

5. 把从BIOS中获得的系统参数和传递给操作系统的参数boot_params放入第一个页框中。

6. 为进程0建立内核态堆栈。

7. 该函数再一次清零eflags寄存器的所有位。

8. 调用setup_idt用空的中断处理程序填充中断描述符表IDT。

9. 识别处理器的型号。

10. 用编译好的GDT和IDT表的地址来填充gdtr和idtr寄存器。

11. 初始化虚拟机监视器Xen。

12. 向start_kernel()函数进发。

后面咱们会详细分析以上步骤。