解压缩内核

来源:互联网 发布:万网域名注册官网 编辑:程序博客网 时间:2024/05/22 21:21

4.1.2 解压缩内核

解压缩内核使用的是decompress_kernel函数,来自arch/x86/boot/compressed/misc.c

 

301asmlinkage void decompress_kernel(void *rmode, memptr heap,

 302                                  unsigned char *input_data,

 303                                  unsigned long input_len,

 304                                  unsigned char *output)

 305{

 306        real_mode = rmode;

 307

 308        if (real_mode->hdr.loadflags & QUIET_FLAG)

 309                quiet = 1;

 310

 311        if (real_mode->screen_info.orig_video_mode == 7) {

 312                vidmem = (char *) 0xb0000;

 313                vidport = 0x3b4;

 314        } else {

 315                vidmem = (char *) 0xb8000;

 316                vidport = 0x3d4;

 317        }

 318

 319        lines = real_mode->screen_info.orig_video_lines;

 320        cols = real_mode->screen_info.orig_video_cols;

 321

 322        free_mem_ptr     = heap;        /* Heap */

 323        free_mem_end_ptr = heap + BOOT_HEAP_SIZE;

 324

 325        if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))

 326                error("Destination address inappropriately aligned");

 327#ifdef CONFIG_X86_64

 328        if (heap > 0x3fffffffffffUL)

 329                error("Destination address too large");

 330#else

 331        if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))

 332                error("Destination address too large");

 333#endif

 334#ifndef CONFIG_RELOCATABLE

 335        if ((unsigned long)output != LOAD_PHYSICAL_ADDR)

 336                error("Wrong destination address");

 337#endif

 338

 339        if (!quiet)

 340                putstr("/nDecompressing Linux... ");

 341        decompress(input_data, input_len, NULL, NULL, output, NULL, error);

 342        parse_elf(output);

 343        if (!quiet)

 344                putstr("done./nBooting the kernel./n");

 345        return;

 346}

 

看到他的参数,共有5个,是刚才依次压栈而得到的。第一个是压入的output(还记得吧,倒着来的),在刚才的131行看到的,来自z_extract_offset_negative,用于作为解压缩的缓存首地址;第二个,input_len,其值等于z_input_len,表示待压缩内核大小;第三个,input_data,来自input_data,表示待压缩内核地址;第四个参数,heap32位下是32位,来自boot_heap,表示解压缩阶段的堆;最后一个参数,rmode,来自我们刚才用到的esi寄存器,表示刚才拷贝之前内核映像地址。

 

假设我配置的是CONFIG_KERNEL_BZIP2,那么会调用顶层lib/decompress_bunzip2.c中的decompress函数,最终会调用位于同一文件的bunzip2函数(注意,在我们制作bzImage的时候使用的压缩程序只有一个,如果指定的bunzip2,那么都在decompress_bunzip2.c里):

 

674/* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,

 675   not end of file.) */

 676STATIC int INIT bunzip2(unsigned char *buf, int len,

 677                        int(*fill)(void*, unsigned int),

 678                        int(*flush)(void*, unsigned int),

 679                        unsigned char *outbuf,

 680                        int *pos,

 681                        void(*error_fn)(char *x))

 682{

 683        struct bunzip_data *bd;

 684        int i = -1;

 685        unsigned char *inbuf;

 686

 687        set_error_fn(error_fn);

 688        if (flush)

 689                outbuf = malloc(BZIP2_IOBUF_SIZE);

 690

 691        if (!outbuf) {

 692                error("Could not allocate output bufer");

 693                return RETVAL_OUT_OF_MEMORY;

 694        }

 695        if (buf)

 696                inbuf = buf;

 697        else

 698                inbuf = malloc(BZIP2_IOBUF_SIZE);

 699        if (!inbuf) {

 700                error("Could not allocate input bufer");

 701                i = RETVAL_OUT_OF_MEMORY;

 702                goto exit_0;

 703        }

 704        i = start_bunzip(&bd, inbuf, len, fill);

 705        if (!i) {

 706                for (;;) {

 707                        i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);

 708                        if (i <= 0)

 709                                break;

 710                        if (!flush)

 711                                outbuf += i;

 712                        else

 713                                if (i != flush(outbuf, i)) {

 714                                    i = RETVAL_UNEXPECTED_OUTPUT_EOF;

 715                                    break;

 716                                }

 717                }

 718        }

 719        /* Check CRC and release memory */

 720        if (i == RETVAL_LAST_BLOCK) {

 721                if (bd->headerCRC != bd->totalCRC)

 722                        error("Data integrity error when decompressing.");

 723                else

 724                        i = RETVAL_OK;

 725        } else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {

 726                error("Compressed file ends unexpectedly");

 727        }

 728        if (!bd)

 729                goto exit_1;

 730        if (bd->dbuf)

 731                large_free(bd->dbuf);

 732        if (pos)

 733                *pos = bd->inbufPos;

 734        free(bd);

 735exit_1:

 736        if (!buf)

 737                free(inbuf);

 738exit_0:

 739        if (flush)

 740                free(outbuf);

 741        return i;

 742}

 

bunzip2函数的第一、二个参数是待压缩内核的首地址和长度;第三、四是两个函数参数,传进来的时候为空;第五个参数是解压缩后的地址,也是刚才传进来的。683行首先初始化一个指向bunzip_data数据结构的指针bd,待会再去说他。我们没有定义flush函数,所以定义一个局部指针inbuf指向刚才传递进来的待压缩内核首地址buf,随后704行调用函数start_bunzip

 

626static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,

 627                             int (*fill)(void*, unsigned int))

 628{

 629        struct bunzip_data *bd;

 630        unsigned int i, j, c;

 631        const unsigned int BZh0 =

 632                (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)

 633                +(((unsigned int)'h') << 8)+(unsigned int)'0';

 634

 635        /* Figure out how much data to allocate */

 636        i = sizeof(struct bunzip_data);

 637

 638        /* Allocate bunzip_data.  Most fields initialize to zero. */

 639        bd = *bdp = malloc(i);

 640        if (!bd)

 641                return RETVAL_OUT_OF_MEMORY;

 642        memset(bd, 0, sizeof(struct bunzip_data));

 643        /* Setup input buffer */

 644        bd->inbuf = inbuf;

 645        bd->inbufCount = len;

 646        if (fill != NULL)

 647                bd->fill = fill;

 648        else

 649                bd->fill = nofill;

 650

 651        /* Init the CRC32 table (big endian) */

 652        for (i = 0; i < 256; i++) {

 653                c = i << 24;

 654                for (j = 8; j; j--)

 655                        c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);

 656                bd->crc32Table[i] = c;

 657        }

 658

 659        /* Ensure that file starts with "BZh['1'-'9']." */

 660        i = get_bits(bd, 32);

 661        if (((unsigned int)(i-BZh0-1)) >= 9)

 662                return RETVAL_NOT_BZIP_DATA;

 663

 664        /* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of

 665           uncompressed data.  Allocate intermediate buffer for block. */

 666        bd->dbufSize = 100000*(i-BZh0);

 667

 668        bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));

 669        if (!bd->dbuf)

 670                return RETVAL_OUT_OF_MEMORY;

 671        return RETVAL_OK;

 672}

 

由于bdpfill都是空的,所以传入这个函数的有效的仅仅是待压缩内核首地址及其长度,内部变量i是数据结构bunzip_data的长度。而631BZh0常量很重要,就是著名的bz压缩常量。我们看到这段代码:

        const unsigned int BZh0 =

                (((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)

                +(((unsigned int)'h') << 8)+(unsigned int)'0';

就是压缩常量的值,'B''Z''h''0'ASCII码,计数出来就是0x48<<24 +0x5A<<16+0x68最后等于0x48166800

 

继续走,分配i个内存单元,由指针bd指上,同时bunzip2函数的内部变量bd也指到它了,然后642行将该结构清零(这些都是规定动作,值得我们程序员学习)。看了我们不得不把数据结构bunzip_data列出来了:

 

  91/* Structure holding all the housekeeping data, including IO buffers and

  92   memory that persists between calls to bunzip */

  93struct bunzip_data {

  94        /* State for interrupting output loop */

  95        int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;

  96        /* I/O tracking data (file handles, buffers, positions, etc.) */

  97        int (*fill)(void*, unsigned int);

  98        int inbufCount, inbufPos /*, outbufPos*/;

  99        unsigned char *inbuf /*,*outbuf*/;

 100        unsigned int inbufBitCount, inbufBits;

 101        /* The CRC values stored in the block header and calculated from the

 102        data */

 103        unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;

 104        /* Intermediate buffer and its size (in bytes) */

 105        unsigned int *dbuf, dbufSize;

 106        /* These things are a bit too big to go on the stack */

 107        unsigned char selectors[32768];         /* nSelectors = 15 bits */

 108        struct group_data groups[MAX_GROUPS];   /* Huffman coding tables */

 109        int io_error;                   /* non-zero if we have IO error */

 110};

 

644645行初始化bunzip_datainbufinbufCount字段。随后给crc32Table字段赋值,根据注释说,这个字段是存储块的头,随后调用get_bits函数:

 

113/* Return the next nnn bits of input.  All reads from the compressed input

 114   are done through this function.  All reads are big endian */

 115static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)

 116{

 117        unsigned int bits = 0;

 118

 119        /* If we need to get more data from the byte buffer, do so.

 120           (Loop getting one byte at a time to enforce endianness and avoid

 121           unaligned access.) */

 122        while (bd->inbufBitCount < bits_wanted) {

 123                /* If we need to read more data from file into byte buffer, do

 124                   so */

 125                if (bd->inbufPos == bd->inbufCount) {

 126                        if (bd->io_error)

 127                                return 0;

 128                        bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);

 129                        if (bd->inbufCount <= 0) {

 130                            bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;

 131                            return 0;

 132                        }

 133                        bd->inbufPos = 0;

 134                }

 135                /* Avoid 32-bit overflow (dump bit buffer to top of output) */

 136                if (bd->inbufBitCount >= 24) {

 137                        bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);

 138                        bits_wanted -= bd->inbufBitCount;

 139                        bits <<= bits_wanted;

 140                        bd->inbufBitCount = 0;

 141                }

 142                /* Grab next 8 bits of input from buffer. */

 143                bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];

 144                bd->inbufBitCount += 8;

 145        }

 146        /* Calculate result */

 147        bd->inbufBitCount -= bits_wanted;

 148        bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);

 149

 150        return bits;

 151}

 

这个函数比较简单,一开始inbufBitCount肯定小于32,因为被清零了。由于是解压缩阶段,bd->inbufPos也为0,不可能等于inbufCount。所以第一次循环直接来到143行,第一次循环,bd->inbufBits也就是bd->inbuf[bd->inbufPos++]的值,bd->inbufPos加加之后就变成1了,所以bd->inbufBits = bd->inbuf[1]144bd->inbufBitCount8了;

 

第二次循环,bd->inbufBitCount8还是小于32,也小于24,所以又赋值bd->inbufBitsbd->inbuf[1] << 8|bd->inbuf[2]bd->inbufPos加加之后就变成2了,bd->inbufBitCount成了16

 

第三次循环,bd->inbufBitCount16还是小于32,也小于24,所以又赋值bd->inbufBitsbd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]bd->inbufPos加加之后就变成3了,bd->inbufBitCount成了24

 

第四次循环,bd->inbufBitCount24了,还是小于32,但是要进入136行的条件块。只不过得到了一个bits值,这个值是:

bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]&(1 << bd->inbufBitCount)-1)

换算过来就是bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]&0xfffff。最后139行再把这么长串左移8位(此时bits_wanted138行做了个运算:32-24=8)。最后转了这么久,bd->inbufBitCount又为0了。是不是要进入一个死循环了呢?没有,143bd->inbufBits继续走,bd->inbuf[1]<<8|bd->inbuf[2]<<8|bd->inbuf[3]|bd->inbuf[4],但是bits_wanted却是变成8了,所以跳出循环了。现在感受到内核代码的变态了吧,下面的更变态:bd->inbufBitCount被清零了,bits,也就是刚才那么长一串还要继续去|= (bd->inbufBits >> 8)&((1 << 8)-1)

 

最后返回这个bits值。这个值是多少,我实在是不知道,只有机器知道。但根据注释我们知道这个值的意思是待压缩内核压缩组合块的数量,压缩程序是按4个字节一组进行压缩的,每个字节按位移动8位,4个字节压缩成一个块,称为压缩组合块。以我的水平只能分析到这种程度了,请吃透了这里代码的同志联系我啊,一定会请你喝酒的。

 

回到start_bunzip中,刚才bits返回给了i660i-BZh0-1就是0-0x48166800-1转换成无符号整形,就是0x48166801,最后i-0x48166801肯定是小于9的,不然就不可能继续走了,我们假设这个值为8。最后666行和668行给bd->dbufSizebd->dbuf赋值,其长度就是800k

 

最后返回到bunzip2中,start_bunzip最后返回的RETVAL_OK0,所以进入705这个条件语句中,一来就进入read_bunzip函数:

 

/* Undo burrows-wheeler transform on intermediate buffer to produce output.

 514   If start_bunzip was initialized with out_fd =-1, then up to len bytes of

 515   data are written to outbuf.  Return value is number of bytes written or

 516   error (all errors are negative numbers).  If out_fd!=-1, outbuf and len

 517   are ignored, data is written to out_fd and return is RETVAL_OK or error.

 518*/

 519

 520static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)

 521{

 522        const unsigned int *dbuf;

 523        int pos, xcurrent, previous, gotcount;

 524

 525        /* If last read was short due to end of file, return last block now */

 526        if (bd->writeCount < 0)

 527                return bd->writeCount;

 528

 529        gotcount = 0;

 530        dbuf = bd->dbuf;

 531        pos = bd->writePos;

 532        xcurrent = bd->writeCurrent;

 533

 534        /* We will always have pending decoded data to write into the output

 535           buffer unless this is the very first call (in which case we haven't

 536           Huffman-decoded a block into the intermediate buffer yet). */

 537

 538        if (bd->writeCopies) {

 539                /* Inside the loop, writeCopies means extra copies (beyond 1) */

 540                --bd->writeCopies;

 541                /* Loop outputting bytes */

 542                for (;;) {

 543                        /* If the output buffer is full, snapshot

 544                         * state and return */

 545                        if (gotcount >= len) {

 546                                bd->writePos = pos;

 547                                bd->writeCurrent = xcurrent;

 548                                bd->writeCopies++;

 549                                return len;

 550                        }

 551                        /* Write next byte into output buffer, updating CRC */

 552                        outbuf[gotcount++] = xcurrent;

 553                        bd->writeCRC = (((bd->writeCRC) << 8)

 554                                ^bd->crc32Table[((bd->writeCRC) >> 24)

 555                                ^xcurrent]);

 556                        /* Loop now if we're outputting multiple

 557                         * copies of this byte */

 558                        if (bd->writeCopies) {

 559                                --bd->writeCopies;

 560                                continue;

 561                        }

 562decode_next_byte:

 563                        if (!bd->writeCount--)

 564                                break;

 565                        /* Follow sequence vector to undo

 566                         * Burrows-Wheeler transform */

 567                        previous = xcurrent;

 568                        pos = dbuf[pos];

 569                        xcurrent = pos&0xff;

 570                        pos >>= 8;

 571                        /* After 3 consecutive copies of the same

 572                           byte, the 4th is a repeat count.  We count

 573                           down from 4 instead *of counting up because

 574                           testing for non-zero is faster */

 575                        if (--bd->writeRunCountdown) {

 576                                if (xcurrent != previous)

 577                                        bd->writeRunCountdown = 4;

 578                        } else {

 579                                /* We have a repeated run, this byte

 580                                 * indicates the count */

 581                                bd->writeCopies = xcurrent;

 582                                xcurrent = previous;

 583                                bd->writeRunCountdown = 5;

 584                                /* Sometimes there are just 3 bytes

 585                                 * (run length 0) */

 586                                if (!bd->writeCopies)

 587                                        goto decode_next_byte;

 588                                /* Subtract the 1 copy we'd output

 589                                 * anyway to get extras */

 590                                --bd->writeCopies;

 591                        }

 592                }

 593                /* Decompression of this block completed successfully */

 594                bd->writeCRC = ~bd->writeCRC;

 595                bd->totalCRC = ((bd->totalCRC << 1) |

 596                                (bd->totalCRC >> 31)) ^ bd->writeCRC;

 597                /* If this block had a CRC error, force file level CRC error. */

 598                if (bd->writeCRC != bd->headerCRC) {

 599                        bd->totalCRC = bd->headerCRC+1;

 600                        return RETVAL_LAST_BLOCK;

 601                }

 602        }

 603

 604        /* Refill the intermediate buffer by Huffman-decoding next

 605         * block of input */

 606        /* (previous is just a convenient unused temp variable here) */

 607        previous = get_next_block(bd);

 608        if (previous) {

 609                bd->writeCount = previous;

 610                return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;

 611        }

 612        bd->writeCRC = 0xffffffffUL;

 613        pos = bd->writePos;

 614        xcurrent = bd->writeCurrent;

 615        goto decode_next_byte;

 616}

 

看到这个函数,我都冒冷汗了。具体的我实在没能力去分析他了,感兴趣的同志可以帮帮我,谢谢。最后解压后的程序会存放到outbuf开始的内存中。这个函数结束后,bunzip2也就带着结束了RETVAL_OK结束了,decompress也就结束了。回到boot/compressed/head_32.S的代码中:

 

141#if CONFIG_RELOCATABLE

……

 165#endif

 

由于我们.config没有CONFIG_RELOCATABLE,所以不去详细分析141165行的代码,直接来到170行:

 

170        xorl    %ebx, %ebx

 171        jmp     *%ebp

 

开始执行解压缩后的第一条代码,即第二个startup_32()函数。这个函数主要是为第一个Linux进程(进程0)建立执行环境。该函数主要执行以下操作:

1.        把段寄存器初始化为最终值。

2.        把内核的bss段填充为0

3.        初始化包含在swapper_pg_dir的临时内核页表,并初始化pg0,以使线性地址一致地映射同一物理地址。

4.        把页全局目录的地址存放在cr3寄存器中,并通过设置cr0寄存器的PG位启用分页。

5.        把从BIOS中获得的系统参数和传递给操作系统的参数boot_params放入第一个页框中。

6.        为进程0建立内核态堆栈。

7.        该函数再一次清零eflags寄存器的所有位。

8.        调用setup_idt用空的中断处理程序填充中断描述符表IDT

9.        识别处理器的型号。

10.    用编译好的GDTIDT表的地址来填充gdtridtr寄存器。

11.    初始化虚拟机监视器Xen

12.    start_kernel()函数进发。

 

后面咱们会详细分析以上步骤。