程序员的自我修养:MiniCRT自制C语言运行库

来源:互联网 发布:网络课程视频加速器 编辑:程序博客网 时间:2024/06/05 00:15

程序员的自我修养一书中,在最后一章专门介绍一款小型的C语言运行库,并给出了详细的代码实现。阅读该代码实现,可以对C语言运行库提供的语言抽象层得到更深的理解。Talk is cheap, show code!

minicrt.h: MiniCRT的文件头

#ifndef __MINI_CRT_H__#define __MINI_CRT_H__#endif/*定义C++相关的函数,以使得函数的兼容性得到满足,但是依旧按照C的修饰规则来进行导出函数名修饰,即使用extern "C"*/#ifdef __cplusplusextern "C" {#endif//malloc#ifndef NULL#define NULL (0)#endifvoid free(void* ptr);void* malloc( unsigned size );static int brk( void* end_data_segment );int mini_crt_init_heap();//字符串char* itoa(int n, char* str, int radix);int strcmp(const char* src, const char* dst);char* strcpy(char* dest, const char* src);unsigned strlen(const char* sr);//文件与IOtypedef int FILE;#define EOF (-1)/*FILE* 这个类型在Windows下实际上是内核句柄,要通过GetStdHandle的Windows API获得而在Linux下则是文件描述符,标准输入输出是0,1,2,并不是指向FILE结构的地址*/#ifdef  WIN32#define stdin ( (FILE*) (GetStdHandle(STD_INPUT_HANDLE)) )#define stdout ( (FILE*) (GetStdHandle(STD_OUTPUT_HANDLE)) )#define stderr ( (FILE*) (GetStdHandle (STD_ERROR_HANDLE)) )#else#define stdin  ((FILE*) 0)#define stdout  ((FILE*) 1)#define stderr  ((FILE*) 2)#endifint mini_crt_init_io(); //省略了缓冲等诸多内容,miniCRT的IO基本无需初始化(即无需给打开文件列表进行空间分配),故而这个函数其实就是个空函数FILE* fopen( const char* filename, const char* mode);int fread(void* buffer, int size, int count, FILE* stream);int fwrite(const void* bufffer, int size, int count, FILE* stream);int fclose(FILE* fp);int fseek(FILE* fp, int offset, int set);//printfint fputc( int c, FILE* stream );int fputs( const char* str, FILE* stream);int printf( const char* format, ...);int fprintf(FILE* stream, const char* format,...);//internalvoid do_global_ctors();void mini_crt_call_exit_routine();//atexittypedef void (*atexit_func_t) (void);int atexit(atexit_func_t func);#ifdef  __cplusplus}#endif

entry.c:运行库入口函数定义

#include "minicrt.h"#ifdef  WIN32#include <Windows.h>#endif//entry.cextern int main(int argc, char* argv[]);//申明程序员编写的程序中的main函数void exit(int);static void crt_fatal_error(const char* msg){    //printf("fatal error:%s", msg);    exit(1);}void mini_crt_entry(void){    int ret;#ifdef WIN32    int flag = 0;    int argc = 0;    char* argv[16]; //最多16个参数    int sequentialSpaceNum = 0; //连续空格数目    char* cl = GetCommandLineA(); //对于windows系统提供GetCommandLine这个API来返回整体命令行参数字符串    //解析命令行    argv[0] = cl;    argc++;    while(*cl) {        if(*cl == '\"'){ //在字符串中的空格是有效的字符,所以需要设计标识位            if(flag==0) flag=1;            else flag = 0;        }        //如果不是在字符串期间遇到空格,则需要剔除空格        else if(*cl == ' ' && flag == 0) {            sequentialSpaceNum = 1; //空格数目为1            while( *(cl+sequentialSpaceNum) == ' '){                sequentialSpaceNum++;            }            if( *(cl+sequentialSpaceNum) ) {                argv[argc] = cl + sequentialSpaceNum;                argc++;            }            *cl = '\0';            cl = cl + sequentialSpaceNum;        }        cl++;    }#else    int argc;     char** argv;    char* ebp_reg;    asm(" movl %%ebp, %0  \n\t":"=r"(ebp_reg));    argc = *(int*)(ebp_reg + 4);    argv = (char**)(ebp_reg + 8);#endif    if (!mini_crt_init_heap())        crt_fatal_error("heap initialize failed");    if (!mini_crt_init_io())        crt_fatal_error("IO initialize failed");    ret = main(argc, argv);    exit(ret);}void exit(int exitCode){    //mini_crt_call_exit_routine();#ifdef WIN32    ExitProcess(exitCode);#else    asm("movl %0, %%ebx \n\t"        "movl $1, %%eax \n\t"        "int $0x80      \n\t"        "hlt            \n\t"::"m"(exitCode));#endif}

malloc.c:堆的初始化和功能实现

//堆的实现/*在遵循Mini CRT的原则下,我们将Mini CRT堆的实现归纳为以下几条1.实现一个以空闲链表算法为基础的堆空间分配算法;2.为了简单起见,堆空间大小固定为32MB,初始化后空间不再扩展或缩小;3.在Windows平台下不适用HeapAlloc等堆分配算法,采用VirtualAlloc 向系统直接申请32MB空间,由我们自己的堆分配算法实现malloc4.在Linux平台下,使用brk将数据段结束地址向后调整32MB,将这块空间作为堆空间*//* brk系统调用可以设置进程的数据段.data边界,而sbrk可以移动进程的数据段边界,显然如果将数据段边界后移,就相当于分配了一定量的内存。但是这段内存初始只是分配了虚拟空间,这些空间的申请一开始是不会提交的(即不会分配物理页面),当进程师徒访问一个地址的时候,操作系统会检测到页缺少异常,从而会为被访问的地址所在的页分配物理页面。故而这种被动的物理分配,又被称为按践踏分配,即不打不动。*/#include "minicrt.h"typedef struct _heap_header{    enum{        HEAP_BLOCK_FREE = 0xABABABAB, //空闲块的魔数        HEAP_BLOCK_USED = 0xCDCDCDCD, //占用快的魔数    }type;    unsigned size;  //块的尺寸包括块的信息头    struct _heap_header* next;    struct _heap_header* prev;}heap_header;#define ADDR_ADD(a,o) ( ((char*) (a)) + o)#define HEADER_SIZE (sizeof(heap_header))static heap_header* list_head = NULL;void free(void* ptr){    heap_header* header = (heap_header*) ADDR_ADD(ptr, -HEADER_SIZE);    if(header->type != HEAP_BLOCK_USED)        return;    header->type = HEAP_BLOCK_FREE;    if(header->prev != NULL && header->prev->type == HEAP_BLOCK_FREE) {        //释放块的前一个块也正好为空        header->prev->next = header->next;        if(header->next != NULL)            header->next->prev = header->prev;        header->prev->size += header->size;        header = header->prev;    }    if(header->next != NULL && header->next->type == HEAP_BLOCK_FREE) {        //释放块的后一个块也是空块        header->size += header->next->size;        header->next = header->next->next;    }}void* malloc( unsigned size ){    heap_header* header;    if(size == 0)        return NULL;    header = list_head;//  fputs("\ninside the malloc\n", stdout);//  fputs("outside the malloc-fuck you asshole\n", stdout);    while(header != 0) {        if (header->type == HEAP_BLOCK_USED) {            header = header->next;            continue;        }        //刚好碰到一个空闲快,且其块的大小大于所需size加上一个信息头尺寸,但是小于所需size加上两个信息头尺寸,即剩余的内部碎片就算分离出来,也没有利用价值了,直接整个块都分配给used,等待整体释放        if (header->size > size + HEADER_SIZE &&              header->size <= size + HEADER_SIZE*2)         {            header->type = HEAP_BLOCK_USED;            return ADDR_ADD(header, HEADER_SIZE);        }        //空闲块空间足够,且剩余的内部碎片分离出来还可以再使用        if (header->size > size + HEADER_SIZE * 2) {            //split            heap_header* next = (heap_header*) ADDR_ADD(header, size+HEADER_SIZE);            next->prev = header;            next->next = header->next;            next->type = HEAP_BLOCK_FREE;            next->size = header->size - (size + HEADER_SIZE); //此处有误吧            if (header->next != NULL)                header->next->prev = next;            header->next = next;            header->size = size + HEADER_SIZE;            header->type = HEAP_BLOCK_USED;            return ADDR_ADD(header, HEADER_SIZE);        };        header = header->next;    }//  fputs("outside the malloc-fuck you asshole\n", stdout);    //delete header;    return NULL;}#ifndef WIN32//Linux brk system callstatic int brk(void* end_data_segment) {    int ret = 0;    //brk system call number:45    //in /usr/include/asm-i386/unistd.h:    //#define __NR_brk 45    asm("movl $45, %%eax \n\t"        "movl %1, %%ebx  \n\t"        "int $0x80       \n\t"        "movl %%eax, %0  \n\t"        :"=r"(ret):"m"(end_data_segment) );}#endif#ifdef WIN32#include <Windows.h>#endifint mini_crt_init_heap(){    void* base = NULL;    heap_header* header = NULL;    //32MB heap size    unsigned heap_size = 1024*1024*32;//以base为起点分配32MB的内存空间   #ifdef WIN32    base = VirtualAlloc(0, heap_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);    if (base == NULL)        return 0;#else    base = (void*)brk(0);    void* end = ADDR_ADD(base, heap_size);    end = (void*)brk(end);    if(!end)    {        printf("Linux create heap fail\n");        return 0;    }#endif    header = (heap_header*) base;    header->size = heap_size;    header->type = HEAP_BLOCK_FREE;    header->next = NULL;    header->prev = NULL;    list_head = header;    if(header == 0){        printf("Linux create heap fail\n");        return 0;    }    return 1;}

string.c:字符串相关函数的封装和实现

/*这部分实现的是字符串相关的操作,主要是包括计算字符串长度、比较两个字符串、整数与字符串之间的转换等,由于这部分无需涉及任何和内核的交互,是纯粹的用户态的计算。实现较为简单*/#include "minicrt.h"char* itoa(int n, char* str, int radix){    char digit[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";    char* p = str;    char* head = str;    if(!p || radix < 2 || radix > 36) //radix代表是几进制        return p;    if(radix != 10 && n < 0)        return p;    if (n == 0) //如果要转换的数字n为0,则直接在输出字符串中直接输出    {        *p++ = '0';        *p = 0;        return p;    }    if (radix == 10 && n < 0) //如果是10进制,且为负数,则先添加负号,然后转正留待后续处理    {        *p++ = '-';        n = -n;    }    while (n)    {        *p++ = digit[n % radix];        n /= radix;    }    *p = 0; //数字转换完了,末尾添加0    //上面的数字字符串是倒序的,这里将数字字符串倒过来    for (--p; head<p; ++head, --p)    {        char temp = *head;        *head = *p;        *p = temp;    }    return str;}int strcmp (const char* src, const char* dst){    int ret = 0;    unsigned char* p1 = (unsigned char*)src;    unsigned char* p2 = (unsigned char*)dst;    while( !(ret = *p1 - *p2) && *p2)    {        ++p1, ++p2;    }    if (ret < 0)    ret = -1;    else if(ret > 0)        ret = 1;    return(ret);}char* strcpy(char *dest, const char* src){    char* ret = dest;    while(*src)        *dest++ = *src++;    *dest = '\0';//字符串拷贝完后,手动在末尾添加\0    return ret; //返回copy后的字符串的首}unsigned strlen(const char* str){    int cnt = 0;    if (!str)        return 0;    for (; (int)*str != 204 && *str != '\0'; ++str){        //printf("inside the for-loop %d\n", cnt);        ++cnt;    }    //printf("in the strlen return %d\n", cnt);    return cnt;}int  fputc(int c, FILE* stream){    if( fwrite(&c, 1, 1, stream) != 1)        return EOF;    else        return c;}int fputs(const char* str, FILE* stream){//  fputc('\n', stream);    int len = strlen(str);    if (fwrite(str,1,len, stream) != len)        return EOF;    else        return len;}

printf.c:printf输出函数的封装和实现

#include "minicrt.h"#ifndef WIN32#define va_list  char*#define va_start(ap,arg) ( ap = (va_list)&arg + sizeof(arg))#define va_arg(ap, t)    ( *(t*) ( (ap+=sizeof(t)) - sizeof(t) ) )#define va_end(ap)       ( ap = (va_list) 0)#else#include <Windows.h>#endif//Mini CRT 中并不支持特殊的格式操作,仅支持%d和%s两种简单的转换int vfprintf(FILE* stream, const char* format, va_list arglist ){    int translating = 0;    int ret = 0; //记录最终输出的字符个数    const char* p = 0;//  fputs("***********entry the Vprintf*********\n",stream);    for (p = format; *p && *p != '\0'; ++p)    {        switch (*p)        {            case '%':                if (!translating)                    translating = 1; //translating置为1,代表后面的字符需要解析                else                {                    if (fputc('%', stream) < 0)                        return EOF;                    ++ret;                    translating = 0;                }                break;            case 'd':                if (translating) //%d                {                    char buf[16] = {0};                    translating = 0;                    itoa( va_arg(arglist, int), buf, 10);                    if (fputs(buf, stream) < 0)                        return EOF;                    ret += strlen(buf);                    free(buf);                    //buf = 0;                }                else if (fputc('d', stream) < 0)                    return EOF;                else                    ++ret;                break;            case 's':                if (translating) //%s                {                    const char* str = va_arg(arglist, const char*);                    translating = 0;                    if (fputs(str, stream) < 0)                        return EOF;                    ret += strlen(str);                }                else if (fputc ('s' , stream) < 0)                    return EOF;                else                     ++ret;                break;            default:                if (translating)                    translating = 0;                if ( fputc(*p, stream) < 0 )                    return EOF;                else                    ++ret;                break;        }    }//  fputs("************Get out the Vprintf*****************\n", stream);    return ret;}int printf(const char* format, ...){    //fputs("entry the printf*************\n", stdout);    va_list(arglist);    va_start(arglist, format);    return vfprintf(stdout, format, arglist);}int fprintf(FILE* stream, const char* format, ...){    va_list(arglist);    va_start(arglist, format);    return vfprintf(stream, format, arglist);}

stdio.c:IO初始化和函数实现

//stdio.h/*1.为了简单起见,这里miniCRT不实现此前介绍的附带的buffer机制,不对Windows下的换行机制进行转换,即\r\n与\n不进行转换。2.在Windows下,文件基本操作使用了Windows API(CreateFile,ReadFile,WriteFile,CloseHandle,SetFilePointer)3.在Linux下,则使用系统调用open\read\write\close\seek4.fopen时仅区分"r""w""+"这几种模式及它们的组合,不对文本模式和二进制模式进行区分,不支持追加模式("a")。*/#include "minicrt.h"int mini_crt_init_io(){    return 1;}#ifdef WIN32#include <Windows.h>FILE* fopen(const char* filename, const char* mode){    Handle hFile = 0;    int access = 0;    int creation = 0;    if(strcmp(mode, "w") == 0) {        access != GENERIC_WRITE;        creation |= CREATE_ALWAYS;    }    if(strcmp(mode, "w+") == 0) {        access |= GENERIC_WRITE | GENERIC_READ;        creation |= CREATR_ALWAYS;    }    if(strcmp(mode, "r") == 0) {        access |= GENERIC_READ;        creation += OPEN_EXISTING;    }    if(strcmp(mode, "r+") == 0) {        access |= GENERIC_WRITE | GENERIC_READ;        creation |= TRUNCATE_EXISTING;    }    hFile = CreateFileA(filename, access, 0, 0, creation, 0, 0);    if (hFile == INVALID_HANDLE_VALUE)        return 0;    return (FILE*)hFile;}int fread(void* buffer, int size, int count, FILE* stream){    int read = 0;    if (!ReadFile( (HANDLE)stream, buffer, size*count, &read, 0))        return 0;    return read;}/*Windows API的ReadFILE()BOOL ReadFile(    HANDLE  hFile,//hFile为要读取的文件句柄,对应的是fread函数中stream参数    LPVOID  lpBuffer,//lpBuffer是存储缓冲区的其实地址,对应fread函数中的buffer    DWORD  nNumberofBytesToRead,//nNumberofBytesToRead代表要读取字节总数,等于fread函数中count * elementSize    LPDWORD  lpNumberofBytesRead,//lpNumberOfBytesRead代表一个指向DWORD类型的指针,用来表示读取了多少个字节    LPOVERLAPPED  lpOverlapped//lpOverlapped没用);*/int fwrite(const void* buffer, int size, int count, FILE* stream){    int written = 0;    if (!WriteFile( (HANDLE)stream, buffer, size*count, &written, 0))        return 0;    return written;}int fclose(FILE* fp){    return CloseHandle((HANDLE)fp);}int fseek(FILE* fp, int offset, int set){    return SetFilePointer((HANDLE)fp, offset, 0, set);}/*Windows API的SetFilePointer(    HANDLE  hFile,//hFile为要读取的文件句柄,对应的是fread函数中stream参数    LONG  IDistanceToMove, //偏移量(低位),指向64位偏移量的低32位    PLONG lpDistanceToMoveHigh, //偏移量(高位),指向64位偏移量的高32位    DWORD dwMoveMethod//基准位置,文件开始位置FILE_BEGIN/文件当前位置FILE_CURRENT/文件结束位置FILE_END);*/#else //#ifdef WIN32//movl这种AT&T汇编语言是UNIX下惯用的汇编语言Assembly Language//mov这种intel汇编语言则是Windows下常用,static int open(const char* pathname, int flags, int mode){    int fd = 0;    asm("movl $5, %%eax \n\t"        "movl %1, %%ebx \n\t"        "movl %2, %%ecx \n\t"        "movl %3, %%edx \n\t"        "int $0x80      \n\t"        "movl %%eax, %0 \n\t":        "=m"(fd):"m"(pathname), "m"(flags), "m"(mode) );}static int read( int fd, void* buffer, unsigned size){    int ret = 0;    asm("movl $3, %%eax  \n\t"        "movl %1, %%ebx  \n\t"        "movl %2, %%ecx  \n\t"        "movl %3, %%edx  \n\t"        "int $0x80       \n\t"        "movl %%eax, %0  \n\t":        "=m"(ret):"m"(fd), "m"(buffer), "m"(size) );    return ret;}static int write( int fd, const void* buffer, unsigned size){    int ret = 0;    asm("movl $4, %%eax  \n\t"        "movl %1, %%ebx  \n\t"        "movl %2, %%ecx  \n\t"        "movl %3, %%edx  \n\t"        "int $0x80       \n\t"        "movl %%eax, %0  \n\t":        "=m"(ret):"m"(fd), "m"(buffer), "m"(size) );    return ret;}static int close(int fd){    int ret = 0;    asm("movl $6, %%eax  \n\t"        "movl %1, %%ebx  \n\t"        "int $0x80       \n\t"        "movl %%eax, %0  \n\t":        "=m"(ret):"m"(fd) );    return ret;}static int seek(int fd, int offset, int mode){    int ret = 0;    asm("movl $19, %%eax  \n\t"        "movl %1, %%ebx  \n\t"        "movl %2, %%ecx  \n\t"        "movl %3, %%edx  \n\t"        "int $0x80       \n\t"        "movl %%eax, %0  \n\t":        "=m"(ret):"m"(fd), "m"(offset), "m"(mode) );    return ret;}FILE* fopen(const char* filename, const char* mode){    int fd = -1;    int flags = 0;    int access = 00700; //创建文件的权限    //来自于/usr/include/bits/fcntl.h    //注意:以0开始的数字是八进制的#define O_RDONLY   00#define O_WRONLY   01#define O_RDWR     02#define O_CREAT    0100#define O_TRUNC    01000#define O_APPEND   02000    if(strcmp(mode, "w") == 0)        flags |= O_WRONLY | O_CREAT | O_TRUNC;    if(strcmp(mode, "w+") == 0)        flags |= O_RDWR | O_CREAT | O_TRUNC;    if(strcmp(mode, "r") == 0)        flags |= O_RDONLY;    if(strcmp(mode, "r+") == 0)        flags |= O_RDWR | O_CREAT;    fd = open(filename, flags, access);    return (FILE*)fd;}   int fread(void* buffer, int size, int count, FILE* stream){    return read( (int)stream, buffer, size*count );}int fwrite(const void* buffer, int size, int count, FILE* stream){    return write( (int)stream, buffer, size*count );}int fclose(FILE* fp){    return close( (int)fp);}int fseek(FILE* fp, int offset, int set){    return seek( (int)fp, offset, set);}#endif

test.c:程序员的测试代码

#include "minicrt.h"int main(int argc, char* argv[]) //莫名奇妙的形参只能传递使用一次{    int i;    FILE* fp;    char** v =(char**) malloc( argc * sizeof(char*) );    char** tempArgv = argv;    int tempArgc = argc;    for(i=0; i<argc; ++i)    {        v[i] = (char*) malloc( strlen(tempArgv[i]) + 1);        strcpy(v[i], tempArgv[i]);    }    fp = fopen("test.txt", "w");    for(i = 0; i<tempArgc; ++i)    {        int len = strlen(v[i]);        fwrite(&len, 1, sizeof(int), fp);        fwrite(v[i], 1, len, fp);    }    fclose(fp);    fp = fopen("test.txt", "r");    for(i=0; i<tempArgc; ++i)    {        int len;        char* buf;        fread(&len, 1, sizeof(int), fp);        buf =(char*) malloc(len + 1);        fread(buf, 1, len, fp);        buf[len] = '\0';        printf("%d %s\n", len, buf);        free(buf);        free(v[i]);    }    fclose(fp);}

MiniCRT运行库设计成可以兼容Linux和Windows,故而运行该程序需要分系统讨论

Linux下运行命令

$gcc -c -fno-builtin -nostdlib -fno-stack-protector entry.c malloc.c stdio.c string.c printf.c -m32 -g$gcc -c -ggdb -fno-builtin -nostdlib -fno-stack-protector test.c -m32 -g$ar -rs minicrt.a malloc.o printf.o stdio.o string.o$ld -static -e mini_crt_entry entry.o test.o minicrt.a -o test -m elf_i386

-fno-builtin参数:关闭GCC的内置函数功能,默认情况下GCC会把strlen\strcmp等函数展开成它内部的实现;
-nostdlib:表示不适用任何来自Glibc、GCC的库文件和启动文件,它包含了-nostartfiles这个参数;
-fno-stack-protector:关闭堆栈保护功能,最新版本的GCC在处理变长参数函数的情况下会要求实现对堆栈的保护函数;
由于系统是64位ubuntu,故而需要在上面注明-m32或m elf_i386。
运行结果应该如下:

$ ./test arg1 arg2 1246 ./test4 arg14 arg23 124

Windows下运行命令

>cl /c /DWIN32 /GS- entry.c malloc.c printf.c stdio.c string.c>lib entry.obj malloc.obj printf.obj stdio.obj string.obj /OUT:minicrt.lib>cl /c /DWIN32 test.c>link test.obj minicrt.lib kernel32.lib /NODEFAULTLIB /entry:mini_crt_entry

/DWIN32:启用cl的宏定义功能,即定义WIN32这个宏,这是代码中区分平台的关键宏;
/GS-:关闭堆栈保护功能,否则会在链接截断发生”_security_cookie”和”_security_check_cookie”符号未定义错误。

原创粉丝点击