程序员的自我修养:MiniCRT自制C语言运行库
来源:互联网 发布:网络课程视频加速器 编辑:程序博客网 时间:2024/06/05 00:15
程序员的自我修养一书中,在最后一章专门介绍一款小型的C语言运行库,并给出了详细的代码实现。阅读该代码实现,可以对C语言运行库提供的语言抽象层得到更深的理解。Talk is cheap, show code!
minicrt.h: MiniCRT的文件头
#ifndef __MINI_CRT_H__#define __MINI_CRT_H__#endif/*定义C++相关的函数,以使得函数的兼容性得到满足,但是依旧按照C的修饰规则来进行导出函数名修饰,即使用extern "C"*/#ifdef __cplusplusextern "C" {#endif//malloc#ifndef NULL#define NULL (0)#endifvoid free(void* ptr);void* malloc( unsigned size );static int brk( void* end_data_segment );int mini_crt_init_heap();//字符串char* itoa(int n, char* str, int radix);int strcmp(const char* src, const char* dst);char* strcpy(char* dest, const char* src);unsigned strlen(const char* sr);//文件与IOtypedef int FILE;#define EOF (-1)/*FILE* 这个类型在Windows下实际上是内核句柄,要通过GetStdHandle的Windows API获得而在Linux下则是文件描述符,标准输入输出是0,1,2,并不是指向FILE结构的地址*/#ifdef WIN32#define stdin ( (FILE*) (GetStdHandle(STD_INPUT_HANDLE)) )#define stdout ( (FILE*) (GetStdHandle(STD_OUTPUT_HANDLE)) )#define stderr ( (FILE*) (GetStdHandle (STD_ERROR_HANDLE)) )#else#define stdin ((FILE*) 0)#define stdout ((FILE*) 1)#define stderr ((FILE*) 2)#endifint mini_crt_init_io(); //省略了缓冲等诸多内容,miniCRT的IO基本无需初始化(即无需给打开文件列表进行空间分配),故而这个函数其实就是个空函数FILE* fopen( const char* filename, const char* mode);int fread(void* buffer, int size, int count, FILE* stream);int fwrite(const void* bufffer, int size, int count, FILE* stream);int fclose(FILE* fp);int fseek(FILE* fp, int offset, int set);//printfint fputc( int c, FILE* stream );int fputs( const char* str, FILE* stream);int printf( const char* format, ...);int fprintf(FILE* stream, const char* format,...);//internalvoid do_global_ctors();void mini_crt_call_exit_routine();//atexittypedef void (*atexit_func_t) (void);int atexit(atexit_func_t func);#ifdef __cplusplus}#endif
entry.c:运行库入口函数定义
#include "minicrt.h"#ifdef WIN32#include <Windows.h>#endif//entry.cextern int main(int argc, char* argv[]);//申明程序员编写的程序中的main函数void exit(int);static void crt_fatal_error(const char* msg){ //printf("fatal error:%s", msg); exit(1);}void mini_crt_entry(void){ int ret;#ifdef WIN32 int flag = 0; int argc = 0; char* argv[16]; //最多16个参数 int sequentialSpaceNum = 0; //连续空格数目 char* cl = GetCommandLineA(); //对于windows系统提供GetCommandLine这个API来返回整体命令行参数字符串 //解析命令行 argv[0] = cl; argc++; while(*cl) { if(*cl == '\"'){ //在字符串中的空格是有效的字符,所以需要设计标识位 if(flag==0) flag=1; else flag = 0; } //如果不是在字符串期间遇到空格,则需要剔除空格 else if(*cl == ' ' && flag == 0) { sequentialSpaceNum = 1; //空格数目为1 while( *(cl+sequentialSpaceNum) == ' '){ sequentialSpaceNum++; } if( *(cl+sequentialSpaceNum) ) { argv[argc] = cl + sequentialSpaceNum; argc++; } *cl = '\0'; cl = cl + sequentialSpaceNum; } cl++; }#else int argc; char** argv; char* ebp_reg; asm(" movl %%ebp, %0 \n\t":"=r"(ebp_reg)); argc = *(int*)(ebp_reg + 4); argv = (char**)(ebp_reg + 8);#endif if (!mini_crt_init_heap()) crt_fatal_error("heap initialize failed"); if (!mini_crt_init_io()) crt_fatal_error("IO initialize failed"); ret = main(argc, argv); exit(ret);}void exit(int exitCode){ //mini_crt_call_exit_routine();#ifdef WIN32 ExitProcess(exitCode);#else asm("movl %0, %%ebx \n\t" "movl $1, %%eax \n\t" "int $0x80 \n\t" "hlt \n\t"::"m"(exitCode));#endif}
malloc.c:堆的初始化和功能实现
//堆的实现/*在遵循Mini CRT的原则下,我们将Mini CRT堆的实现归纳为以下几条1.实现一个以空闲链表算法为基础的堆空间分配算法;2.为了简单起见,堆空间大小固定为32MB,初始化后空间不再扩展或缩小;3.在Windows平台下不适用HeapAlloc等堆分配算法,采用VirtualAlloc 向系统直接申请32MB空间,由我们自己的堆分配算法实现malloc4.在Linux平台下,使用brk将数据段结束地址向后调整32MB,将这块空间作为堆空间*//* brk系统调用可以设置进程的数据段.data边界,而sbrk可以移动进程的数据段边界,显然如果将数据段边界后移,就相当于分配了一定量的内存。但是这段内存初始只是分配了虚拟空间,这些空间的申请一开始是不会提交的(即不会分配物理页面),当进程师徒访问一个地址的时候,操作系统会检测到页缺少异常,从而会为被访问的地址所在的页分配物理页面。故而这种被动的物理分配,又被称为按践踏分配,即不打不动。*/#include "minicrt.h"typedef struct _heap_header{ enum{ HEAP_BLOCK_FREE = 0xABABABAB, //空闲块的魔数 HEAP_BLOCK_USED = 0xCDCDCDCD, //占用快的魔数 }type; unsigned size; //块的尺寸包括块的信息头 struct _heap_header* next; struct _heap_header* prev;}heap_header;#define ADDR_ADD(a,o) ( ((char*) (a)) + o)#define HEADER_SIZE (sizeof(heap_header))static heap_header* list_head = NULL;void free(void* ptr){ heap_header* header = (heap_header*) ADDR_ADD(ptr, -HEADER_SIZE); if(header->type != HEAP_BLOCK_USED) return; header->type = HEAP_BLOCK_FREE; if(header->prev != NULL && header->prev->type == HEAP_BLOCK_FREE) { //释放块的前一个块也正好为空 header->prev->next = header->next; if(header->next != NULL) header->next->prev = header->prev; header->prev->size += header->size; header = header->prev; } if(header->next != NULL && header->next->type == HEAP_BLOCK_FREE) { //释放块的后一个块也是空块 header->size += header->next->size; header->next = header->next->next; }}void* malloc( unsigned size ){ heap_header* header; if(size == 0) return NULL; header = list_head;// fputs("\ninside the malloc\n", stdout);// fputs("outside the malloc-fuck you asshole\n", stdout); while(header != 0) { if (header->type == HEAP_BLOCK_USED) { header = header->next; continue; } //刚好碰到一个空闲快,且其块的大小大于所需size加上一个信息头尺寸,但是小于所需size加上两个信息头尺寸,即剩余的内部碎片就算分离出来,也没有利用价值了,直接整个块都分配给used,等待整体释放 if (header->size > size + HEADER_SIZE && header->size <= size + HEADER_SIZE*2) { header->type = HEAP_BLOCK_USED; return ADDR_ADD(header, HEADER_SIZE); } //空闲块空间足够,且剩余的内部碎片分离出来还可以再使用 if (header->size > size + HEADER_SIZE * 2) { //split heap_header* next = (heap_header*) ADDR_ADD(header, size+HEADER_SIZE); next->prev = header; next->next = header->next; next->type = HEAP_BLOCK_FREE; next->size = header->size - (size + HEADER_SIZE); //此处有误吧 if (header->next != NULL) header->next->prev = next; header->next = next; header->size = size + HEADER_SIZE; header->type = HEAP_BLOCK_USED; return ADDR_ADD(header, HEADER_SIZE); }; header = header->next; }// fputs("outside the malloc-fuck you asshole\n", stdout); //delete header; return NULL;}#ifndef WIN32//Linux brk system callstatic int brk(void* end_data_segment) { int ret = 0; //brk system call number:45 //in /usr/include/asm-i386/unistd.h: //#define __NR_brk 45 asm("movl $45, %%eax \n\t" "movl %1, %%ebx \n\t" "int $0x80 \n\t" "movl %%eax, %0 \n\t" :"=r"(ret):"m"(end_data_segment) );}#endif#ifdef WIN32#include <Windows.h>#endifint mini_crt_init_heap(){ void* base = NULL; heap_header* header = NULL; //32MB heap size unsigned heap_size = 1024*1024*32;//以base为起点分配32MB的内存空间 #ifdef WIN32 base = VirtualAlloc(0, heap_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); if (base == NULL) return 0;#else base = (void*)brk(0); void* end = ADDR_ADD(base, heap_size); end = (void*)brk(end); if(!end) { printf("Linux create heap fail\n"); return 0; }#endif header = (heap_header*) base; header->size = heap_size; header->type = HEAP_BLOCK_FREE; header->next = NULL; header->prev = NULL; list_head = header; if(header == 0){ printf("Linux create heap fail\n"); return 0; } return 1;}
string.c:字符串相关函数的封装和实现
/*这部分实现的是字符串相关的操作,主要是包括计算字符串长度、比较两个字符串、整数与字符串之间的转换等,由于这部分无需涉及任何和内核的交互,是纯粹的用户态的计算。实现较为简单*/#include "minicrt.h"char* itoa(int n, char* str, int radix){ char digit[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; char* p = str; char* head = str; if(!p || radix < 2 || radix > 36) //radix代表是几进制 return p; if(radix != 10 && n < 0) return p; if (n == 0) //如果要转换的数字n为0,则直接在输出字符串中直接输出 { *p++ = '0'; *p = 0; return p; } if (radix == 10 && n < 0) //如果是10进制,且为负数,则先添加负号,然后转正留待后续处理 { *p++ = '-'; n = -n; } while (n) { *p++ = digit[n % radix]; n /= radix; } *p = 0; //数字转换完了,末尾添加0 //上面的数字字符串是倒序的,这里将数字字符串倒过来 for (--p; head<p; ++head, --p) { char temp = *head; *head = *p; *p = temp; } return str;}int strcmp (const char* src, const char* dst){ int ret = 0; unsigned char* p1 = (unsigned char*)src; unsigned char* p2 = (unsigned char*)dst; while( !(ret = *p1 - *p2) && *p2) { ++p1, ++p2; } if (ret < 0) ret = -1; else if(ret > 0) ret = 1; return(ret);}char* strcpy(char *dest, const char* src){ char* ret = dest; while(*src) *dest++ = *src++; *dest = '\0';//字符串拷贝完后,手动在末尾添加\0 return ret; //返回copy后的字符串的首}unsigned strlen(const char* str){ int cnt = 0; if (!str) return 0; for (; (int)*str != 204 && *str != '\0'; ++str){ //printf("inside the for-loop %d\n", cnt); ++cnt; } //printf("in the strlen return %d\n", cnt); return cnt;}int fputc(int c, FILE* stream){ if( fwrite(&c, 1, 1, stream) != 1) return EOF; else return c;}int fputs(const char* str, FILE* stream){// fputc('\n', stream); int len = strlen(str); if (fwrite(str,1,len, stream) != len) return EOF; else return len;}
printf.c:printf输出函数的封装和实现
#include "minicrt.h"#ifndef WIN32#define va_list char*#define va_start(ap,arg) ( ap = (va_list)&arg + sizeof(arg))#define va_arg(ap, t) ( *(t*) ( (ap+=sizeof(t)) - sizeof(t) ) )#define va_end(ap) ( ap = (va_list) 0)#else#include <Windows.h>#endif//Mini CRT 中并不支持特殊的格式操作,仅支持%d和%s两种简单的转换int vfprintf(FILE* stream, const char* format, va_list arglist ){ int translating = 0; int ret = 0; //记录最终输出的字符个数 const char* p = 0;// fputs("***********entry the Vprintf*********\n",stream); for (p = format; *p && *p != '\0'; ++p) { switch (*p) { case '%': if (!translating) translating = 1; //translating置为1,代表后面的字符需要解析 else { if (fputc('%', stream) < 0) return EOF; ++ret; translating = 0; } break; case 'd': if (translating) //%d { char buf[16] = {0}; translating = 0; itoa( va_arg(arglist, int), buf, 10); if (fputs(buf, stream) < 0) return EOF; ret += strlen(buf); free(buf); //buf = 0; } else if (fputc('d', stream) < 0) return EOF; else ++ret; break; case 's': if (translating) //%s { const char* str = va_arg(arglist, const char*); translating = 0; if (fputs(str, stream) < 0) return EOF; ret += strlen(str); } else if (fputc ('s' , stream) < 0) return EOF; else ++ret; break; default: if (translating) translating = 0; if ( fputc(*p, stream) < 0 ) return EOF; else ++ret; break; } }// fputs("************Get out the Vprintf*****************\n", stream); return ret;}int printf(const char* format, ...){ //fputs("entry the printf*************\n", stdout); va_list(arglist); va_start(arglist, format); return vfprintf(stdout, format, arglist);}int fprintf(FILE* stream, const char* format, ...){ va_list(arglist); va_start(arglist, format); return vfprintf(stream, format, arglist);}
stdio.c:IO初始化和函数实现
//stdio.h/*1.为了简单起见,这里miniCRT不实现此前介绍的附带的buffer机制,不对Windows下的换行机制进行转换,即\r\n与\n不进行转换。2.在Windows下,文件基本操作使用了Windows API(CreateFile,ReadFile,WriteFile,CloseHandle,SetFilePointer)3.在Linux下,则使用系统调用open\read\write\close\seek4.fopen时仅区分"r""w""+"这几种模式及它们的组合,不对文本模式和二进制模式进行区分,不支持追加模式("a")。*/#include "minicrt.h"int mini_crt_init_io(){ return 1;}#ifdef WIN32#include <Windows.h>FILE* fopen(const char* filename, const char* mode){ Handle hFile = 0; int access = 0; int creation = 0; if(strcmp(mode, "w") == 0) { access != GENERIC_WRITE; creation |= CREATE_ALWAYS; } if(strcmp(mode, "w+") == 0) { access |= GENERIC_WRITE | GENERIC_READ; creation |= CREATR_ALWAYS; } if(strcmp(mode, "r") == 0) { access |= GENERIC_READ; creation += OPEN_EXISTING; } if(strcmp(mode, "r+") == 0) { access |= GENERIC_WRITE | GENERIC_READ; creation |= TRUNCATE_EXISTING; } hFile = CreateFileA(filename, access, 0, 0, creation, 0, 0); if (hFile == INVALID_HANDLE_VALUE) return 0; return (FILE*)hFile;}int fread(void* buffer, int size, int count, FILE* stream){ int read = 0; if (!ReadFile( (HANDLE)stream, buffer, size*count, &read, 0)) return 0; return read;}/*Windows API的ReadFILE()BOOL ReadFile( HANDLE hFile,//hFile为要读取的文件句柄,对应的是fread函数中stream参数 LPVOID lpBuffer,//lpBuffer是存储缓冲区的其实地址,对应fread函数中的buffer DWORD nNumberofBytesToRead,//nNumberofBytesToRead代表要读取字节总数,等于fread函数中count * elementSize LPDWORD lpNumberofBytesRead,//lpNumberOfBytesRead代表一个指向DWORD类型的指针,用来表示读取了多少个字节 LPOVERLAPPED lpOverlapped//lpOverlapped没用);*/int fwrite(const void* buffer, int size, int count, FILE* stream){ int written = 0; if (!WriteFile( (HANDLE)stream, buffer, size*count, &written, 0)) return 0; return written;}int fclose(FILE* fp){ return CloseHandle((HANDLE)fp);}int fseek(FILE* fp, int offset, int set){ return SetFilePointer((HANDLE)fp, offset, 0, set);}/*Windows API的SetFilePointer( HANDLE hFile,//hFile为要读取的文件句柄,对应的是fread函数中stream参数 LONG IDistanceToMove, //偏移量(低位),指向64位偏移量的低32位 PLONG lpDistanceToMoveHigh, //偏移量(高位),指向64位偏移量的高32位 DWORD dwMoveMethod//基准位置,文件开始位置FILE_BEGIN/文件当前位置FILE_CURRENT/文件结束位置FILE_END);*/#else //#ifdef WIN32//movl这种AT&T汇编语言是UNIX下惯用的汇编语言Assembly Language//mov这种intel汇编语言则是Windows下常用,static int open(const char* pathname, int flags, int mode){ int fd = 0; asm("movl $5, %%eax \n\t" "movl %1, %%ebx \n\t" "movl %2, %%ecx \n\t" "movl %3, %%edx \n\t" "int $0x80 \n\t" "movl %%eax, %0 \n\t": "=m"(fd):"m"(pathname), "m"(flags), "m"(mode) );}static int read( int fd, void* buffer, unsigned size){ int ret = 0; asm("movl $3, %%eax \n\t" "movl %1, %%ebx \n\t" "movl %2, %%ecx \n\t" "movl %3, %%edx \n\t" "int $0x80 \n\t" "movl %%eax, %0 \n\t": "=m"(ret):"m"(fd), "m"(buffer), "m"(size) ); return ret;}static int write( int fd, const void* buffer, unsigned size){ int ret = 0; asm("movl $4, %%eax \n\t" "movl %1, %%ebx \n\t" "movl %2, %%ecx \n\t" "movl %3, %%edx \n\t" "int $0x80 \n\t" "movl %%eax, %0 \n\t": "=m"(ret):"m"(fd), "m"(buffer), "m"(size) ); return ret;}static int close(int fd){ int ret = 0; asm("movl $6, %%eax \n\t" "movl %1, %%ebx \n\t" "int $0x80 \n\t" "movl %%eax, %0 \n\t": "=m"(ret):"m"(fd) ); return ret;}static int seek(int fd, int offset, int mode){ int ret = 0; asm("movl $19, %%eax \n\t" "movl %1, %%ebx \n\t" "movl %2, %%ecx \n\t" "movl %3, %%edx \n\t" "int $0x80 \n\t" "movl %%eax, %0 \n\t": "=m"(ret):"m"(fd), "m"(offset), "m"(mode) ); return ret;}FILE* fopen(const char* filename, const char* mode){ int fd = -1; int flags = 0; int access = 00700; //创建文件的权限 //来自于/usr/include/bits/fcntl.h //注意:以0开始的数字是八进制的#define O_RDONLY 00#define O_WRONLY 01#define O_RDWR 02#define O_CREAT 0100#define O_TRUNC 01000#define O_APPEND 02000 if(strcmp(mode, "w") == 0) flags |= O_WRONLY | O_CREAT | O_TRUNC; if(strcmp(mode, "w+") == 0) flags |= O_RDWR | O_CREAT | O_TRUNC; if(strcmp(mode, "r") == 0) flags |= O_RDONLY; if(strcmp(mode, "r+") == 0) flags |= O_RDWR | O_CREAT; fd = open(filename, flags, access); return (FILE*)fd;} int fread(void* buffer, int size, int count, FILE* stream){ return read( (int)stream, buffer, size*count );}int fwrite(const void* buffer, int size, int count, FILE* stream){ return write( (int)stream, buffer, size*count );}int fclose(FILE* fp){ return close( (int)fp);}int fseek(FILE* fp, int offset, int set){ return seek( (int)fp, offset, set);}#endif
test.c:程序员的测试代码
#include "minicrt.h"int main(int argc, char* argv[]) //莫名奇妙的形参只能传递使用一次{ int i; FILE* fp; char** v =(char**) malloc( argc * sizeof(char*) ); char** tempArgv = argv; int tempArgc = argc; for(i=0; i<argc; ++i) { v[i] = (char*) malloc( strlen(tempArgv[i]) + 1); strcpy(v[i], tempArgv[i]); } fp = fopen("test.txt", "w"); for(i = 0; i<tempArgc; ++i) { int len = strlen(v[i]); fwrite(&len, 1, sizeof(int), fp); fwrite(v[i], 1, len, fp); } fclose(fp); fp = fopen("test.txt", "r"); for(i=0; i<tempArgc; ++i) { int len; char* buf; fread(&len, 1, sizeof(int), fp); buf =(char*) malloc(len + 1); fread(buf, 1, len, fp); buf[len] = '\0'; printf("%d %s\n", len, buf); free(buf); free(v[i]); } fclose(fp);}
MiniCRT运行库设计成可以兼容Linux和Windows,故而运行该程序需要分系统讨论
Linux下运行命令
$gcc -c -fno-builtin -nostdlib -fno-stack-protector entry.c malloc.c stdio.c string.c printf.c -m32 -g$gcc -c -ggdb -fno-builtin -nostdlib -fno-stack-protector test.c -m32 -g$ar -rs minicrt.a malloc.o printf.o stdio.o string.o$ld -static -e mini_crt_entry entry.o test.o minicrt.a -o test -m elf_i386
-fno-builtin参数:关闭GCC的内置函数功能,默认情况下GCC会把strlen\strcmp等函数展开成它内部的实现;
-nostdlib:表示不适用任何来自Glibc、GCC的库文件和启动文件,它包含了-nostartfiles这个参数;
-fno-stack-protector:关闭堆栈保护功能,最新版本的GCC在处理变长参数函数的情况下会要求实现对堆栈的保护函数;
由于系统是64位ubuntu,故而需要在上面注明-m32或m elf_i386。
运行结果应该如下:
$ ./test arg1 arg2 1246 ./test4 arg14 arg23 124
Windows下运行命令
>cl /c /DWIN32 /GS- entry.c malloc.c printf.c stdio.c string.c>lib entry.obj malloc.obj printf.obj stdio.obj string.obj /OUT:minicrt.lib>cl /c /DWIN32 test.c>link test.obj minicrt.lib kernel32.lib /NODEFAULTLIB /entry:mini_crt_entry
/DWIN32:启用cl的宏定义功能,即定义WIN32这个宏,这是代码中区分平台的关键宏;
/GS-:关闭堆栈保护功能,否则会在链接截断发生”_security_cookie”和”_security_check_cookie”符号未定义错误。
阅读全文
0 0
- 程序员的自我修养:MiniCRT自制C语言运行库
- 程序员的自我修养:MiniCRT++运行库部分实现C++特性
- 程序员的自我修养---C/C++运行库
- 程序员的自我修养---C/C++运行库
- 程序员的自我修养-运行库
- 程序员的自我修养: 程序运行的基石-C/C++运行库
- 程序员的自我修养: 运行库与多线程
- 【程序员的自我修养】第11章 运行库
- 程序员的自我修养——运行库
- 程序员的自我修养
- 程序员的自我修养
- 程序员的自我修养
- 程序员的自我修养
- 程序员的自我修养
- 程序员的自我修养
- 程序员的自我修养
- 程序员的自我修养
- 程序员的自我修养
- linux目录文件权限访问
- centos7 安装gitlab
- PAT乙题1022. D进制的A+B (20)
- 彻底搞清RPC模块设计与实现
- 最大子序列(P1085),子矩阵( P1086)存一下思路
- 程序员的自我修养:MiniCRT自制C语言运行库
- 函数的调用过程,栈帧的创建和销毁
- PAT乙题1023. 组个最小数 (20)
- PAT乙题1026. 程序运行时间(15)
- Boolan博览网C++开发课程第三周笔记
- linux 0.11 内核完全注释V3.0学习笔记(一)
- 1004 0-1背包问题
- Matlab画三维立体网状图形(类似魔方)
- 不同级别并发理解