redis aof持久化的源码分析
来源:互联网 发布:李斯特改编 知乎 编辑:程序博客网 时间:2024/04/20 14:18
除了rdb持久化功能之外,redis还提供了aof(append only file)持久化功能。与rdb不同,aof持久化
是通过保存redis服务器所执行的写命令来记录数据库的状态。
AOF持久化的实现
AOF持久化的实现可以分为命令追加、文件写入和文件同步三个步骤。
命令追加
当AOF持久化功能处于打开状态时,服务器在执行完一个写命令之后,会以协议格式将被执行的写命
令追加到服务器状态的aof_buf缓冲区的末尾:
struct redisServer { sds aof_buf;/* AOF buffer, written before entering the event loop */}服务器执行完写命令,调用propagate进行命令追加。
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags){ if (server.aof_state != AOF_OFF && flags & PROPAGATE_AOF) feedAppendOnlyFile(cmd,dbid,argv,argc);}//进行命令追加void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) { if (dictid != server.aof_selected_db) { //切换dbid,追加select命令 snprintf(seldb,sizeof(seldb),"%d",dictid); buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n", (unsigned long)strlen(seldb),seldb); server.aof_selected_db = dictid; } if (cmd->proc == expireCommand || cmd->proc == pexpireCommand || cmd->proc == expireatCommand) { /* 将EXPIRE/PEXPIRE/EXPIREAT转化成PEXPIREAT生成命令协议格式的字符串 */ buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) { /* 将SETEX/PSETEX转换成SET和PEXPIREAT生成命令协议格式的字符串 */ tmpargv[0] = createStringObject("SET",3); tmpargv[1] = argv[1]; tmpargv[2] = argv[3]; buf = catAppendOnlyGenericCommand(buf,3,tmpargv); decrRefCount(tmpargv[0]); buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]); } else { //将写命令生成命令协议格式的字符串 buf = catAppendOnlyGenericCommand(buf,argc,argv); } //将命令的协议格式的字符串追加到aof_buf if (server.aof_state == AOF_ON) server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf)); if (server.aof_child_pid != -1) aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf)); sdsfree(buf);}/*1、将EXPIRE/PEXPIRE/EXPIREAT转化成PEXPIREAT生成命令协议格式的字符串 2、SETEX/PSETEX的设置过期时间部分转化成PEXPIREAT生成命令协议格式的字符串*/sds catAppendOnlyExpireAtCommand(sds buf, struct redisCommand *cmd, robj *key, robj *seconds) { …… buf = catAppendOnlyGenericCommand(buf, 3, argv); return buf;}//生成命令的协议格式的字符串sds catAppendOnlyGenericCommand(sds dst, int argc, robj **argv) { char buf[32]; int len, j; robj *o; buf[0] = '*'; //参数个数 len = 1+ll2string(buf+1,sizeof(buf)-1,argc); buf[len++] = '\r'; buf[len++] = '\n'; dst = sdscatlen(dst,buf,len); for (j = 0; j < argc; j++) { o = getDecodedObject(argv[j]); buf[0] = '$';//参数长度 len = 1+ll2string(buf+1,sizeof(buf)-1,sdslen(o->ptr)); buf[len++] = '\r'; buf[len++] = '\n';//参数 dst = sdscatlen(dst,buf,len); dst = sdscatlen(dst,o->ptr,sdslen(o->ptr)); dst = sdscatlen(dst,"\r\n",2); decrRefCount(o); } return dst;}文件写入和同步
redis的服务器进程是一个事件循环,文件事件负责处理客户端的命令请求,而时间事件负责执行serverCron
函数这样的定时运行的函数。在处理文件事件执行写命令,使得命令被追加到aof_buf中,然后在处理时间事件执
行serverCron函数会调用flushAppendOnlyFile函数进行文件的写入和同步。
flushAppendOnlyFile函数的行为由服务器配置的appendfsync选项的值决定。
always:将aof_buf中的所有内容写入并同步到aof文件。
everysec:将aof_buf中的所有内容写入到aof文件,如果上次同步的时间距离现在超过1s,那么对aof文件进行同
步,同步操作由一个线程专门负责执行。
no:将aof_buf中的所有内容写入到aof文件,但不对aof文件同步,同步有操作系统执行。
void flushAppendOnlyFile(int force) { if (sdslen(server.aof_buf) == 0) return; if (server.aof_fsync == AOF_FSYNC_EVERYSEC) sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0; if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) { if (sync_in_progress) { if (server.aof_flush_postponed_start == 0) { server.aof_flush_postponed_start = server.unixtime; return; } else if (server.unixtime - server.aof_flush_postponed_start < 2) { return; } server.aof_delayed_fsync++; } } //将aof_buf中的内容写入到aof文件 nwritten = write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf)); server.aof_flush_postponed_start = 0; …… server.aof_current_size += nwritten; if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) { sdsclear(server.aof_buf); } else { sdsfree(server.aof_buf); server.aof_buf = sdsempty(); } //appendfsync为no或者有后台进程在进行aof或rdb,不进行文件同步 if (server.aof_no_fsync_on_rewrite && (server.aof_child_pid != -1 || server.rdb_child_pid != -1)) return; /* appendfsync为always */ if (server.aof_fsync == AOF_FSYNC_ALWAYS) {/ aof_fsync(server.aof_fd); //同步aof文件 server.aof_last_fsync = server.unixtime;//记录同步时间 } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC && server.unixtime > server.aof_last_fsync)) { /* appendfsync为EVERYSEC*/ if (!sync_in_progress) aof_background_fsync(server.aof_fd); server.aof_last_fsync = server.unixtime; }}void aof_background_fsync(int fd) { bioCreateBackgroundJob(BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);}
AOF文件的载入和数据还原
服务器读入并重新执行一遍aof文件里面保存的写命令,就可以还原服务器关闭之前的数据库状态。
服务器读取aof文件并还原数据库状态的流程:
int loadAppendOnlyFile(char *filename) { …… server.aof_state = AOF_OFF; //创建伪客户端 fakeClient = createFakeClient(); startLoading(fp); //解析aof文件 while(1) { /* Serve the clients from time to time */ if (!(loops++ % 1000)) { loadingProgress(ftello(fp)); processEventsWhileBlocked(); } if (fgets(buf,sizeof(buf),fp) == NULL) { } if (buf[0] != '*') goto fmterr; if (buf[1] == '\0') goto readerr; argc = atoi(buf+1);//命令的参数个数 argv = zmalloc(sizeof(robj*)*argc); fakeClient->argc = argc; fakeClient->argv = argv; //读取命令的参数 for (j = 0; j < argc; j++) { if (fgets(buf,sizeof(buf),fp) == NULL) { fakeClient->argc = j; /* Free up to j-1. */ freeFakeClientArgv(fakeClient); goto readerr; } if (buf[0] != '$') goto fmterr; len = strtol(buf+1,NULL,10); argsds = sdsnewlen(NULL,len); if (len && fread(argsds,len,1,fp) == 0) { sdsfree(argsds); fakeClient->argc = j; /* Free up to j-1. */ freeFakeClientArgv(fakeClient); goto readerr; } argv[j] = createObject(OBJ_STRING,argsds); if (fread(buf,2,1,fp) == 0) { fakeClient->argc = j+1; /* Free up to j. */ freeFakeClientArgv(fakeClient); goto readerr; /* discard CRLF */ } }//执行写命令 cmd = lookupCommand(argv[0]->ptr); fakeClient->cmd = cmd; cmd->proc(fakeClient); }}
AOF重写
由于aof是通过不断追加写命令来记录数据库状态,所以服务器执行比较久之后,aof文件中的内容会越来越
多,磁盘占有量越来越大,同时也是使通过过aof文件还原数据库的需要的时间也变得很久。所以就需要通过读
取服务器当前的数据库状态来重写新的aof文件。
AOF的重写实现
由于AOF重写是会进行大量写写入操作,势必为长时间阻塞主进程,因此redis把重写程序放到子进程执行。
这样做有两点好处:
1)子进程重写期间,主进程可以继续处理命令。
2)子进程带有主进程的数据副本,这样就可以避免与主进程竞争db->dict,这是线程实现不了的。
重写期间,主进程继续处理命令,对数据库状态进行修改,这样使得当前的数据库状态与重写的AOF文件
所保存的数据库状态不一致。因此,redis设置了AOF重写缓冲区,在创建子进程后,主进程每执行一个写命令
都会写到重写缓冲区。在子进程完成重写后,主进程会将AOF重写缓冲区的数据写入到重写的AOF文件,保证
数据状态的一致。
重写aof文件的命令
void bgrewriteaofCommand(client *c) { if (server.aof_child_pid != -1) { } else if (server.rdb_child_pid != -1) { server.aof_rewrite_scheduled = 1; } else if (rewriteAppendOnlyFileBackground() == C_OK) { } else { }}serverCron定时程序,触发AOF重写
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { if (server.rdb_child_pid != -1 || server.aof_child_pid != -1 || ldbPendingChildren()) { …… } else { ……//检查是否触发AOF重写 if (server.rdb_child_pid == -1 &&server.aof_child_pid == -1 && server.aof_rewrite_perc &&server.aof_current_size > server.aof_rewrite_min_size) { long long base = server.aof_rewrite_base_size ?server.aof_rewrite_base_size : 1; long long growth = (server.aof_current_size*100/base) - 100; if (growth >= server.aof_rewrite_perc) { rewriteAppendOnlyFileBackground(); } } }}后台重写的实现
//后台重写AOF文件int rewriteAppendOnlyFileBackground(void) { if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR; if (aofCreatePipes() != C_OK) return C_ERR;//创建父进程与子进程的管道 openChildInfoPipe(); start = ustime(); if ((childpid = fork()) == 0) { char tmpfile[256]; snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid()); if (rewriteAppendOnlyFile(tmpfile) == C_OK) { …… } } else { /* Parent */ …… } return C_OK; /* unreached */}//重写AOF文件的程序int rewriteAppendOnlyFile(char *filename) { snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid()); server.aof_child_diff = sdsempty(); rioInitWithFile(&aof,fp); if (server.aof_rewrite_incremental_fsync) rioSetAutoSync(&aof,AOF_AUTOSYNC_BYTES); ……//进行重写操作 if (rewriteAppendOnlyFileRio(&aof) == C_ERR) goto werr; if (fflush(fp) == EOF) goto werr; if (fsync(fileno(fp)) == -1) goto werr; //重写期间,从父进程的重写缓冲区获取部分写命令 …… if (rename(tmpfile,filename) == -1) { } return C_OK;}//重写操作int rewriteAppendOnlyFileRio(rio *aof) { ……// 遍历所有的数据库 for (j = 0; j < server.dbnum; j++) { char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n"; redisDb *db = server.db+j; dict *d = db->dict; if (dictSize(d) == 0) continue; di = dictGetSafeIterator(d); if (rioWrite(aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr; if (rioWriteBulkLongLong(aof,j) == 0) goto werr; //遍历dict while((de = dictNext(di)) != NULL) { ……//检查key-value是否过期,过期就不需要重写到AOF文件 if (expiretime != -1 && expiretime < now) continue; // 根据value类型,进行对应的重写逻辑 if (o->type == OBJ_STRING) { char cmd[]="*3\r\n$3\r\nSET\r\n"; if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr; if (rioWriteBulkObject(aof,&key) == 0) goto werr; if (rioWriteBulkObject(aof,o) == 0) goto werr; } else if (o->type == OBJ_LIST) { if (rewriteListObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_SET) { if (rewriteSetObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_ZSET) { if (rewriteSortedSetObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_HASH) { if (rewriteHashObject(aof,&key,o) == 0) goto werr; } else if (o->type == OBJ_MODULE) { if (rewriteModuleObject(aof,&key,o) == 0) goto werr; }//写入key-value的过期时间 if (expiretime != -1) { char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n"; if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr; if (rioWriteBulkObject(aof,&key) == 0) goto werr; if (rioWriteBulkLongLong(aof,expiretime) == 0) goto werr; } …… } dictReleaseIterator(di); di = NULL; } return C_OK;}子进程重写完成后,父进程进行处理
int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { if (server.rdb_child_pid != -1 || server.aof_child_pid != -1 || ldbPendingChildren()) { if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) { if(pid == server.aof_child_pid) { //子进程完成重写,父进程进行重写AOF文件的处理 backgroundRewriteDoneHandler(exitcode,bysignal); } } } }void backgroundRewriteDoneHandler(int exitcode, int bysignal) { if (!bysignal && exitcode == 0) { snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int)server.aof_child_pid); newfd = open(tmpfile,O_WRONLY|O_APPEND); if (aofRewriteBufferWrite(newfd) == -1) { ……//将重写缓冲区的数据写入到重写AOF文件 } if (rename(tmpfile,server.aof_filename) == -1) { ……//覆盖旧的AOF文件 } …… } }
- redis aof持久化的源码分析
- Redis源码分析(十五)——持久化AOF
- Redis的持久化-AOF
- Redis的持久化-AOF
- Redis的持久化-AOF
- Redis的AOF持久化
- Redis源码解析:12AOF持久化
- redis源码阅读(8)-AOF持久化
- Redis AOF持久化
- Redis AOF持久化
- redis持久化--AOF
- Redis 持久化 AOF
- redis 持久化AOF
- Redis-AOF持久化
- 【redis】AOF 持久化
- Redis的AOF持久化的实现
- Redis源码分析:AOF
- Redis源码分析:AOF
- 古谚、评论与论断、名篇与名言
- 图像处理基本原理----基本概念
- Android使用pull解析xml
- Bash脚本实现批量作业并行化
- 卷积
- redis aof持久化的源码分析
- 项目上线流程
- Ionic2视图的创建与删除
- 2017.03.31:数据仓库与数据分析01
- 读《统计自然语言处理》——语料库与知识词汇库
- LSD_SLAM 编译、安装到运行demo
- kettle 映射的使用——对自定义参数处理
- C++ Primer plus 学习笔记之第八章函数探幽(1)
- linux上在不建立窗口的情况下使用opengl