redis aof持久化的源码分析

来源:互联网 发布:李斯特改编 知乎 编辑:程序博客网 时间:2024/04/20 14:18

       除了rdb持久化功能之外,redis还提供了aof(append only file)持久化功能。与rdb不同,aof持久化

是通过保存redis服务器所执行的写命令来记录数据库的状态。


AOF持久化的实现

        AOF持久化的实现可以分为命令追加、文件写入和文件同步三个步骤。

命令追加

       当AOF持久化功能处于打开状态时,服务器在执行完一个写命令之后,会以协议格式将被执行的写命

令追加到服务器状态的aof_buf缓冲区的末尾:

struct redisServer {    sds aof_buf;/* AOF buffer, written before entering the event loop */}
       服务器执行完写命令,调用propagate进行命令追加。

void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,               int flags){    if (server.aof_state != AOF_OFF && flags & PROPAGATE_AOF)        feedAppendOnlyFile(cmd,dbid,argv,argc);}//进行命令追加void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {    if (dictid != server.aof_selected_db) {        //切换dbid,追加select命令        snprintf(seldb,sizeof(seldb),"%d",dictid);        buf = sdscatprintf(buf,"*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",            (unsigned long)strlen(seldb),seldb);        server.aof_selected_db = dictid;    }    if (cmd->proc == expireCommand || cmd->proc == pexpireCommand ||        cmd->proc == expireatCommand) {        /* 将EXPIRE/PEXPIRE/EXPIREAT转化成PEXPIREAT生成命令协议格式的字符串 */        buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);    } else if (cmd->proc == setexCommand || cmd->proc == psetexCommand) {        /* 将SETEX/PSETEX转换成SET和PEXPIREAT生成命令协议格式的字符串 */        tmpargv[0] = createStringObject("SET",3);        tmpargv[1] = argv[1];        tmpargv[2] = argv[3];        buf = catAppendOnlyGenericCommand(buf,3,tmpargv);        decrRefCount(tmpargv[0]);        buf = catAppendOnlyExpireAtCommand(buf,cmd,argv[1],argv[2]);    } else {        //将写命令生成命令协议格式的字符串        buf = catAppendOnlyGenericCommand(buf,argc,argv);    }    //将命令的协议格式的字符串追加到aof_buf    if (server.aof_state == AOF_ON)        server.aof_buf = sdscatlen(server.aof_buf,buf,sdslen(buf));    if (server.aof_child_pid != -1)        aofRewriteBufferAppend((unsigned char*)buf,sdslen(buf));    sdsfree(buf);}/*1、将EXPIRE/PEXPIRE/EXPIREAT转化成PEXPIREAT生成命令协议格式的字符串   2、SETEX/PSETEX的设置过期时间部分转化成PEXPIREAT生成命令协议格式的字符串*/sds catAppendOnlyExpireAtCommand(sds buf, struct redisCommand *cmd, robj *key, robj *seconds) {    ……    buf = catAppendOnlyGenericCommand(buf, 3, argv);    return buf;}//生成命令的协议格式的字符串sds catAppendOnlyGenericCommand(sds dst, int argc, robj **argv) {    char buf[32];    int len, j;    robj *o;    buf[0] = '*'; //参数个数    len = 1+ll2string(buf+1,sizeof(buf)-1,argc);    buf[len++] = '\r';    buf[len++] = '\n';    dst = sdscatlen(dst,buf,len);    for (j = 0; j < argc; j++) {        o = getDecodedObject(argv[j]);        buf[0] = '$';//参数长度        len = 1+ll2string(buf+1,sizeof(buf)-1,sdslen(o->ptr));        buf[len++] = '\r';        buf[len++] = '\n';//参数        dst = sdscatlen(dst,buf,len);        dst = sdscatlen(dst,o->ptr,sdslen(o->ptr));        dst = sdscatlen(dst,"\r\n",2);        decrRefCount(o);    }    return dst;}
文件写入和同步

       redis的服务器进程是一个事件循环,文件事件负责处理客户端的命令请求,而时间事件负责执行serverCron

函数这样的定时运行的函数。在处理文件事件执行写命令,使得命令被追加到aof_buf中,然后在处理时间事件执

行serverCron函数会调用flushAppendOnlyFile函数进行文件的写入和同步。

      flushAppendOnlyFile函数的行为由服务器配置的appendfsync选项的值决定。

always:将aof_buf中的所有内容写入并同步到aof文件。

everysec:将aof_buf中的所有内容写入到aof文件,如果上次同步的时间距离现在超过1s,那么对aof文件进行同

                  步,同步操作由一个线程专门负责执行。

no:将aof_buf中的所有内容写入到aof文件,但不对aof文件同步,同步有操作系统执行。

void flushAppendOnlyFile(int force) {        if (sdslen(server.aof_buf) == 0) return;    if (server.aof_fsync == AOF_FSYNC_EVERYSEC)        sync_in_progress = bioPendingJobsOfType(BIO_AOF_FSYNC) != 0;    if (server.aof_fsync == AOF_FSYNC_EVERYSEC && !force) {        if (sync_in_progress) {            if (server.aof_flush_postponed_start == 0) {                server.aof_flush_postponed_start = server.unixtime;                return;            } else if (server.unixtime - server.aof_flush_postponed_start < 2) {                return;            }            server.aof_delayed_fsync++;        }    }    //将aof_buf中的内容写入到aof文件    nwritten = write(server.aof_fd,server.aof_buf,sdslen(server.aof_buf));    server.aof_flush_postponed_start = 0;    ……    server.aof_current_size += nwritten;    if ((sdslen(server.aof_buf)+sdsavail(server.aof_buf)) < 4000) {        sdsclear(server.aof_buf);    } else {        sdsfree(server.aof_buf);        server.aof_buf = sdsempty();    }    //appendfsync为no或者有后台进程在进行aof或rdb,不进行文件同步    if (server.aof_no_fsync_on_rewrite &&        (server.aof_child_pid != -1 || server.rdb_child_pid != -1))            return;    /* appendfsync为always */    if (server.aof_fsync == AOF_FSYNC_ALWAYS) {/        aof_fsync(server.aof_fd); //同步aof文件        server.aof_last_fsync = server.unixtime;//记录同步时间    } else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC &&                server.unixtime > server.aof_last_fsync)) {        /* appendfsync为EVERYSEC*/        if (!sync_in_progress) aof_background_fsync(server.aof_fd);        server.aof_last_fsync = server.unixtime;    }}void aof_background_fsync(int fd) {    bioCreateBackgroundJob(BIO_AOF_FSYNC,(void*)(long)fd,NULL,NULL);}

AOF文件的载入和数据还原

        服务器读入并重新执行一遍aof文件里面保存的写命令,就可以还原服务器关闭之前的数据库状态。

服务器读取aof文件并还原数据库状态的流程:


int loadAppendOnlyFile(char *filename) {    ……    server.aof_state = AOF_OFF;    //创建伪客户端    fakeClient = createFakeClient();    startLoading(fp);    //解析aof文件    while(1) {        /* Serve the clients from time to time */        if (!(loops++ % 1000)) {            loadingProgress(ftello(fp));            processEventsWhileBlocked();        }        if (fgets(buf,sizeof(buf),fp) == NULL) {        }        if (buf[0] != '*') goto fmterr;        if (buf[1] == '\0') goto readerr;        argc = atoi(buf+1);//命令的参数个数        argv = zmalloc(sizeof(robj*)*argc);        fakeClient->argc = argc;        fakeClient->argv = argv;        //读取命令的参数        for (j = 0; j < argc; j++) {            if (fgets(buf,sizeof(buf),fp) == NULL) {                fakeClient->argc = j; /* Free up to j-1. */                freeFakeClientArgv(fakeClient);                goto readerr;            }            if (buf[0] != '$') goto fmterr;            len = strtol(buf+1,NULL,10);            argsds = sdsnewlen(NULL,len);            if (len && fread(argsds,len,1,fp) == 0) {                sdsfree(argsds);                fakeClient->argc = j; /* Free up to j-1. */                freeFakeClientArgv(fakeClient);                goto readerr;            }            argv[j] = createObject(OBJ_STRING,argsds);            if (fread(buf,2,1,fp) == 0) {                fakeClient->argc = j+1; /* Free up to j. */                freeFakeClientArgv(fakeClient);                goto readerr; /* discard CRLF */            }        }//执行写命令        cmd = lookupCommand(argv[0]->ptr);        fakeClient->cmd = cmd;        cmd->proc(fakeClient);    }}

AOF重写

       由于aof是通过不断追加写命令来记录数据库状态,所以服务器执行比较久之后,aof文件中的内容会越来越

多,磁盘占有量越来越大,同时也是使通过过aof文件还原数据库的需要的时间也变得很久。所以就需要通过读

取服务器当前的数据库状态来重写新的aof文件。

AOF的重写实现

      由于AOF重写是会进行大量写写入操作,势必为长时间阻塞主进程,因此redis把重写程序放到子进程执行。

这样做有两点好处:

      1)子进程重写期间,主进程可以继续处理命令。

      2)子进程带有主进程的数据副本,这样就可以避免与主进程竞争db->dict,这是线程实现不了的。

      重写期间,主进程继续处理命令,对数据库状态进行修改,这样使得当前的数据库状态与重写的AOF文件

所保存的数据库状态不一致。因此,redis设置了AOF重写缓冲区,在创建子进程后,主进程每执行一个写命令

都会写到重写缓冲区。在子进程完成重写后,主进程会将AOF重写缓冲区的数据写入到重写的AOF文件,保证

数据状态的一致。

重写aof文件的命令

void bgrewriteaofCommand(client *c) {    if (server.aof_child_pid != -1) {    } else if (server.rdb_child_pid != -1) {        server.aof_rewrite_scheduled = 1;    } else if (rewriteAppendOnlyFileBackground() == C_OK) {    } else {    }}
serverCron定时程序,触发AOF重写

int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {    if (server.rdb_child_pid != -1 || server.aof_child_pid != -1 ||        ldbPendingChildren())    {        ……    } else {        ……//检查是否触发AOF重写         if (server.rdb_child_pid == -1 &&server.aof_child_pid == -1 &&             server.aof_rewrite_perc &&server.aof_current_size > server.aof_rewrite_min_size)         {            long long base = server.aof_rewrite_base_size ?server.aof_rewrite_base_size : 1;            long long growth = (server.aof_current_size*100/base) - 100;            if (growth >= server.aof_rewrite_perc) {                rewriteAppendOnlyFileBackground();            }         }    }}
后台重写的实现

//后台重写AOF文件int rewriteAppendOnlyFileBackground(void) {    if (server.aof_child_pid != -1 || server.rdb_child_pid != -1) return C_ERR;    if (aofCreatePipes() != C_OK) return C_ERR;//创建父进程与子进程的管道    openChildInfoPipe();    start = ustime();    if ((childpid = fork()) == 0) {        char tmpfile[256];        snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof", (int) getpid());        if (rewriteAppendOnlyFile(tmpfile) == C_OK) {            ……        }     } else {        /* Parent */ ……    }    return C_OK; /* unreached */}//重写AOF文件的程序int rewriteAppendOnlyFile(char *filename) {    snprintf(tmpfile,256,"temp-rewriteaof-%d.aof", (int) getpid());    server.aof_child_diff = sdsempty();    rioInitWithFile(&aof,fp);    if (server.aof_rewrite_incremental_fsync)        rioSetAutoSync(&aof,AOF_AUTOSYNC_BYTES);    ……//进行重写操作    if (rewriteAppendOnlyFileRio(&aof) == C_ERR) goto werr;    if (fflush(fp) == EOF) goto werr;    if (fsync(fileno(fp)) == -1) goto werr;    //重写期间,从父进程的重写缓冲区获取部分写命令    ……    if (rename(tmpfile,filename) == -1) {    }    return C_OK;}//重写操作int rewriteAppendOnlyFileRio(rio *aof) {    ……// 遍历所有的数据库    for (j = 0; j < server.dbnum; j++) {        char selectcmd[] = "*2\r\n$6\r\nSELECT\r\n";        redisDb *db = server.db+j;        dict *d = db->dict;        if (dictSize(d) == 0) continue;        di = dictGetSafeIterator(d);        if (rioWrite(aof,selectcmd,sizeof(selectcmd)-1) == 0) goto werr;        if (rioWriteBulkLongLong(aof,j) == 0) goto werr;        //遍历dict        while((de = dictNext(di)) != NULL) {            ……//检查key-value是否过期,过期就不需要重写到AOF文件            if (expiretime != -1 && expiretime < now) continue;            // 根据value类型,进行对应的重写逻辑            if (o->type == OBJ_STRING) {                char cmd[]="*3\r\n$3\r\nSET\r\n";                if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr;                if (rioWriteBulkObject(aof,&key) == 0) goto werr;                if (rioWriteBulkObject(aof,o) == 0) goto werr;            } else if (o->type == OBJ_LIST) {                if (rewriteListObject(aof,&key,o) == 0) goto werr;            } else if (o->type == OBJ_SET) {                if (rewriteSetObject(aof,&key,o) == 0) goto werr;            } else if (o->type == OBJ_ZSET) {                if (rewriteSortedSetObject(aof,&key,o) == 0) goto werr;            } else if (o->type == OBJ_HASH) {                if (rewriteHashObject(aof,&key,o) == 0) goto werr;            } else if (o->type == OBJ_MODULE) {                if (rewriteModuleObject(aof,&key,o) == 0) goto werr;            }//写入key-value的过期时间            if (expiretime != -1) {                char cmd[]="*3\r\n$9\r\nPEXPIREAT\r\n";                if (rioWrite(aof,cmd,sizeof(cmd)-1) == 0) goto werr;                if (rioWriteBulkObject(aof,&key) == 0) goto werr;                if (rioWriteBulkLongLong(aof,expiretime) == 0) goto werr;            }            ……        }        dictReleaseIterator(di);        di = NULL;    }    return C_OK;}
子进程重写完成后,父进程进行处理

int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {    if (server.rdb_child_pid != -1 || server.aof_child_pid != -1 ||        ldbPendingChildren())    {        if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {            if(pid == server.aof_child_pid) {            //子进程完成重写,父进程进行重写AOF文件的处理                backgroundRewriteDoneHandler(exitcode,bysignal);            }         }    } }void backgroundRewriteDoneHandler(int exitcode, int bysignal) {    if (!bysignal && exitcode == 0) {        snprintf(tmpfile,256,"temp-rewriteaof-bg-%d.aof",            (int)server.aof_child_pid);        newfd = open(tmpfile,O_WRONLY|O_APPEND);        if (aofRewriteBufferWrite(newfd) == -1) {            ……//将重写缓冲区的数据写入到重写AOF文件        }        if (rename(tmpfile,server.aof_filename) == -1) {            ……//覆盖旧的AOF文件        }        ……    } }




2 0
原创粉丝点击