Redis源码分析（三十一）--- latency延迟分析处理

来源：互联网发布：e盾网络验证那么厉害吗编辑：程序博客网时间：2024/05/17 02:03

每当提到延时统计的时候，一定想到的一个名词就是”性能测试“，没错，在Redis的redis_benchmark文件中，的确用到了延迟文件中的相关信息。在Redis中的官方解释此文件:

/* The latency monitor allows to easily observe the sources of latency * in a Redis instance using the LATENCY command. Different latency * sources are monitored, like disk I/O, execution of commands, fork * system call, and so forth. * * 延时监听器可以对Redis中很多简单的资源进行监听，比如I/O磁盘操作，执行一些指令， * fork创建子线程操作等的监听。 * ----------------------------------------------------------------------------

在Redis中的延时操作中，整个过程原理非常简单，他是针对每种事件维护了一个统计列表，每个列表中包括了了采集的一系列样本，每个样本包括，此样本的创建时间和此样本的延时时间。event==》对SampleSeriesList 是一个字典的映射关系。下面看看，里面关键的采集点，名叫latencySample采集点的结构定义:

/* Representation of a latency sample: the sampling time and the latency * observed in milliseconds. *//* 延时样品例子 */struct latencySample {//延时Sample创建的时间    int32_t time; /* We don't use time_t to force 4 bytes usage everywhere. */    //延时的具体时间， 单位为毫秒    uint32_t latency; /* Latency in milliseconds. */};

字典中维护的可不是一个Sample结点，而是一个结点列表结构体:

/* The latency time series for a given event. *//* 针对某个事件采集的一系列延时sample */struct latencyTimeSeries {//下一个延时Sample的下标    int idx; /* Index of the next sample to store. */    //最大的延时    uint32_t max; /* Max latency observed for this event. */    //最近的延时记录    struct latencySample samples[LATENCY_TS_LEN]; /* Latest history. */};

在Redis代码的设计中，因为延时是用来测试和结果分析的，所以，作者还设计了用于后面分析报告中会用到的数据统计结构体；

/* Latency statistics structure. *//* 延时sample的数据统计结果结构体 */struct latencyStats {//绝对最高的延时时间    uint32_t all_time_high; /* Absolute max observed since latest reset. */    //平均Sample延时时间    uint32_t avg;           /* Average of current samples. */    //Sample的最小延时时间    uint32_t min;           /* Min of current samples. */    //Sample的最大延时时间    uint32_t max;           /* Max of current samples. */    //平均相对误差，与平均延时相比    uint32_t mad;           /* Mean absolute deviation. */    //samples的总数    uint32_t samples;       /* Number of non-zero samples. */    //最早的延时记录点的创建时间    time_t period;          /* Number of seconds since first event and now. */};

意思都非常的直接，那么一个简单的Sample如何进行事件的检测呢？

/* Start monitoring an event. We just set the current time. *//* 对某个事件设置监听，就是设置一下当前的时间 */#define latencyStartMonitor(var) if (server.latency_monitor_threshold) { \    var = mstime(); \} else { \    var = 0; \}/* End monitoring an event, compute the difference with the current time * to check the amount of time elapsed. *//* 结束监听，算出过了多少时间 */#define latencyEndMonitor(var) if (server.latency_monitor_threshold) { \    var = mstime() - var; \}

很简单，记录开始时间，记录结束时间，中间的差值就是延时时间了，如果超出给定的时间范围，就加入到延时列表中:

/* Add the sample only if the elapsed time is >= to the configured threshold. *//* 如果延时时间超出server.latency_monitor_threshold，则将Sample加入延时列表中 */#define latencyAddSampleIfNeeded(event,var) \    if (server.latency_monitor_threshold && \        (var) >= server.latency_monitor_threshold) \          latencyAddSample((event),(var));

我们重点关注一下，latencyAddSample，就是把采样结点加入到记录中，步骤如下:

1.根据传入的event事件，在server.latency_events找到key为event事件的val,即一个latencyTimeSeries

2.在这个latencyTimeSeries的struct latencySample samples[LATENCY_TS_LEN]中添加一个新的Sample

实现代码如下:

/* Add the specified sample to the specified time series "event". * This function is usually called via latencyAddSampleIfNeeded(), that * is a macro that only adds the sample if the latency is higher than * server.latency_monitor_threshold. *//* 添加Sample到指定的Event对象的Sample列表中 */void latencyAddSample(char *event, mstime_t latency) {//找出Event对应的延时Sample记录结构体    struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);    time_t now = time(NULL);    int prev;    /* Create the time series if it does not exist. */    if (ts == NULL) {        ts = zmalloc(sizeof(*ts));        ts->idx = 0;        ts->max = 0;        memset(ts->samples,0,sizeof(ts->samples));        //如果ts为空，重新添加，一个Event，对应一个latencyTimeSeries        dictAdd(server.latency_events,zstrdup(event),ts);    }    /* If the previous sample is in the same second, we update our old sample     * if this latency is > of the old one, or just return. */    prev = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;    if (ts->samples[prev].time == now) {        if (latency > ts->samples[prev].latency)            ts->samples[prev].latency = latency;        return;    }//为Sample赋值    ts->samples[ts->idx].time = time(NULL);    ts->samples[ts->idx].latency = latency;    if (latency > ts->max) ts->max = latency;    ts->idx++;    if (ts->idx == LATENCY_TS_LEN) ts->idx = 0;}

结点都出来之后，当然会进行结构的分析统计了，这时就用到了latencyStats结构体；

/* Analyze the samples avaialble for a given event and return a structure * populate with different metrics, average, MAD, min, max, and so forth. * Check latency.h definition of struct latenctStat for more info. * If the specified event has no elements the structure is populate with * zero values. *//* 分析某个时间Event的延时结果，结果信息存入latencyStats结构体中 */void analyzeLatencyForEvent(char *event, struct latencyStats *ls) {    struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);    int j;    uint64_t sum;//初始化延时统计结果结构体的变量    ls->all_time_high = ts ? ts->max : 0;    ls->avg = 0;    ls->min = 0;    ls->max = 0;    ls->mad = 0;    ls->samples = 0;    ls->period = 0;    if (!ts) return;    /* First pass, populate everything but the MAD. */    sum = 0;    for (j = 0; j < LATENCY_TS_LEN; j++) {        if (ts->samples[j].time == 0) continue;        ls->samples++;        if (ls->samples == 1) {            ls->min = ls->max = ts->samples[j].latency;        } else {        //找出延时最大和最小的延时时间            if (ls->min > ts->samples[j].latency)                ls->min = ts->samples[j].latency;            if (ls->max < ts->samples[j].latency)                ls->max = ts->samples[j].latency;        }        sum += ts->samples[j].latency;        /* Track the oldest event time in ls->period. */        if (ls->period == 0 || ts->samples[j].time < ls->period)        //最早的延时记录点的创建时间            ls->period = ts->samples[j].time;    }    /* So far avg is actually the sum of the latencies, and period is     * the oldest event time. We need to make the first an average and     * the second a range of seconds. */    if (ls->samples) {        ls->avg = sum / ls->samples;        ls->period = time(NULL) - ls->period;        if (ls->period == 0) ls->period = 1;    }    /* Second pass, compute MAD. */    //计算平均相对误差，与平均延时相比    sum = 0;    for (j = 0; j < LATENCY_TS_LEN; j++) {        int64_t delta;        if (ts->samples[j].time == 0) continue;        delta = (int64_t)ls->avg - ts->samples[j].latency;        if (delta < 0) delta = -delta;        sum += delta;    }    if (ls->samples) ls->mad = sum / ls->samples;}

当然还可以利用这些采集的点，画一个微线图，更加形象的展示出来:

#define LATENCY_GRAPH_COLS 80/* 利用延时的Sample点，画出对应的微线图 */sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {    int j;    struct sequence *seq = createSparklineSequence();    sds graph = sdsempty();    uint32_t min = 0, max = 0;    for (j = 0; j < LATENCY_TS_LEN; j++) {        int i = (ts->idx + j) % LATENCY_TS_LEN;        int elapsed;        char *label;        char buf[64];        if (ts->samples[i].time == 0) continue;        /* Update min and max. */        if (seq->length == 0) {            min = max = ts->samples[i].latency;        } else {            if (ts->samples[i].latency > max) max = ts->samples[i].latency;            if (ts->samples[i].latency < min) min = ts->samples[i].latency;        }        /* Use as label the number of seconds / minutes / hours / days         * ago the event happened. */        elapsed = time(NULL) - ts->samples[i].time;        if (elapsed < 60)            snprintf(buf,sizeof(buf),"%ds",elapsed);        else if (elapsed < 3600)            snprintf(buf,sizeof(buf),"%dm",elapsed/60);        else if (elapsed < 3600*24)            snprintf(buf,sizeof(buf),"%dh",elapsed/3600);        else            snprintf(buf,sizeof(buf),"%dd",elapsed/(3600*24));        label = zstrdup(buf);        sparklineSequenceAddSample(seq,ts->samples[i].latency,label);    }    graph = sdscatprintf(graph,        "%s - high %lu ms, low %lu ms (all time high %lu ms)\n", event,        (unsigned long) max, (unsigned long) min, (unsigned long) ts->max);    for (j = 0; j < LATENCY_GRAPH_COLS; j++)        graph = sdscatlen(graph,"-",1);    graph = sdscatlen(graph,"\n",1);    //调用sparkline函数画微线图    graph = sparklineRender(graph,seq,LATENCY_GRAPH_COLS,4,SPARKLINE_FILL);    freeSparklineSequence(seq);    //返回微线图字符串    return graph;}

在Redis还封装了一些命令供外部调用，这里就不分析了，就是对上述方法的复合调用:

/* ---------------------------- Latency API --------------------------------- */void latencyMonitorInit(void) /* 延时监听初始化操作，创建Event字典对象 */void latencyAddSample(char *event, mstime_t latency) /* 添加Sample到指定的Event对象的Sample列表中 */int latencyResetEvent(char *event_to_reset) /* 重置Event事件的延迟，删除字典中的event的记录 */void analyzeLatencyForEvent(char *event, struct latencyStats *ls) /* 分析某个时间Event的延时结果，结果信息存入latencyStats结构体中 */sds createLatencyReport(void) /* 根据延时Sample的结果，创建阅读性比较好的分析报告 */void latencyCommandReplyWithSamples(redisClient *c, struct latencyTimeSeries *ts)void latencyCommandReplyWithLatestEvents(redisClient *c)sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts)void latencyCommand(redisClient *c)

Redis的延时类文件的分析也结束了，分析了这么长时间Redis的Redis代码，感觉每一块的代码都会有他的亮点存在，分析了30多期下来，还是学到了很多网上所学不到的知识，网上更多的是Redis主流思想的学习，像一些比较细小点，也只有自己品味，自己才能够真正的体会。

0 0