upstream均衡负载模块（一）加权轮询策略

来源：互联网发布：sistar shake it 编辑：程序博客网时间：2024/06/05 16:30

upstream负载均衡模块主要是用于从“upstream”定义的后端服务器中选择一台服务器进行连接。nginx先使用负载均衡模块选择一台主机，再使用upstream模块实现与这台主机的交互。

负载均衡策略

Nginx负载均衡策略主要分成两大类：内置策略和扩展策略。我们主要分析内置策略，内置策略主要是ip hash策略和加权轮询策略。默认情况下，这两种策略会被编译进内核，只需在配置时指明参数就行。扩展策略有很多，通用hash，consistent hash等，默认不编译进内核，是第三方模块。

nginx的upstream目前支持四种方式：

1)轮询（默认）

每个请求按时间顺序逐一分配到不同的后端服务器，如果后端服务器down掉，能自动剔除。

2)weight

指定轮询几率，weight和访问比率成正比，用于后端服务器性能不均的情况。

2)ip_hash

每个请求按访问ip的hash结果分配，这样每个访客固定访问一个后端服务器，可以解决session的问题。

3)fair（第三方）

按后端服务器的响应时间来分配请求，响应时间短的优先分配。

4)url_hash（第三方）

Nginx默认采用round_robin加权算法。如果要选择其他的负载均衡算法，必须在upstream的配置上下文中通过配置指令ip_hash明确指定

upstream load_balance{    ip_hash;    server localhost:8001;    server localhost:8002;}

指令如下

static ngx_command_t  ngx_http_upstream_ip_hash_commands[] = {    { ngx_string("ip_hash"),      NGX_HTTP_UPS_CONF|NGX_CONF_NOARGS,      ngx_http_upstream_ip_hash,      0,      0,      NULL },       ngx_null_command};

整个http配置块被解析完毕后，会调用所有http模块的对应的初始化函数对于模块ngx_http_upstream_module而言，对应的main配置初始函数是ngx_http_upstream_init_main_conf()

for (i = 0; i < umcf->upstreams.nelts; i++) {        init = uscfp[i]->peer.init_upstream ? uscfp[i]->peer.init_upstream:                                            ngx_http_upstream_init_round_robin;        if (init(cf, uscfp[i]) != NGX_OK) {            return NGX_CONF_ERROR;        }}

默认采用加权轮询方式就是因为init赋值的那一行代码。如果没有进行策略选择，那么就调用默认的策略初始函数ngx_http_upstream_init_round_robin，也就是加权轮询策略。否则的话就调用uscfp[i]->peer.init_upstream函数，如果选择ip hash负载策略，那么就会用ngx_http_upstream_init_ip_hash()。
在正式分析负载模块的具体代码前，我们先介绍熟悉几个相关的结构体

typedef struct {    ngx_addr_t                      *addrs;//指向存储IP地址的数组的指针，host信息(对应的是 ngx_url_t->addrs )    ngx_uint_t                       naddrs;//与第一个参数配合使用，数组元素个数(对应的是 ngx_url_t->naddrs )    ngx_uint_t                       weight;    ngx_uint_t                       max_fails;    time_t                           fail_timeout;    unsigned                         down:1;    unsigned                         backup:1;} ngx_http_upstream_server_t;

typedef struct ngx_http_upstream_srv_conf_s  ngx_http_upstream_srv_conf_t;struct ngx_http_upstream_srv_conf_s {    ngx_http_upstream_peer_t         peer;    void                           **srv_conf;//在 ngx_http_upstream()函数中被设置，指向的是本层的srv_conf    ngx_array_t                     *servers;  /*array of ngx_http_upstream_server_t */    ngx_uint_t                       flags;//调用函数时ngx_http_upstream_add() 指定的标记    ngx_str_t                        host;//在函数 ngx_http_upstream_add() 中设置（e.g. upstream backend中的backend）    u_char                          *file_name;//"/usr/local/nginx/conf/nginx.conf"    ngx_uint_t                       line;//proxy在配置文件中的行号    in_port_t                        port;//使用的端口号（ngx_http_upstream_add()函数中添加, 指向ngx_url_t-->port，通常在函数ngx_parse_inet_url()中解析）    in_port_t                        default_port;//默认使用的端口号（ngx_http_upstream_add()函数中添加, 指向ngx_url_t-->default_port）    ngx_uint_t                       no_port;  /* unsigned no_port:1 */};

typedef struct {    //使用负载均衡的类型，默认采用 ngx_http_upstream_init_round_robin（）    ngx_http_upstream_init_pt        init_upstream;    //使用的负载均衡类型的初始化函数    ngx_http_upstream_init_peer_pt   init;    //us->peer.data = peers; 指向的是 ngx_http_upstream_rr_peers_t（函数 ngx_http_upstream_init_round_robin()中设置）    void                            *data;} ngx_http_upstream_peer_t;

typedef ngx_int_t (*ngx_http_upstream_init_peer_pt)(ngx_http_request_t *r,    ngx_http_upstream_srv_conf_t *us);

如果upstream中服务器为空，那么默认使用proxy_pass。将利用函数ngx_inet_resolve_host依据us参数中的host和port进行解析。将结果保存在一个ngx_url_t类型的变量中：

typedef struct {    ngx_str_t                 url;//保存IP地址+端口信息（e.g. 192.168.124.129:8011 或 money.163.com）    ngx_str_t                 host;//保存IP地址信息    ngx_str_t                 port_text;//保存port字符串    ngx_str_t                 uri;//uri部分，在函数ngx_parse_inet_url()中设置    in_port_t                 port;//端口，e.g. listen指令中指定的端口（listen 192.168.124.129:8011）    in_port_t                 default_port;//默认端口（当no_port字段为真时，将默认端口赋值给port字段， 默认端口通常是80）    int                       family;//address family, AF_xxx    unsigned                  listen:1;//是否为指监听类的设置    unsigned                  uri_part:1;    unsigned                  no_resolve:1;//根据情况决定是否解析域名（将域名解析到IP地址）    unsigned                  one_addr:1;//等于1时，仅有一个IP地址    unsigned                  no_port:1;//标识url中没有显示指定端口(为1时没有指定)    unsigned                  wildcard:1;//标识是否使用通配符（e.g. listen *:8000;）    socklen_t                 socklen;//sizeof(struct sockaddr_in)    u_char                    sockaddr[NGX_SOCKADDRLEN];//sockaddr_in结构指向它    ngx_addr_t               *addrs;//数组大小是naddrs字段；每个元素对应域名的IP地址信息(struct sockaddr_in)，在函数中赋值（ngx_inet_resolve_host()）    ngx_uint_t                naddrs;//url对应的IP地址个数,IP格式的地址将默认为1    char                     *err;//错误信息字符串} ngx_url_t;

此函数会创建后端服务器列表，并且将非后备服务器与后备服务器分开进行各自单独的链表。每一个后端服务器用一个结构体ngx_http_upstream_rr_peer_t与之对应（ngx_http_upstream_round_robin.h）：

typedef struct {    struct sockaddr                *sockaddr;//后端服务器地址    socklen_t                       socklen;//后端服务器地址长度    ngx_str_t                       name;//后端名称    ngx_int_t                       current_weight;//当前权重，nginx会在运行过程中调整此权重    ngx_int_t                       effective_weight;    ngx_int_t                       weight;//配置的权重    ngx_uint_t                      fails;//已尝试失败次数    time_t                          accessed;//检测失败时间，用于计算超时    time_t                          checked;    ngx_uint_t                      max_fails;//最大失败次数    time_t                          fail_timeout;//多长时间内出现max_fails次失败便认为后端down掉了    ngx_uint_t                      down;          /* unsigned  down:1; *///指定某后端是否挂了#if (NGX_HTTP_SSL)    ngx_ssl_session_t              *ssl_session;   /* local to a process */#endif} ngx_http_upstream_rr_peer_t;

列表最前面需要带有一些head信息，用结构体ngx_http_upstream_rr_peers_t与之对应：

typedef struct ngx_http_upstream_rr_peers_s  ngx_http_upstream_rr_peers_t;struct ngx_http_upstream_rr_peers_s {    ngx_uint_t                      number;//队列中服务器数量 /* ngx_mutex_t                    *mutex; */    ngx_uint_t                      total_weight;//所有服务器总权重    unsigned                        single:1;//为1表示后端服务器总共只有一台，用于优化，此时不需要再做选择    unsigned                        weighted:1;//为1表示总的权重值等于服务器数量    ngx_str_t                      *name;    ngx_http_upstream_rr_peers_t   *next;//后备服务器列表挂载在这个字段下    ngx_http_upstream_rr_peer_t     peer[1];};

ngx_http_upstream_init_round_robin函数具体分析：

//函数：初始化服务器负载均衡表      
//参数：  
//us：ngx_http_upstream_main_conf_t结构体中upstreams数组元素  
ngx_int_t  
ngx_http_upstream_init_round_robin(ngx_conf_t *cf,  
    ngx_http_upstream_srv_conf_t *us)  
{  
    ngx_url_t                      u;  
    ngx_uint_t                     i, j, n, w;  
    ngx_http_upstream_server_t    *server;  
    ngx_http_upstream_rr_peers_t  *peers, *backup;  
  
    //回调指针设置  
    us->peer.init = ngx_http_upstream_init_round_robin_peer;  
  
    //服务器数组指针不为空  
    if (us->servers) {  
        server = us->servers->elts;  
  
        n = 0;  
        w = 0;  
  
    //遍历所有服务器  
        for (i = 0; i < us->servers->nelts; i++) {  
        //是后备服务器，跳过  
            if (server[i].backup) {  
                continue;  
            }  
  
        //服务器地址数量统计  
            n += server[i].naddrs;  
        //总的权重计算  
            w += server[i].naddrs * server[i].weight;  
        }  
  
        if (n == 0) {  
            ngx_log_error(NGX_LOG_EMERG, cf->log, 0,  
                          "no servers in upstream \"%V\" in %s:%ui",  
                          &us->host, us->file_name, us->line);  
            return NGX_ERROR;  
        }  
  
    //为非后备服务器分配空间  
        peers = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peers_t)  
                              + sizeof(ngx_http_upstream_rr_peer_t) * (n - 1));  
        if (peers == NULL) {  
            return NGX_ERROR;  
        }  
  
    //非后备服务器列表头中各属性设置  
        peers->single = (n == 1);  
        peers->number = n;  
        peers->weighted = (w != n);  
        peers->total_weight = w;  
        peers->name = &us->host;  
  
        n = 0;  
  
    //后备服务器列表中各服务器项设置  
        for (i = 0; i < us->servers->nelts; i++) {  
            for (j = 0; j < server[i].naddrs; j++) {  
                if (server[i].backup) {  
                    continue;  
                }  
  
                peers->peer[n].sockaddr = server[i].addrs[j].sockaddr;  
                peers->peer[n].socklen = server[i].addrs[j].socklen;  
                peers->peer[n].name = server[i].addrs[j].name;  
                peers->peer[n].max_fails = server[i].max_fails;  
                peers->peer[n].fail_timeout = server[i].fail_timeout;  
                peers->peer[n].down = server[i].down;  
                peers->peer[n].weight = server[i].weight;  
                peers->peer[n].effective_weight = server[i].weight;  
                peers->peer[n].current_weight = 0;  
                n++;  
            }  
        }  
  
    //非后备服务器列表挂载的位置  
        us->peer.data = peers;  
  
        /* backup servers */  
    //后备服务器  
        n = 0;  
        w = 0;  
  
        for (i = 0; i < us->servers->nelts; i++) {  
            if (!server[i].backup) {  
                continue;  
            }  
        //后备服务器地址数量统计  
            n += server[i].naddrs;  
        //后备服务器总权重计算  
            w += server[i].naddrs * server[i].weight;  
        }  
  
        if (n == 0) {  
            return NGX_OK;  
        }  
  
    //后备服务器列表地址空间分配  
        backup = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peers_t)  
                              + sizeof(ngx_http_upstream_rr_peer_t) * (n - 1));  
        if (backup == NULL) {  
            return NGX_ERROR;  
        }  
  
        peers->single = 0;  
    //后备服务器列表头中各属性设置  
        backup->single = 0;  
        backup->number = n;  
        backup->weighted = (w != n);  
        backup->total_weight = w;  
        backup->name = &us->host;  
  
        n = 0;  
  
    //后备服务器列表中各服务器项设置  
        for (i = 0; i < us->servers->nelts; i++) {  
            for (j = 0; j < server[i].naddrs; j++) {  
                if (!server[i].backup) {  
                    continue;  
                }  
  
                backup->peer[n].sockaddr = server[i].addrs[j].sockaddr;  
                backup->peer[n].socklen = server[i].addrs[j].socklen;  
                backup->peer[n].name = server[i].addrs[j].name;  
                backup->peer[n].weight = server[i].weight;  
                backup->peer[n].effective_weight = server[i].weight;  
                backup->peer[n].current_weight = 0;  
                backup->peer[n].max_fails = server[i].max_fails;  
                backup->peer[n].fail_timeout = server[i].fail_timeout;  
                backup->peer[n].down = server[i].down;  
                n++;  
            }  
        }  
  
    //后备服务器挂载  
        peers->next = backup;  
  
        return NGX_OK;  
    }  
  
    //us参数中服务器指针为空，例如用户直接在proxy_pass等指令后配置后端服务器地址  
    /* an upstream implicitly defined by proxy_pass, etc. */  
  
    if (us->port == 0) {  
        ngx_log_error(NGX_LOG_EMERG, cf->log, 0,  
                      "no port in upstream \"%V\" in %s:%ui",  
                      &us->host, us->file_name, us->line);  
        return NGX_ERROR;  
    }  
  
    ngx_memzero(&u, sizeof(ngx_url_t));  
  
    u.host = us->host;  
    u.port = us->port;  
  
    //IP地址解析  
    if (ngx_inet_resolve_host(cf->pool, &u) != NGX_OK) {  
        if (u.err) {  
            ngx_log_error(NGX_LOG_EMERG, cf->log, 0,  
                          "%s in upstream \"%V\" in %s:%ui",  
                          u.err, &us->host, us->file_name, us->line);  
        }  
  
        return NGX_ERROR;  
    }  
  
    n = u.naddrs;  
  
    peers = ngx_pcalloc(cf->pool, sizeof(ngx_http_upstream_rr_peers_t)  
                              + sizeof(ngx_http_upstream_rr_peer_t) * (n - 1));  
    if (peers == NULL) {  
        return NGX_ERROR;  
    }  
  
    peers->single = (n == 1);  
    peers->number = n;  
    peers->weighted = 0;  
    peers->total_weight = n;  
    peers->name = &us->host;  
  
    for (i = 0; i < u.naddrs; i++) {  
        peers->peer[i].sockaddr = u.addrs[i].sockaddr;  
        peers->peer[i].socklen = u.addrs[i].socklen;  
        peers->peer[i].name = u.addrs[i].name;  
        peers->peer[i].weight = 1;  
        peers->peer[i].effective_weight = 1;  
        peers->peer[i].current_weight = 0;  
        peers->peer[i].max_fails = 1;  
        peers->peer[i].fail_timeout = 10;  
    }  
  
    us->peer.data = peers;  
  
    /* implicitly defined upstream has no backup servers */  
  
    return NGX_OK;  
}  

完成全局的初始化后，当有一个客户端请求到来时，将会执行针对本轮（此请求）的初始化操作，（针对一个客户端请求，nginx会进行多次尝试选择，尝试全部失败后才返回502错误，所以注意一轮选择与一次选择的区别）。

前面介绍的函数ngx_http_upstream_round_robin_init中，设置了回调函数us->peer.init，它的调用位置是ngx_http_upstream_init_request

static voidngx_http_upstream_init_request(ngx_http_request_t *r){...if (uscf->peer.init(r, uscf) != NGX_OK) {        ngx_http_upstream_finalize_request(r, u,                                           NGX_HTTP_INTERNAL_SERVER_ERROR);        return;    }    ngx_http_upstream_connect(r, u);}

在每个请求选择后端服务器前调用此函数，下面分析一下函数ngx_http_upstream_init_round_robin_peer

//函数：//功能：针对每个请求选择后端服务器前做一些初始化工作ngx_int_tngx_http_upstream_init_round_robin_peer(ngx_http_request_t *r,    ngx_http_upstream_srv_conf_t *us){    ngx_uint_t                         n;    ngx_http_upstream_rr_peer_data_t  *rrp;    rrp = r->upstream->peer.data;    if (rrp == NULL) {        rrp = ngx_palloc(r->pool, sizeof(ngx_http_upstream_rr_peer_data_t));        if (rrp == NULL) {            return NGX_ERROR;        }        r->upstream->peer.data = rrp;    }    rrp->peers = us->peer.data;//后端服务器表    rrp->current = 0;    //n取值为：非后备服务器和后备服务器列表中个数较大的那个值    n = rrp->peers->number;    if (rrp->peers->next && rrp->peers->next->number > n) {        n = rrp->peers->next->number;    }    //如果n小于一个指针变量所能表示的范围    if (n <= 8 * sizeof(uintptr_t)) {//直接使用已有的指针类型的data变量做位图（tried是位图，用来标识在一轮选择中，各个后端服务器是否已经被选择过）        rrp->tried = &rrp->data;        rrp->data = 0;    } else {//否则从内存池中申请空间        n = (n + (8 * sizeof(uintptr_t) - 1)) / (8 * sizeof(uintptr_t));        rrp->tried = ngx_pcalloc(r->pool, n * sizeof(uintptr_t));        if (rrp->tried == NULL) {            return NGX_ERROR;        }    }    //回调函数设置    r->upstream->peer.get = ngx_http_upstream_get_round_robin_peer;    r->upstream->peer.free = ngx_http_upstream_free_round_robin_peer;    r->upstream->peer.tries = rrp->peers->number;#if (NGX_HTTP_SSL)    r->upstream->peer.set_session =                               ngx_http_upstream_set_round_robin_peer_session;    r->upstream->peer.save_session =                               ngx_http_upstream_save_round_robin_peer_session;#endif    return NGX_OK;}

对后端服务器进行一次选择

对后端服务器进行选择调用函数ngx_http_upstream_get_round_robin_robin_peer

ngx_int_t377 ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)378 {379     ngx_http_upstream_rr_peer_data_t  *rrp = data;380 381     time_t                         now;382     uintptr_t                      m;383     ngx_int_t                      rc;384     ngx_uint_t                     i, n;385     ngx_connection_t              *c;386     ngx_http_upstream_rr_peer_t   *peer;387     ngx_http_upstream_rr_peers_t  *peers;388 389     ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0,390                    "get rr peer, try: %ui", pc->tries);391 392     now = ngx_time();393 394     /* ngx_lock_mutex(rrp->peers->mutex); */395 396     /*  未实现的陈旧代码，不用去管他    */397     if (rrp->peers->last_cached) {398 399         /* cached connection */400         ...... 415     }416 417     pc->cached = 0;418     pc->connection = NULL;419 420     /*  判断是否只有一台后端服务器  */421     if (rrp->peers->single) {422         peer = &rrp->peers->peer[0];423 424     } else {425 426         /* there are several peers */427                 /*  判断是否是第一次选择,第一次选择的机器数量就是后端服务器的数量*/            /*  表示在连接一个远端服务器时，当前连接出现异常失败后可以重试的次数，                也就是允许的最多失败次数,第一次链接时候可以重试的次数就是主机数  */428         if (pc->tries == rrp->peers->number) {  /*  number是后端服务器的个数   */429 430             /* it's a first try - get a current peer */431 432             i = pc->tries;433 434             for ( ;; ) {                    /*  返回权值最大的服务器下标，rrp->current是经过选择的后端服务器的下标   */435                 rrp->current = ngx_http_upstream_get_peer(rrp->peers);  /*  get_peer是加权轮选的具体实现  */436                 /*  437                 ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,438                                "get rr peer, current: %ui %i",439                                rrp->current,440                                rrp->peers->peer[rrp->current].current_weight);441                    /*  如果机器数大于了32,那么就返回的是该后端服务器在位图中的第几个int块中，小于32就返回0    */442                 n = rrp->current / (8 * sizeof(uintptr_t));                    /*  m代表的是该后端服务器在位图中的第几位 */443                 m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t));444                     /*  对rrp->tried的具体使用    */                    /*  判断tried位图中该机器是否可用，如果tried[n]为0则表示可用   */                    /*  位图标记过的就不要再去选择了，处于down机状态的也被排除 */445                 if (!(rrp->tried[n] & m)) {446                     peer = &rrp->peers->peer[rrp->current];447                     448                     if (!peer->down) {      /*  非down */ 449                                                         /*  一段时间内的最大失败次数进行判断    */450                         if (peer->max_fails == 0451                             || peer->fails < peer->max_fails)   /*  fails是已经失败的次数*/452                         {453                             break;  454                         }455 456                         if (now - peer->checked > peer->fail_timeout) {457                             peer->checked = now;458                             break;459                         }460                                                     /*  有问题的服务器，将权重设为0，让他先休息一会    */461                         peer->current_weight = 0;462 463                     } else {    /*  down设置位图标记???   */464                         rrp->tried[n] |= m;     /*  设置位图标记  */465                     }466                                             /*  如果执行到这里说明没有执行break,表示检验不通过  */                        /*  tries表示该连接失败，可以重试机器数-1            */467                     pc->tries--;    468                 }469                                     /*  如果没有可以重试的机器了则错误   */470                 if (pc->tries == 0) {471                     goto failed;472                 }473 474                 if (--i == 0) {475                     ngx_log_error(NGX_LOG_ALERT, pc->log, 0,476                                   "round robin upstream stuck on %ui tries",477                                   pc->tries);478                     goto failed;479                 }480             }481             /*  break直接跳出来，当前权重减一，时时改变    */ 482             peer->current_weight--;483 484         } else {485                 /*  非第一次进行选择,不是使用轮询，而是利用current进行遍历了    */486             i = pc->tries;487 488             for ( ;; ) {                    /*rrp->current此时是之前返回的权值最大的服务器下标+1(如果是第二次的话)*/489                 n = rrp->current / (8 * sizeof(uintptr_t));490                 m = (uintptr_t) 1 << rrp->current % (8 * sizeof(uintptr_t));491 492                 if (!(rrp->tried[n] & m)) {493 494                     peer = &rrp->peers->peer[rrp->current];495                                             /*  与上面的判断类似    */496                     if (!peer->down) {497 498                         if (peer->max_fails == 0499                             || peer->fails < peer->max_fails)500                         {501                             break;502                         }503 504                         if (now - peer->checked > peer->fail_timeout) {505                             peer->checked = now;506                             break;507                         }508 509                         peer->current_weight = 0;510 511                     } else {512                         rrp->tried[n] |= m;513                     }514 515                     pc->tries--;516                 }517 518                 rrp->current++;     /*  没有释放，所以要在这里自增 */519                     /*  超过主机数量，就要从头开始 */520                 if (rrp->current >= rrp->peers->number) {521                     rrp->current = 0;522                 }523                                     /*  可以尝试的主机数为0    */524                 if (pc->tries == 0) {525                     goto failed;526                 }527 528                 if (--i == 0) {529                     ngx_log_error(NGX_LOG_ALERT, pc->log, 0,530                                   "round robin upstream stuck on %ui tries",531                                   pc->tries);532                     goto failed;533                 }534             }535                             /*  权重值减少一  */536             peer->current_weight--;537         }538         /*  无论是第一次还是第二次，都要将选择了的进行标记   */ 539         rrp->tried[n] |= m;540     }541 542     pc->sockaddr = peer->sockaddr;543     pc->socklen = peer->socklen;544     pc->name = &peer->name;545 546     /* ngx_unlock_mutex(rrp->peers->mutex); */547 548     if (pc->tries == 1 && rrp->peers->next) {549         pc->tries += rrp->peers->next->number;550 551         n = rrp->peers->next->number / (8 * sizeof(uintptr_t)) + 1;552         for (i = 0; i < n; i++) {553              rrp->tried[i] = 0;554         }555     }556 557     return NGX_OK;558     /*  使用后备服务器(如果有的话)对错误情况进行处理   */559 failed:560 561     peers = rrp->peers;562             /*  如果非后备服务器都出错了，            此时如果有后备服务器就切换到后备服务器            如果连后备服务器都搞不定就返回NGX_BUSY           */563     if (peers->next) {564 565         /* ngx_unlock_mutex(peers->mutex); */566 567         ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pc->log, 0, "backup servers");568 569         rrp->peers = peers->next;570         pc->tries = rrp->peers->number;571                     /*  rrp->peers->number是后备服务器的数量   */572         n = rrp->peers->number / (8 * sizeof(uintptr_t)) + 1;573         for (i = 0; i < n; i++) {574              rrp->tried[i] = 0; /*  位图清0  */575         }576             /*                  对后备服务器执行函数ngx_http_upstream_get_round_robin_peer                对后备服务器进行相关非后备服务器的类似操作                如果连后备服务器都失败则rc==NGX_BUSY            */577         rc = ngx_http_upstream_get_round_robin_peer(pc, rrp);578              579         if (rc != NGX_BUSY) {580             return rc;581         }582 583         /* ngx_lock_mutex(peers->mutex); */584     }585 586     /* all peers failed, mark them as live for quick recovery */587 588     for (i = 0; i < peers->number; i++) {589         peers->peer[i].fails = 0;590     }591 592     /* ngx_unlock_mutex(peers->mutex); */593 594     pc->name = peers->name;595 596     return NGX_BUSY;597 }

后端服务器权值计算在函数ngx_http_upstream_get_peer中
下面分析此函数（有两种版本后面解释两种方法）这是第一种

static ngx_uint_t601 ngx_http_upstream_get_peer(ngx_http_upstream_rr_peers_t *peers)602 {603     ngx_uint_t                    i, n, reset = 0;604     ngx_http_upstream_rr_peer_t  *peer;605 606     peer = &peers->peer[0];607 608     for ( ;; ) {609 610         for (i = 0; i < peers->number; i++) {611                             /*  已经休息的服务器不用计算    */612             if (peer[i].current_weight <= 0) {613                 continue;614             }615 616             n = i;617 618             while (i < peers->number - 1) {619 620                 i++;621                                     /*                          如果都小于0,则都跳过了，此时i等于peers->number-1，                        在执行for中的i++，i变为peers->number                    */622                 if (peer[i].current_weight <= 0) {623                     continue;       624                 }625                     /*  权重的核心计算                           初始状态中peer[n].current_weight等于peer[n].weight                        peer[i].current_weight等于peer[i].weight                        乘以1000的目的是避免浮点运算，直接将除数放大1000倍，也就是间接的将精度提升到小数点后三位                        由于是比较大小，所以同时提高1000倍不会影响结果。                    */626                 if (peer[n].current_weight * 1000 / peer[i].current_weight627                     > peer[n].weight * 1000 / peer[i].weight)628                 {629                     return n;630                 }631 632                 n = i;633             }634 635             if (peer[i].current_weight > 0) {636                 n = i;637             }638             /*  如果权值都小于0,此处不会被执行    */ 639             return n;640         }   /*  for结束   */641            /*  当所有权值都小于0的时候，将他们进行重置，重置为配置文件中的权值  */642         if (reset++) {643             return 0;644         }645         /*  重置权值    */ 646         for (i = 0; i < peers->number; i++) {647             peer[i].current_weight = peer[i].weight;648         }649     }650 }

经典版算法

if (peer[i].current_weight <= 0) { continue; }

if (peer[n].current_weight * 1000 / peer[i].current_weight

> peer[n].weight * 1000 / peer[i].weight) { return n; }

else { n = i; }

我们举个例子来说明这个算法：{ a, b, c }三个服务器，weight值是{ 5, 1, 2 }，那么分配的过程参见下面这张表：

selected server

current_weights

reason

{ 5, 1, 2 }

第二个if无法满足

{ 5, 1, 1 }

1 / 1 > 1 / 2

{ 5, 0, 1 }

5 / 1 > 5 / 2

{ 4, 0, 1 }

4 / 1 > 5 / 2

{ 3, 0, 1 }

3 / 1 > 5 / 2

{ 2, 0, 1 }

第二个if 无法满足

{ 2, 0, 0 }

没得选了

{ 1, 0, 0 }

没得选了

这么看效果还不错，但是如果仔细看会发现有缺陷。就是weight小的server分配不均。其实b在第四或者第五位被分配是比较好的。可能有人会说为什么要这样吹毛求疵呢。那我们设法将第六位被分配的c去掉，其实很简单，也就是weight设置成{ 5, 1, 1 }，那么分配序列就成了c, b, a, a, a, a, a，将这个算法的缺点放到最大。

下面介绍第二种

//按照当前各服务器权值进行选择static ngx_http_upstream_rr_peer_t *ngx_http_upstream_get_peer(ngx_http_upstream_rr_peer_data_t *rrp){    time_t                        now;    uintptr_t                     m;    ngx_int_t                     total;    ngx_uint_t                    i, n;    ngx_http_upstream_rr_peer_t  *peer, *best;    now = ngx_time();    best = NULL;    total = 0;    for (i = 0; i < rrp->peers->number; i++) {//计算当前服务器的标记位在位图中的位置        n = i / (8 * sizeof(uintptr_t));        m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));//已经选择过，跳过        if (rrp->tried[n] & m) {            continue;        }//当前服务器对象        peer = &rrp->peers->peer[i];//当前服务器已宕机，排除        if (peer->down) {            continue;        }//根据指定一段时间内最大失败次数做判断        if (peer->max_fails            && peer->fails >= peer->max_fails            && now - peer->checked <= peer->fail_timeout)        {            continue;        }        peer->current_weight += peer->effective_weight;        total += peer->effective_weight;        if (peer->effective_weight < peer->weight) {            peer->effective_weight++;        }        if (best == NULL || peer->current_weight > best->current_weight) {            best = peer;        }    }    if (best == NULL) {        return NULL;    }    //所选择的服务器在服务器列表中的位置    i = best - &rrp->peers->peer[0];    rrp->current = i;    n = i / (8 * sizeof(uintptr_t));    m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));    //位图相应位置置位    rrp->tried[n] |= m;    best->current_weight -= total;    best->checked = now;    return best;}

要理解这个函数的工作原理，先要区分下表示服务的ngx_http_upstream_rr_peer_t结构体中的一下三个成员变量：

    ngx_int_t                       current_weight;    ngx_int_t                       effective_weight;    ngx_int_t                       weight;

effective_weight相当于质量(来源于配置的weight)，current_weight相当于重量。前者反应本质，一般是不变的。current_weight是运行时的动态权值，它的变化基于effective_weight。但是effective_weight在其对应的peer服务异常时，会被调低，当服务恢复正常时，effective_weight会逐渐恢复到实际值（配置的weight）。

下面我们结合具体的代码来看。

它们在函数ngx_http_upstream_init_round_robin中被初始化：

 for (i = 0; i < us->servers->nelts; i++) {            for (j = 0; j < server[i].naddrs; j++) {                if (server[i].backup) {                    continue;                }                peers->peer[n].weight = server[i].weight;                peers->peer[n].effective_weight = server[i].weight;                peers->peer[n].current_weight = 0;                n++;            }        }        /* backup servers */        for (i = 0; i < us->servers->nelts; i++) {            for (j = 0; j < server[i].naddrs; j++) {                if (!server[i].backup) {                    continue;                }                backup->peer[n].weight = server[i].weight;                backup->peer[n].effective_weight = server[i].weight;                backup->peer[n].current_weight = 0;                n++;            }        }     /* an upstream implicitly defined by proxy_pass, etc. */    for (i = 0; i < u.naddrs; i++) {        peers->peer[i].weight = 1;        peers->peer[i].effective_weight = 1;        peers->peer[i].current_weight = 0;    }

可以看到weight、effective_weight都是初始化为配置项中的weight值。current_weight初始化为0.

下面分析这三个变量在负载均衡过程中的变化。

weight的值在整个运行过程中不发生变化。

total变量记录了针对一个服务列表的一次轮询过程中轮询到的所有服务的effective_weight总和。在每一次针对服务列表的轮询之前会置为为0.

遍历服务列表的过程中，每遍历到一个服务，会在该服务的current_weight上加上其对应的effective_weight。这个是累加。如果对统一的服务列表进行另一次轮询，那么会在前面计算的current_weight的基础之上再加上effective_weight。

轮询策略是取current_weight最大的服务器。每次取到后端服务（用best表示）后，都会把该对象peer的current_weight减去total的值。因为该服务刚被选中过，因此要降低权值。

关于effective_weight的变化，有两处，一个是在函数ngx_http_upstream_get_peer中：

        //服务正常，effective_weight 逐渐恢复正常            if (peer->effective_weight < peer->weight) {            peer->effective_weight++;        }

另一处是在释放后端服务的函数ngx_http_upstream_free_round_robin_peer中：（后面介绍这个函数）

        if (peer->max_fails) {             //服务发生异常时，调低effective_weight            peer->effective_weight -= peer->weight / peer->max_fails;        }

权重高的会优先被选中，而且被选中的频率也更高。权重低的也会由于权重逐渐增长获得被选中的机会

下面给出一个加权轮询的选择实例：

selected server

current_weight beforeselected

current_weight afterselected

{ 5, 1, 2 }

{ -3, 1, 2 }

{ 2, 2, 4 }

{ 2, 2, -4 }

{ 7, 3, -2 }

{ -1, 3, -2 }

{ 4, 4, 0 }

{ -4, 4, 0 }

{ 1, 5, 2 }

{ 1, -3, 2 }

{ 6, -2, 4 }

{ -2, -2, 4 }

{ 3, -1, 6 }

{ 3, -1, -2 }

{ 8, 0, 0 }

{ 0, 0, 0 }

释放后端服务器

释放后端服务器可能有两种情况

1.连接后端服务器并且正常处理当前客户端请求后释放后端服务器

2.如果在某一轮选择中，选择后的服务器连接失败或者处理请求时出现错误，需要重新进行选择

//函数：//功能：释放后端服务器voidngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data,    ngx_uint_t state){    ngx_http_upstream_rr_peer_data_t  *rrp = data;    time_t                       now;    ngx_http_upstream_rr_peer_t  *peer;    ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,                   "free rr peer %ui %ui", pc->tries, state);    /* TODO: NGX_PEER_KEEPALIVE */    //后端服务只有一个    if (rrp->peers->single) {        pc->tries = 0;        return;    }    peer = &rrp->peers->peer[rrp->current];    //在某一轮选择里，某次选择的服务器因连接失败或请求处理失败而需要重新进行选择    if (state & NGX_PEER_FAILED) {        now = ngx_time();        /* ngx_lock_mutex(rrp->peers->mutex); *///已尝试失败次数加一        peer->fails++;        peer->accessed = now;        peer->checked = now;//如果有最大失败次数限制        if (peer->max_fails) {    //服务发生异常时，调低effective_weight            peer->effective_weight -= peer->weight / peer->max_fails;        }        ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,                       "free rr peer failed: %ui %i",                       rrp->current, peer->effective_weight);//effective_weigh < 0将他置0，让他休息一轮        if (peer->effective_weight < 0) {            peer->effective_weight = 0;        }        /* ngx_unlock_mutex(rrp->peers->mutex); */    } else {        /* mark peer live if check passed */        if (peer->accessed < peer->checked) {            peer->fails = 0;        }    }    //ngx_peer_connection_t结构体中tries字段：    //表示在连接一个远端服务器时，当前连接出现异常失败后可以重试的次数，也就是允许失败的次数    if (pc->tries) {        pc->tries--;    }    /* ngx_unlock_mutex(rrp->peers->mutex); */}

整个加权轮询处理流程大致处理如下图所示

首先是全局初始化，由函数ngx_http_upstream_init_round_robin完成，它在函数ngx_http_upstream_init_main_conf中被调用，代码：

static char *ngx_http_upstream_init_main_conf(ngx_conf_t *cf, void *conf){    ...    for (i = 0; i < umcf->upstreams.nelts; i++) {        //全局初始化        init = uscfp[i]->peer.init_upstream ? uscfp[i]->peer.init_upstream:                                            ngx_http_upstream_init_round_robin;        if (init(cf, uscfp[i]) != NGX_OK) {            return NGX_CONF_ERROR;        }    }...}

收到客户请求之后，针对当前请求进行初始化，完成此功能的函数是ngx_http_upstream_init_round_robin_peer，它在函数ngx_http_upstream_init_request中被调用：

static voidngx_http_upstream_init_request(ngx_http_request_t *r){...if (uscf->peer.init(r, uscf) != NGX_OK) {        ngx_http_upstream_finalize_request(r, u,                                           NGX_HTTP_INTERNAL_SERVER_ERROR);        return;    }    ngx_http_upstream_connect(r, u);}

然后是针对每个请求选择后端服务器，实现此功能的函数是ngx_http_upstream_get_round_robin_peer。它在函数ngx_event_connect_peer中被调用：

//函数：连接后端upstreamngx_int_tngx_event_connect_peer(ngx_peer_connection_t *pc){...    //此处调用选择后端服务器功能函数ngx_http_upstream_get_round_robin_peer    rc = pc->get(pc, pc->data);    if (rc != NGX_OK) {        return rc;    }    s = ngx_socket(pc->sockaddr->sa_family, SOCK_STREAM, 0);...}

之后是测试连接ngx_http_upstream_test_connect。它在函数ngx_http_upstream_send_request被调用：

//函数：发送数据到后端upstreamstatic voidngx_http_upstream_send_request(ngx_http_request_t *r, ngx_http_upstream_t *u){...    if (!u->request_sent && ngx_http_upstream_test_connect(c) != NGX_OK) {        //测试连接失败        ngx_http_upstream_next(r, u, NGX_HTTP_UPSTREAM_FT_ERROR);        return;    }...}

如果测试成功，继续后续处理，并释放后端服务器。

如果测试失败，调用ngx_http_upstream_next函数，这个函数可能再次调用peer.get调用别的连接。

static voidngx_http_upstream_next(ngx_http_request_t *r, ngx_http_upstream_t *u,    ngx_uint_t ft_type){...    if (u->peer.sockaddr) {        if (ft_type == NGX_HTTP_UPSTREAM_FT_HTTP_404) {            state = NGX_PEER_NEXT;        } else {            state = NGX_PEER_FAILED;        }        //释放后端服务器        u->peer.free(&u->peer, u->peer.data, state);        u->peer.sockaddr = NULL;    }...if (status) {        u->state->status = status;        if (u->peer.tries == 0 || !(u->conf->next_upstream & ft_type)) {#if (NGX_HTTP_CACHE)            if (u->cache_status == NGX_HTTP_CACHE_EXPIRED                && (u->conf->cache_use_stale & ft_type))            {                ngx_int_t  rc;                rc = u->reinit_request(r);                if (rc == NGX_OK) {                    u->cache_status = NGX_HTTP_CACHE_STALE;                    rc = ngx_http_upstream_cache_send(r, u);                }                ngx_http_upstream_finalize_request(r, u, rc);                return;            }#endif            //结束请求            ngx_http_upstream_finalize_request(r, u, status);            return;        }    }...//再次发起连接ngx_http_upstream_connect(r, u);}

函数ngx_http_upstream_connect中会调用ngx_event_connect_peer，进而调用ngx_http_upstream_get_round_robin_peer再次选择后端服务器。
本文参考资料

http://blog.csdn.net/xiajun07061225/article/details/9318871

http://ucshell.com/2014/05/05/nginx-loadbalance-round-robin/
http://blog.sina.com.cn/s/blog_7303a1dc01014i0j.html

0 0