用Mochiweb打造百万级Comet应用，第三部分（续）

来源：互联网发布：淘宝卖家问大家功能编辑：程序博客网时间：2024/05/21 10:04

提示：如有转载请注明作者独舞及出处

原文：A Million-user Comet Application with Mochiweb, Part 3

参考资料：Comet--基于 HTTP 长连接、无须在浏览器端安装插件的“服务器推”技术为“Comet”

MochiWeb--建立轻量级HTTP服务器的Erlang库

floodtest2.erl

-module(floodtest2).
-compile(export_all).
-define(SERVERADDR, "10.1.2.3"). % where mochiweb is running
-define(SERVERPORT, 8000).
% Generate the config in bash like so (chose some available address space):
% EACH=62000; for i in `seq 1 17`; do echo "{{10,0,0,$i}, $((($i-1)*$EACH+1)), $(($i*$EACH))}, "; done
run(Interval) ->
Config = [
{{10,0,0,1}, 1, 62000},
{{10,0,0,2}, 62001, 124000},
{{10,0,0,3}, 124001, 186000},
{{10,0,0,4}, 186001, 248000},
{{10,0,0,5}, 248001, 310000},
{{10,0,0,6}, 310001, 372000},
{{10,0,0,7}, 372001, 434000},
{{10,0,0,8}, 434001, 496000},
{{10,0,0,9}, 496001, 558000},
{{10,0,0,10}, 558001, 620000},
{{10,0,0,11}, 620001, 682000},
{{10,0,0,12}, 682001, 744000},
{{10,0,0,13}, 744001, 806000},
{{10,0,0,14}, 806001, 868000},
{{10,0,0,15}, 868001, 930000},
{{10,0,0,16}, 930001, 992000},
{{10,0,0,17}, 992001, 1054000}],
start(Config, Interval).
start(Config, Interval) ->
Monitor = monitor(),
AdjustedInterval = Interval / length(Config),
[ spawn(fun start/5, [Lower, Upper, Ip, AdjustedInterval, Monitor])
|| {Ip, Lower, Upper} <- Config ],
ok.
start(LowerID, UpperID, _, _, _) when LowerID == UpperID -> done;
start(LowerID, UpperID, LocalIP, Interval, Monitor) ->
spawn(fun connect/5, [?SERVERADDR, ?SERVERPORT, LocalIP, "/test/"++LowerID, Monitor]),
receive after Interval -> start(LowerID + 1, UpperID, LocalIP, Interval, Monitor) end.
connect(ServerAddr, ServerPort, ClientIP, Path, Monitor) ->
Opts = [binary, {packet, 0}, {ip, ClientIP}, {reuseaddr, true}, {active, false}],
{ok, Sock} = gen_tcp:connect(ServerAddr, ServerPort, Opts),
Monitor ! open,
ReqL = io_lib:format("GET ~s/r/nHost: ~s/r/n/r/n", [Path, ServerAddr]),
Req = list_to_binary(ReqL),
ok = gen_tcp:send(Sock, [Req]),
do_recv(Sock, Monitor),
(catch gen_tcp:close(Sock)),
ok.
do_recv(Sock, Monitor)->
case gen_tcp:recv(Sock, 0) of
{ok, B} ->
Monitor ! {bytes, size(B)},
io:format("Recvd ~s/n", [ binary_to_list(B)]),
io:format("Recvd ~w bytes/n", [size(B)]),
do_recv(Sock, Monitor);
{error, closed} ->
Monitor ! closed,
closed;
Other ->
Monitor ! closed,
io:format("Other:~w/n",[Other])
end.
% Monitor process receives stats and reports how much data we received etc:
monitor() ->
Pid = spawn(?MODULE, monitor0, [{0,0,0,0}]),
timer:send_interval(10000, Pid, report),
Pid.
monitor0({Open, Closed, Chunks, Bytes}=S) ->
receive
report -> io:format("{Open, Closed, Chunks, Bytes} = ~w/n",[S]);
open -> monitor0({Open + 1, Closed, Chunks, Bytes});
closed -> monitor0({Open, Closed + 1, Chunks, Bytes});
chunk -> monitor0({Open, Closed, Chunks + 1, Bytes});
{bytes, B} -> monitor0({Open, Closed, Chunks, Bytes + B})
end.

作为一个初始的测试，我像第一部分描述的那样连接mochiweb应用 - 它简单的每隔10秒给每个客户端发送一条消息。

erl> c(floodtest2), floodtest2:run(20).

这很快就吃掉了我的内存。

像那样用gen_tcp打开很多连接吃掉了很多内存。在没有任何其他调整的情况下我想它需要~36GB的内存以保证正常工作。我没有兴趣试着优化我的快速破解的erlang http 客户端(在真实世界了,这将是1M个web浏览器),在手的有多于32GB内存的机子只有那台我们产品数据库用机，我不能找到一个很好的理由就因为测试这个让last.fm下线：）另外，它看起来它一直只能管理打开64,500个端口。

从这一点我决定采用值得相信的 libevent, 我很高兴发现有这么一个HTTP API。新版已经有了一个evhttp_connection_set_local_address函数。感觉很有希望.

这是采用libevent库用C编写的客户端：

#include <sys/types.h>
#include <sys/time.h>
#include <sys/queue.h>
#include <stdlib.h>
#include <err.h>
#include <event.h>
#include <evhttp.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <time.h>
#include <pthread.h>
#define BUFSIZE 4096
#define NUMCONNS 62000
#define SERVERADDR "10.103.1.43"
#define SERVERPORT 8000
#define SLEEP_MS 10
char buf[BUFSIZE];
int bytes_recvd = 0;
int chunks_recvd = 0;
int closed = 0;
int connected = 0;
// called per chunk received
void chunkcb(struct evhttp_request * req, void * arg)
{
int s = evbuffer_remove( req->input_buffer, &buf, BUFSIZE );
//printf("Read %d bytes: %s/n", s, &buf);
bytes_recvd += s;
chunks_recvd++;
if(connected >= NUMCONNS && chunks_recvd%10000==0)
printf(">Chunks: %d/tBytes: %d/tClosed: %d/n", chunks_recvd, bytes_recvd, closed);
}
// gets called when request completes
void reqcb(struct evhttp_request * req, void * arg)
{
closed++;
}
int main(int argc, char **argv)
{
event_init();
struct evhttp *evhttp_connection;
struct evhttp_request *evhttp_request;
char addr[16];
char path[32]; // eg: "/test/123"
int i,octet;
for(octet=1; octet<=17; octet++){
sprintf(&addr, "10.224.0.%d", octet);
for(i=1;i<=NUMCONNS;i++) {
evhttp_connection = evhttp_connection_new(SERVERADDR, SERVERPORT);
evhttp_connection_set_local_address(evhttp_connection, &addr);
evhttp_set_timeout(evhttp_connection, 864000); // 10 day timeout
evhttp_request = evhttp_request_new(reqcb, NULL);
evhttp_request->chunk_cb = chunkcb;
sprintf(&path, "/test/%d", ++connected);
if(i%100==0) printf("Req: %s/t->/t%s/n", addr, &path);
evhttp_make_request( evhttp_connection, evhttp_request, EVHTTP_REQ_GET, path );
evhttp_connection_set_timeout(evhttp_request->evcon, 864000);
event_loop( EVLOOP_NONBLOCK );
if( connected % 200 == 0 )
printf("/nChunks: %d/tBytes: %d/tClosed: %d/n", chunks_recvd, bytes_recvd, closed);
usleep(SLEEP_MS*1000);
}
}
event_dispatch();
return 0;
}

更多的参数用#define’s硬编码，这样你通过编辑源码来配置他然后重新编译。

编印运行：
$ gcc -o httpclient httpclient.c -levent $ ./httpclient

这样还是不能打开多余64,500个端口. 尽管他用了很少的内存。

尽管我指定了本地地址端口还是会超出限制, 临时端口无论是在内核或tcp栈上分配的都会超出2^16。因此，为了能打开多于64,500个连接, 你需要指定本地地址和本地端口，相应的管理它们。不幸的libevent HTTP API没有选项指定本地端口。我为 libevent打了补丁加了一个合适的函数：
void evhttp_connection_set_local_port(struct evhttp_connection *evcon, u_short port);.

这相当不错； libevent编写的被很好, 文档也相当友好。

安装我修改过的libevent, 我可以在the set_local_address下添加如下代码:
evhttp_connection_set_local_port(evhttp_connection, 1024+i);

用他替换后, 从不同的地址的多个连接就能用同一端口号，指定本地地址。我重编译客户端让它运行一段时间以验证他能大多2^16限制。

Netstat验证:
# netstat -n | awk '/^tcp/ {t[$NF]++}END{for(state in t){print state, t[state]}}’ TIME_WAIT 8 ESTABLISHED 118222

这显示多少端口在不同状态被打开。我们最后能够打开多于2^16个连接.

现在我们有了在一台机子上打开百万http连接的工具。它看起来每个连接消耗2KB内存, 加上内核占用的。是时候用它测试百万连接用户的我们的mochiweb comet服务器了。

C1024K测试-1 百万comet连接

为了这个测试我用了4台不同配置的服务器。这样的配置可能比测试用的高，但是它是有用的将来会用为产品，这能做一个很变态的测试.所有这四个服务器都在同一个千兆局域网上，中间用了3个交换机和一个路由器。

一百万测试就像第一、二部分的10k测试，主要不同是更改了客户端，现在是用libevent,c编写，我在一个用了多台机子的正式的分布式erlang设置中运行的a。

服务器1 - 四核 2GHz CPU, 16GB 内存

启动订阅管理器
调入好友数据
启动路由器

服务器2 - 双通道四核 2.8GHz CPU, 32GB 内存

启动mochiweb应用

服务器3 - 四核 2GHz CPU, 16GB 内存

创建17个真实ip
安装打了补丁的libevent
运行客户端: ./httpclient 每秒建立100个连接直到1M

服务器4 - 双核 2GHz, 2GB内存

运行msggen程序, 向路由器发送大量的消息

在猛增到一百万连接期间我测量了mochiweb的内存用量，还有在剩下的时间里：

httpclient在每个连接见加了10ms延时,因此打开一百万连接用了将近3个小时。打开1M连接的mochiweb进程固定地内存用大约为25GB. 运行的服务器都由Ganglia监控, 它测量CPU, 网络和内存用量并且生成漂亮的图片：

你可以看到它需要大约38GB内存且开始了swap。我猜想这个不同是因为内核为保持打开的连接而基本被耗光的。当我开始发送消息是就达到了定点。

消息有1000个进程产生，每个进程平均60ms一条消息，总共每秒大约16,666条消息：

erl> [ spawn( fun()->msggen:start(1000000, 10+random:uniform(100), 1000000) end) || I <- lists:seq(1,1000) ].

服务器 (server-4) 产生消息看起来如下图所示（Ganglia）:

每秒有10MB的消息发出 - 每秒16,666条消息. 典型的这些消息来自消息总线，应用服务器，或者已存在架构的一部分。

当我开始发送消息时，服务器1的负载(运行订阅管理器和路由器)一直低于1,CPU占用率从0增到5%。

服务器2的CPU (运行mochiweb应用, 有1M个连接) 增长的比较显著：

自然的，进程当要处理消息是不得不离开休眠状态,内存用量将轻微增加 . 没有消息且所有连接处于打开状态是内存用量的最好时候- 可想的,实际工作时需要更多内存。

从安全方面, mochiweb机器需要40GB内存一打开1M活跃comet连接。30GB用于mochiweb应用,剩下的10GB用于内核.换句话说,每个连接你需要分配40KB。

当用大量连接做各种测试时,我最终对我的sysctl.conf文件做了些修改. 只是部分试错,我真的不知道更多以做出明智的决定关于那个值需要修改的内部原因 . 我的策略是等待问题发生，检测 /var/log/kern.log 看什么神秘的错误被报告, 然后添加听起来很合理的数据. 这是上面测试使用的设置信息：

net.core.rmem_max = 33554432
net.core.wmem_max = 33554432
net.ipv4.tcp_rmem = 4096 16384 33554432
net.ipv4.tcp_wmem = 4096 16384 33554432
net.ipv4.tcp_mem = 786432 1048576 26777216
net.ipv4.tcp_max_tw_buckets = 360000
net.core.netdev_max_backlog = 2500
vm.min_free_kbytes = 65536
vm.swappiness = 0
net.ipv4.ip_local_port_range = 1024 65535

我将很喜欢学习更多关于linux tcp调优的知识这样我能对这些设置做更明智的决策. 这些可与确定不是优化的, 但是最少它们足够应付1M的连接.这些更改运行在一个64bit的elang虚拟机上, 字长是8bytes而不是4, 可能可以解释为什么内存用量比我在第二部分做c10k测试时高的多。

一个用Libevent实现的Erlang C-Node

在为libevent加入HTTP api后, 它看起来完全合理做1M连接测试相对于用c写的http服务器，因此我们有了比较的基础。

我猜打开内核的poll模型意味这erlang虚拟机能够用epol(或类似的),但是即使是这样显然也需要解决负载问题，我们通过委派连接处理给用libevent实现的c程序或许能减轻负载. 我想重用更多的Erlang代码, 因此让我们尽可能少的用c - 只是在连接处理和HTTP部分。我也寻找了试用Erlang C 接口的一个理由,因此下面的程序组合了两者。他是一个用C和libevent写的comethttp服务器用用整数id标志用户(向我们的mochiweb应用), 且扮演一个Erlang C节点.

它连接一个指定的erlang节点, 监听像{123, <<"Hello user 123">>}的消息然后向用户123分派“Hello user 123″ , 假如已连接. 那些没有连接用户的消息被丢弃,就像前面的例子。

httpdcnode.c

#include <sys/types.h>
#include <sys/time.h>
#include <sys/queue.h>
#include <stdlib.h>
#include <err.h>
#include <event.h>
#include <evhttp.h>
#include <stdio.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include "erl_interface.h"
#include "ei.h"
#include <pthread.h>
#define BUFSIZE 1024
#define MAXUSERS (17*65536) // C1024K
// List of current http requests by uid:
struct evhttp_request * clients[MAXUSERS+1];
// Memory to store uids passed to the cleanup callback:
int slots[MAXUSERS+1];
// called when user disconnects
void cleanup(struct evhttp_connection *evcon, void *arg)
{
int *uidp = (int *) arg;
fprintf(stderr, "disconnected uid %d/n", *uidp);
clients[*uidp] = NULL;
}
// handles http connections, sets them up for chunked transfer,
// extracts the user id and registers in the global connection table,
// also sends a welcome chunk.
void request_handler(struct evhttp_request *req, void *arg)
{
struct evbuffer *buf;
buf = evbuffer_new();
if (buf == NULL){
err(1, "failed to create response buffer");
}
evhttp_add_header(req->output_headers, "Content-Type", "text/html; charset=utf-8");
int uid = -1;
if(strncmp(evhttp_request_uri(req), "/test/", 6) == 0){
uid = atoi( 6+evhttp_request_uri(req) );
}
if(uid <= 0){
evbuffer_add_printf(buf, "User id not found, try /test/123 instead");
evhttp_send_reply(req, HTTP_NOTFOUND, "Not Found", buf);
evbuffer_free(buf);
return;
}
if(uid > MAXUSERS){
evbuffer_add_printf(buf, "Max uid allowed is %d", MAXUSERS);
evhttp_send_reply(req, HTTP_SERVUNAVAIL, "We ran out of numbers", buf);
evbuffer_free(buf);
return;
}
evhttp_send_reply_start(req, HTTP_OK, "OK");
// Send welcome chunk:
evbuffer_add_printf(buf, "Welcome, Url: ‘%s’ Id: %d/n", evhttp_request_uri(req), uid);
evhttp_send_reply_chunk(req, buf);
evbuffer_free(buf);
// put reference into global uid->connection table:
clients[uid] = req;
// set close callback
evhttp_connection_set_closecb( req->evcon, cleanup, &slots[uid] );
}
// runs in a thread - the erlang c-node stuff
// expects msgs like {uid, msg} and sends a a ‘msg’ chunk to uid if connected
void cnode_run()
{
int fd; /* fd to Erlang node */
int got; /* Result of receive */
unsigned char buf[BUFSIZE]; /* Buffer for incoming message */
ErlMessage emsg; /* Incoming message */
ETERM *uid, *msg;
erl_init(NULL, 0);
if (erl_connect_init(1, "secretcookie", 0) == -1)
erl_err_quit("erl_connect_init");
if ((fd = erl_connect("httpdmaster@localhost")) < 0)
erl_err_quit("erl_connect");
fprintf(stderr, "Connected to httpdmaster@localhost/n/r");
struct evbuffer *evbuf;
while (1) {
got = erl_receive_msg(fd, buf, BUFSIZE, &emsg);
if (got == ERL_TICK) {
continue;
} else if (got == ERL_ERROR) {
fprintf(stderr, "ERL_ERROR from erl_receive_msg./n");
break;
} else {
if (emsg.type == ERL_REG_SEND) {
// get uid and body data from eg: {123, <<"Hello">>}
uid = erl_element(1, emsg.msg);
msg = erl_element(2, emsg.msg);
int userid = ERL_INT_VALUE(uid);
char *body = (char *) ERL_BIN_PTR(msg);
int body_len = ERL_BIN_SIZE(msg);
// Is this userid connected?
if(clients[userid]){
fprintf(stderr, "Sending %d bytes to uid %d/n", body_len, userid);
evbuf = evbuffer_new();
evbuffer_add(evbuf, (const void*)body, (size_t) body_len);
evhttp_send_reply_chunk(clients[userid], evbuf);
evbuffer_free(evbuf);
}else{
fprintf(stderr, "Discarding %d bytes to uid %d - user not connected/n",
body_len, userid);
// noop
}
erl_free_term(emsg.msg);
erl_free_term(uid);
erl_free_term(msg);
}
}
}
// if we got here, erlang connection died.
// this thread is supposed to run forever