使用nginx, memcached, libevent, mysql实现url排重

来源:互联网 发布:慕清明知乎 编辑:程序博客网 时间:2024/06/07 06:21

0. 思路
用nginx在前端做反向代理, 把请求转到c写的程序url_query, 这个程序去memcached里取url, 没有则关掉connection.
此时nginx会fallback到另一个C写的程序url_set, 该程序去查mysql数据, 如果有则存memcached, 同时返回HTTP 200, 否则返回HTTP 404.

这样写的好处就是url_query挂掉不影响服务, memcached挂掉也不影响服务.
而且可以在不同的服务器上开多个url_query, url_set, memcached.

1. 数据表

CREATE TABLE `url` (    `url` VARCHAR(1024),    `date` DATE,    KEY `url` (`url`));

2. config.h

123456789101112131415161718
#define MAXLEN 2048  #define QUERY_HOST "127.0.0.1"#define QUERY_PORT 8111 #define SET_HOST "127.0.0.1"#define SET_PORT 8222 #define MEMCACHE_HOST "127.0.0.1"#define MEMCACHE_PORT 11211 #define MYSQL_HOST "127.0.0.1"#define MYSQL_PORT 0#define MYSQL_USER "user"#define MYSQL_PASS "password"#define MYSQL_DATABASE "isdup"#define MYSQL_SOCK NULL#define MYSQL_FLAGS 0

3. url_query.c

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
#include "config.h"#include <event.h>#include <evhttp.h>#include <unistd.h>#include <libmemcached/memcached.h> static memcached_st *mc = NULL; void init_memcache() {    mc = memcached_create(NULL);    memcached_server_add(mc, MEMCACHE_HOST, MEMCACHE_PORT);} void query_handler(struct evhttp_request *req, void *arg) {   char *uri = req->uri;    if (strnlen(uri, MAXLEN) == MAXLEN || *uri == '\0') {       evhttp_connection_free(req->evcon);       return;   }    size_t len;   char *result;   uint32_t flags;   memcached_return ret;    result = memcached_get(mc, uri+1, strnlen(uri+1, MAXLEN), &len, &flags, &ret);    if (result) {       evhttp_send_reply(req, HTTP_OK, "OK", NULL);       free(result);   }   else {       evhttp_send_reply(req, HTTP_NOTFOUND, "NOT FOUND", NULL);   }}  int main(int argc, char **argv) {    mc = memcached_create(NULL);    init_memcache();     event_init();    struct evhttp *httpd = evhttp_start(QUERY_HOST, QUERY_PORT);    evhttp_set_gencb(httpd, query_handler, NULL);    event_dispatch();    evhttp_free(httpd);    return 0;}

4. url_set.c

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
#include "config.h"#include <stdio.h>#include <event.h>#include <evhttp.h>#include <unistd.h>#include <libmemcached/memcached.h>#include <mysql/mysql.h> static memcached_st *mc = NULL;static MYSQL *mysql = NULL;static MYSQL_RES *res = NULL;static MYSQL_ROW row;static char buf[MAXLEN]; void init_memcache() {    mc = memcached_create(NULL);    memcached_server_add(mc, MEMCACHE_HOST, MEMCACHE_PORT);} void init_mysql() {    if ((mysql = mysql_init(NULL)) == NULL) {        fprintf(stderr, "mysql_init\n");        exit(-1);    }    if (mysql_real_connect(                mysql,                 MYSQL_HOST,                 MYSQL_USER,                 MYSQL_PASS,                 MYSQL_DATABASE,                 MYSQL_PORT,                 MYSQL_SOCK,                 MYSQL_FLAGS    ) == NULL) {        fprintf(stderr, "mysql_connect[%d]: %s\n", mysql_errno(mysql), mysql_error(mysql));        exit(-1);    }} void set_handler(struct evhttp_request *req, void *arg) {   char *uri = req->uri;    if (strnlen(uri, MAXLEN) == MAXLEN || *uri == '\0') {       evhttp_connection_free(req->evcon);       return;   }    uri++;   sprintf(buf, "SELECT url FROM url WHERE url = '%s' limit 1", uri);   mysql_query(mysql, buf);   res = mysql_store_result(mysql);   if (mysql_num_rows(res)) {       fprintf(stderr, "num_rows: %d\n", mysql_num_rows(res));       memcached_set(mc, uri, strlen(uri), "1", 1, 0, 0); evhttp_send_reply(req, HTTP_OK, "OK", NULL); }   else {       evhttp_send_reply(req, HTTP_NOTFOUND, "NOT FOUND", NULL);   }}  int main(int argc, char **argv) {    init_memcache();    init_mysql();     event_init();    struct evhttp *httpd = evhttp_start(SET_HOST, SET_PORT);    evhttp_set_gencb(httpd, set_handler, NULL);    event_dispatch();    evhttp_free(httpd);    return 0;}

5. nginx 配置

 #user  nobody;worker_processes  1; #error_log  logs/error.log;#error_log  logs/error.log  notice;#error_log  logs/error.log  info; #pid        logs/nginx.pid;  events {    worker_connections  1024;}  http {    include       mime.types;    default_type  application/octet-stream;     upstream url_query {        server 127.0.0.1:8111;    }    upstream url_set {        server 127.0.0.1:8222;    }    server {        listen       80;        server_name  localhost;         location / {            proxy_pass http://url_query;            proxy_set_header Host $host;            proxy_set_header X-Real-IP $remote_addr;            proxy_set_header X-Forwarded-For "$remote_addr";            error_page 404 502 503 504 = @fallback;        }         location @fallback {            proxy_pass http://url_set;        }    }}

6. 测试
数据表里有一条url为aaa的记录.

[lyxint@null isdup]$ curl -I http://127.0.0.1/http://lyxint.com/HTTP/1.1 404 NOT FOUNDServer: nginx/0.9.3Date: Sun, 31 Jul 2011 03:54:56 GMTConnection: keep-alive [lyxint@null isdup]$ curl -I http://127.0.0.1/aaaHTTP/1.1 200 OKServer: nginx/0.9.3Date: Sun, 31 Jul 2011 03:55:15 GMTConnection: keep-alive

没有做压力测试, 用ab测来效果倒是不错, 不过不是随机的url, 不具备说服力.

7. 注意
这两个C程序写得很简单, 错误处理什么的很少, 还需要改进. 当然, 这只是一个demo
由于url存在memcached的key里, 而memcached的key最大长度为写死为250. 以防url长度超过250, 编译memcached的时候需要把memcached.h中的#define KEY_MAX_LENGTH 250改大一点.


转自http://lyxint.com/archives/205

原创粉丝点击