NGX打印日志时对特殊字符的转码

来源:互联网 发布:单片机仿真需要程序吗 编辑:程序博客网 时间:2024/06/05 14:09
  • 问题:
    [root@3WR ~]# curl -svo /dev/null test/ -x 127.0.0.1:9711
    日志打印结果中http_user_agent不一致

1.控制台输出结果中打印0.9.

User-Agent: curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9. zlib/1.2.3 libidn/0.6.5

2.access.log中输出结果中打印0.9.8\x7F

127.0.0.1- - [01/Jul/2015:13:01:14 +0800] “GET http://localhost/HTTP/1.1” 200 30 “-” “curl/7.15.5(x86_64-redhat-li nux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\x7F zlib/1.2.3 libidn/0.6.5”


  • 原因:
    先看下ASCII码表
八进制 十六进制 十进制 字符 177 7F 127 del

原因是\x7F字符被解释成【删除】 操作, 于是, 本来应该记录的 0.9.8, 删除一个字符后变成了0.9.
跟踪看下为什么access.log打印出了\x7F

Breakpoint 6, ngx_http_log_variable (r=0x7587e0,     buf=0x7436b8 "curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\\x7F zlib/1.2.3 libidn/0.6.5\"\n07t",     op=0x740410) at src/http/modules/ngx_http_log_module.c:893893     value = ngx_http_get_indexed_variable(r, op->data);(gdb) p *value/* escape = 1,内容是"0.9.8\177",\177和\x7F是一个东西 都是符号del */$16 = {len = 91, valid = 1, no_cacheable = 0, not_found = 0, escape = 1,   data = 0x76c13c "curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\177 zlib/1.2.3 libidn/0.6.5"}(gdb) bt#0  ngx_http_log_variable (r=Unhandled dwarf expression opcode 0xf3) at src/http/modules/ngx_http_log_module.c:900#1  0x0000000000447892 in ngx_http_log_handler (r=0x7587e0) at src/http/modules/ngx_http_log_module.c:331#2  0x000000000043d35e in ngx_http_log_request (r=0x7587e0) at src/http/ngx_http_request.c:3399#3  0x000000000043e5a7 in ngx_http_free_request (r=0x7587e0, rc=0) at src/http/ngx_http_request.c:3346#4  0x000000000043f40b in ngx_http_set_keepalive (r=0x7587e0) at src/http/ngx_http_request.c:2789#5  ngx_http_finalize_connection (r=0x7587e0) at src/http/ngx_http_request.c:2459#6  0x000000000044018b in ngx_http_finalize_request (r=0x7587e0, rc=<value optimized out>)    at src/http/ngx_http_request.c:2360#7  0x000000000043afd9 in ngx_http_core_content_phase (r=0x7587e0, ph=Unhandled dwarf expression opcode 0xf3) at src/http/ngx_http_core_module.c:1408#8  0x0000000000435f93 in ngx_http_core_run_phases (r=0x7587e0) at src/http/ngx_http_core_module.c:888#9  0x00000000004360a2 in ngx_http_handler (r=Unhandled dwarf expression opcode 0xf3) at src/http/ngx_http_core_module.c:871#10 0x000000000043e3bb in ngx_http_process_request (r=0x7587e0) at src/http/ngx_http_request.c:1828#11 0x000000000044106c in ngx_http_process_request_headers (rev=Unhandled dwarf expression opcode 0xf3) at src/http/ngx_http_request.c:1259#12 0x00000000004415df in ngx_http_process_request_line (rev=0x7f7c35c971b0) at src/http/ngx_http_request.c:940#13 0x0000000000441ef9 in ngx_http_wait_request_handler (rev=0x7f7c35c971b0) at src/http/ngx_http_request.c:472#14 0x00000000004326e8 in ngx_epoll_process_events (cycle=0x73ec90, timer=Unhandled dwarf expression opcode 0xf3) at src/event/modules/ngx_epoll_module.c:683#15 0x0000000000429aaa in ngx_process_events_and_timers (cycle=0x73ec90) at src/event/ngx_event.c:249#16 0x0000000000430da0 in ngx_worker_process_cycle (cycle=0x73ec90, data=Unhandled dwarf expression opcode 0xf3) at src/os/unix/ngx_process_cycle.c:807#17 0x000000000042f4cb in ngx_spawn_process (cycle=0x73ec90, proc=0x430cb1 <ngx_worker_process_cycle>, data=0x0,     name=0x4e7d83 "worker process", respawn=-4) at src/os/unix/ngx_process.c:198#18 0x00000000004301e8 in ngx_start_worker_processes (cycle=0x73ec90, n=1, type=-4)    at src/os/unix/ngx_process_cycle.c:362#19 0x0000000000431d9d in ngx_master_process_cycle (cycle=0x73ec90) at src/os/unix/ngx_process_cycle.c:249#20 0x0000000000412d71 in main (argc=Unhandled dwarf expression opcode 0xf3) at src/core/nginx.c:412(gdb) n904         return (u_char *) ngx_http_log_escape(buf, value->data, value->len);  //ngx在此函数内做了转码
  • 源码面前,了无秘密
static uintptr_tngx_http_log_escape(u_char *dst, u_char *src, size_t size){    ngx_uint_t      n;    /* 这是十六进制字符表 */    static u_char   hex[] = "0123456789ABCDEF";    /* 这是ASCII码表,每一位表示一个符号,其中值为1表示此符号需要转换,值为0表示不需要转换 */    static uint32_t   escape[] = {        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */                    /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */        0x00000004, /* 0000 0000 0000 0000  0000 0000 0000 0100 */                    /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */        0x10000000, /* 0001 0000 0000 0000  0000 0000 0000 0000 */                    /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */    };    if (dst == NULL) {        /* find the number of the characters to be escaped */        n = 0;        while (size) {            if (escape[*src >> 5] & (1 << (*src & 0x1f))) {                n++;            }            src++;            size--;        }        return (uintptr_t) n;    }    while (size) {         /* escape[*src >> 5],escape每一行保存了32个符号,         所以右移5位,即除以32就找到src对应的字符保存在escape的行,         (1 << (*src & 0x1f))此符号在escape一行中的位置,         相&结果就是判断src符号位是否为1,需不需要转换 */        if (escape[*src >> 5] & (1 << (*src & 0x1f))) {            *dst++ = '\\';            *dst++ = 'x';            /* 一个字符占一个字节8位,每4位转成一个16进制表示 */            /* 高4位转换成16进制 */            *dst++ = hex[*src >> 4];            /* 低4位转换成16进制*/            *dst++ = hex[*src & 0xf];            src++;        } else {            /* 不需要转换的字符直接赋值 */            *dst++ = *src++;        }        size--;    }    return (uintptr_t) dst;}
0 0