Linux内核netfilter子系统ulogd项目性能调优记录

来源:互联网 发布:深度linux 启动器 编辑:程序博客网 时间:2024/05/19 07:27
  1. 使用ULOGD打SYSLOG到SYSLOG-NG,当NFLOG拿到的数据包在6K左右时,CPU有两个核心占了15%左右

  2. 使用修改过的ULOGD直接打TCP,CPU只有一个核心占15%左右(修改版在https://github.com/InsZVA/ulogd-with-syslog-tcp)

  3. 使用perf top查看,

   8.89%  libc-2.12.so                  [.] vfprintf                                                                                                                                            3.49%  libc-2.12.so                  [.] _IO_default_xsputn                                                                                                                                  2.10%  [kernel]                      [k] find_busiest_group                                                                                                                                  1.94%  libc-2.12.so                  [.] _IO_vfscanf                                                                                                                                         1.70%  libc-2.12.so                  [.] __strchrnul                                                                                                                                         1.66%  libpython2.7.so.1.0           [.] PyEval_EvalFrameEx                                                                                                                                  1.64%  libc-2.12.so                  [.] _itoa_word                                                                                                                                          1.45%  [kernel]                      [k] schedule                                                                                                                                            1.13%  [kernel]                      [k] ixgbe_clean_rx_irq                                                                                                                                  1.08%  ulogd                         [.] ulogd_propagate_results                                                                                                                             1.00%  [kernel]                      [k] _spin_lock                                                                                                                                          0.98%  [kernel]                      [k] ipt_do_table                                                                                                                                        0.97%  [kernel]                      [k] net_rx_action                                                                                                                                       0.83%  [kernel]                      [k] memcpy                                                                                                                                              0.76%  [kernel]                      [k] _spin_unlock_irqrestore                                                                                                                             0.72%  [kernel]                      [k] menu_select                                                                                                                                         0.71%  libc-2.12.so                  [.] _IO_str_init_static_internal                                                                                                                        0.70%  [kernel]                      [k] irq_entries_start                                                                                                                                   0.65%  libc-2.12.so                  [.] _IO_vsprintf                                                                                                                                        0.59%  [kernel]                      [k] hrtimer_interrupt                                                                                                                                   0.57%  [kernel]                      [k] __hrtimer_start_range_ns                                                                                                                            0.55%  [kernel]                      [k] ixgbe_poll                                                                                                                                          0.54%  [kernel]                      [k] rb_erase                                                                                                                                            0.54%  libc-2.12.so                  [.] _IO_old_init                                                                                                                                        0.53%  [kernel]                      [k] find_next_bit                                                                                                                                       0.53%  [kernel]                      [k] getnstimeofday                                                                                                                                      0.49%  [kernel]                      [k] _spin_lock_irqsave                                                                                                                                  0.47%  [kernel]                      [k] ktime_get_real                                                                                                                                      0.47%  [kernel]                      [k] tick_nohz_stop_sched_tick                                                                                                                           0.46%  libpython2.7.so.1.0           [.] lookdict_string                                                                                                                                     0.46%  [kernel]                      [k] nf_conntrack_in                                                                                                                                     0.45%  traffic_server                [.] NetHandler::mainNetEvent                                                                                                                            0.45%  [kernel]                      [k] native_read_tsc                                                                                                                                     0.45%  [kernel]                      [k] kfree                                                                                                                                               0.43%  libc-2.12.so                  [.] sprintf                                                                                                                                             0.42%  [kernel]                      [k] __audit_syscall_exit                                                                                                                                0.42%  [kernel]                      [k] __switch_to                                                                                                                                         0.40%  [kernel]                      [k] ip_route_input                                                                                                                                      0.40%  [kernel]                      [k] __remove_hrtimer                                                                                                                                    0.38%  [kernel]                      [k] cpumask_next_and                                                                                                                                    0.37%  libc-2.12.so                  [.] __offtime                                                                                                                                           0.37%  libc-2.12.so                  [.] free                 

大量CPU消耗在标准输入输出库函数中,那么看来应该是解析字符串的问题了。

stack=log3:NFLOG,base1:BASE,ifi1:IFINDEX,ip2str1:IP2STR,print1:PRINTPKT,sys1:SYSLOGTCP

写了个代码测sprintf的性能:

#include <stdio.h>#include <stdlib.h>#include <sys/time.h>#define NULL 0#define TIME_ELAPSED(codeToTime) do{ \    struct timeval beginTime, endTime; \    gettimeofday(&beginTime, NULL); \    {codeToTime;} \    gettimeofday(&endTime, NULL); \    long secTime  = endTime.tv_sec - beginTime.tv_sec; \    long usecTime = endTime.tv_usec - beginTime.tv_usec; \    printf("[%s(%d)]Elapsed Time: SecTime = %lds, UsecTime = %ldus!\n", __FILE__, __LINE__, secTime, usecTime); \}while(0)uint16_t randu16() {    return (uint16_t)rand();}uint32_t randu32() {    return (uint32_t)rand();}char* f_puts(char* buf, const char* data) {    while((*buf++ = *data++));    return --buf;}char* f_putn(char* buf, int n) {    while (n) {        int digit = n % 10;        *buf++ = (char)('0' + digit);        n /= 10;    }    return --buf;}int optimized() {    char buf[1024];    char* buf_cur = buf;    buf_cur = f_puts(buf_cur, "PROTO=TCP SPT=");    buf_cur = f_putn(buf_cur, randu16());    buf_cur = f_puts(buf_cur, " DPT=");    buf_cur = f_putn(buf_cur, randu16());    buf_cur = f_puts(buf_cur, " SEQ=");    buf_cur = f_putn(buf_cur, randu32());    buf_cur = f_puts(buf_cur, " ACK=");    buf_cur = f_putn(buf_cur, randu32());    buf_cur = f_puts(buf_cur, " WINDOW=");    buf_cur = f_putn(buf_cur, randu16());    buf_cur = f_puts(buf_cur, " URG ACK PSH RST SYN FIN URGP=");    buf_cur = f_putn(buf_cur, randu16());    buf_cur = f_puts(buf_cur, " ");    return (int)(buf_cur - buf);}int work() {    char buf[1024];    char* buf_cur = buf;    buf_cur += sprintf(buf_cur, "PROTO=TCP ");    buf_cur += sprintf(buf_cur, "SPT=%u DPT=%u ",                       randu16(),                       randu16());    /* FIXME: config */    buf_cur += sprintf(buf_cur, "SEQ=%u ACK=%u ",                       randu32(),                       randu32());    buf_cur += sprintf(buf_cur, "WINDOW=%u ",                       randu16());    buf_cur += sprintf(buf_cur, "URG ");    buf_cur += sprintf(buf_cur, "ACK ");    buf_cur += sprintf(buf_cur, "PSH ");    buf_cur += sprintf(buf_cur, "RST ");    buf_cur += sprintf(buf_cur, "SYN ");    buf_cur += sprintf(buf_cur, "FIN ");    buf_cur += sprintf(buf_cur, "URGP=%u ",                       randu16());    return (int)(buf_cur - buf);}int main() {    TIME_ELAPSED(        for (int i = 0; i < 1000; i++)                work();    );    TIME_ELAPSED(            for (int i = 0; i < 1000; i++)                optimized();    );    return 0;}

结果:

[main.c(99)]Elapsed Time: SecTime = 0s, UsecTime = 1158us![main.c(103)]Elapsed Time: SecTime = 0s, UsecTime = 159us!

注意:

千万当心itoa、sprinf等库函数,性能可能低的可怕。

0 0