logstash 中正则grok

来源:互联网 发布:北京网络大学 编辑:程序博客网 时间:2024/06/07 10:49

调试正则的工具: http://grokdebug.herokuapp.com/

注意:add 的field或者tag或者解析时字段的命令一定不能是关键字,如type

解析例子:

input {       beats {            add_field => {"myid"=>"nginx"}        port => 5043    }    beats {             add_field => {"myid"=>"java"}         port =>5044    }}filter {        if [myid] == "nginx" {       grok {         match => {             "message" => "^(?<domain>%{IP:ip}|(?:%{NOTSPACE:subsite}\.)?(?<site>[-a-zA-Z0-9]+?).com|%{NOTSPACE:unknown}) %{IPORHOST:dayuip} - (?<user>[a-zA-Z\.\@\-\+_%]+) \[%{HTTPDATE:timestamp}\] \"%{WORD:verb} (?<request_path>(?<biz>\/[^/?]*)%{URIPATH:}?)(?:%{URIPARAM:request_param})? HTTP/%{NUMBER:httpversion}\" %{NUMBER:response} (?:%{NUMBER:bytes}|-) (?:%{BASE10NUM:request_duration}|-) (?:\"(?:%{URI:referrer}|-)\"|%{QS:referrer}) %{QS:agent} \"(?:%{IPORHOST:clientip}(?:[^\"]*)|-)\" %{QS:uidgot} %{QS:uidset} \"(?:[^\" ]* )*(?<upstream>[^ \"]*|-)\"$"            }            }         date {              locale => "en"          timezone => "Asia/Shanghai"          match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]        }            mutate {            convert => { "bytes" => "integer" "request_duration" => "float"}     }     }     if [myid] == "java" {         if [source] =~ /.+-phplog.log/  {                grok {                 match => {                     "message"  => "\[entry\]\[ts\](?<ts>.*)\[/ts\]\[lv\](?<lv>.*)\[/lv\]\[th\](?<th>.*)\[/th\]\[lg\](?<lg>.*)\[/lg\]\[cl\](?<cl>.*)\[/cl\]\[m\](?<m>.*)\[/m\]\[ln\](?<ln>.*)\[/ln\]\[bsid\](?<bsid>.*)\[/bsid\]\[esid\](?<esid>.*)\[/esid\]\[txt\](?<txt>.*)\[/txt\]\[proj\](?<proj>.*)\[/proj\]\[iid\](?<iid>.*)\[/iid\]\[file\](?<file>.*)\[/file\]\[ex\](?<ex>.*)\[/ex\]\[type\](?<logtype>.*)\[/type\]\[/entry\]"                       }                 }            mutate {        #去掉没用的字段        remove_field => ["type","logtype"]            }        } else {           grok {               match => {            "message" => "\[entry\]\[ts\](?<ts>.*)\[/ts\]\[lv\](?<lv>.*)\[/lv\]\[th\](?<th>.*)\[/th\]\[lg\](?<lg>.*)\[/lg\]\[cl\](?<cl>.*)\[/cl\]\[m\](?<m>.*)\[/m\]\[ln\](?<ln>.*)\[/ln\]\[bsid\](?<bsid>.*)\[/bsid\]\[esid\](?<esid>.*)\[/esid\](\[cmid\](?<cmid>.*)\[/cmid\])?\[txt\](?<txt>.*)\[/txt\]\[ex\](?<ex>.*)\[/ex\]\[/entry\]"            }        }          grok {           match => {                "source" => "(?<proj>[^/]+)-(?<iid>\w+)-\w+\.log"                }        }       }       mutate {            rename => {                "source" => "file"                "offset" => "seq"            }          }      mutate {        #去掉没用的字段        remove_field => ["input_type","count","tags","message","@version","beat","fields","offset","source"]            }      date {            match => ["ts",'yyyy-MM-dd$HH:mm:ss.SSS','yyyy-MM-dd$HH:mm:ss.SSSZ']          }    } # endif_javalog}output{     if [myid] == "nginx" {       elasticsearch {            hosts => ["192.168.5.201:9200"]            index => "log-nginx-%{+YYYY.MM.dd}"        }      http {          format=>"json"          http_method=>"post"#   #     url => "http://192.168.1.68:8990/api/v1/metrics"          url => "http://agg.we.com/api/v1/acclog"     }     }    if [myid] == "java" {                if [host] == "zy-java1"                {            elasticsearch {                hosts => ["192.168.5.201:9200"]                index => "log-java-call-uat-%{+YYYY.MM.dd}"            }        }                if [host] == "JAVA1"                 {            elasticsearch {                hosts => ["192.168.5.201:9200"]                index => "log-java-call-%{+YYYY.MM.dd}"            }        }                if [host] == "JAVA2"                 {            elasticsearch {                hosts => ["192.168.5.201:9200"]                index => "log-java-call-%{+YYYY.MM.dd}"            }        }    }}
原创粉丝点击