动态正则匹配
来源:互联网 发布:美国警察知乎 编辑:程序博客网 时间:2024/06/16 01:43
需求:
1、写一个动态正则;
2、只要写出日志的Schma就可以获取到日志的正则。
package com.donews.utilimport java.util.regex.Patternimport scala.collection.mutable.ArrayBuffer/** * Created by yuhui on 2016/8/5. *//***列子: www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" China 22 Beijing第一版本 "$domain $ip - $remote_user [$timestamp] \"$http_url\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" $country $region $city"例子 : www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing"第二版本 "$domain $ip - $remote_user [$timestamp] \"$http_url\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$e_ip\" \"$country\" \"$region\" \"$city\""例子 : www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] "GET /media/201408/2834414.shtm HTTP/1.1" "http://www.donews.com/media/201408/2834414.shtm" 200 11296 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)" "-" "China" "22" "Beijing"第三版本 $domain $http_x_forwarded_for - $remote_user [$timestamp] "$http_url" "$url" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$e_ip" "$country" "$region" "$city" */object DynamicRegex{ var cmd = "" var regex ="" def tran(cmd: String): String = { val sb = new StringBuffer() sb.append("^") val regex = "^(\\W+)$" val p = Pattern.compile(regex) cmd.split(" ").foreach(key => if (!p.matcher(key).find()) { key.substring(0, key.indexOf("$")) match { case "" => if (key.split("\\$").length > 2) { var split = "" val regex = "(\\$\\w+)(\\W+)(\\$\\w+)(.*)" val p = Pattern.compile(regex) val m = p.matcher(key) while (m.find()) { split = m.group(2) } sb.append("(") for (i <- Range(0, key.split("\\$").length - 1, 1)) { if (i < key.split("\\$").length - 2) { sb.append("[\\S]+[" + split + "]") } else { sb.append("[\\S]+") } } sb.append(")\\s") } else { sb.append("([\\S]+)\\s") } case _ => val regex = "(\\W+)(\\$\\w+)(\\W+)" val p = Pattern.compile(regex) val m = p.matcher(key) if (m.find) { val pre = m.group(1) val end = m.group(3) sb.append("(" + escape(pre) + ".+" + escape(end) + ")\\s") } } }else{ sb.append("(\\W+)\\s") } ) val str = sb.toString str.substring(0, str.length - 2).concat("$") } def escape(original: String): String = { val tb = new StringBuffer() for (i <- Range(0, original.length(), 1)) { if ("\"".equals(original.charAt(i).toString)) { } else { tb.append("\\") } tb.append(original.charAt(i)) } tb.toString } def lineToGroup(line: String): ArrayBuffer[String] = { val groups = ArrayBuffer[String]() val p = Pattern.compile(regex) val m = p.matcher(line) while (m.find()) { for (i <- Range(1, m.groupCount() + 1, 1)) { groups.append(m.group(i)) } } groups } def main(args: Array[String]): Unit = { cmd = "$domain $http_x_forwarded_for - $remote_user [$timestamp] \"$http_url\" \"$url\" $status $body_bytes_sent \"$http_referer\" \"$http_user_agent\" \"$e_ip\" \"$country\" \"$region\" \"$city\"" regex=tran(cmd) println(regex) val log = "www.donews.com 123.125.71.72 - - [28/Nov/2016:11:08:50 +0800] \"GET /media/201408/2834414.shtm HTTP/1.1\" \"http://www.donews.com/media/201408/2834414.shtm\" 200 11296 \"-\" \"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)\" \"-\" \"China\" \"22\" \"Beijing\"" lineToGroup(log).foreach(x=>println(x)) }}
输出结果:
^([\S]+)\s([\S]+)\s(\W+)\s([\S]+)\s(\[.+\])\s(".+")\s(".+")\s([\S]+)\s([\S]+)\s(".+")\s(".+")\s(".+")\s(".+")\s(".+")\s(".+")$www.donews.com123.125.71.72--[28/Nov/2016:11:08:50 +0800]"GET /media/201408/2834414.shtm HTTP/1.1""http://www.donews.com/media/201408/2834414.shtm"20011296"-""Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)""-""China""22""Beijing"
0 0
- 动态正则匹配
- 【动态规划】正则表达式匹配
- 动态规划——正则表达式匹配
- String、动态规划——正则表达式匹配
- php正则匹配动态页面文章标题<h1>
- 第10题 正则表达式匹配(动态规划)
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 正则匹配
- 暗恋的男神交了女朋友,那女人尖嘴猴腮
- pear phar pecl区别
- javascript 原生焦点轮播图
- PHP学习-[查看数组和变量,SeeVar($var);输出数组元素换行&&可折叠]
- 说个老爸的糗事,以前家里买了台老式
- 动态正则匹配
- iOS NSFileManager共享数据的坑
- 2017学习记录,一起进步
- js 中json对象转字符串
- SDK环境变量设置
- POJ 2559 Largest Rectangle in a Histogram 单调栈学习
- 如何建立一个基于事件驱动的全自动化交易系统
- 图片加载,避免oom篇(1)
- telnet如何岁memcached进行操作。