Spark-项目中分析日志的核心代码
来源:互联网 发布:手机淘宝注册 编辑:程序博客网 时间:2024/06/05 01:14
代码
LogRecord 类:
case class LogRecord ( clientIpAddress: String, rfc1413ClientIdentity: String, remoteUser: String, ` dateTime: String, //[day/month/year:hour:minute:second zone] request: String, httpStatusCode: String, bytesSent: String, referer: String, userAgent: String )
LogParser 解析类
import java.util.regex.Patternimport java.text.SimpleDateFormatimport java.util.Localeimport scala.util.control.Exception._import java.util.regex.Matcherimport scala.util.{Try, Success, Failure}@SerialVersionUID(100L)class LogParser extends Serializable { private val ddd = "\\d{1,3}" private val ip = s"($ddd\\.$ddd\\.$ddd\\.$ddd)?" private val client = "(\\S+)" private val user = "(\\S+)" private val dateTime = "(\\[.+?\\])" private val request = "\"(.*?)\"" private val status = "(\\d{3})" private val bytes = "(\\S+)" private val referer = "\"(.*?)\"" private val agent = "\"(.*?)\"" private val regex = s"$ip $client $user $dateTime $request $status $bytes $referer $agent" private val p = Pattern.compile(regex) def parseRecord(record: String): Option[AccessLogRecord] = { val matcher = p.matcher(record) if (matcher.find) { Some(buildAccessLogRecord(matcher)) } else { None } } def parseRecordReturningNullObjectOnFailure(record: String): AccessLogRecord = { val matcher = p.matcher(record) if (matcher.find) { buildAccessLogRecord(matcher) } else { AccessLogParser.nullObjectAccessLogRecord } } private def buildAccessLogRecord(matcher: Matcher) = { AccessLogRecord( matcher.group(1), matcher.group(2), matcher.group(3), matcher.group(4), matcher.group(5), matcher.group(6), matcher.group(7), matcher.group(8), matcher.group(9)) }}/** * 例子: * 94.102.63.11 - - [21/Jul/2009:02:48:13 -0700] "GET / HTTP/1.1" 200 18209 "http://acme.com/foo.php" "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)" */object AccessLogParser { val nullObjectAccessLogRecord = AccessLogRecord("", "", "", "", "", "", "", "", "") def parseRequestField(request: String): Option[Tuple3[String, String, String]] = { val arr = request.split(" ") if (arr.size == 3) Some((arr(0), arr(1), arr(2))) else None } def parseDateField(field: String): Option[java.util.Date] = { val dateRegex = "\\[(.*?) .+]" val datePattern = Pattern.compile(dateRegex) val dateMatcher = datePattern.matcher(field) if (dateMatcher.find) { val dateString = dateMatcher.group(1) println("***** DATE STRING" + dateString) // HH is 0-23; kk is 1-24 val dateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss", Locale.ENGLISH) allCatch.opt(dateFormat.parse(dateString)) // return Option[Date] } else { None } }}
总结
日志分析是经常做的事情,大数据下的日志分析也是一个常用技术。
0 0
- Spark-项目中分析日志的核心代码
- Spark日志分析项目Demo(1)--Flume-ng的安装
- Spark日志分析项目Demo(3)--Spark入口和DataFrame
- 逆向分析之核心代码的分析
- Spark日志分析项目Demo(5)--自定义Accumulator
- Spark日志分析项目Demo(9)--常规性能调优
- Spark日志分析项目Demo(10) --JVM调优
- spark 日志分析
- Spark日志分析案例
- srs 日志分析---spark
- Spark 分析Apache日志
- Spark做日志分析
- Spark大数据分析框架的核心部件
- 基于spark之上的即席分析-日志分析场景
- 基于spark之上的即席分析-日志分析场景
- 逆向分析之核心代码的定位
- Spark的核心概念
- 关于《机器学习实战》中创建决策树的核心代码分析
- Android 开源项目浅读-------SwipeMenuListView-重写,第六章,缓慢收回
- IO流
- 在 Xcode 7 中安装 Alcatraz,再次安装vvdocumenter
- WampServer 2.5设置外网访问/局域网手机访问(403 Forbidden错误解决方法)
- Qt quick 实现bootstrap界面之标签控件
- Spark-项目中分析日志的核心代码
- Java 静态变量,静态函数,静态代码块,构造代码块
- Meanshift--聚类算法
- Ubuntu14.10 更新源
- 浅谈null
- hjr教程-汇编(二):指令操作地址和数据
- Java千百问_07JVM架构(001)_java内存模型是什么样的
- Vector源码注释,粗略理解
- 深入理解JVM03--内存分配与回收策略