Spark-项目中分析日志的核心代码

来源:互联网 发布:手机淘宝注册 编辑:程序博客网 时间:2024/06/05 01:14

代码

LogRecord 类:

case class LogRecord (    clientIpAddress: String,          rfc1413ClientIdentity: String,       remoteUser: String,             `    dateTime: String,              //[day/month/year:hour:minute:second zone]    request: String,                     httpStatusCode: String,              bytesSent: String,                  referer: String,                    userAgent: String                )

LogParser 解析类

import java.util.regex.Patternimport java.text.SimpleDateFormatimport java.util.Localeimport scala.util.control.Exception._import java.util.regex.Matcherimport scala.util.{Try, Success, Failure}@SerialVersionUID(100L)class LogParser extends Serializable {    private val ddd = "\\d{1,3}"                          private val ip = s"($ddd\\.$ddd\\.$ddd\\.$ddd)?"      private val client = "(\\S+)"                         private val user = "(\\S+)"    private val dateTime = "(\\[.+?\\])"                  private val request = "\"(.*?)\""                     private val status = "(\\d{3})"    private val bytes = "(\\S+)"                          private val referer = "\"(.*?)\""    private val agent = "\"(.*?)\""    private val regex = s"$ip $client $user $dateTime $request $status $bytes $referer $agent"    private val p = Pattern.compile(regex)    def parseRecord(record: String): Option[AccessLogRecord] = {        val matcher = p.matcher(record)        if (matcher.find) {            Some(buildAccessLogRecord(matcher))        } else {            None        }    }    def parseRecordReturningNullObjectOnFailure(record: String): AccessLogRecord = {        val matcher = p.matcher(record)        if (matcher.find) {            buildAccessLogRecord(matcher)        } else {            AccessLogParser.nullObjectAccessLogRecord        }    }    private def buildAccessLogRecord(matcher: Matcher) = {        AccessLogRecord(            matcher.group(1),            matcher.group(2),            matcher.group(3),            matcher.group(4),            matcher.group(5),            matcher.group(6),            matcher.group(7),            matcher.group(8),            matcher.group(9))    }}/** * 例子: * 94.102.63.11 - - [21/Jul/2009:02:48:13 -0700] "GET / HTTP/1.1" 200 18209 "http://acme.com/foo.php" "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)" */object AccessLogParser {    val nullObjectAccessLogRecord = AccessLogRecord("", "", "", "", "", "", "", "", "")    def parseRequestField(request: String): Option[Tuple3[String, String, String]] = {        val arr = request.split(" ")        if (arr.size == 3) Some((arr(0), arr(1), arr(2))) else None    }    def parseDateField(field: String): Option[java.util.Date] = {        val dateRegex = "\\[(.*?) .+]"        val datePattern = Pattern.compile(dateRegex)        val dateMatcher = datePattern.matcher(field)        if (dateMatcher.find) {                val dateString = dateMatcher.group(1)                println("***** DATE STRING" + dateString)                // HH is 0-23; kk is 1-24                val dateFormat = new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss", Locale.ENGLISH)                allCatch.opt(dateFormat.parse(dateString))  // return Option[Date]            } else {            None        }    }}

总结

日志分析是经常做的事情,大数据下的日志分析也是一个常用技术。

0 0
原创粉丝点击