生产环境!实战!电信支付系统使用Hadoop分析Apache日志!
来源:互联网 发布:2017科普知识网络大赛 编辑:程序博客网 时间:2024/06/05 15:21
1、输入数据
1.2.3.4- - [20/Feb/2016:00:05:11 +0800] "POST /zhifubao/zhifu HTTP/1.1" 200 1286
1.2.3.4 - - [20/Feb/2016:00:05:14 +0800] "POST /pay/zf HTTP/1.1" 200 96
2.2.3.4 - - [20/Feb/2016:00:05:15 +0800] "POST /zhifubao/zhifu HTTP/1.1" 200 1290
2.2.3.4 - - [20/Feb/2016:00:05:18 +0800] "POST /pay2/pay.do HTTP/1.1" 200 32
1.2.4.4 - - [20/Feb/2016:00:05:22 +0800] "POST /zhifubao/zhifu HTTP/1.1" 200 1285
1.2.3.5 - - [20/Feb/2016:00:05:23 +0800] "POST /zhifubao/zhifu HTTP/1.1" 200 1291
1.3.3.4 - - [20/Feb/2016:00:05:25 +0800] "POST /pay2/pay.do HTTP/1.1" 200 1976
2、正则表达式 取IP 日期 支付方式
package com.dt.java.test;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class RhzfApacheLog {
public static void main(String[] args) {
// TODO Auto-generated method stub
//String a ="a\" ";
// String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+) \" (\\d{3}) (\\d+)";
/* String pattern ="^(\\S+) (\\S+) (\\S+)";
m.group():11.5.41.3 - -
m.group(1):11.5.41.3
m.group(2):-*/
/* String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\]";
m.group():11.7.11.3 - - [18/Feb/2016:00:00:55 +0800]
m.group(1):11.7.11.3
m.group(2):-
m.group(3):-
m.group(4):18/Feb/2016
m.group(5)::00:00:55
m.group(6):+0800
捕获个数:groupCount()=6*/
/* String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\"";
m.group():11.5.41.3 - - [18/Feb/2016:00:00:55 +0800] "POST /zhifubao/pay HTTP/1.1"
m.group(1):11.5.41.3
m.group(2):-
m.group(3):-
m.group(4):18/Feb/2016
m.group(5)::00:00:55
m.group(6):+0800
m.group(7):POST
m.group(8):/zhifubao/pay
m.group(9):HTTP/1.1
捕获个数:groupCount()=9*/
/* String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)";
m.group():11.5.41.3 - - [18/Feb/2016:00:00:55 +0800] "POST /zhifubao/pay HTTP/1.1" 200 1285
m.group(1):11.5.41.3
m.group(2):-
m.group(3):-
m.group(4):18/Feb/2016
m.group(5)::00:00:55
m.group(6):+0800
m.group(7):POST
m.group(8):/zhifubao/pay
m.group(9):HTTP/1.1
m.group(10):200
m.group(11):1285
捕获个数:groupCount()=11
*/
String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)";
Pattern p = Pattern.compile(pattern);
String s = "11.5.41.3 - - [18/Feb/2016:00:00:55 +0800] \"POST /zhifubao/pay HTTP/1.1\" 200 1285";
Matcher m = p.matcher(s);
while(m.find())
{
System.out.println("m.group():"+m.group()); //打印一个大组
System.out.println("m.group(1):"+m.group(1)); //打印组1
System.out.println("m.group(2):"+m.group(2)); //打印组2
System.out.println("m.group(3):"+m.group(3));
System.out.println("m.group(4):"+m.group(4));
System.out.println("m.group(5):"+m.group(5));
System.out.println("m.group(6):"+m.group(6));
System.out.println("m.group(7):"+m.group(7));
System.out.println("m.group(8):"+m.group(8));
System.out.println("m.group(9):"+m.group(9));
System.out.println("m.group(10):"+m.group(10));
System.out.println("m.group(11):"+m.group(11));
System.out.println();
}
System.out.println("捕获个数:groupCount()="+m.groupCount());
}}
3、输出结果
1.1.1.2||18/Feb/2016||/zhifubao/zhifu 50000
1.1.1.2||19/Feb/2016||/zhifubao/zhifu 60000
1.1.1.3||18/Feb/2016||/zhifubao/zhifu 70000
1.1.1.3||19/Feb/2016||/zhifubao/zhifu 50000
1.1.1.1||18/Feb/2016||/zhifubao/zhifu 60000
1.1.1.1||19/Feb/2016||/zhifubao/zhifu 70000
1.1.1.2||18/Feb/2016||/zhifubao/zhifu 70000
1.1.1.2||19/Feb/2016||/zhifubao/zhifu 50000
4、源代码
package com.dtspark.hadoop.hellomapreduce;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class RhzfApacheURLLog {
public static class DataMapper
extends Mapper<LongWritable, Text, Text, LongWritable>{
private String pattern ="^(\\S+) (\\S+) (\\S+)\\[([\\w/]+)([\\w:/]+)\\s([+\\-]\\d{4})\\] \"(\\S+) (\\S+) (\\S+)\" (\\d{3}) (\\d+)";
private Pattern p = Pattern.compile(pattern);
private LongWritable resultValue = new LongWritable(1);
private Text text =new Text();
public void map(LongWritable key, Text value, Context context
) throws IOException, InterruptedException {
System.out.println("Map Methond Invoked!!!");
String line =value.toString();
String result = handleLine(line);
if (result != null && result.length() > 0 ){
text.set(result);
context.write(text, resultValue);
}
}
private String handleLine(String line) {
String handResult = null;
if(line.length()>0){
Matcher m = p.matcher(line);
while(m.find())
{ String mip =m.group(1);
String mdate =m.group(4);
String malipay =m.group(8);
handResult = mip.trim() +"||"+mdate.trim() + "||"+ malipay.trim();
System.out.println("m.group(1):"+m.group(1)); //打印组1
System.out.println("m.group(4):"+m.group(4));
System.out.println("m.group(8):"+m.group(8));
}
}
return handResult;
}
}
public static class DataReducer
extends Reducer<Text,LongWritable,Text, LongWritable> {
private LongWritable totalresultValue = new LongWritable(1);
public void reduce(Text key, Iterable<LongWritable> values,
Context context
) throws IOException, InterruptedException {
System.out.println("Reduce Methond Invoked!!!" );
int total =0;
for (LongWritable item : values){
total += item.get();
}
totalresultValue.set(total);
context.write(key, totalresultValue);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: RhzfApacheURLLog <in> [<in>...] <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "RhzfApacheURLLog");
job.setJarByClass(RhzfApacheURLLog.class);
job.setMapperClass(DataMapper.class);
job.setReducerClass(DataReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
- 生产环境!实战!电信支付系统使用Hadoop分析Apache日志!
- 【hive实战】使用hive分析 hadoop 日志
- 【hive实战】使用hive分析 hadoop 日志
- 生产环境下的hadoop 配置实战
- Hadoop日志分析系统
- hadoop日志分析系统
- Hadoop项目实战---日志分析
- 某生产系统RAC故障分析日志
- Hadoop日志文件分析系统
- HIve实战分析Hadoop的日志
- Hadoop项目实战---黑马论坛日志分析
- hadoop编程实战——日志分析
- Hadoop项目实战---黑马论坛日志分析
- 生产环境部署hadoop
- 实践:使用 Apache Hadoop 处理日志使用典型 Linux 系统上的 Hadoop 从日志中提取有用数据
- Hadoop 分析Apache日志记录 URLlog日志分析
- 使用Hadoop分析网站日志
- 使用awstats分析apache日志
- 使用link.xml来控制strippinglevel减少dll库大小技巧、优化安装包大小
- xshell中按backspace乱码问题
- android媒体--stagefright概述【一】
- BroadcastReceiver的两种注册方式(静态注册和动态注册)android开发教程
- ArrayList构造函数
- 生产环境!实战!电信支付系统使用Hadoop分析Apache日志!
- 基于特征分析的人脸识别
- Unity3D性能优化 - Update() 的使用
- 第五届_切面条
- !!scala 【经典】一步一步推导!!高阶函数,类型推断
- C语言:华氏温度与摄氏温度对照表
- 九度-简单题 题目1431:Sort
- 关于中文注释导致的文件编码错误问题
- jQuery常用的元素查找方法总结