hadoop 处理日志
来源:互联网 发布:linux程序设计第4版pdf 编辑:程序博客网 时间:2024/05/01 13:17
hadoop 处理日志
127.0.0.1 - - [03/Jul/2014:23:36:38 +0800] "GET /course/detail/3.htm HTTP/1.0" 200 38435 0.038
182.131.89.195 - - [03/Jul/2014:23:37:43 +0800] "GET / HTTP/1.0" 301 - 0.000
127.0.0.1 - - [03/Jul/2014:23:38:27 +0800] "POST /service/notes/addViewTimes_23.htm HTTP/1.0" 200 2 0.003
127.0.0.1 - - [03/Jul/2014:23:39:03 +0800] "GET /html/notes/20140617/779.html HTTP/1.0" 200 69539 0.046
127.0.0.1 - - [03/Jul/2014:23:43:00 +0800] "GET /html/notes/20140318/24.html HTTP/1.0" 200 67171 0.049
127.0.0.1 - - [03/Jul/2014:23:43:59 +0800] "POST /service/notes/addViewTimes_779.htm HTTP/1.0" 200 1 0.003
127.0.0.1 - - [03/Jul/2014:23:45:51 +0800] "GET / HTTP/1.0" 200 70044 0.060
127.0.0.1 - - [03/Jul/2014:23:46:17 +0800] "GET /course/list/73.htm HTTP/1.0" 200 12125 0.010
127.0.0.1 - - [03/Jul/2014:23:46:58 +0800] "GET /html/notes/20140609/542.html HTTP/1.0" 200 94971 0.077
127.0.0.1 - - [03/Jul/2014:23:48:31 +0800] "POST /service/notes/addViewTimes_24.htm HTTP/1.0" 200 2 0.003
127.0.0.1 - - [03/Jul/2014:23:48:34 +0800] "POST /service/notes/addViewTimes_542.htm HTTP/1.0" 200 2 0.003
127.0.0.1 - - [03/Jul/2014:23:49:31 +0800] "GET /notes/index-top-3.htm HTTP/1.0" 200 53494 0.041
127.0.0.1 - - [03/Jul/2014:23:50:55 +0800] "GET /html/notes/20140609/544.html HTTP/1.0" 200 183694 0.076
127.0.0.1 - - [03/Jul/2014:23:53:32 +0800] "POST /service/notes/addViewTimes_544.htm HTTP/1.0" 200 2 0.004
127.0.0.1 - - [03/Jul/2014:23:54:53 +0800] "GET /html/notes/20140620/900.html HTTP/1.0" 200 151770 0.054
127.0.0.1 - - [03/Jul/2014:23:57:42 +0800] "GET /html/notes/20140620/872.html HTTP/1.0" 200 52373 0.034
127.0.0.1 - - [03/Jul/2014:23:58:17 +0800] "POST /service/notes/addViewTimes_900.htm HTTP/1.0" 200 2 0.003
127.0.0.1 - - [03/Jul/2014:23:58:51 +0800] "GET / HTTP/1.0" 200 70044 0.057
public class LogMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException {
String line = value.toString().trim();
String temp = log(line);
if(temp.length()>0){
context.write(new Text(temp), new IntWritable(1));
}
}
//处理字符串方法
static String log(String line){
String result = "";
try{
if(line.length()>20){
if(line.indexOf("GET")>0){
result = line.substring(line.indexOf("GET"), line.indexOf("HTTP/1.0")).trim();
}else if(line.indexOf("POST")>0){
result = line.substring(line.indexOf("POST"), line.indexOf("HTTP/1.0")).trim();
}
}
}catch(Exception e){
System.out.println(line);
}
return result;
}
//测试log方法
public static void main(String[] args) {
String line = "127.0.0.1 - - [03/Jul/2014:23:36:38 +0800] \"GET /course/detail/3.htm HTTP/1.0\" 200 38435 0.038";
System.out.println(log(line));
}
}
public class LogReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values,Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public class JobMain {
/**
* @param args
*/
public static void main(String[] args)throws Exception {
Configuration configuration = new Configuration();
Job job = new Job(configuration,"log_job");
job.setJarByClass(JobMain.class);
job.setMapperClass(LogMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setReducerClass(LogReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path path = new Path(args[1]);
FileSystem fs = FileSystem.get(configuration);
if(fs.exists(path)){
fs.delete(path, true);
}
FileOutputFormat.setOutputPath(job, path);
System.exit(job.waitForCompletion(true)?0:1);
}
}
- HADOOP 处理 NGINX 日志
- hadoop 处理日志
- 实践:使用 Apache Hadoop 处理日志
- 实践:使用 Apache Hadoop 处理日志
- 实践:使用 Apache Hadoop 处理日志
- 基于Hadoop的日志收集框架---Chukwa的处理流程
- 003 利用hadoop+hive离线处理日志-方案分析
- Hadoop自定义类型处理手机上网日志(笔记5)
- hadoop日志
- Hadoop日志
- Hadoop日志
- hadoop 日志
- hadoop日志
- hadoop日志
- Hadoop之——使用hadoop自定义类型处理手机上网日志
- 实践:使用 Apache Hadoop 处理日志使用典型 Linux 系统上的 Hadoop 从日志中提取有用数据
- 日志处理
- 日志处理
- STAF/STAX 安装配置
- MapReduce Design Patterns(7、输入输出模式)(十三)
- 使用 Google V8 引擎开发可定制的应用程序
- WCF技术简单入门
- jsp,java项目邮箱注册发送邮件
- hadoop 处理日志
- 【第七章·查找技术】思维导图
- poj1915 简单bfs
- svn ubuntu
- Lombok(1.14.8)的简单示例
- easyui-accordion 设置默认选项卡
- ★★★同时支持iexplorer和firefox的javascript复制拷贝信息到剪贴板方法
- 虚拟机界面大小自适应设置
- 《数据结构》实验五 树和二叉树