hadoop日志分析程序
来源:互联网 发布:主机屋的数据库名称 编辑:程序博客网 时间:2024/05/09 06:16
业务描述:设定inputpath和ouputpath,根据访问日志分析某一个应用访问某一个API的总次数和总流量,统计后分别输出到两个文件中。
package org.apache.hadoop.examples;
import java.io.IOException;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class LogAnalysiser {
public static class MapClass extends
Mapper<Object, Text, Text, LongWritable> {
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
if (line == null || line.equals(""))
return;
String[] words = line.split(",");
if (words == null || words.length < 8)
return;
String appid = words[1];
String apiName = words[2];
LongWritable recbytes = new LongWritable(Long.parseLong(words[7]));
Text word = new Text();
word.set(new StringBuffer("flow::").append(appid).append("::")
.append(apiName).toString());
context.write(word, recbytes); // 输出流量的统计结果,通过flow::作为前缀来标示。
word.clear();
word.set(new StringBuffer("count::").append(appid).append("::")
.append(apiName).toString());
context.write(word, new LongWritable(1));// 输出次数的统计结果,通过count::作为前缀来标示
}
}
public static class PartitionerClass extends
Partitioner<Text, LongWritable> {
public int getPartition(Text key, LongWritable value, int numPartitions) {
if (numPartitions >= 2)// Reduce 个数,判断流量还是次数的统计分配到不同的Reduce
if (key.toString().startsWith("flow::"))
return 0;
else
return 1;
else
return 0;
}
}
public static class ReduceClass extends
Reducer<Text, LongWritable, Text, LongWritable> {
public void reduce(Text key, Iterable<LongWritable> values,
Context context) throws IOException, InterruptedException {
//
Text newkey = new Text();
newkey.set(key.toString().substring(
key.toString().indexOf("::") + 2));
LongWritable result = new LongWritable();
long sum = 0;
int counter = 0;
for (LongWritable val : values)// 累加同一个key的统计结果
{
sum += val.get();
// 担心处理太久,JobTracker长时间没有收到报告会认为TaskTracker已经失效,因此定时报告一下
counter = counter + 1;
if (counter == 1000) {
counter = 0;
}
}
result.set(sum);
context.write(newkey, result);// 输出最后的汇总结果
}
}
public static class CombinerClass extends
Reducer<Text, LongWritable, Text, LongWritable> {
public void reduce(Text key, Iterable<LongWritable> values,
Context context) throws IOException, InterruptedException {
LongWritable result = new LongWritable();
long sum = 0;
for (LongWritable val : values)// 累加同一个key的统计结果
{
sum += val.get();
}
result.set(sum);
}
}
public static void main(String[] args) throws Exception {
if (args == null || args.length < 2) {
System.out.println("need inputpath and outputpath");
return;
}
String inputpath = args[0];
String outputpath = args[1];
//文件名
String shortin = args[0];
String shortout = args[1];
if (shortin.indexOf(File.separator) >= 0)
shortin = shortin.substring(shortin.lastIndexOf(File.separator));
if (shortout.indexOf(File.separator) >= 0)
shortout = shortout.substring(shortout.lastIndexOf(File.separator));
//
SimpleDateFormat formater = new SimpleDateFormat("yyyy-MM-dd");
shortout = new StringBuffer(shortout).append("-").append(
formater.format(new Date())).toString();
if (!shortin.startsWith("/"))
shortin = "/" + shortin;
if (!shortout.startsWith("/"))
shortout = "/" + shortout;
shortin = "/user/root" + shortin;
shortout = "/user/root" + shortout;
File inputdir = new File(inputpath);
File outputdir = new File(outputpath);
if (!inputdir.exists() || !inputdir.isDirectory()) {
System.out.println("inputpath not exist or isn't dir!");
return;
}
if (!outputdir.exists()) {
new File(outputpath).mkdirs();
}
Configuration conf = new Configuration();
Job job = new Job(conf, "analysis job");
job.setJarByClass(LogAnalysiser.class);
FileSystem fileSys = FileSystem.get(conf);
fileSys.copyFromLocalFile(new Path(inputpath), new Path(shortin));// 将本地文件系统的文件拷贝到HDFS中
job.setJobName("analysisjob");
job.setOutputKeyClass(Text.class);// 输出的key类型,在OutputFormat会检查
job.setOutputValueClass(LongWritable.class); // 输出的value类型,在OutputFormat会检查
job.setMapperClass(MapClass.class);
job.setCombinerClass(CombinerClass.class);
job.setReducerClass(ReduceClass.class);
job.setPartitionerClass(PartitionerClass.class);
job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job, shortin); // hdfs中的输入路径
FileOutputFormat.setOutputPath(job, new Path(shortout));// hdfs中输出路径
Date startTime = new Date();
System.out.println("Job started: " + startTime);
Date end_time = new Date();
System.out.println("Job ended: " + end_time);
System.out.println("The job took "
+ (end_time.getTime() - startTime.getTime()) / 1000
+ " seconds.");
// 删除输入和输出的临时文件
fileSys.copyToLocalFile(new Path(shortout), new Path(outputpath));
fileSys.delete(new Path(shortin), true);
fileSys.delete(new Path(shortout), true);
}
}
原帖转自:http://www.javaeye.com/topic/647269
- hadoop日志分析程序
- hadoop 日志分析程序
- 使用hadoop编写日志分析MR程序
- 使用hadoop编写日志分析MR程序
- Hadoop小程序:分析日志文件
- Hadoop-03-第二个MapReduce程序--模拟分析购物日志
- hadoop日志简单分析
- Hadoop的日志分析
- hadoop日志分析
- hadoop日志分析
- hadoop+web日志分析
- Hadoop日志分析系统
- Hadoop 日志文件分析
- Hadoop 日志分析
- hadoop日志分析系统
- hadoop之日志分析
- 深入剖析Hadoop程序日志
- 深入剖析HADOOP程序日志
- C++知识点辨析
- C语言中产生随机数
- 配置文件验证管理员登陆 学习总结
- ASP.NET MVC 实践之路 之十八 利用ASP.NET MVC处理文件上传与下载
- test
- hadoop日志分析程序
- 短信的监听
- 郭德刚相声大全
- WebQQ协议分析(10)——聊天(3)
- JAVA 程序员情书
- Android开机自动启动程序设置
- C# 代理 事件 详解
- IDLE的运行、编辑与调试
- Oracle开发专题之:分析函数总结