hadoop日志分析程序

来源:互联网 发布:主机屋的数据库名称 编辑:程序博客网 时间:2024/05/09 06:16

业务描述:设定inputpathouputpath,根据访问日志分析某一个应用访问某一个API的总次数和总流量,统计后分别输出到两个文件中。

package org.apache.hadoop.examples;

 

import java.io.IOException;

import java.io.File;

import java.text.SimpleDateFormat;

import java.util.Date;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Partitioner;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

 

public class LogAnalysiser {

 

         public static class MapClass extends

                            Mapper<Object, Text, Text, LongWritable> {

 

                   public void map(Object key, Text value, Context context)

                                     throws IOException, InterruptedException {

 

                            String line = value.toString();

                            if (line == null || line.equals(""))

                                     return;

 

                            String[] words = line.split(",");

                            if (words == null || words.length < 8)

                                     return;

 

                            String appid = words[1];

                            String apiName = words[2];

 

                            LongWritable recbytes = new LongWritable(Long.parseLong(words[7]));

                            Text word = new Text();

 

                            word.set(new StringBuffer("flow::").append(appid).append("::")

                                               .append(apiName).toString());

                            context.write(word, recbytes); // 输出流量的统计结果,通过flow::作为前缀来标示。

                            word.clear();

 

                            word.set(new StringBuffer("count::").append(appid).append("::")

                                               .append(apiName).toString());

                            context.write(word, new LongWritable(1));// 输出次数的统计结果,通过count::作为前缀来标示

                   }

         }

 

         public static class PartitionerClass extends

                            Partitioner<Text, LongWritable> {

 

                   public int getPartition(Text key, LongWritable value, int numPartitions) {

                            if (numPartitions >= 2)// Reduce 个数,判断流量还是次数的统计分配到不同的Reduce

                                     if (key.toString().startsWith("flow::"))

                                               return 0;

                                     else

                                               return 1;

                            else

                                     return 0;

                   }

         }

 

         public static class ReduceClass extends

                            Reducer<Text, LongWritable, Text, LongWritable> {

 

                   public void reduce(Text key, Iterable<LongWritable> values,

                                     Context context) throws IOException, InterruptedException {

                            //

                            Text newkey = new Text();

                            newkey.set(key.toString().substring(

                                               key.toString().indexOf("::") + 2));

                            LongWritable result = new LongWritable();

 

                            long sum = 0;

                            int counter = 0;

                            for (LongWritable val : values)// 累加同一个key的统计结果

                            {

                                     sum += val.get();

 

                                     // 担心处理太久,JobTracker长时间没有收到报告会认为TaskTracker已经失效,因此定时报告一下

                                     counter = counter + 1;

                                     if (counter == 1000) {

                                               counter = 0;

                                     }

                            }

                            result.set(sum);

                            context.write(newkey, result);// 输出最后的汇总结果

                   }

         }

 

         public static class CombinerClass extends

                            Reducer<Text, LongWritable, Text, LongWritable> {

                   public void reduce(Text key, Iterable<LongWritable> values,

                                     Context context) throws IOException, InterruptedException {

 

                            LongWritable result = new LongWritable();

                            long sum = 0;

 

                            for (LongWritable val : values)// 累加同一个key的统计结果

                            {

                                     sum += val.get();

                            }

                            result.set(sum);

                   }

         }

 

         public static void main(String[] args) throws Exception {

 

                   if (args == null || args.length < 2) {

                            System.out.println("need inputpath and outputpath");

                            return;

                   }

 

                   String inputpath = args[0];

                   String outputpath = args[1];

                  

                   //文件名

                   String shortin  = args[0];

                   String shortout = args[1];

 

                   if (shortin.indexOf(File.separator) >= 0)

                            shortin = shortin.substring(shortin.lastIndexOf(File.separator));

                   if (shortout.indexOf(File.separator) >= 0)

                            shortout = shortout.substring(shortout.lastIndexOf(File.separator));

 

                   //

                   SimpleDateFormat formater = new SimpleDateFormat("yyyy-MM-dd");

                   shortout = new StringBuffer(shortout).append("-").append(

                                     formater.format(new Date())).toString();

 

                   if (!shortin.startsWith("/"))

                            shortin = "/" + shortin;

                   if (!shortout.startsWith("/"))

                            shortout = "/" + shortout;

                  

                   shortin = "/user/root" + shortin;

                   shortout = "/user/root" + shortout;

 

                   File inputdir  = new File(inputpath);

                   File outputdir = new File(outputpath);

 

                   if (!inputdir.exists() || !inputdir.isDirectory()) {

                            System.out.println("inputpath not exist or isn't dir!");

                            return;

                   }

 

                   if (!outputdir.exists()) {

                            new File(outputpath).mkdirs();

                   }

 

                   Configuration conf = new Configuration();

                   Job job = new Job(conf, "analysis job");

                   job.setJarByClass(LogAnalysiser.class);

 

                   FileSystem fileSys = FileSystem.get(conf);

                   fileSys.copyFromLocalFile(new Path(inputpath), new Path(shortin));// 将本地文件系统的文件拷贝到HDFS

 

                   job.setJobName("analysisjob");

                   job.setOutputKeyClass(Text.class);// 输出的key类型,在OutputFormat会检查

                   job.setOutputValueClass(LongWritable.class); // 输出的value类型,在OutputFormat会检查

                   job.setMapperClass(MapClass.class);

                   job.setCombinerClass(CombinerClass.class);

                   job.setReducerClass(ReduceClass.class);

                   job.setPartitionerClass(PartitionerClass.class);

 

                   job.setNumReduceTasks(2);

                   FileInputFormat.setInputPaths(job, shortin);        // hdfs中的输入路径

                   FileOutputFormat.setOutputPath(job, new Path(shortout));// hdfs中输出路径

 

                   Date startTime = new Date();

                   System.out.println("Job started: " + startTime);

                   Date end_time = new Date();

                   System.out.println("Job ended: " + end_time);

 

                   System.out.println("The job took "

                                     + (end_time.getTime() - startTime.getTime()) / 1000

                                     + " seconds.");

 

                   // 删除输入和输出的临时文件

                   fileSys.copyToLocalFile(new Path(shortout), new Path(outputpath));

                   fileSys.delete(new Path(shortin), true);

                   fileSys.delete(new Path(shortout), true);

         }

}

 

原帖转自:http://www.javaeye.com/topic/647269

 

 

 

原创粉丝点击