Hadoop MapReduce 简单案例--求平均值

来源:互联网 发布:易知资本 编辑:程序博客网 时间:2024/05/21 22:51

代码:

import java.io.IOException;  import java.util.StringTokenizer;  import org.apache.hadoop.conf.Configuration;  import org.apache.hadoop.fs.Path;  import org.apache.hadoop.io.LongWritable;  import org.apache.hadoop.io.Text;  import org.apache.hadoop.mapreduce.Job;  import org.apache.hadoop.mapreduce.Mapper;  import org.apache.hadoop.mapreduce.Reducer;  import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  import org.apache.hadoop.util.GenericOptionsParser;  public class Sum {            public static class SumMapper extends              Mapper<Object, Text, Text, LongWritable> {          private Text word = new Text("sum");          private static LongWritable numValue = new LongWritable(1);          public void map(Object key, Text value, Context context)                  throws IOException, InterruptedException {              StringTokenizer itr = new StringTokenizer(value.toString());              long sum = 0,sum2=0;  //sum为每次读入的数,sun2为每行的个数            while (itr.hasMoreTokens()) {                  String s = itr.nextToken();                  long val = Long.parseLong(s);                  sum += val;                  sum2++;            }              numValue.set(sum/sum2);            word.set("average");            context.write(word, numValue);          }      }      public static class SumReducer extends              Reducer<Text, LongWritable, Text, LongWritable> {          private LongWritable result = new LongWritable();          private Text ave = new Text("average");          public void reduce(Text key, Iterable<LongWritable> values,                  Context context) throws IOException, InterruptedException {              long sum = 0,count=0;  //sum为每行计算出来的平均值,count为行数            for (LongWritable val : values) {                  long v = val.get();                  sum += v;                  count++;            }              result.set(sum/count);              context.write(ave, result);          }      }      public static void main(String[] args) throws Exception {          Configuration conf = new Configuration();          String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();          if (otherArgs.length != 2) {              System.err.println("Usage: numbersum <in> <out>");              System.exit(2);          }          long startTime=System.currentTimeMillis();//计算时间        Job job = new Job(conf, "number sum");           job.setJarByClass(Sum.class);          job.setMapperClass(SumMapper.class);          job.setReducerClass(SumReducer.class);          job.setOutputKeyClass(Text.class);          job.setOutputValueClass(LongWritable.class);          FileInputFormat.addInputPath(job, new Path(otherArgs[0]));          FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));          job.waitForCompletion(true) ;        long endTime=System.currentTimeMillis();        System.out.println("time="+(endTime-startTime));        System.exit(0);      }  }  


原创粉丝点击