MapReduce编程模板

来源:互联网 发布:sql注入 跑管理员 编辑:程序博客网 时间:2024/06/12 21:49

import java.io.IOException;

 

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

 

/**

 * MapReuce

 *

 * @authorbeifeng

 *

 */

public classModuleMapReduce extends Configuredimplements Tool {

 

   // step 1: Map Class

   /**

    *

    * public class Mapper<KEYIN,VALUEIN, KEYOUT, VALUEOUT>

    */

   // TODO

   public static class ModuleMapper extends

        Mapper<LongWritable,Text, Text, IntWritable> {

 

      @Override

      public void setup(Context context) throws IOException,

           InterruptedException{

        // Nothing

      }

 

      @Override

      public void map(LongWritable key,Text value, Context context)

           throws IOException,InterruptedException {

        // TODO

      }

 

      @Override

      public void cleanup(Contextcontext) throwsIOException,

           InterruptedException{

        // Nothing

      }

 

   }

 

   // step 2: Reduce Class

   /**

    *

    * public classReducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT>

    */

   // TODO

   public static class ModuleReducer extends

        Reducer<Text,IntWritable, Text, IntWritable> {

 

      @Override

      protected void setup(Context context) throws IOException,

           InterruptedException{

        // Nothing

      }

 

      @Override

      public void reduce(Text key,Iterable<IntWritable> values,

           Contextcontext) throwsIOException, InterruptedException {

        // TODO

      }

 

      @Override

      protected void cleanup(Contextcontext) throwsIOException,

           InterruptedException{

        // Nothing

      }

 

   }

 

   // step 3: Driver ,component job

   public int run(String[] args) throws Exception {

      // 1: get confifuration

      Configurationconfiguration = getConf();

 

      // 2: create Job

      Jobjob = Job.getInstance(configuration, //

           this.getClass().getSimpleName());

      // run jar

      job.setJarByClass(this.getClass());

 

      // 3: set job

      // input -> map -> reduce -> output

      // 3.1: input

      PathinPath = newPath(args[0]);

      FileInputFormat.addInputPath(job,inPath);

 

      // 3.2: map

      job.setMapperClass(ModuleMapper.class);

      // TODO

      job.setMapOutputKeyClass(Text.class);

      job.setMapOutputValueClass(IntWritable.class);

     

//****************************Shuffle*********************************

      // 1) partitioner

//    job.setPartitionerClass(cls);

      // 2) sort

//    job.setSortComparatorClass(cls);

      // 3) optional,combiner

//    job.setCombinerClass(cls);

      // 4) group

//    job.setGroupingComparatorClass(cls);

     

//****************************Shuffle*********************************

 

      // 3.3: reduce

      job.setReducerClass(ModuleReducer.class);

      // TODO

      job.setOutputKeyClass(Text.class);

      job.setOutputValueClass(IntWritable.class);

     

      // set reduce number

//    job.setNumReduceTasks(2);

     

     

     

      // 3.4: output

      PathoutPath = newPath(args[1]);

      FileOutputFormat.setOutputPath(job,outPath);

      // 4: submit job

      boolean isSuccess =job.waitForCompletion(true);

      return isSuccess ? 0 : 1;

   }

  

   // step 4: run program

   public static void main(String[] args)throws Exception {

      // 1: get confifuration

      Configurationconfiguration = newConfiguration();

      int status = ToolRunner.run(configuration,//

           new ModuleMapReduce(),//

           args);

 

      System.exit(status);

   }

}

 


MapReduce编程格式相对固定,但是需要根据需求编写相关的map和reduce部分,来实现业务需求

原创粉丝点击