026_默认的MapReduce Driver(最小驱动问题)

来源:互联网 发布:梦幻邮箱数据 编辑:程序博客网 时间:2024/05/20 21:49
1、 最小配置的MapReduce Driver

读取输入文件中的内容,输出到指定目录的输出文件中,此时文件中的内容为:

Key---输入文件每行内容的起始位置。

Value---输入文件每行的原始内容。

输出文件中的内容就是:key+\t+value.

 1 package org.dragon.hadoop.mapreduce.app.minDriver; 2  3 import java.io.IOException; 4  5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.mapreduce.Job; 8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;10 11 /**12  *13  * @author ZhuXY  14  * @time   2016-3-13 下午9:24:4915  *16  */17 18 /**19  * function:最小配置的MapReduce Driver20  * 21  * 读取输入文件中的内容,输出到指定目录的输出文件中,22  *     此时文件中的内容为: Key---输入文件每行内容的起始位置。23  *                 Value---输入文件每行的原始内容。24  *     输出文件中的内容就是:key+\t+value.25  * 26  * @author ZhuXY27  * 28  */29 public class MinimalDriverMapReduce {30     31     /*32      * Mapper Class33      */34     35     /*36      * Reducer Class37      */38     39     /*40      * Driver Code41      */42     43     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {44         args=new String[]{45             "hdfs://hadoop-master.dragon.org:9000/wc/mininput/",46             "hdfs://hadoop-master.dragon.org:9000/wc/minoutput"47         };48         49         // get conf50         Configuration conf=new Configuration();51         52         // create job53         Job job=new Job(conf, MinimalDriverMapReduce.class.getSimpleName());54         55         // set job56         job.setJarByClass(MinimalDriverMapReduce.class);57         //    1) set input58         FileInputFormat.addInputPath(job, new Path(args[0]));59         60         //    2) set map61     62         //    3) set reduce63         64         //    4) set output65         FileOutputFormat.setOutputPath(job, new Path(args[1]));66         67         // submit job68         boolean isSuccess=job.waitForCompletion(true);69         70         // return status71         System.exit(isSuccess?0:1);72     }73 }

2、查看默认的配置

  主要在这个类中:

3、Map与reduce的默认输入输出类型。

4、写最小配置默认

导包:

 1 package org.dragon.hadoop.mapreduce.app.minDriver; 2  3 import java.io.IOException; 4  5 import org.apache.hadoop.conf.Configuration; 6 import org.apache.hadoop.fs.Path; 7 import org.apache.hadoop.io.LongWritable; 8 import org.apache.hadoop.io.Text; 9 import org.apache.hadoop.mapreduce.Job;10 import org.apache.hadoop.mapreduce.Mapper;11 import org.apache.hadoop.mapreduce.Reducer;12 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;13 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;15 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;16 import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
View import Code

 

真正的代码:

 1 * @author ZhuXY   2  * @time   2016-3-13 下午9:45:02 3  * 4  */ 5  6 /** 7  * MapReduce Minimal Driver默认配置 8  *  9  * @author ZhuXY10  * 11  */12 public class TotalDefaultMinimalDriverMP {13     /*14      * Mapper Class15      */16 17     /*18      * Reducer Class19      */20 21     /*22      * Driver Code23      */24 25     public static void main(String[] args) throws IOException,26             ClassNotFoundException, InterruptedException {27         args = new String[] {28                 "hdfs://hadoop-master.dragon.org:9000/wc/mininput/",29                 "hdfs://hadoop-master.dragon.org:9000/wc/minoutput" };30 31         // step 1:get conf32         Configuration conf = new Configuration();33 34         // step 2:create job35         Job job = new Job(conf, MinimalDriverMapReduce.class.getSimpleName());36 37         // step 3:set job38         // 1) set run jar class39         job.setJarByClass(MinimalDriverMapReduce.class);40 41         // 2) set input format42         job.setInputFormatClass(TextInputFormat.class);                //可省43 44         // 3) set input path45         FileInputFormat.addInputPath(job, new Path(args[0]));46 47         // 4) set mapper class48         job.setMapperClass(Mapper.class);                //可省49 50         // 5)set map input key/value class51         job.setMapOutputKeyClass(LongWritable.class);                //可省52         job.setMapOutputValueClass(Text.class);                //可省53 54         // 6) set partitioner class55         job.setPartitionerClass(HashPartitioner.class);                //可省56 57         // 7) set reducer number58         job.setNumReduceTasks(1);//default 1                //可省59         // 8)set sort comparator class60         job.setSortComparatorClass(LongWritable.Comparator.class);                //可省61 62         // 9) set group comparator class63         job.setGroupingComparatorClass(LongWritable.Comparator.class);                //可省64 65         // 10) set combiner class66         //job.setCombinerClass(null);默认是null,但是此处不能写                //可省67 68         // 11) set reducer class69         job.setReducerClass(Reducer.class);                //可省70 71         // 12) set output format72         job.setOutputFormatClass(TextOutputFormat.class);                //可省73 74         // 13) job output key/value class75         job.setOutputKeyClass(LongWritable.class);                //可省76         job.setOutputValueClass(Text.class);                //可省77 78         // 14) job output path79         FileOutputFormat.setOutputPath(job, new Path(args[1]));80 81         // step 4: submit job82         boolean isSuccess = job.waitForCompletion(true);83 84         // step 5: return status85         System.exit(isSuccess ? 0 : 1);86     }87 }
0 0
原创粉丝点击