MapReduce编程之Partitioner

来源：互联网发布：淘宝售后服务时间编辑：程序博客网时间：2024/05/17 06:46

Partitioner决定MapTask输出的数据交由哪个ReduceTask处

理默认实现：分发的key的hash值对ReduceTask个数取模

案例实现

/** *  * MapReduce编程之Partitioner *          Partitioner决定MapTask输出的数据交由哪个ReduceTask处理 *          默认实现：分发的key的hash值对ReduceTask个数取模 * * 数据：cat partitioner *        xiaomi 200 *        huawei 100 *        iphone8 50 *        xiaomi 200 *        huawei 100 *        iphone8 50 *        xiaomi 200 *        huawei 100 *        iphone8 50 *        xiaomi 200 *        huawei 100 *        iphone8 50 *        nokia 20 */

public class PartitionerApp {

    /**     * Map:读取输入的文件     */    public static class MyMapper extends Mapper<LongWritable,Text,Text,LongWritable>{        LongWritable one = new LongWritable(1);        @Override        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {            //接收到的每一行数据            String line = value.toString();            //按照指定分隔符进行拆分            String[] words = line.split(" ");            //通过上下文把map的处理结果输出            context.write(new Text(words[0]),new LongWritable(Long.parseLong(words[1])));        }    }    /**     * Reduce: 归并操作     */    public static class MyReducer extends Reducer<Text,LongWritable,Text,LongWritable>{        @Override        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {            long sum = 0;            for (LongWritable value:values) {                //求key出现的次数总和                sum += value.get();            }            //最终统计结果的输出            context.write(key,new LongWritable(sum));        }    }    public static class MyPartitioner extends Partitioner<Text,LongWritable> {        public int getPartition(Text key, LongWritable value, int numPartitions) {            if (key.toString().equals("xiaomi")){                return 0;            }            if (key.toString().equals("huawei")){                return 1;            }            if (key.toString().equals("iphone7")){                return 2;            }            return 3;        }    }    /**     * 定义Driver：封装了MapReduce作业的所有信息     * @param args     */    public static void main(String[] args) throws Exception {        //创建Configuration        Configuration configuration = new Configuration();        //准备清理已存在的输出目录        Path outputPath = new Path(args[1]);        FileSystem fileSystem = FileSystem.get(configuration);        if (fileSystem.exists(outputPath)){            fileSystem.delete(outputPath,true);            System.out.println("output file exists, but is has deleted");        }        //创建Job        Job job = Job.getInstance(configuration, "wordcount");        //设置job的处理类        job.setJarByClass(PartitionerApp.class);        //设置作业处理的输入路径        FileInputFormat.setInputPaths(job,new Path(args[0]));        //设置map相关参数        job.setMapperClass(MyMapper.class);        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(LongWritable.class);        //设置reduce相关参数        job.setReducerClass(MyReducer.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(LongWritable.class);        //通过job设置combiner处理类，其实逻辑上和我们的reduce一模一样//        job.setCombinerClass(WordCountApp.MyReducer.class);        //设置job的partitioner        job.setPartitionerClass(MyPartitioner.class);        //设置4个reduce，每个分区一个        job.setNumReduceTasks(4);        //设置作业处理的输出路径        FileOutputFormat.setOutputPath(job,new Path(args[1]));        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}

阅读全文

0 0