partition使用案例

来源:互联网 发布:淘宝评价不能晒图 编辑:程序博客网 时间:2024/06/05 06:03
package org.apache.hadoop.examples;import java.io.IOException;import java.util.*;import org.apache.hadoop.fs.Path;import org.apache.hadoop.conf.*;import org.apache.hadoop.io.*;import org.apache.hadoop.mapred.*;import org.apache.hadoop.util.*;/** * 输入文本,以tab间隔 * kaka    1       28 * hua     0       26 * chao    1 * tao     1       22 * mao     0       29      22 * *///Partitioner函数的使用public class MyPartitioner {// Map函数public static class MyMap extends MapReduceBase implementsMapper<LongWritable, Text, Text, Text> {public void map(LongWritable key, Text value,OutputCollector<Text, Text> output, Reporter reporter)throws IOException {String[] arr_value = value.toString().split("\t");//测试输出//for(int i=0;i<arr_value.length;i++)//{//System.out.print(arr_value[i]+"\t");//}//System.out.print(arr_value.length);//System.out.println();Text word1 = new Text();Text word2 = new Text();if (arr_value.length > 3) {word1.set("long");word2.set(value);} else if (arr_value.length < 3) {word1.set("short");word2.set(value);} else {word1.set("right");word2.set(value);}output.collect(word1, word2);}}public static class MyReduce extends MapReduceBase implementsReducer<Text, Text, Text, Text> {public void reduce(Text key, Iterator<Text> values,OutputCollector<Text, Text> output, Reporter reporter)throws IOException {int sum = 0;System.out.println(key);while (values.hasNext()) {output.collect(key, new Text(values.next().getBytes()));}}}// 接口Partitioner继承JobConfigurable,所以这里有两个override方法public static class MyPartitionerPar implements Partitioner<Text, Text> {/** * getPartition()方法的 * 输入参数:键/值对<key,value>与reducer数量numPartitions * 输出参数:分配的Reducer编号,这里是result * */@Overridepublic int getPartition(Text key, Text value, int numPartitions) {// TODO Auto-generated method stubint result = 0;System.out.println("numPartitions--" + numPartitions);if (key.toString().equals("long")) {result = 0 % numPartitions;} else if (key.toString().equals("short")) {result = 1 % numPartitions;} else if (key.toString().equals("right")) {result = 2 % numPartitions;}System.out.println("result--" + result);return result;}@Overridepublic void configure(JobConf arg0) {// TODO Auto-generated method stub}}//输入参数:/home/hadoop/input/PartitionerExample /home/hadoop/output/Partitionerpublic static void main(String[] args) throws Exception {JobConf conf = new JobConf(MyPartitioner.class);conf.setJobName("MyPartitioner");//控制reducer数量,因为要分3个区,所以这里设定了3个reducerconf.setNumReduceTasks(3);conf.setMapOutputKeyClass(Text.class);conf.setMapOutputValueClass(Text.class);//设定分区类conf.setPartitionerClass(MyPartitionerPar.class);conf.setOutputKeyClass(Text.class);conf.setOutputValueClass(Text.class);//设定mapper和reducer类conf.setMapperClass(MyMap.class);conf.setReducerClass(MyReduce.class);conf.setInputFormat(TextInputFormat.class);conf.setOutputFormat(TextOutputFormat.class);FileInputFormat.setInputPaths(conf, new Path(args[0]));FileOutputFormat.setOutputPath(conf, new Path(args[1]));JobClient.runJob(conf);}}
partition使用案例
0 0
原创粉丝点击