hadoop二次排序

来源：互联网发布：js关闭iframe页面编辑：程序博客网时间：2024/05/17 00:02
package example;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;public class SecondSort {// 假设我们需要对气温记录（key：年份，value：气温）进行排序，要求按年份升序并按气温降序// 由于hadoop只会给key排序，并且只能保证每一个reduce收到的数据按key有序// 所以需要设置两个方面：1.把key和value拼起来，并且定义为自定义类型对象，保证每个分区内按自定义排序规则有序// 这种组合key虽然保证了分区内有序，但也由于组合键的原因，无法保证同个年份在同个分区（1970+30C°和1970+31C°显然不是相同的key，而hadoop只会将相同的key传到同一个reducer）// 2.为了保证全局有序，把reduce的数量设为1// 首先编写组合键的类型，集成Writable接口和Comparable接口，并重写compareTo方法public class CombineKey implements WritableComparable<CombineKey>{private Text year;private IntWritable temperature;public CombineKey(Text year, IntWritable temperature){this.year = year;this.temperature = temperature;}@Overridepublic void readFields(DataInput arg0) throws IOException {// TODO Auto-generated method stubyear.readFields(arg0);temperature.readFields(arg0);}@Overridepublic void write(DataOutput arg0) throws IOException {// TODO Auto-generated method stubyear.write(arg0);temperature.write(arg0);}// 按value升序排序，分区的时候再按year分区@Overridepublic int compareTo(CombineKey combineKey) {// TODO Auto-generated method stubreturn temperature.get() - combineKey.temperature.get();}public Text getYear(){return year;}}public class KeyPartioner extends Partitioner<CombineKey, IntWritable>{@Overridepublic int getPartition(CombineKey arg0, IntWritable arg1, int arg2) {// TODO Auto-generated method stubreturn new HashPartitioner().getPartition(arg0.getYear(), arg1, arg2);}}public static void main(String args[]) throws IOException, ClassNotFoundException, InterruptedException{// 省略mapper和reducer以及输入输出格式、路径的设置Job job = new Job();job.setJarByClass(SecondSort.class);job.setMapOutputKeyClass(CombineKey.class);job.setPartitionerClass(KeyPartioner.class);job.waitForCompletion(true);}}
0 0