Hadoop中自定义排序,分区,分组

来源:互联网 发布:猪八戒局域网考试软件 编辑:程序博客网 时间:2024/06/05 20:44

–Reducer

import java.io.IOException;    import org.apache.hadoop.io.Text;  import org.apache.hadoop.mapreduce.Reducer;  public class HotReduce extends Reducer<KeyPair, Text, KeyPair, Text>{           @Override          protected void reduce(KeyPair kp, Iterable<Text> i,Context context)                          throws IOException, InterruptedException {                  for (Text text : i) {                          context.write(kp, text);                  }          }           }  

–Mapper

import java.io.IOException;  import java.text.ParseException;  import java.text.SimpleDateFormat;  import java.util.Calendar;  import java.util.Date;    import org.apache.hadoop.io.LongWritable;  import org.apache.hadoop.io.Text;  import org.apache.hadoop.mapreduce.Mapper;  public class HotMapper extends Mapper<LongWritable, Text, KeyPair, Text>{          @Override          protected void map(LongWritable key, Text value,Context context)                          throws IOException, InterruptedException {                  String line=value.toString();                  String[] ss=line.split("\t");                  if (ss.length==2){                          int year=Integer.parseInt(ss[0].substring(0, 4));                          int hot=Integer.parseInt(ss[1].substring(0, ss[1].indexOf("°C")));                          KeyPair kp=new KeyPair();                          kp.setYear(year);                          kp.setHot(hot);                          context.write(kp, value);                  }          }  }  

–KeyPair 自定义封装类

import java.io.DataInput;  import java.io.DataOutput;  import java.io.IOException;  import org.apache.hadoop.io.WritableComparable;  public class KeyPair implements WritableComparable<KeyPair> {          private int year;          private int hot;          public int getYear() {                  return year;          }          public void setYear(int year) {                  this.year = year;          }          public int getHot() {                  return hot;          }          public void setHot(int hot) {                  this.hot = hot;          }          @Override  反序列化        public void readFields(DataInput in) throws IOException {                  this.year=in.readInt();                  this.hot=in.readInt();          }          @Override  序列化        public void write(DataOutput out) throws IOException {                  out.writeInt(year);                  out.writeInt(hot);          }          @Override  对比        public int compareTo(KeyPair o) {                                   int result=Integer.compare(year, o.getYear());                  if (result!=0){                          return result;                  }                  return Integer.compare(hot, o.hot);          }          @Override          public String toString() {                  return year+"\t"+hot;          }          @Override          public int hashCode() {                  return new Integer(year+hot).hashCode();          }  } 

–HotPartition
//自定义分区

import org.apache.hadoop.io.Text;  import org.apache.hadoop.mapreduce.Partitioner;  public class HotPartition extends Partitioner<KeyPair, Text>{          @Override          public int getPartition(KeyPair key, Text value, int num) {                  return (key.getYear()*127%num);          }  }  

–HotSort 自定义排序

import org.apache.hadoop.io.WritableComparable;  import org.apache.hadoop.io.WritableComparator;  public class HotSort extends WritableComparator{          public HotSort() {                  super(KeyPair.class, true);  //排序时使用自定义分装的类进行排序        }          @Override          public int compare(WritableComparable a, WritableComparable b) {                  KeyPair o1=(KeyPair) a;                  KeyPair o2=(KeyPair) b;                  int res=Integer.compare(o1.getYear(), o2.getYear());                  if (res!=0){                          return res;                  }                  return -Integer.compare(o1.getHot(),o2.getHot());//降序排序          }  }  

–HotGroup
自定义分组

import org.apache.hadoop.io.WritableComparable;  import org.apache.hadoop.io.WritableComparator;  public class HotGroup extends WritableComparator{          public HotGroup() {                  super(KeyPair.class, true);          }          @Override          public int compare(WritableComparable a, WritableComparable b) {                  KeyPair o1=(KeyPair) a;                  KeyPair o2=(KeyPair) b;                  return Integer.compare(o1.getYear(),o2.getYear());          }  } public class Job{    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {        Configuration configuration = new Configuration();        Job job = new Job(configuration,"partation");        job.setJarByClass(Job.class);        job.setMapperClass(HotMapper.class);        job.setReducerClass(HotReducer.class);        job.setOutputKeyClass(KeyPair.class);        job.setOutputValueClass(Text.class);        job.setGroupingComparatorClass(FirstPartation.class);        job.setNumReduceTasks(3);        job.setSortComparatorClass(SortHot.class);        job.setGroupingComparatorClass(GroupHot.class);        FileInputFormat.addInputPath(job, new Path("/user/root/books"));        FileOutputFormat.setOutputPath(job, new Path("/user/root/bookout"));        System.exit(job.waitForCompletion(true)?0:1);    }}
0 0
原创粉丝点击