SecondarySort二次排序代码

来源:互联网 发布:mac下载os x yosemite 编辑:程序博客网 时间:2024/05/01 15:59

package com.zhiyou.bd17.mr1014;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;//二次排序public class SecondarySort {//自定义封装类型,封装二次排序的第一个字段和第二个字段//自定义排序规则:第一个字段不同按照第一个字段排序,第一个字段相同按照第二个排序public static class TwoFields implements WritableComparable<TwoFields>{private String firstFied;private int secondField;public String getFirstFied() {return firstFied;}public void setFirstFied(String firstFied) {this.firstFied = firstFied;}public int getSecondField() {return secondField;}public void setSecondField(int secondField) {this.secondField = secondField;}//序列化public void write(DataOutput out) throws IOException {out.writeUTF(firstFied);out.writeInt(secondField);}//反序列化public void readFields(DataInput in) throws IOException {this.firstFied = in.readUTF();this.secondField = in.readInt();}//比较方法:先比较第一个字段,第一个字段相同的再用第二个字段的比较结果public int compareTo(TwoFields o) {if (this.firstFied.equals(o.firstFied)) {return this.secondField - o.secondField;}else {return this.firstFied.compareTo(o.firstFied);}}}// 自定义分区,用来将第一个字段相同的key值分区到同一个reducer节点上public static class TwoFieldPartitoner extends Partitioner<TwoFields, NullWritable>{// 返回值是一个int数字,这个数字是reducer的标号@Overridepublic int getPartition(TwoFields key, NullWritable value, int numPartitions) {int reducerNo = (key.firstFied.hashCode()&Integer.MAX_VALUE) % numPartitions;return reducerNo;}}//定义分组比较器,让不同key值的第一个字段相同的kv 调用同一个 reducer方法public static class GroupToReducerComparetor extends WritableComparator {//构造方法里面要向父类传递比较器要比较的数据类型public GroupToReducerComparetor() {//1.比较器比较的类型参数//2.是否实例化对象super(TwoFields.class,true);}//重写compare方法自定义排序规则(根据第一字段分组)@Overridepublic int compare(WritableComparable a, WritableComparable b) {TwoFields ca = (TwoFields) a;TwoFields cb = (TwoFields) b;return ca.getFirstFied().compareTo(cb.getFirstFied());}}//定义mappublic static class SecondarySortMap extends Mapper<Text, Text, TwoFields, NullWritable> {private final NullWritable oValue = NullWritable.get();@Overrideprotected void map(Text key, Text value, Mapper<Text, Text, TwoFields, NullWritable>.Context context)throws IOException, InterruptedException {//将两个字段中的数据封装到一个twoFields对象中TwoFields twoFields = new TwoFields();twoFields.setFirstFied(key.toString());twoFields.setSecondField(Integer.valueOf(value.toString()));context.write(twoFields, oValue);}}//定义reducerpublic static class SecondarySortReducer extends Reducer<TwoFields, NullWritable, Text, Text> {private Text oKey = new Text();private Text oValue = new Text();@Overrideprotected void reduce(TwoFields key, Iterable<NullWritable> values,Reducer<TwoFields, NullWritable, Text, Text>.Context context) throws IOException, InterruptedException {//输出一组内容for (NullWritable value : values) {oKey.set(key.firstFied);oValue.set(String.valueOf(key.secondField));context.write(oKey, oValue);}//一组内容输出完,添加一条分割线oKey.set("-------------");oValue.set("");context.write(oKey, oValue);}}//设置并启动jobpublic static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration configuration = new Configuration();Job job = Job.getInstance(configuration);job.setJarByClass(SecondarySort.class);job.setJobName("二次排序");job.setMapperClass(SecondarySortMap.class);job.setReducerClass(SecondarySortReducer.class);job.setMapOutputKeyClass(TwoFields.class);job.setMapOutputValueClass(NullWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);Path inputPath = new Path("/bd17/secondaryorder");Path outputDir = new Path("/bd17/output/secondaryorder1");outputDir.getFileSystem(configuration).delete(outputDir,true);FileInputFormat.addInputPath(job, inputPath);FileOutputFormat.setOutputPath(job, outputDir);// 把文件内容以kv的形式读取出来发送给mapjob.setInputFormatClass(KeyValueTextInputFormat.class);//设置partitionjob.setPartitionerClass(TwoFieldPartitoner.class);//设置分组比较器job.setGroupingComparatorClass(GroupToReducerComparetor.class);System.exit(job.waitForCompletion(true)?0:1);}}


源数据:




处理结果:



























原创粉丝点击