MapReduce的自定义排序

来源:互联网 发布:2016年8月非农数据信息 编辑:程序博客网 时间:2024/05/22 00:25
package com.bigdata.hadoop.mapred;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/** * hello3中数据  * 3 2  * 3 3  * 3 1  * 2 1  * 2 2  * 1 1  * 期望排序后 输出的K3升序排列 V3也升序排列 * 1 1  * 2 1  * 2 2  * 3 1  * 3 2 * 3 3  * 因为map任务的默认排序是将K2进行排序,不会排序V 所以需要自定义K2,将原来的K2和V2封装到新的K2中 *  * @author yinhao * */public class SortApp {private static final String INPUT_PATH = "hdfs://hadoop1:9000/dir1/hello3";private static final String OUTPUT_PATH = "hdfs://hadoop1:9000/dir1/sort_out";public static void main(String[] args) throws Exception {Configuration configuration = new Configuration();Job job = new Job(configuration, SortApp.class.getSimpleName());final FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH),configuration);fileSystem.delete(new Path(OUTPUT_PATH), true);job.setJarByClass(SortApp.class);FileInputFormat.setInputPaths(job, INPUT_PATH);job.setMapperClass(MyMapper.class);job.setMapOutputKeyClass(NewK2.class);job.setMapOutputValueClass(NullWritable.class);job.setReducerClass(MyReducer.class);job.setOutputKeyClass(NewK2.class);job.setOutputValueClass(NullWritable.class);FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));job.waitForCompletion(true);}public static class MyMapper extendsMapper<LongWritable, Text, NewK2, NullWritable> {@Overrideprotected void map(LongWritable key, Text value,Mapper<LongWritable, Text, NewK2, NullWritable>.Context context)throws IOException, InterruptedException {final String line = value.toString();final String[] splited = line.split("\t");context.write(new NewK2(Long.parseLong(splited[0]), Long.parseLong(splited[1])), NullWritable.get());}}public static class MyReducer extendsReducer<NewK2, NullWritable, NewK2, NullWritable> {@Overrideprotected void reduce(NewK2 key,Iterable<NullWritable> values,Reducer<NewK2, NullWritable, NewK2, NullWritable>.Context context)throws IOException, InterruptedException {context.write(key, NullWritable.get());}}public static class NewK2 implements WritableComparable<NewK2> {long first;long second;public NewK2() {// TODO Auto-generated constructor stub}public NewK2(long first, long second) {this.first = first;this.second = second;}@Overridepublic void write(DataOutput out) throws IOException {out.writeLong(first);out.writeLong(second);}@Overridepublic void readFields(DataInput in) throws IOException {this.first = in.readLong();this.second = in.readLong();}@Overridepublic int compareTo(NewK2 o) {long result = this.first - o.first;if (result != 0) {return (int) result;} else {return (int) (this.second - o.second);}}@Overridepublic String toString() {return this.first + "\t" + this.second;}}}

0 0
原创粉丝点击