mapreduce编程自定义排序

来源:互联网 发布:淘宝网ued官方博客 编辑:程序博客网 时间:2024/05/29 10:56


输入数据:

[root@baolibin hadoop]# hadoop fs -text /input/hahaWarning: $HADOOP_HOME is deprecated.2       13       21       3



代码:

package hadoop_2_6_0;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.util.Iterator;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;public class SortText {public static class NewkWritable implements WritableComparable<NewkWritable>{long first;long second;public NewkWritable(){}public NewkWritable(long first,long second){this.set(first, second);}public void set(long first,long second){this.first=first;this.second=second;}@Overridepublic void readFields(DataInput in) throws IOException {this.first=in.readLong();this.second=in.readLong();}@Overridepublic void write(DataOutput out) throws IOException {out.writeLong(first);out.writeLong(second);}@Overridepublic int compareTo(NewkWritable o) {//return (int) ((this.first+this.second)-(o.first+o.second));//按照降序和排序return (int) ((o.first+o.second)-(this.first+this.second));}@Overridepublic String toString() {return first+"";}}public static class MyMapper extends Mapper<LongWritable, Text, NewkWritable, LongWritable>{NewkWritable k2=new NewkWritable();LongWritable V2=new LongWritable();@Overrideprotected void map(LongWritable key,Text value,Mapper<LongWritable, Text, NewkWritable, LongWritable>.Context context)throws IOException, InterruptedException {String line=value.toString();String[] splited=line.split("\t");k2.set(Long.parseLong(splited[0]),Long.parseLong(splited[1]));V2.set(Long.parseLong(splited[1]));//k2的toString方法,只输出了first,输出的结果正常。context.write(k2, V2);}}public static class MyReducer extends Reducer<NewkWritable, LongWritable, NewkWritable, LongWritable>{@Overrideprotected void reduce(NewkWritable k2,Iterable<LongWritable> v2s,Reducer<NewkWritable, LongWritable, NewkWritable, LongWritable>.Context context)throws IOException, InterruptedException {Iterator<LongWritable> iterator=v2s.iterator();iterator.hasNext();LongWritable v2=iterator.next();context.write(k2, v2);}}public static void main(String[] args) throws Exception {//String INPUT_PATH=args[0];//String OUTPUT_PATH=args[1];String INPUT_PATH="/input/haha";String OUTPUT_PATH="/sort_out2";Configuration conf=new Configuration();Job job=Job.getInstance(conf, SortText.class.getSimpleName());job.setJarByClass(SortText.class);//1.1FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.1.100:9000"+INPUT_PATH));job.setInputFormatClass(TextInputFormat.class);//1.2job.setMapperClass(MyMapper.class);job.setMapOutputKeyClass(NewkWritable.class);job.setMapOutputValueClass(LongWritable.class);//1.3//1.4//1.5//2.2job.setReducerClass(MyReducer.class);job.setOutputKeyClass(NewkWritable.class);job.setOutputValueClass(LongWritable.class);//2.3FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.100:9000"+OUTPUT_PATH));job.setOutputFormatClass(TextOutputFormat.class);job.waitForCompletion(true);}}



结果为:

[root@baolibin hadoop]# hadoop fs -text /sort_out2/part-r*Warning: $HADOOP_HOME is deprecated.3       21       32       1


解析:

结果如下,按照每行和排序。

自定义一个数据类型类,实现WritableComparable接口,并实现比较方法。


这样书写并不安全:

return (int) ((o.first+o.second)-(this.first+this.second));

但是本例子只是实现简单的少量数据的排序。


map方法按行进行字符串切分,找出要进行比较的内容:

k2.set(Long.parseLong(splited[0]),Long.parseLong(splited[1]));V2.set(Long.parseLong(splited[1]));context.write(k2, V2);


reduce方法只需要迭代进行输出即可:

Iterator<LongWritable> iterator=v2s.iterator();iterator.hasNext();LongWritable v2=iterator.next();context.write(k2, v2);



0 0
原创粉丝点击