利用MapReduce进行排序

来源：互联网发布：数据透视表总计图标编辑：程序博客网时间：2024/06/05 09:39

一 排序原理

二 测试样例

输入

file1：

2

32

654

32

15

756

65223

file2：

5956

22

650

92

file3：

26

54

6

输出：

1 12

2 6

3 15

4 22

5 26

6 32

7 32

8 54

9 92

10 650

11 654

12 756

13 5956

14 65223

三 代码

Sort.java

import java.io.IOException;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.Partitioner;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.GenericOptionsParser;

public class Sort {

public static class Map extends

Mapper<Object, Text, IntWritable, IntWritable> {

private static IntWritable data = new IntWritable();

public void map(Object key, Text value, Context context)

throws IOException, InterruptedException {

String line = value.toString();

data.set(Integer.parseInt(line));

context.write(data, new IntWritable(1));

}

}

public static class Reduce extends

Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {

private static IntWritable linenum = new IntWritable(1);

public void reduce(IntWritable key, Iterable<IntWritable> values,

Context context) throws IOException, InterruptedException {

for (IntWritable val : values) {

context.write(linenum, key);

linenum = new IntWritable(linenum.get() + 1);

}

}

}

public static class Partition extends Partitioner<IntWritable, IntWritable> {

@Override

public int getPartition(IntWritable key, IntWritable value,

int numPartitions) {

int MaxNumber = 65223;

int bound = MaxNumber / numPartitions + 1;

int keynumber = key.get();

for (int i = 0; i < numPartitions; i++) {

if (keynumber < bound * i && keynumber >= bound * (i - 1))

return i - 1;

}

return 0;

}

}

/**

* @param args

*/

public static void main(String[] args) throws Exception {

// TODO Auto-generated method stub

Configuration conf = new Configuration();

String[] otherArgs = new GenericOptionsParser(conf, args)

.getRemainingArgs();

if (otherArgs.length != 2) {

System.err.println("Usage WordCount <int> <out>");

System.exit(2);

}

Job job = new Job(conf, "Sort");

job.setJarByClass(Sort.class);

job.setMapperClass(Map.class);

job.setPartitionerClass(Partition.class);

job.setReducerClass(Reduce.class);

job.setOutputKeyClass(IntWritable.class);

job.setOutputValueClass(IntWritable.class);

FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}

阅读全文

0 0