二次排序代码
来源:互联网 发布:山寨币源码 编辑:程序博客网 时间:2024/04/30 00:48
1、输入数据:
2、代码:
3、打包执行命令:
4、结果输出文件目录:
- [hadoop@hadoop ~]$ hdfs dfs -text /user/hadoop/secondarysort.txt
- 3 5
- 5 89
- 7 63
- 5 56
- 3 9
- 3 1
- 7 26
- 7 45
- 7 4
- 5 18
- 5 23
- 7 63
- 3 24
- [hadoop@hadoop ~]$
2、代码:
- package secondarySort;
- import java.io.DataInput;
- import java.io.DataOutput;
- import java.io.IOException;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.io.WritableComparable;
- import org.apache.hadoop.io.WritableComparator;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Partitioner;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
- public class SecondarySort_Demo {
- //自己定义的key类应该实现WritableComparable接口
- public static class IntPair implements WritableComparable<IntPair>{
- int first;
- int second;
- public void set(int left,int right){
- first=left;
- second=right;
- }
- public int getFirst(){
- return first;
- }
- public int getSecond(){
- return second;
- }
- //序列化,将IntPair转化成使用流传送的二进制
- public void write(DataOutput out) throws IOException {
- out.writeInt(first);
- out.writeInt(second);
- }
- //反序列化,从流中的二进制转换成IntPair
- public void readFields(DataInput in) throws IOException {
- first=in.readInt();
- second=in.readInt();
- }
- //key的比较
- public int compareTo(IntPair o) {
- if(first!=o.first){
- return first<o.first ? -1:1;
- }else if(second!=o.second){
- return second<o.second ? -1:1;
- }else{
- return 0;
- }
- }
- //新定义类应该重写的两个方法
- public int hashCode(){
- return first*157+second;
- }
- public boolean equals(Object right){
- if(right==null)
- return false;
- if(this==right)
- return true;
- if(right instanceof IntPair){
- IntPair r=(IntPair) right;
- return r.first==first&&r.second==second;
- }else {
- return false;
- }
- }
- }
- /**
- * 分区函数类。根据first确定Partition。
- */
- public static class FirstPartitioner extends Partitioner<IntPair, IntWritable>{
- @Override
- public int getPartition(IntPair key, IntWritable value,
- int numPartitions) {
- return Math.abs(key.getFirst()*127) % numPartitions;
- }
- }
- /**
- * 分组函数类。只要first相同就属于同一个组。
- */
- /*//第一种方法,实现接口RawComparator
- public static class GroupingCpmparator implements RawComparator<IntPair>{
- public int compare(IntPair o1, IntPair o2) {
- int l=o1.getFirst();
- int r=o2.getFirst();
- return l == r ? 0:(l<r ? -1:1);
- }
- //一个字节一个字节的比,直到找到一个不相同的字节,然后比这个字节的大小作为两个字节流的大小比较结果。
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, b2, s2, Integer.SIZE/8);
- }
- }*/
- //第二种方法,继承WritableComparator
- public static class GroupingComparator extends WritableComparator{
- protected GroupingComparator(){
- super(IntPair.class,true);
- }
- @SuppressWarnings("rawtypes")
- //Compare two WritableComparables.
- public int compare(WritableComparable w1,WritableComparable w2){
- IntPair ip1=(IntPair) w1;
- IntPair ip2=(IntPair) w2;
- int l=ip1.getFirst();
- int r=ip2.getFirst();
- return l==r?1:(l<r?-1:1);
- }
- }
- // 自定义map
- public static class MyMap extends Mapper<LongWritable, Text, IntPair, IntWritable>{
- private final IntPair intkey=new IntPair();
- private final IntWritable intvalue=new IntWritable();
- @Override
- protected void map(LongWritable key, Text value,Context context)
- throws IOException, InterruptedException {
- String line=value.toString();
- String[] splited=line.split("\t");
- intkey.set(Integer.parseInt(splited[0]), Integer.parseInt(splited[1]));
- intvalue.set(Integer.parseInt(splited[1]));
- context.write(intkey, intvalue);
- }
- }
- // 自定义reduce
- public static class MyReduce extends Reducer<IntPair, IntWritable, Text, IntWritable>{
- private final Text left =new Text();
- // private static final Text SEPARATOR =new Text("========================");
- @Override
- protected void reduce(IntPair k2, Iterable<IntWritable> v2s,Context context)
- throws IOException, InterruptedException {
- // context.write(SEPARATOR, null);
- left.set(Integer.toString(k2.getFirst()));
- for (IntWritable val : v2s) {
- context.write(left, val);
- }
- }
- }
- public static void main(String[] args) throws Exception{
- Configuration conf = new Configuration();
- // 实例化一道作业
- Job job=Job.getInstance(conf, SecondarySort_Demo.class.getSimpleName());
- job.setJarByClass(SecondarySort_Demo.class);
- // Mapper类型
- job.setMapperClass(MyMap.class);
- // 不再需要Combiner类型,因为Combiner的输出类型<Text, IntWritable>对Reduce的输入类型<IntPair, IntWritable>不适用
- //job.setCombinerClass(Reduce.class);
- // Reducer类型
- job.setReducerClass(MyReduce.class);
- // 分区函数
- job.setPartitionerClass(FirstPartitioner.class);
- // 分组函数
- job.setGroupingComparatorClass(GroupingComparator.class);
- // map 输出Key的类型
- job.setMapOutputKeyClass(IntPair.class);
- // map输出Value的类型
- job.setMapOutputValueClass(IntWritable.class);
- // rduce输出Key的类型,是Text,因为使用的OutputFormatClass是TextOutputFormat
- job.setOutputKeyClass(Text.class);
- // rduce输出Value的类型
- job.setOutputValueClass(IntWritable.class);
- // 将输入的数据集分割成小数据块splites,同时提供一个RecordReder的实现。
- job.setInputFormatClass(TextInputFormat.class);
- // 提供一个RecordWriter的实现,负责数据输出。
- job.setOutputFormatClass(TextOutputFormat.class);
- // 输入hdfs路径
- FileInputFormat.setInputPaths(job, args[0]);
- // 输出hdfs路径
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
- // 提交job
- // System.exit(job.waitForCompletion(true) ? 0 : 1);
- job.waitForCompletion(true);
- }
- }
3、打包执行命令:
- hadoop jar secondarysort.jar /user/hadoop/secondarysort.txt /user/hadoop/output
4、结果输出文件目录:
- [hadoop@hadoop ~]$ hdfs dfs -ls /user/hadoop/output
- Found 77 items
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/_SUCCESS
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00000
- -rw-r--r-- 3 hadoop supergroup 17 2015-08-30 17:22 /user/hadoop/output/part-r-00001
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00002
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00003
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00004
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00005
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00006
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00007
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00008
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00009
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00010
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00011
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00012
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00013
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00014
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00015
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00016
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00017
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00018
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00019
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00020
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00021
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00022
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00023
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00024
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00025
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00026
- -rw-r--r-- 3 hadoop supergroup 20 2015-08-30 17:22 /user/hadoop/output/part-r-00027
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00028
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00029
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00030
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00031
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00032
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00033
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00034
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00035
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00036
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00037
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00038
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00039
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00040
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00041
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00042
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00043
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00044
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00045
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00046
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00047
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00048
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00049
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00050
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00051
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00052
- -rw-r--r-- 3 hadoop supergroup 24 2015-08-30 17:22 /user/hadoop/output/part-r-00053
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00054
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00055
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00056
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00057
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00058
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00059
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00060
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00061
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00062
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00063
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00064
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00065
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00066
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00067
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00068
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00069
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00070
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00071
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00072
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00073
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00074
- -rw-r--r-- 3 hadoop supergroup 0 2015-08-30 17:22 /user/hadoop/output/part-r-00075
5、排序完数据:
- [hadoop@hadoop ~]$ hdfs dfs -text /user/hadoop/output/part-r-00001
- 3 1
- 3 5
- 3 9
- 3 24
- [hadoop@hadoop ~]$ hdfs dfs -text /user/hadoop/output/part-r-00027
- 5 18
- 5 23
- 5 56
- 5 89
- [hadoop@hadoop ~]$ hdfs dfs -text /user/hadoop/output/part-r-00053
- 7 4
- 7 26
- 7 45
- 7 63
- 7 63
- [hadoop@hadoop ~]$
0 0
- 二次排序代码
- SecondarySort二次排序代码
- 2-1、二次排序代码
- 二次排序的实现代码
- 二次排序
- 二次排序
- 二次排序
- hadoop 二次排序
- InverseMapper.class二次排序
- Hadoop二次排序
- Hadoop二次排序
- Hadoop二次排序
- hadoop二次排序
- hadoop之二次排序
- hadoop二次排序一
- hadoop二次排序二
- hadoop二次排序三
- Hadoop二次排序
- 剑指Offer_34_找出字符串中第一个只出现一次的字符
- Mac安装Django
- 对MVC的理解
- Git终端命令笔记
- ionic 搜索提示
- 二次排序代码
- 递归模式的思考(一)
- Android Studio plugin is too old,please...
- 查找(2)——哈希查找
- npm和compass开始一个项目
- 007 easy
- 根据前台设计数据库--搜索页篇
- 苹果CEO乔布斯在斯坦福大学的演讲稿[中英]
- 3、Ext组件Panel面板使用