自定义Writable类型 两个数的排序
来源:互联网 发布:ubuntu网络配置命令 编辑:程序博客网 时间:2024/05/21 05:18
使用Writable 和 WritableComparable接口的区别 :
Writable是对value的封装,可以把多个值做一个类
WritableComparable继承自Writable 和 Comparable , 比Writable 多了个 compareTo方法 , 可以在compareTo方法中对数据的key进行排序处理
package com.cyh;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.Date;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class NumberSort { /** * 以DataBean 做为 key , 内部进行比较 第一个数和第二个数, 来确定排序 * @author hadoop */ public static class DataBean implements WritableComparable<DataBean>{ long first; long second; public DataBean() { } public DataBean(int first, int second) { this.first = first; this.second = second; } public long getFirst() { return first; } public void setFirst(long first) { this.first = first; } public long getSecond() { return second; } public void setSecond(int second) { this.second = second; } @Override public void write(DataOutput out) throws IOException { out.writeLong(first); out.writeLong(second); } @Override public void readFields(DataInput in) throws IOException { first = in.readLong(); second = in.readLong(); } @Override public int compareTo(DataBean o) { if (first != o.first) { return first < o.first ? 1 : -1; } else if (second != o.second) { return second < o.second ? 1 : -1; } else { return 0; } } @Override public String toString() { return first + " " + second; } } public static class Map extends Mapper<LongWritable , Text, DataBean, LongWritable >{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] fields = line.split(" "); int first = Integer.parseInt(fields[0]); int second = Integer.parseInt(fields[1]); System.out.println("ss"); //创建key对象 DataBean bean = new DataBean(first, second); context.write(bean, new LongWritable(second));//第一第二个数作为key , 第二个数作为value } } public static class Re extends Reducer<DataBean, LongWritable, Text, LongWritable>{ @Override protected void reduce(DataBean key, Iterable<LongWritable> values,Context context) throws IOException, InterruptedException { long frist = key.getFirst(); for (LongWritable val : values){ //这是第二个数的集合 , 当第一个数 和 第二个数都相同时 就会形成多个值 context.write(new Text(String.valueOf(frist)), val); } } } public static void main(String[] args) throws Exception { String time = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date()); args = new String[]{"/input/data","/output/"+time}; Configuration conf = new Configuration(); Job job = Job.getInstance(conf); //1. 设置启动类 job.setJarByClass(NumberSort.class); //2. 设置map job.setMapperClass(Map.class); job.setMapOutputKeyClass(DataBean.class); job.setMapOutputValueClass(LongWritable.class); //3.设置reduce job.setReducerClass(Re.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); //4.设置输入输出路径 FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //5.启动 job.waitForCompletion(true); }}
初始数据
10 1010 910 81 32 31 45 96 96 96 94 44 54 44 5
排序后
10 1010 910 86 96 96 95 94 54 54 44 42 31 41 3
阅读全文
0 0
- 自定义Writable类型 两个数的排序
- 自定义Writable类型
- Hadoop自定义Writable实现二次排序
- 自定义Writable
- 实现Writable类型的ListWritable,SetWritable
- Java自定义类型的排序
- 自定义数组类型的排序
- Hadoop如何实现自定义的Writable
- 自定义Writable类中Comparator的编码
- Hadoop中自定义Writable类型(FFT中复数)
- Hadoop Serialization -- hadoop序列化详解 (3)【ObjectWritable,集合Writable以及自定义的Writable】
- Hadoop Serialization -- hadoop序列化详解 (3)【ObjectWritable,集合Writable以及自定义的Writable】
- Hadoop Writable基本类型
- C++,vector 自定义类型的排序
- 自定义类型struct的排序-sort函数
- vector中针对自定义类型的排序
- 自定义Hadoop Writable
- Hadoop 自定义Writable NullPointerException
- ActiveMQ(六):spring+ActiveMQ+线程池实现简单的分布式,多线程,多任务的异步任务处理系统
- HTML网页中table居中和表格内容居中
- Unable to open debugger port (127.0.0.1:43657)
- Linux进程间通信--消息队列
- 10-23正则表达式规则
- 自定义Writable类型 两个数的排序
- TP页面常用标签
- bzoj 1854 游戏 并查集+贪心 解题报告
- c语言(二)函数
- 从1到n中整数1出现的次数
- AR在各行业的应用
- PDT Standard
- [LeetCode]Reverse Integer
- 线程、多线程与线程池总结