Hadoop中自定义排序,分区,分组
来源:互联网 发布:猪八戒局域网考试软件 编辑:程序博客网 时间:2024/06/05 20:44
–Reducer
import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class HotReduce extends Reducer<KeyPair, Text, KeyPair, Text>{ @Override protected void reduce(KeyPair kp, Iterable<Text> i,Context context) throws IOException, InterruptedException { for (Text text : i) { context.write(kp, text); } } }
–Mapper
import java.io.IOException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class HotMapper extends Mapper<LongWritable, Text, KeyPair, Text>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String line=value.toString(); String[] ss=line.split("\t"); if (ss.length==2){ int year=Integer.parseInt(ss[0].substring(0, 4)); int hot=Integer.parseInt(ss[1].substring(0, ss[1].indexOf("°C"))); KeyPair kp=new KeyPair(); kp.setYear(year); kp.setHot(hot); context.write(kp, value); } } }
–KeyPair 自定义封装类
import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class KeyPair implements WritableComparable<KeyPair> { private int year; private int hot; public int getYear() { return year; } public void setYear(int year) { this.year = year; } public int getHot() { return hot; } public void setHot(int hot) { this.hot = hot; } @Override 反序列化 public void readFields(DataInput in) throws IOException { this.year=in.readInt(); this.hot=in.readInt(); } @Override 序列化 public void write(DataOutput out) throws IOException { out.writeInt(year); out.writeInt(hot); } @Override 对比 public int compareTo(KeyPair o) { int result=Integer.compare(year, o.getYear()); if (result!=0){ return result; } return Integer.compare(hot, o.hot); } @Override public String toString() { return year+"\t"+hot; } @Override public int hashCode() { return new Integer(year+hot).hashCode(); } }
–HotPartition
//自定义分区
import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Partitioner; public class HotPartition extends Partitioner<KeyPair, Text>{ @Override public int getPartition(KeyPair key, Text value, int num) { return (key.getYear()*127%num); } }
–HotSort 自定义排序
import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class HotSort extends WritableComparator{ public HotSort() { super(KeyPair.class, true); //排序时使用自定义分装的类进行排序 } @Override public int compare(WritableComparable a, WritableComparable b) { KeyPair o1=(KeyPair) a; KeyPair o2=(KeyPair) b; int res=Integer.compare(o1.getYear(), o2.getYear()); if (res!=0){ return res; } return -Integer.compare(o1.getHot(),o2.getHot());//降序排序 } }
–HotGroup
自定义分组
import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class HotGroup extends WritableComparator{ public HotGroup() { super(KeyPair.class, true); } @Override public int compare(WritableComparable a, WritableComparable b) { KeyPair o1=(KeyPair) a; KeyPair o2=(KeyPair) b; return Integer.compare(o1.getYear(),o2.getYear()); } } public class Job{ public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration configuration = new Configuration(); Job job = new Job(configuration,"partation"); job.setJarByClass(Job.class); job.setMapperClass(HotMapper.class); job.setReducerClass(HotReducer.class); job.setOutputKeyClass(KeyPair.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(FirstPartation.class); job.setNumReduceTasks(3); job.setSortComparatorClass(SortHot.class); job.setGroupingComparatorClass(GroupHot.class); FileInputFormat.addInputPath(job, new Path("/user/root/books")); FileOutputFormat.setOutputPath(job, new Path("/user/root/bookout")); System.exit(job.waitForCompletion(true)?0:1); }}
0 0
- Hadoop中自定义排序,分区,分组
- Hadoop 自定义排序,自定义分区,自定义分组
- hadoop自定义排序、分组、分区(温度统计)
- hadoop 排序、分区、分组实例
- 自定义分区、数据类型、排序、分组
- hadoop自定义排序,分组排序
- hadoop二次排序、分组、排序和分区
- hadoop-之二次排序&分组&分区
- HADOOP(2)__Mapreduce分区、排序、分组
- hadoop 自定义分组排序,求相同key中value最小值
- 「 Hadoop」mapreduce对温度数据进行自定义排序、分组、分区等
- mapreduce,自定义分区,分组,排序实现join
- MapReduce的自定义排序、分区和分组
- 图文并茂展示hadoop 中wordcount的 输入,map处理,排序,分区,分组,combine,copy,再排序,分组,redece处理
- hadoop提交作业自定义排序和分组
- mapreduce学习笔记-二次排序(自定义数据类型,自定义分区分组)
- hadoop分组与分区
- Hadoop分区与分组
- Py第十六问 ImportError: DLL load failed: %1 is not a valid Win32 application
- TCP BBR及MPTCP的一些想法
- 欢迎使用CSDN-markdown编辑器
- Unity3d 使用Texturepacker制作NGUI图集
- chrome本地保存账号密码获取思路
- Hadoop中自定义排序,分区,分组
- 排序算法学习
- 对Android 软键盘向下的监听
- javaEE之标签和过滤器
- 远程连接Linux (Ubuntu配置SSH服务) 端口22
- RecycleView的使用细则
- iOS自学笔记之UISwitch和UITextField
- PHP5.6以后开启curl支持的办法
- 三线性插值在HOG中的应用