刘刚 mapside 实现
来源:互联网 发布:linux创建新用户命令 编辑:程序博客网 时间:2024/05/17 02:51
package MapJoin;import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.net.URI;import java.util.HashMap;import java.util.Map;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class MapSideJoin extends Configured implements Tool {public static class MapClass extends Mapper<LongWritable, Text, Text, Text> {private Map<String, String> customersMap = new HashMap<String, String>();private Text oKey = new Text();private String[] order;// 此方法会在map方法执行之前执行@Overrideprotected void setup(Context context) throws IOException,InterruptedException {super.setup(context);BufferedReader in = null;try {// 从当前作业中获取要缓存的文件String Customer = null;URI[] pathuri = context.getCacheFiles();String pathString = pathuri[0].toString();Path path = new Path(pathString);in = new BufferedReader(new FileReader(path.toString()));while (null != (Customer = in.readLine())) {String[] cust = Customer.split(",");customersMap.put(cust[0], cust[1] + "," + cust[2]);}} catch (IOException e) {e.printStackTrace();} finally {try {if (in != null) {in.close();}} catch (IOException e) {e.printStackTrace();}}}public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {order = value.toString().split(",");String orderTail = order[1] + "," + order[2] + "," + order[3];oKey.set(order[0] + "," + customersMap.get(order[0]) + ","+ orderTail);context.write(oKey, new Text());}}public int run(String[] args) throws Exception {Job job = new Job(getConf(), "MultiTableJoin");job.setJobName("MultiTableJoin");job.setJarByClass(MapSideJoin.class);job.setMapperClass(MapClass.class);job.setInputFormatClass(TextInputFormat.class);job.setOutputFormatClass(TextOutputFormat.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);job.addCacheFile(new URI("hdfs://lx-zhujiming:9000/user/......./tmp/input/Customers.csv"));job.addCacheFile(new URI("/home/......../esktop/Customers.csv"));String[] otherArgs = new GenericOptionsParser(job.getConfiguration(),args).getRemainingArgs();FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));return job.waitForCompletion(true) ? 0 : 1;}public static void main(String[] args) throws Exception {int res = ToolRunner.run(new Configuration(), new MapSideJoin(), args);System.exit(res);}}
0 0
- 刘刚 mapside 实现
- 浅释IPv6 刘 刚
- sql实现获取刚获得的插入ID
- android实现联网小例子,刚学习与大家分享
- 按钮实现刚点击就切换状态(即UIControlEventTouchDown)
- SqlServer中实现返回刚插入记录的ID
- Android---刚进程序时的导航页面实现
- JS 实现刚进入页面时刷新一次
- 刚发现set的底层是通过map实现的
- 刚来!
- 刚回来
- 刚来!
- 刚到!
- 刚来!
- 刚到
- 刚来
- 刚来到
- 刚来
- List在Python中的使用
- iOS多线程编程(一)——RunLoop基本概念
- JAVA 多线程静态同步函数的锁是class 对象
- 机房收费系统调错总结(二)
- 关于cell的循环利用
- 刘刚 mapside 实现
- [剑指Offer] 从尾到头打印链表
- 关于布局的学习
- 第十四周阅读程序——3
- ACM天梯赛练习L1-007. 念数字
- leetcode 177. Nth Highest Salary
- 压缩跟踪Compressive Tracking源码理解
- libnet、libnids、libpcap轻松搭建Linux网络入侵检测系统
- mysql explain分析sql语句的执行计划