MapReduce Join
来源:互联网 发布:信息管理系统源码 编辑:程序博客网 时间:2024/06/13 15:14
Map Side Join
package MapJoin;import java.io.BufferedReader;import java.io.FileReader;import java.io.IOException;import java.net.URI;import java.util.HashMap;import java.util.Map;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;/* * Table1 011990-99999 SIHCCAJAVRI 012650-99999 TYNSET-HANSMOEN Table2 012650-99999 194903241200 111 012650-99999 194903241800 78 011990-99999 195005150700 0 011990-99999 195005151200 22 011990-99999 195005151800 -11 * */public class MapJoin { static class mapper extends Mapper<LongWritable, Text, Text, Text> { private Map<String, String> Table1Map = new HashMap<String, String>(); // 将小表读到内存HashMap中 protected void setup(Context context) throws IOException { URI[] paths = context.getCacheFiles(); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FSDataInputStream fsr = fs.open(new Path(paths[0].toString())); // BufferedReader br = new BufferedReader(new FileReader( // paths[0].toString())); String line = null; try { while ((line = fsr.readLine().toString()) != null) { String[] vals = line.split("\\t"); if (vals.length == 2) { Table1Map.put(vals[0], vals[1]); } } } catch (Exception e) { // TODO: handle exception e.printStackTrace(); } finally { fsr.close(); } } // 对大表进行Map扫描 protected void map(LongWritable key, Text val, Context context) throws IOException, InterruptedException { String[] vals = val.toString().split("\\t"); if (vals.length == 3) { // 每条记录都用外键对HashMap get String Table1Vals = Table1Map.get(vals[0]); Table1Vals = (Table1Vals == null) ? "" : Table1Vals; context.write(new Text(vals[0]), new Text(Table1Vals + "\t" + vals[1] + "\t" + vals[2])); } } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args) .getRemainingArgs(); if (otherArgs.length != 3) { System.err .println("Parameter number is wrong, please enter three parameters:<big table hdfs input> <small table local input> <hdfs output>"); System.exit(-1); } Job job = new Job(conf, "MapJoin"); job.setJarByClass(MapJoin.class); job.setMapperClass(mapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); job.addCacheFile((new Path(args[1]).toUri())); FileOutputFormat.setOutputPath(job, new Path(args[2])); System.exit(job.waitForCompletion(true) ? 0 : 1); }}
Reduce Side Join
package ReduceJoin;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/*user.csv文件:"ID","NAME","SEX""1","user1","0""2","user2","0""3","user3","0""4","user4","1""5","user5","0""6","user6","0""7","user7","1""8","user8","0""9","user9","0"order.csv文件:"USER_ID","NAME""1","order1""2","order2""3","order3""4","order4""7","order7""8","order8""9","order9"*/public class ReduceJoin { public static class MapClass extends Mapper<LongWritable, Text, Text, Text> { //最好在map方法外定义变量,以减少map计算时创建对象的个数 private Text key = new Text(); private Text value = new Text(); private String[] keyValue = null; @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //value是每一行的内容,Text类型,所有我们要把key从value中解析出来 keyValue = value.toString().split(",", 2); this.key.set(keyValue[0]); //把外键设为MapReduce key this.value.set(keyValue[1]); context.write(this.key, this.value); } } public static class Reduce extends Reducer<Text, Text, Text, Text> { private Text value = new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuilder valueStr = new StringBuilder(); //values中的每一个值是不同数据文件中的具有相同key的值 //即是map中输出的多个文件相同key的value值集合 for(Text val : values) { valueStr.append(val); valueStr.append(","); } this.value.set(valueStr.deleteCharAt(valueStr.length()-1).toString()); context.write(key, this.value); } } public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException { // TODO Auto-generated method stub Configuration conf = new Configuration(); Job job = new Job(conf, "MyJoin"); job.setJarByClass(ReduceJoin.class); job.setMapperClass(MapClass.class); job.setReducerClass(Reduce.class); //job.setCombinerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }}
0 0
- mapreduce join
- MapReduce Join
- MapReduce Join
- MapReduce Join联结实现
- MapReduce之Join实现
- hadoop mapreduce join
- hadoop MapReduce join
- MapReduce之Join操作
- MapReduce实现join操作
- MapReduce Join联结实现
- MapReduce Join联结实现
- MapReduce的Join
- mapreduce join 例子
- hadoop MapReduce join
- MapReduce实现join操作
- hadoop MapReduce join
- MapReduce实现join操作
- MapReduce Join操作
- Android数据持久化——SharedPreferences
- Dubbo泛化引用
- 在Markdown Pad中输入表格
- 如何在mysql存储过程中处理select语句返回的多行结果(用游标)
- androidの设计的布局在阿拉伯语下界面错乱的解决方法 总汇
- MapReduce Join
- NSInvocation使用
- C/C++网络编程总结
- js 将json字符串转换为json对象的方法解析
- POJ 2750Potted Flower 线段树+dp 动态求环中的最大连续子序列和
- Android自定义类似ProgressDialog
- 如何生成git公钥?
- PHP
- 文章标题