mapreduce编程:单表自连接
来源:互联网 发布:淘宝装修图片 编辑:程序博客网 时间:2024/05/01 21:55
程序:
package my.hadoopstudy; import java.util.*;import java.io.*;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.TextInputFormat;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;public class oneTableConnect { public static class tableMapper extends Mapper<Text, Text, Text, Text> { @Override public void map(Text key, Text value, Context context) throws IOException, InterruptedException { context.write(key, new Text( "1" + value.toString())); context.write(value, new Text("0" + key.toString())); } } public static class tableReducer extends Reducer<Text, Text, Text, Text> { @Override public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { List<String> gChildren=new ArrayList<String>(); List<String> gParents =new ArrayList<String>(); for(Text val : values) { if(val.toString().charAt(0) == '0')// the key's child. { gChildren.add(val.toString().substring(1)); } else//the key's parent. { gParents.add(val.toString().substring(1)); } } for(int i=0; i<gChildren.size(); i++) for(int j=0;j<gParents.size();j++) { context.write(new Text(gChildren.get(i)), new Text(gParents.get(j))); } } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " "); Job job = new Job(conf, "tableConnect"); job.setJarByClass(oneTableConnect.class); job.setMapperClass(tableMapper.class); job.setReducerClass(tableReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit( job.waitForCompletion(true) ? 0 : 1); }}
输入:
输出:
0 0
- mapreduce编程:单表自连接
- MapReduce编程-自连接
- Spark-MapReduce编程-自连接(Scala)
- MapReduce处理表的自连接
- MapReduce编程(五) 单表关联
- MapReduce单表关联
- MapReduce实现单表关联
- MapReduce实现单表关联
- MapReduce单表关联实验
- MapReduce实现单表关联
- MapReduce实战【单表关联】
- MapReduce实例----单表关联
- MapReduce编程实例:连接(Join)
- Hadoop MapReduce 在某一列上自连接(self join)
- MapReduce表连接之半连接SemiJoin
- MapReduce程序之实现单表关联
- mapreduce操作单表关联数据
- Hadoop MapReduce单表关联程序
- 为何安卓程序用Java开发
- Openstack Keystone 认证流程(四)--Filter流水线
- 辣妈萌宝APP--打开辣妈萌宝进行记录
- Java垃圾回收机制
- is not in the sudoers file 解决方案
- mapreduce编程:单表自连接
- Oracle的edit命令
- Java算法题目:一个5位数,判断它是不是回文数。即12321是回文数,个位与万位相同,十位与千位相同。
- Oracle Purge和drop的区别
- arcgis10.2.2桌面版具体的安装步骤过程
- spring注解
- java中switch语句的测试
- 遇到多个构造器参数时要考虑用构建器——Effective Java 读书笔记
- C++“输入输出流与文件系统”