Hadoop MapReduce多表关联程序
来源:互联网 发布:图片视频制作软件 编辑:程序博客网 时间:2024/06/05 20:19
package com.hadoop.sample; import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class MTJoin { private static int time = 0; public static class Map extends Mapper<Object,Text,Text,Text>{ //在map中先区分输入行属于左表还是右表,然后对两列值进行分割, //保存连接列在key值,剩余列和左右表标志在value中,最后输出 public void map(Object key,Text value,Context context) throws IOException,InterruptedException{ String line = value.toString(); int i = 0; //输入文件首行,不处理 if(line.contains("factoryname")==true||line.contains("addressID")==true){ return; } //找出数据中的分割点 while(line.charAt(i)>='9'||line.charAt(i)<='0'){ i++; } if(line.charAt(i)>='9'||line.charAt(i)<='0'){ //左表 int j = i-1; while(line.charAt(j)!=' ') j--; String[] values = {line.substring(0, j),line.substring(i)}; context.write(new Text(values[1]), new Text("1+"+values[0])); }else{//右表 int j = i+1; while(line.charAt(j)!=' ') j++; String[] values = {line.substring(0, i+1),line.substring(j)}; context.write(new Text(values[0]), new Text("2+"+values[1])); } } } public static class Reduce extends Reducer<Text,Text,Text,Text>{ //reduce解析map输出,将value中数据按照左右表分别保存,然后求笛卡尔积,输出 public void reduce(Text key,Iterable<Text> values,Context context) throws IOException,InterruptedException{ if(time == 0){//输入文件第一行 context.write(new Text("factoryname"),new Text("addressname")); time++; } int factorynum = 0; String factory[] = new String[10]; int adressnum = 0; String adress[] = new String[10]; Iterator iter = values.iterator(); while(iter.hasNext()){ String record = iter.next().toString(); int len = record.length(); int i = 2; char type = record.charAt(0); String factoryname = new String(); String adressname = new String(); if(type == '1'){//左表 factory[factorynum] = record.substring(2); factorynum++; }else{//右表 adress[adressnum] = record.substring(2); } } if(factorynum!=0&&adressnum!=0){//笛卡尔积 for(int m=0;m<factorynum;m++){ for(int n=0;n<adressnum;n++){ context.write(new Text(factory[m]), new Text(adress[n])); } } } } } /** * @param args */ public static void main(String[] args) throws Exception{ // TODO Auto-generated method stub Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs(); if(otherArgs.length != 2){ System.err.println("Usage WordCount <int> <out>"); System.exit(2); } Job job = new Job(conf,"word count"); job.setJarByClass(MTJoin.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
0 0
- Hadoop MapReduce多表关联程序
- hadoop mapreduce多表关联
- Hadoop MapReduce单表关联程序
- mapreduce多表关联
- MapReduce程序之实现单表关联
- MapReduce多表关联实验
- mapreduce编程:多表关联
- MapReduce实例----多表关联
- Hadoop MapReduce排序程序
- <hadoop> mapreduce程序分块
- hadoop多表关联
- hadoop多表关联
- hadoop 多表关联
- Hadoop 单表关联 多表关联
- Mapreduce多表关联与词频统计程序运行结果输出到Hbase的两个途径
- MapReduce编程之实现多表关联
- Hadoop2.8.0<Mapreduce实现多表关联>
- MapReduce单表关联
- Hadoop MapReduce单表关联程序
- 互斥锁和自旋锁
- JavaScript中的内置对象
- 训练2 4题
- response.sendRedirect()与request.getRequestDispatcher().forward()区别
- Hadoop MapReduce多表关联程序
- Some Basic Concepts in Image Steganography
- Animation and Transitions in Qt Quick | Qt Quick 5
- sdau-2 1013 1014
- 数素数,PAT,1013
- acm 2 1024 Sequence one
- 计算机视觉和图像处理常用的一些标准图片
- 利用Java heap dump查找、分析问题
- wso2esb 4.9.0中无法启动Axis2Server的解决办法