hadoop学习--多表关联

来源：互联网发布：软土路基沉降观测数据编辑：程序博客网时间：2024/06/05 09:09

本例从多个表中提取出所需要的信息。

输入是2个文件，一个表示工厂表，包含工厂名和地址编号；另一个表示地址表，包含地址名和地址编号。根据2个表的信息输出工厂名-地址名表。

factory.txt:

factorynameaddressedBeijing Red Star1Shenzhan Thunder3Guangzhou Honda2Beijing Rising1Guangzhou Development Bank2Tencent3Bank of Beijing1

address.txt:

addressIDaddressname1Beijing2Guangzhou3Shenzhen5Hangzhou

输出：

factorynameaddressnameBeijing Red StarBeijingBeijing RisingBeijingBank of BeijingBeijingGuangzhou HondaGuangzhouGuangzhou Development BankGuangzhouShenzhan ThunderShenzhenTencentShenzhen1、设计思路

在map阶段，对于每个输入以adressID为key进行保存；

来自factory.txt则存为:

<1,2:Beijing Red Star>

<3,2:Shenzhan Thunder>

...

来自address.txt则存为

<1,1:Beijing>

<2,1:Guangzhou>

...

这里的value开头的1： 2：用来区分来自不同的表，将在reduce中用到。

在reduce阶段

对于相同的key，保存相应的factoryname和addressname。具体细节可参考上一篇单表关联部分。

2、程序代码

[java] view plaincopy

import java.io.IOException;
import java.util.StringTokenizer;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class MTjoin {
public static class TokenizerMapper
extends Mapper<Object, Text, Text, Text>{
String tabletype = new String();
String childname = new String();
String parentname = new String();
public void map(Object key, Text value, Context context
) throws IOException, InterruptedException {
String line = value.toString();
int i = 0;
if(line.contains("factoryname") == true || line.contains("addressID") == true)
{
return;
}
while(line.charAt(i) > '9' || line.charAt(i) < '0')
{
i++;
}
if(i == 0) //address
{
int j = i + 1;
while(line.charAt(j) != ' ')j++;
String[] values = {line.substring(0,j),line.substring(j+1)};
context.write(new Text(values[0]),new Text("1:" + values[1]));
}
else //name
{
int j = i - 1;
while(line.charAt(j) != ' ')j--;
String[] values = {line.substring(0,j),line.substring(i)};
context.write(new Text(values[1]),new Text("2:" + values[0]));
}
}
}
public static class IntSumReducer
extends Reducer<Text,Text,Text,Text> {
private IntWritable result = new IntWritable();
int count = 0;
public void reduce(Text key, Iterable<Text> values,
Context context
) throws IOException, InterruptedException {
if(count == 0)
{
context.write(new Text("factoryname"),new Text("addressname"));
count++;
}
int factorynum = 0;
int addressnum = 0;
String[] factoryname = new String[10];
String[] addressname = new String[10];
String strrecord = new String();
String[] strArr = new String[3];
Iterator ite = values.iterator();
while(ite.hasNext())
{
strrecord = ite.next().toString();
if(strrecord.length()<=0)
{
continue;
}
char type = strrecord.charAt(0);
if(type == '1')
{
addressname[addressnum++] = strrecord.substring(2);
}
else if(type == '2')
{
factoryname[factorynum++] = strrecord.substring(2);
}
}
if(factorynum != 0 && addressnum != 0)
{
for(int i = 0;i < factorynum;i++)
{
for(int j = 0; j < addressnum;j++)
{
context.write(new Text(factoryname[i]),new Text(addressname[j]));
}
}
}
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length != 3) {
System.err.println("Usage: MTjoin <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "MTjoin");
job.setJarByClass(MTjoin.class);
job.setMapperClass(TokenizerMapper.class);
//job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

0 0