多表关联

来源:互联网 发布:公司职位划分 知乎 编辑:程序博客网 时间:2024/06/05 16:29

输入两个文件,一个代表工厂表,包含工厂名列和地址编号列;另一个代表地址表,包含地址名列和地址编号列。要求输入数据中找出工厂名和地址名的对应关系,输出工厂名-地址名表

输入样例:

factory:

factoryname addressed

Beijing Red Star 1

Shenzhen Thunder 3

Guangzhou Honda 2

Beijing Rising 1

Guangzhou Development Bank 2

Tencent 3

Bank of Beijing 1

address:

AddressID addressname

1 Beijing

2 Guangzhou

3 Shenzhen

4 Xian

样例输出:

fantoryname addressname

Bank of Beijing Beijing

Beijing Red Star Beijing

Beijing Rising Beijing

Guangzhou Development Bank Guangzhou

Guangzhou Honda Guangzhou

Shenzhen Thunder Shenzhen

Tencent Shenzhen

package mapreduce.test;import java.io.IOException;import java.util.Iterator;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class MTjoin {public static int time =0 ;//在map中先区分输入行属于左表还是右表,然后对两列值进行分割//连接列保存在key值,剩余列和左右表标志保存在value中,最后输出public static class Map extends Mapper<Object, Text, Text, Text>{@Overrideprotected void map(Object key, Text value, Context context)throws IOException, InterruptedException {String line = value.toString();int i =0;//输入文件的首行不处理if(line.contains("factoryname")==true || line.contains("addressID")==true){return;}//找出数据中的分割点while(line.charAt(i)>= '9' || line.charAt(i)<='0'){i++;}if(line.charAt(0)>='9' || line.charAt(0)<='0'){//左表int j= i-1;while(line.charAt(j) != ' ') j--;String[] values = {line.substring(0,j),line.substring(i)};context.write(new Text(values[1]), new Text("1+"+values[0]));}else{int j=i+1;while(line.charAt(j) != ' ') j++;String[] values = {line.substring(0,i+1),line.substring(j)};context.write(new Text(values[0]), new Text("2+"+values[1]));}}}//reduce解析map输出,将value中数据按照左右表分别保存,然后求笛卡尔积输出public static class Reduce extends Reducer<Text, Text, Text, Text>{protected void reduce(Text key, Iterable<Text> values,Context context)throws IOException, InterruptedException {if(time==0){context.write(new Text("factoryname"), new Text("addressname"));time++;}int factorynum=0;String factory[] = new String[10];int addressnum = 0;String address[] = new String[10];Iterator ite = values.iterator();while(ite.hasNext()){String record= ite.next().toString();int len = record.length();int i =2;char type = record.charAt(0);String factoryname = new String();String addressname = new String();if(type=='1'){//左表factory[factorynum] = record.substring(2);factorynum++;}else{//右表address[addressnum] = record.substring(2);addressnum++;}}if(factorynum !=0 && addressnum !=0){for(int m=0;m<factorynum;m++){for(int n=0;n<addressnum;n++){context.write(new Text(factory[m]), new Text(address[n]));}}}}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = new Job(conf,"multiple talbe join");job.setJarByClass(MTjoin.class);job.setMapperClass(Map.class);job.setReducerClass(Reduce.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileInputFormat.addInputPath(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));System.exit(job.waitForCompletion(true) ? 0 : 1);}}


0 0