mapreduce编程:多表关联

来源:互联网 发布:梯度下降算法 3维图像 编辑:程序博客网 时间:2024/05/29 09:18
package my.hadoopstudy;  import java.util.*;import java.io.*;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.TextInputFormat;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;public class TableConnect {  public static class tableMapper extends Mapper<Text, Text, Text, Text> {  @Override  public void map(Text key, Text value, Context context) throws IOException, InterruptedException  {       context.write(key,value);  } }   public static class tableReducer extends Reducer<Text, Text, Text, Text> {  @Override  public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException  {   List<String> gChildren=new ArrayList<String>();   List<String> gParents =new ArrayList<String>();   for(Text val : values)   {    if(val.toString().charAt(0)  == '1')// the key's child.    {       gChildren.add(val.toString().substring(1));    }    else//the key's parent.    {       gParents.add(val.toString().substring(1));    }   }   for(int i=0; i<gChildren.size(); i++)    for(int j=0;j<gParents.size();j++)    {       context.write(new Text(gChildren.get(i)), new Text(gParents.get(j)));    }  }  }  public static void main(String[] args) throws Exception {  Configuration conf = new Configuration();  conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");    Job job = new Job(conf, "tableConnect");  job.setJarByClass(TableConnect.class);    job.setMapperClass(tableMapper.class);  job.setReducerClass(tableReducer.class);    job.setOutputKeyClass(Text.class);  job.setOutputValueClass(Text.class);    job.setInputFormatClass(KeyValueTextInputFormat.class);  job.setOutputFormatClass(TextOutputFormat.class);    FileInputFormat.addInputPath(job, new Path(args[0]));  FileOutputFormat.setOutputPath(job, new Path(args[1]));    System.exit( job.waitForCompletion(true) ? 0 : 1); }}

输入数据,文本1:

代号        公司

a 1Beijing Red Star
c 1Shenzhen Thunder
b 1Guangzhou Honda
a 1Beijing Rising
b 1Guangzhou Development Bank
c 1Tencent
a 1Back of Beijing



输入数据,文本2:

代号       地方

a 2Beijing
b 2Guangzhou
c 2Shenzhen
d 2Xian


输出:



0 0
原创粉丝点击