Hadoop中单表链接

来源:互联网 发布:fifaol3韩服数据库中文 编辑:程序博客网 时间:2024/04/29 17:28
import java.io.IOException;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.util.Iterator;import java.util.StringTokenizer;class Map extends Mapper<Object, Text, Text, Text>{    public void map(Object key, Text value, Context context)            throws IOException, InterruptedException{                        StringTokenizer str = new StringTokenizer(value.toString());            String flag = new String();            String childName = new String();            String parentName = new String();            String[] values = new String[2];                        int i = 0;            while( str.hasMoreTokens() ){                 values[i] = str.nextToken();                 i++;            }            if ( values[0].compareTo("child") != 0){                childName = values[0];                parentName = values[1];                            flag = "1";                context.write(new Text(parentName), new Text(flag+" "+childName));                flag="2";                context.write(new Text(childName), new Text(flag+" "+parentName));        }    }}    class Reduce extends Reducer<Text, Text, Text, Text>{    private static int time = 0;    public void reduce(Text key, Iterable<Text> values,Context context)        throws IOException, InterruptedException{        if (time == 0){            context.write(new Text("grandchild"), new Text("grandparent"));            time++;        }        Iterator<Text> ite = values.iterator();        int grandchildNum = 0;        int grandparentNum = 0;        String[] grandchild = new String[10];        String[] grandparent = new String[10];        while (ite.hasNext()){            String  str = ite.next().toString();            if (str.charAt(0) == '2'){                grandparent[grandparentNum] = str.substring(2);                grandparentNum++;            }            else{                grandchild[grandchildNum] = str.substring(2);                grandchildNum++;            }        }            if(grandchildNum != 0 && grandparentNum != 0){                 for (int m = 0; m < grandchildNum; m++) {                        for (int n = 0; n < grandparentNum; n++) {                            context.write(new Text(grandchild[m]), new Text(grandparent[n]));                        }                 }            }    }}    public class STjoin{    public static void main(String[] args) throws Exception{        Job job = new Job();        job.setJarByClass(STjoin.class);        job.setMapperClass(Map.class);        job.setReducerClass(Reducer.class);                FileInputFormat.addInputPath(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));                job.setOutputKeyClass(Text.class);        job.setOutputValueClass(Text.class);                System.exit(job.waitForCompletion(true)?0:1);            }}

0 0
原创粉丝点击