利用DistributedCache实现map端连接 Join On Map Side

来源:互联网 发布:美工孙伟博时代小区 编辑:程序博客网 时间:2024/05/17 08:06

publicclassMapJoinByCache {

 // movies.dat id,name,type

//rating.datuid,mid,rate,time

//joinon map:movies.dat join rating.dat on mid

    publicstaticclassMapJoinerextendsMapper<LongWritable,Text,Text,Text>

    {  

        staticMap<String,String>movies=newHashMap<String,String>();

        publicvoidsetup(Context context) {           

            try {

                FileReader reader =new FileReader("movies.dat");

                BufferedReader br =newBufferedReader(reader);

                Strings1 = null;

                while ((s1= br.readLine()) !=null)

                {

                    System.out.println(s1);

                    String[]splits= s1.split("::");                   

                    StringmovieId=splits[0];

                    StringmovieName =splits[1];

                    movies.put(movieId,movieName);                

                }

                br.close();

                reader.close();

            }catch (Exception e) {

                e.printStackTrace();

            }

        }

        private TextoutKey=newText();

        private TextoutVal=newText();    

        publicvoidmap(LongWritable key,Text value,Context context)throwsIOException, InterruptedException

        {

            if(value!=null||value.toString()!=null)

            {

                String[]splits = value.toString().split("::"); 

                StringmovieId =splits[1];

                StringmovieName= movies.get(movieId);

                outKey.set(movieId);

                outVal.set(movieName+"::"+value.toString());

                context.write(outKey,outVal);

            }

        }

    }

    publicstaticclassDirectReducerextendsReducer<Text,Text,NullWritable,Text>

    {

        NullWritableoutKey=NullWritable.get();

        publicvoidreduce(Text key,Iterable<Text> values,Context context)throwsIOException, InterruptedException

        {

            for(Textvalue :values)

            {

                context.write(outKey,value);

            }

        }

    }

    publicstaticvoidmain(String[] args)throwsURISyntaxException, IOException, InterruptedException, ClassNotFoundException {

        Configurationconf =new Configuration();

        String[]otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();

        DistributedCache.createSymlink(conf);

        DistributedCache.addCacheFile(new URI("hdfs://mylinux:9000/data/exam/movie/movies.dat#movies.dat"),conf);     

        Jobjob=newJob(conf);

        job.setJobName("Joinon Map Side");

        job.setJarByClass(MapJoinByCache.class);

        job.setMapperClass(MapJoiner.class);

        job.setReducerClass(DirectReducer.class);

        FileInputFormat.addInputPath(job,new Path(otherArgs[0]));

        job.setMapOutputKeyClass(Text.class);

        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(NullWritable.class);

        job.setOutputValueClass(Text.class);

        FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));       

        System.exit(job.waitForCompletion(true) ? 0: 1);

}
0 0
原创粉丝点击