利用DistributedCache实现map端连接 Join On Map Side

来源：互联网发布：美工孙伟博时代小区编辑：程序博客网时间：2024/05/17 08:06

publicclassMapJoinByCache {

// movies.dat id,name,type

//rating.datuid,mid,rate,time

//joinon map:movies.dat join rating.dat on mid

publicstaticclassMapJoinerextendsMapper<LongWritable,Text,Text,Text>

{

staticMap<String,String>movies=newHashMap<String,String>();

publicvoidsetup(Context context) {

try {

FileReader reader =new FileReader("movies.dat");

BufferedReader br =newBufferedReader(reader);

Strings1 = null;

while ((s1= br.readLine()) !=null)

{

System.out.println(s1);

String[]splits= s1.split("::");

StringmovieId=splits[0];

StringmovieName =splits[1];

movies.put(movieId,movieName);

}

br.close();

reader.close();

}catch (Exception e) {

e.printStackTrace();

}

private TextoutKey=newText();

private TextoutVal=newText();

publicvoidmap(LongWritable key,Text value,Context context)throwsIOException, InterruptedException

{

if(value!=null||value.toString()!=null)

{

String[]splits = value.toString().split("::");

StringmovieId =splits[1];

StringmovieName= movies.get(movieId);

outKey.set(movieId);

outVal.set(movieName+"::"+value.toString());

context.write(outKey,outVal);

}

publicstaticclassDirectReducerextendsReducer<Text,Text,NullWritable,Text>

{

NullWritableoutKey=NullWritable.get();

publicvoidreduce(Text key,Iterable<Text> values,Context context)throwsIOException, InterruptedException

{

for(Textvalue :values)

{

context.write(outKey,value);

}

publicstaticvoidmain(String[] args)throwsURISyntaxException, IOException, InterruptedException, ClassNotFoundException {

Configurationconf =new Configuration();

String[]otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();

DistributedCache.createSymlink(conf);

DistributedCache.addCacheFile(new URI("hdfs://mylinux:9000/data/exam/movie/movies.dat#movies.dat"),conf);

Jobjob=newJob(conf);

job.setJobName("Joinon Map Side");

job.setJarByClass(MapJoinByCache.class);

job.setMapperClass(MapJoiner.class);

job.setReducerClass(DirectReducer.class);

FileInputFormat.addInputPath(job,new Path(otherArgs[0]));

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(Text.class);

job.setOutputKeyClass(NullWritable.class);

job.setOutputValueClass(Text.class);

FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));

System.exit(job.waitForCompletion(true) ? 0: 1);

}

0 0