利用DistributedCache实现map端连接 Join On Map Side
来源:互联网 发布:美工孙伟博时代小区 编辑:程序博客网 时间:2024/05/17 08:06
publicclassMapJoinByCache {
// movies.dat id,name,type
//rating.datuid,mid,rate,time
//joinon map:movies.dat join rating.dat on mid
publicstaticclassMapJoinerextendsMapper<LongWritable,Text,Text,Text>
{
staticMap<String,String>movies=newHashMap<String,String>();
publicvoidsetup(Context context) {
try {
FileReader reader =new FileReader("movies.dat");
BufferedReader br =newBufferedReader(reader);
Strings1 = null;
while ((s1= br.readLine()) !=null)
{
System.out.println(s1);
String[]splits= s1.split("::");
StringmovieId=splits[0];
StringmovieName =splits[1];
movies.put(movieId,movieName);
}
br.close();
reader.close();
}catch (Exception e) {
e.printStackTrace();
}
}
private TextoutKey=newText();
private TextoutVal=newText();
publicvoidmap(LongWritable key,Text value,Context context)throwsIOException, InterruptedException
{
if(value!=null||value.toString()!=null)
{
String[]splits = value.toString().split("::");
StringmovieId =splits[1];
StringmovieName= movies.get(movieId);
outKey.set(movieId);
outVal.set(movieName+"::"+value.toString());
context.write(outKey,outVal);
}
}
}
publicstaticclassDirectReducerextendsReducer<Text,Text,NullWritable,Text>
{
NullWritableoutKey=NullWritable.get();
publicvoidreduce(Text key,Iterable<Text> values,Context context)throwsIOException, InterruptedException
{
for(Textvalue :values)
{
context.write(outKey,value);
}
}
}
publicstaticvoidmain(String[] args)throwsURISyntaxException, IOException, InterruptedException, ClassNotFoundException {
Configurationconf =new Configuration();
String[]otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
DistributedCache.createSymlink(conf);
DistributedCache.addCacheFile(new URI("hdfs://mylinux:9000/data/exam/movie/movies.dat#movies.dat"),conf);
Jobjob=newJob(conf);
job.setJobName("Joinon Map Side");
job.setJarByClass(MapJoinByCache.class);
job.setMapperClass(MapJoiner.class);
job.setReducerClass(DirectReducer.class);
FileInputFormat.addInputPath(job,new Path(otherArgs[0]));
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job,new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0: 1);
}- 利用DistributedCache实现map端连接 Join On Map Side
- spark实现Map-side Join和Reduce-side Join
- Map side join
- hive map side join
- hive map-side join
- map-side-join /Reduce-side-join
- 基于hadoop2.2的map端表关联(map side join)mapreduce实现
- Hive Map Side Join解析
- MapReduce的Map side join
- MapReduce map side join实例
- 在Spark中实现map-side join和reduce-side join
- 在Spark中实现map-side join和reduce-side join
- Apache Spark探秘:实现Map-side Join和Reduce-side Join
- Apache Spark探秘:实现Map-side Join和Reduce-side Join
- hadoop join之map side join
- Hadoop 多表 join:map side join 范例
- Hadoop 多表 join:map side join 范例
- Hadoop 多表 join:map side join 范例
- FindWindowEx
- MapReduce过程中的序列化与反序列化
- FindWindow
- 众多开源项目直接拿来用
- Horizon学习笔记
- 利用DistributedCache实现map端连接 Join On Map Side
- Extracting Table Data from Word Document using Aspose Words
- 6、ubuntu下强大的图像处理软件
- 关联target在ant配置文件中的应用
- Android向上推送式菜单,两个视图在同一布局内同屏显示。
- 使用Rwordseg进行分词
- spirng3.0 3
- javaEE jdbc批处理
- FPS