MapReduce练习:DataJoin联结练习
来源:互联网 发布:中昌数据什么时候开盘 编辑:程序博客网 时间:2024/06/16 00:18
练习题如下:需要输出每个顾客的订单详情。
涉及到的文件:
Customers
1,Stephanie Leung,555-555-55552,Edward Kim,123-456-78903,Jose Madrize,281-330-80044,Davia Stork,408-555-0000
3,A,12.95,02-Jun-20081,B,88.25,20-May-20082,C,32.00,30-Nov-20073,D,25.02,22-Jan-2009
具体程序如下:
package Self;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase;import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase;import org.apache.hadoop.contrib.utils.join.TaggedMapOutput;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapred.FileInputFormat;import org.apache.hadoop.mapred.FileOutputFormat;import org.apache.hadoop.mapred.JobClient;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.OutputFormat;import org.apache.hadoop.mapred.TextInputFormat;import org.apache.hadoop.mapred.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class DataJoinTest extends Configured implements Tool {public static class MapperClass extends DataJoinMapperBase{@Overrideprotected Text generateGroupKey(TaggedMapOutput value) {String[] lines=value.toString().split(",");return new Text(lines[0]);}@Overrideprotected Text generateInputTag(String inputFile) {return new Text(inputFile);}@Overrideprotected TaggedMapOutput generateTaggedMapOutput(Object object) {TaggedValue taggedValue=new TaggedValue((Text) object);taggedValue.setTag(this.inputTag);return taggedValue;}}public static class TaggedValue extends TaggedMapOutput{private Writable data;public TaggedValue(Writable data){this.data=data;this.tag=new Text("");}public void readFields(DataInput dataInput) throws IOException {this.data.readFields(dataInput);this.tag.readFields(dataInput);}public void write(DataOutput out) throws IOException {this.data.write(out);this.tag.write(out);}@Overridepublic Writable getData() {return this.data;}}public static class ReducerClass extends DataJoinReducerBase{@Overrideprotected TaggedMapOutput combine(Object[] tags, Object[] values) {if(tags.length<2) return null;String joinStr="";for (int i=0;i<values.length;i++) {if(i>0) joinStr+=",";String[] lines=((TaggedMapOutput)values[i]).getData().toString().split(",",2);joinStr+=lines[1];}TaggedValue taggedValue=new TaggedValue(new Text(joinStr));taggedValue.setTag((Text)tags[0]);return taggedValue;}}public int run(String[] args) throws Exception {Configuration configuration=getConf();JobConf job=new JobConf(configuration, DataJoinTest.class);job.setJobName("DataJoinTest");Path in=new Path(args[0]);Path out=new Path(args[1]);FileInputFormat.setInputPaths(job, in);FileOutputFormat.setOutputPath(job, out);job.setMapperClass(MapperClass.class);job.setReducerClass(ReducerClass.class);job.setInputFormat(TextInputFormat.class);job.setOutputFormat(TextOutputFormat.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(TaggedValue.class);job.set("mapred.textoutputformat.separator",",");JobClient.runJob(job);return 0;}public static void main(String[] args) throws Exception {int code=ToolRunner.run(new Configuration(), new DataJoinTest(),args);System.exit(code);}}
阅读全文
0 0
- MapReduce练习:DataJoin联结练习
- MapReduce练习(一)
- MapReduce练习(二)
- MapReduce练习(三)
- MapReduce代码实例练习
- MapReduce练习一:多MapReduce链接
- MapReduce编程练习(一)
- flume+mapreduce实战小练习
- MapReduce 练习一 找爷孙关系
- MapReduce 练习二 找朋友
- MapReduce Join联结实现
- MapReduce Join联结实现
- MapReduce Join联结实现
- MapReduce,DataJoin,链接多数据源
- MapReduce,DataJoin,多表连接查询
- MapReduce DataJoin 链接多数据源
- 练习
- “++”“--”练习
- 实践mahout推荐引擎
- setTimeout,异步,原理
- EasyTalk 项目介绍
- Diplomas and Certificates (Codeforces
- SVM+HOG对图像进行多分类(OpenCV实现)
- MapReduce练习:DataJoin联结练习
- [iOS [转] CAEmitterLayers属性解释] 鱼缸气泡效果(非完美)
- 爬取动态的网页。
- 一次完整的HTTP请求过程
- 教你如何迅速秒杀掉:99%的海量数据处理面试题
- 为什么选择Nginx
- iOS 长按复制文本
- Object类与reflect包
- Tensorflow入门解读