MapReduce练习:DataJoin联结练习

来源:互联网 发布:中昌数据什么时候开盘 编辑:程序博客网 时间:2024/06/16 00:18

练习题如下:需要输出每个顾客的订单详情。

涉及到的文件:

Customers

1,Stephanie Leung,555-555-55552,Edward Kim,123-456-78903,Jose Madrize,281-330-80044,Davia Stork,408-555-0000


Orders

3,A,12.95,02-Jun-20081,B,88.25,20-May-20082,C,32.00,30-Nov-20073,D,25.02,22-Jan-2009

具体程序如下:

package Self;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.contrib.utils.join.DataJoinMapperBase;import org.apache.hadoop.contrib.utils.join.DataJoinReducerBase;import org.apache.hadoop.contrib.utils.join.TaggedMapOutput;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapred.FileInputFormat;import org.apache.hadoop.mapred.FileOutputFormat;import org.apache.hadoop.mapred.JobClient;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.OutputFormat;import org.apache.hadoop.mapred.TextInputFormat;import org.apache.hadoop.mapred.TextOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class DataJoinTest extends Configured implements Tool {public static class MapperClass extends DataJoinMapperBase{@Overrideprotected Text generateGroupKey(TaggedMapOutput value) {String[] lines=value.toString().split(",");return new Text(lines[0]);}@Overrideprotected Text generateInputTag(String inputFile) {return new Text(inputFile);}@Overrideprotected TaggedMapOutput generateTaggedMapOutput(Object object) {TaggedValue taggedValue=new  TaggedValue((Text) object);taggedValue.setTag(this.inputTag);return taggedValue;}}public  static class TaggedValue extends TaggedMapOutput{private Writable data;public TaggedValue(Writable data){this.data=data;this.tag=new Text("");}public void readFields(DataInput dataInput) throws IOException {this.data.readFields(dataInput);this.tag.readFields(dataInput);}public void write(DataOutput out) throws IOException {this.data.write(out);this.tag.write(out);}@Overridepublic Writable getData() {return this.data;}}public static class ReducerClass extends DataJoinReducerBase{@Overrideprotected TaggedMapOutput combine(Object[] tags, Object[] values) {if(tags.length<2) return null;String joinStr="";for (int i=0;i<values.length;i++) {if(i>0) joinStr+=",";String[] lines=((TaggedMapOutput)values[i]).getData().toString().split(",",2);joinStr+=lines[1];}TaggedValue taggedValue=new TaggedValue(new Text(joinStr));taggedValue.setTag((Text)tags[0]);return taggedValue;}}public int run(String[] args) throws Exception {Configuration configuration=getConf();JobConf job=new JobConf(configuration, DataJoinTest.class);job.setJobName("DataJoinTest");Path in=new Path(args[0]);Path out=new Path(args[1]);FileInputFormat.setInputPaths(job, in);FileOutputFormat.setOutputPath(job, out);job.setMapperClass(MapperClass.class);job.setReducerClass(ReducerClass.class);job.setInputFormat(TextInputFormat.class);job.setOutputFormat(TextOutputFormat.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(TaggedValue.class);job.set("mapred.textoutputformat.separator",",");JobClient.runJob(job);return 0;}public static void main(String[] args) throws Exception {int code=ToolRunner.run(new Configuration(), new DataJoinTest(),args);System.exit(code);}}



原创粉丝点击