MR Multiple

来源:互联网 发布:java产生随机数的代码 编辑:程序博客网 时间:2024/04/29 12:56

mapreduce Multiple

package mr.mulit;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class MultipleJob extends Configuration implements Tool{@Overridepublic Configuration getConf() {return new Configuration();}@Overridepublic void setConf(Configuration conf) {}@Overridepublic int run(String[] args) throws Exception {Configuration conf = new Configuration();Job job = new Job(conf, "MultipleJob");job.setJarByClass(MultipleJob.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);MultipleInputs.addInputPath(job, new Path(args[0]), FirstInputFormat.class, FirstMapper.class);MultipleInputs.addInputPath(job, new Path(args[1]), SecondInputFormat.class, SecondMapper.class);Path path = new Path(args[2]);FileSystem fs = FileSystem.get(conf);if(fs.exists(path)){fs.delete(path,true);}FileOutputFormat.setOutputPath(job, path);return job.waitForCompletion(true) ? 0 : 1;}public static void main(String[] args) throws Exception {ToolRunner.run(new MultipleJob(), args);}}

package mr.mulit;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.Writable;public class FirstClass implements Writable {private String value;public FirstClass() {}public FirstClass(String value) {this.value = value;}public String getValue() {return value;}public void setValue(String value) {this.value = value;}@Overridepublic String toString() {return "FirstClass [value=" + value + "]";}@Overridepublic void write(DataOutput out) throws IOException {out.writeUTF(this.value);}@Overridepublic void readFields(DataInput in) throws IOException {this.value = in.readUTF();}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;public class FirstInputFormat extends FileInputFormat<Text, FirstClass> {@Overridepublic RecordReader<Text, FirstClass> createRecordReader(InputSplit split,TaskAttemptContext context) throws IOException,InterruptedException {return new FirstRecordReader();}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class FirstMapper extends Mapper<Text, FirstClass, Text, Text> {@Overrideprotected void map(Text key, FirstClass value,Mapper<Text, FirstClass, Text, Text>.Context context)throws IOException, InterruptedException {context.write(key, new Text(value.toString()));}}

package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;public class FirstRecordReader extends RecordReader<Text, FirstClass> {private LineRecordReader lineRecordReader = null;private Text key = null;private FirstClass value = null;@Overridepublic void initialize(InputSplit split, TaskAttemptContext context)throws IOException, InterruptedException {close();lineRecordReader = new LineRecordReader();lineRecordReader.initialize(split, context);}@Overridepublic boolean nextKeyValue() throws IOException, InterruptedException {if(!lineRecordReader.nextKeyValue()){key = null;value =null;return false;}else{Text val = lineRecordReader.getCurrentValue();String line = val.toString();String[] arr = line.split("t");key = new Text(arr[0]);value = new FirstClass(arr[1]);return true;}}@Overridepublic Text getCurrentKey() throws IOException, InterruptedException {return key;}@Overridepublic FirstClass getCurrentValue() throws IOException,InterruptedException {return value;}@Overridepublic float getProgress() throws IOException, InterruptedException {// TODO Auto-generated method stubreturn lineRecordReader.getProgress();}@Overridepublic void close() throws IOException {if(null != lineRecordReader){lineRecordReader.close();lineRecordReader =null;}key = null;value =null;}}

package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class MultipleReducer extends Reducer<Text, Text, Text, Text> {@Overrideprotected void reduce(Text key, Iterable<Text> values,Reducer<Text, Text, Text, Text>.Context context) throws IOException,InterruptedException {// TODO Auto-generated method stubfor (Text text : values) {context.write(key, text);}}}


package mr.mulit;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.Writable;public class SecondClass implements Writable {private String userName;private int classNum;public SecondClass() {}public SecondClass(String userName, int classNum) {this.userName = userName;this.classNum = classNum;}public String getUserName() {return userName;}public void setUserName(String userName) {this.userName = userName;}public int getClassNum() {return classNum;}public void setClassNum(int classNum) {this.classNum = classNum;}@Overridepublic String toString() {return "SecondClass [userName=" + userName + ", classNum=" + classNum+ "]";}@Overridepublic void write(DataOutput out) throws IOException {out.writeUTF(this.userName);out.writeInt(this.classNum);}@Overridepublic void readFields(DataInput in) throws IOException {this.userName = in.readUTF();this.classNum = in.readInt();}}

package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;public class SecondInputFormat extends FileInputFormat<Text, SecondClass> {@Overridepublic RecordReader<Text, SecondClass> createRecordReader(InputSplit split,TaskAttemptContext context) throws IOException,InterruptedException {return new SecondRecordReader();}}

package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class SecondMapper extends Mapper<Text, SecondClass, Text, Text> {@Overrideprotected void map(Text key, SecondClass value,Mapper<Text, SecondClass, Text, Text>.Context context)throws IOException, InterruptedException {context.write(key, new Text(value.toString()));}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;public class SecondRecordReader extends RecordReader<Text, SecondClass> {private LineRecordReader lineRecordReader = null;private Text key = null;private SecondClass value = null;@Overridepublic void initialize(InputSplit split, TaskAttemptContext context)throws IOException, InterruptedException {close();lineRecordReader = new LineRecordReader();lineRecordReader.initialize(split, context);}@Overridepublic boolean nextKeyValue() throws IOException, InterruptedException {if(!lineRecordReader.nextKeyValue()){key = null;value =null;return false;}else{Text val = lineRecordReader.getCurrentValue();String line = val.toString();String[] arr = line.split("t");key = new Text(arr[0]);value = new SecondClass(arr[1],Integer.parseInt(arr[2]));return true;}}@Overridepublic Text getCurrentKey() throws IOException, InterruptedException {return key;}@Overridepublic SecondClass getCurrentValue() throws IOException,InterruptedException {return value;}@Overridepublic float getProgress() throws IOException, InterruptedException {// TODO Auto-generated method stubreturn lineRecordReader.getProgress();}@Overridepublic void close() throws IOException {if(null != lineRecordReader){lineRecordReader.close();lineRecordReader =null;}key = null;value =null;}}




0 0
原创粉丝点击