MR Multiple
来源:互联网 发布:java产生随机数的代码 编辑:程序博客网 时间:2024/04/29 12:56
mapreduce Multiple
package mr.mulit;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class MultipleJob extends Configuration implements Tool{@Overridepublic Configuration getConf() {return new Configuration();}@Overridepublic void setConf(Configuration conf) {}@Overridepublic int run(String[] args) throws Exception {Configuration conf = new Configuration();Job job = new Job(conf, "MultipleJob");job.setJarByClass(MultipleJob.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);MultipleInputs.addInputPath(job, new Path(args[0]), FirstInputFormat.class, FirstMapper.class);MultipleInputs.addInputPath(job, new Path(args[1]), SecondInputFormat.class, SecondMapper.class);Path path = new Path(args[2]);FileSystem fs = FileSystem.get(conf);if(fs.exists(path)){fs.delete(path,true);}FileOutputFormat.setOutputPath(job, path);return job.waitForCompletion(true) ? 0 : 1;}public static void main(String[] args) throws Exception {ToolRunner.run(new MultipleJob(), args);}}
package mr.mulit;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.Writable;public class FirstClass implements Writable {private String value;public FirstClass() {}public FirstClass(String value) {this.value = value;}public String getValue() {return value;}public void setValue(String value) {this.value = value;}@Overridepublic String toString() {return "FirstClass [value=" + value + "]";}@Overridepublic void write(DataOutput out) throws IOException {out.writeUTF(this.value);}@Overridepublic void readFields(DataInput in) throws IOException {this.value = in.readUTF();}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;public class FirstInputFormat extends FileInputFormat<Text, FirstClass> {@Overridepublic RecordReader<Text, FirstClass> createRecordReader(InputSplit split,TaskAttemptContext context) throws IOException,InterruptedException {return new FirstRecordReader();}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class FirstMapper extends Mapper<Text, FirstClass, Text, Text> {@Overrideprotected void map(Text key, FirstClass value,Mapper<Text, FirstClass, Text, Text>.Context context)throws IOException, InterruptedException {context.write(key, new Text(value.toString()));}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;public class FirstRecordReader extends RecordReader<Text, FirstClass> {private LineRecordReader lineRecordReader = null;private Text key = null;private FirstClass value = null;@Overridepublic void initialize(InputSplit split, TaskAttemptContext context)throws IOException, InterruptedException {close();lineRecordReader = new LineRecordReader();lineRecordReader.initialize(split, context);}@Overridepublic boolean nextKeyValue() throws IOException, InterruptedException {if(!lineRecordReader.nextKeyValue()){key = null;value =null;return false;}else{Text val = lineRecordReader.getCurrentValue();String line = val.toString();String[] arr = line.split("t");key = new Text(arr[0]);value = new FirstClass(arr[1]);return true;}}@Overridepublic Text getCurrentKey() throws IOException, InterruptedException {return key;}@Overridepublic FirstClass getCurrentValue() throws IOException,InterruptedException {return value;}@Overridepublic float getProgress() throws IOException, InterruptedException {// TODO Auto-generated method stubreturn lineRecordReader.getProgress();}@Overridepublic void close() throws IOException {if(null != lineRecordReader){lineRecordReader.close();lineRecordReader =null;}key = null;value =null;}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class MultipleReducer extends Reducer<Text, Text, Text, Text> {@Overrideprotected void reduce(Text key, Iterable<Text> values,Reducer<Text, Text, Text, Text>.Context context) throws IOException,InterruptedException {// TODO Auto-generated method stubfor (Text text : values) {context.write(key, text);}}}
package mr.mulit;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.Writable;public class SecondClass implements Writable {private String userName;private int classNum;public SecondClass() {}public SecondClass(String userName, int classNum) {this.userName = userName;this.classNum = classNum;}public String getUserName() {return userName;}public void setUserName(String userName) {this.userName = userName;}public int getClassNum() {return classNum;}public void setClassNum(int classNum) {this.classNum = classNum;}@Overridepublic String toString() {return "SecondClass [userName=" + userName + ", classNum=" + classNum+ "]";}@Overridepublic void write(DataOutput out) throws IOException {out.writeUTF(this.userName);out.writeInt(this.classNum);}@Overridepublic void readFields(DataInput in) throws IOException {this.userName = in.readUTF();this.classNum = in.readInt();}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;public class SecondInputFormat extends FileInputFormat<Text, SecondClass> {@Overridepublic RecordReader<Text, SecondClass> createRecordReader(InputSplit split,TaskAttemptContext context) throws IOException,InterruptedException {return new SecondRecordReader();}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class SecondMapper extends Mapper<Text, SecondClass, Text, Text> {@Overrideprotected void map(Text key, SecondClass value,Mapper<Text, SecondClass, Text, Text>.Context context)throws IOException, InterruptedException {context.write(key, new Text(value.toString()));}}
package mr.mulit;import java.io.IOException;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;public class SecondRecordReader extends RecordReader<Text, SecondClass> {private LineRecordReader lineRecordReader = null;private Text key = null;private SecondClass value = null;@Overridepublic void initialize(InputSplit split, TaskAttemptContext context)throws IOException, InterruptedException {close();lineRecordReader = new LineRecordReader();lineRecordReader.initialize(split, context);}@Overridepublic boolean nextKeyValue() throws IOException, InterruptedException {if(!lineRecordReader.nextKeyValue()){key = null;value =null;return false;}else{Text val = lineRecordReader.getCurrentValue();String line = val.toString();String[] arr = line.split("t");key = new Text(arr[0]);value = new SecondClass(arr[1],Integer.parseInt(arr[2]));return true;}}@Overridepublic Text getCurrentKey() throws IOException, InterruptedException {return key;}@Overridepublic SecondClass getCurrentValue() throws IOException,InterruptedException {return value;}@Overridepublic float getProgress() throws IOException, InterruptedException {// TODO Auto-generated method stubreturn lineRecordReader.getProgress();}@Overridepublic void close() throws IOException {if(null != lineRecordReader){lineRecordReader.close();lineRecordReader =null;}key = null;value =null;}}
0 0
- MR Multiple
- MR
- MR-2.输出格式(OutputFormat)Multiple outputs多目录输出
- Multiple
- Mr. Bean
- Mr. Huang1
- Mr. Huang2
- Mr.lonely
- Mr.ing
- MR原理
- MR框架结构
- Mr-sniper
- MR原理
- streaming mr
- mr计数器
- MR优化
- gora-mr
- hbase-MR
- 使用二维码——第三方库ZBar和libqrencode的使用
- 安卓实习期间整理知识点(七)
- CSS中强大的EM
- Redis源码解析(六):redis之闲话java客户端
- TestOpenGL
- MR Multiple
- Java实现中文算数验证码(算数运算+-*/)
- Data Structures and Algorithm Analysis in C 学习之List
- LeetCode_Swap Nodes in Pairs
- Android判断对象是否为空工具类
- hdu 1757 A Simple Math Problem(矩阵快速幂)
- 解决phpExcel导出乱码且Content-Type失效的问题
- 最大子序列和的问题的解
- Redis介绍