hadoop的InputFormat简单demo

来源：互联网发布：喷神james 知乎编辑：程序博客网时间：2024/06/05 07:59

1.序列化对象

package com.lijie.inutformat;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.WritableComparable;public class ScorePair implements WritableComparable<ScorePair>{    private float a;    private float b;    private float c;    private float d;    private float e;    public float getA() {        return a;    }    public void setA(float a) {        this.a = a;    }    public float getB() {        return b;    }    public void setB(float b) {        this.b = b;    }    public float getC() {        return c;    }    public void setC(float c) {        this.c = c;    }    public float getD() {        return d;    }    public void setD(float d) {        this.d = d;    }    public float getE() {        return e;    }    public void setE(float e) {        this.e = e;    }    public ScorePair() {        super();        // TODO Auto-generated constructor stub    }    public ScorePair(float a, float b, float c, float d, float e) {        super();        this.a = a;        this.b = b;        this.c = c;        this.d = d;        this.e = e;    }    public void set(float a, float b, float c, float d, float e) {        this.a = a;        this.b = b;        this.c = c;        this.d = d;        this.e = e;    }    @Override    public int hashCode() {        final int prime = 31;        int result = 1;        result = prime * result + Float.floatToIntBits(a);        result = prime * result + Float.floatToIntBits(b);        result = prime * result + Float.floatToIntBits(c);        result = prime * result + Float.floatToIntBits(d);        result = prime * result + Float.floatToIntBits(e);        return result;    }    @Override    public boolean equals(Object obj) {        if (this == obj)            return true;        if (obj == null)            return false;        if (getClass() != obj.getClass())            return false;        ScorePair other = (ScorePair) obj;        if (Float.floatToIntBits(a) != Float.floatToIntBits(other.a))            return false;        if (Float.floatToIntBits(b) != Float.floatToIntBits(other.b))            return false;        if (Float.floatToIntBits(c) != Float.floatToIntBits(other.c))            return false;        if (Float.floatToIntBits(d) != Float.floatToIntBits(other.d))            return false;        if (Float.floatToIntBits(e) != Float.floatToIntBits(other.e))            return false;        return true;    }    @Override    public void readFields(DataInput in) throws IOException {        // TODO Auto-generated method stub        a=in.readFloat();        b=in.readFloat();        c=in.readFloat();        d=in.readFloat();        e=in.readFloat();    }    @Override    public void write(DataOutput out) throws IOException {        // TODO Auto-generated method stub        out.writeFloat(a);        out.writeFloat(b);        out.writeFloat(c);        out.writeFloat(d);        out.writeFloat(e);    }    @Override    public int compareTo(ScorePair o) {        // TODO Auto-generated method stub        return 0;    }}

2.InputFormat类

package com.lijie.inutformat;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.InputSplit;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.RecordReader;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.util.LineReader;public class ScoreInputFormat extends FileInputFormat<Text, ScorePair>{    @Override    protected boolean isSplitable(JobContext context, Path filename) {        // TODO Auto-generated method stub        return false;    }    @Override    public RecordReader<Text, ScorePair> createRecordReader(    InputSplit arg0,                                                            TaskAttemptContext arg1)    throws IOException,                                                                                        InterruptedException {        // TODO Auto-generated method stub        return new ScoreRecordReader();    }}class ScoreRecordReader extends RecordReader<Text, ScorePair>{    private LineReader in;    private Text lineKey;    private ScorePair lineValue;    private Text line;    @Override    public void close() throws IOException {        // TODO Auto-generated method stub        if(in !=null){            in.close();        }    }    @Override    public Text getCurrentKey() throws IOException, InterruptedException {        // TODO Auto-generated method stub        return lineKey;    }    @Override    public ScorePair getCurrentValue() throws IOException, InterruptedException {        // TODO Auto-generated method stub        return lineValue;    }    @Override    public float getProgress() throws IOException, InterruptedException {        // TODO Auto-generated method stub        return 0;    }    @Override    public void initialize(InputSplit arg0, TaskAttemptContext arg1)    throws IOException,                                                                        InterruptedException {        FileSplit split = (FileSplit)arg0;        Configuration conf = arg1.getConfiguration();        Path path = split.getPath();        FileSystem fs = path.getFileSystem(conf);        FSDataInputStream fileIn = fs.open(path);        in = new LineReader(fileIn,conf);        line = new Text();        lineKey = new Text();        lineValue = new ScorePair();    }    @Override    public boolean nextKeyValue() throws IOException, InterruptedException {        int lineSize = in.readLine(line);        if(lineSize == 0) return false;        String[] split = line.toString().split("\\s+");        if(split.length != 7){            throw new IOException("数据错误!");        }        float a,b,c,d,e;        a = Float.parseFloat(split[2].trim());        b = Float.parseFloat(split[3].trim());        c = Float.parseFloat(split[4].trim());        d = Float.parseFloat(split[5].trim());        e = Float.parseFloat(split[6].trim());        lineKey.set(split[0]+"\t"+split[1]);        lineValue.set(a, b, c, d, e);        return true;    }}

3.mapreduce程序

package com.lijie.inutformat;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class ScoreMapReduce extends Configured implements Tool {    public static void main(String[] args) throws Exception {        String[] path = {"hdfs://lijie:9000/score/*","hdfs://lijie:9000/score/out"};        int run = ToolRunner.run(new Configuration(), new ScoreMapReduce(), path);        System.exit(run);    }    public static class ScoreMap extends Mapper<Text, ScorePair, Text, ScorePair> {        @Override        protected void map(Text key, ScorePair value, Context context)  throws IOException,                                                                        InterruptedException {            context.write(key, value);        }    }    public static class ScoreReduce extends Reducer<Text, ScorePair, Text, Text> {        @Override        protected void reduce(  Text key, Iterable<ScorePair> values,                                Context context) throws IOException, InterruptedException {            ScorePair value = values.iterator().next();            //sum            float sum = value.getA()+value.getB()+value.getC()+value.getD()+value.getE();            //avg            float avg = sum/5;            context.write(key, new Text("sum:"+sum+"\t"+"avg:"+avg));        }    }    @Override    public int run(String[] arg) throws Exception {        Configuration conf = new Configuration();        Path path = new Path(arg[1]);        FileSystem fs = path.getFileSystem(conf);        if(fs.isDirectory(path)){            fs.delete(path, true);        }        Job job = new Job(conf, "score");        job.setJarByClass(ScoreMapReduce.class);        job.setMapperClass(ScoreMap.class);        job.setReducerClass(ScoreReduce.class);        job.setInputFormatClass(ScoreInputFormat.class);        FileInputFormat.addInputPath(job, new Path(arg[0]));        FileOutputFormat.setOutputPath(job, path);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(Text.class);        job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(ScorePair.class);        job.waitForCompletion(true);        return 0;    }}

0 0