<hadoop>在hadoop集群的map和reduce函数中传递自定义对象

来源:互联网 发布:淘宝主图促销水印 编辑:程序博客网 时间:2024/05/16 00:48

由于map和reduce中只能传输KEY,VALUE对

@Override    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {        //将Text内容转化成String类型        String line = value.toString();        //分词        String[] words = line.split(" ");        //return <word,1>        for (String word:words)        {            context.write(new Text(word),new IntWritable(1));        }    }
@Override    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {        int result = 0;        for(IntWritable value:values)        {            result += value.get();        }        //输出最终KV对        context.write(key,new IntWritable(result));    }

当需要传输复杂数据类型的时候,选择传输自定义对象,但由于hadoop数据传输需求,数据必须要有合适的序列化方法。即自定义对象需要实现hadoop中的Writable接口

import org.apache.hadoop.io.Writable;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;/** * Created by hadoop on 17-2-18. * 自定义数据类型需要在hadoop中传输需要实现Writable接口 */public class MyData implements Writable{    private String a;    private int b;    //为了能够反序列化必须要定义一个无参数的构造函数    public MyData() {    }    public MyData(String a, int b) {        this.a = a;        this.b = b;    }    public String getA() {        return a;    }    public void setA(String a) {        this.a = a;    }    public int getB() {        return b;    }    public void setB(int b) {        this.b = b;    }    /*         * 序列化方法         */    @Override    public void write(DataOutput dataOutput) throws IOException {        dataOutput.writeChars(a);        dataOutput.writeInt(b);    }    /*     * 反序列化方法     */    @Override    public void readFields(DataInput dataInput) throws IOException {        this.a = dataInput.readLine();        this.b = dataInput.readInt();    }}
0 0