Hadoop学习笔记（八）---内置数据类型与自定义数据类型

来源：互联网发布：北上广深经济数据对比编辑：程序博客网时间：2024/05/17 08:49

例如我们要计算下面数据的同一电话号码（5,6,7,8位置）相同位置数据相加结果：

1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 24 27 2481 24681 2001363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 24 27 2481 24681 2001363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 24 27 2481 24681 2001363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 24 27 2481 24681 2001363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 24 27 2481 24681 200

我们来写一个自定义类，但是必须实现Writeable接口：

package cn.edu.bjut.model;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.Writable;public class DataWriteable implements Writable {    private long a, b, c, d;    public DataWriteable() {        super();    }    public DataWriteable(String a, String b, String c, String d) {        super();        this.a = Long.parseLong(a);        this.b = Long.parseLong(b);        this.c = Long.parseLong(c);        this.d = Long.parseLong(d);    }    @Override    public void readFields(DataInput in) throws IOException {        this.a = in.readLong();        this.b = in.readLong();        this.c = in.readLong();        this.d = in.readLong();    }    @Override    public void write(DataOutput out) throws IOException {        out.writeLong(a);        out.writeLong(b);        out.writeLong(c);        out.writeLong(d);    }    @Override    public String toString() {        return a + "\t" + b + "\t" + d + "\t" + d;    }    public long getA() {        return a;    }    public void setA(long a) {        this.a = a;    }    public long getB() {        return b;    }    public void setB(long b) {        this.b = b;    }    public long getC() {        return c;    }    public void setC(long c) {        this.c = c;    }    public long getD() {        return d;    }    public void setD(long d) {        this.d = d;    }}

主方法这么写：

package cn.edu.bjut.model;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class NumCount {    static final String INPUT_DIR = "hdfs://172.21.15.189:9000/input";    static final String OUTPUT_DIR = "hdfs://172.21.15.189:9000/output";    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        Path path = new Path(OUTPUT_DIR);        FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_DIR), conf);        if(fileSystem.exists(path)) {            fileSystem.delete(path, true);        }        Job job = new Job(conf, "NumCount");        FileInputFormat.setInputPaths(job, INPUT_DIR); //设置输入路径        FileOutputFormat.setOutputPath(job, path);  //设置输出路径        job.setMapperClass(MyMapper.class); //设置自定义的mapper类        job.setReducerClass(MyReducer.class);  //设置自定义的reduce类        job.setOutputKeyClass(Text.class);  //设置输出的key的类型        job.setOutputValueClass(DataWriteable.class);  //设置输出的value类型        job.waitForCompletion(true);  //开始执行    }    /**     * 自定义的map类     * @author Gary     *     */    static class MyMapper extends Mapper<LongWritable, Text, Text, DataWriteable> {        @Override        protected void map(LongWritable key, Text value,                Mapper<LongWritable, Text, Text, DataWriteable>.Context context)                throws IOException, InterruptedException {            String[] all = value.toString().split(" ");            DataWriteable dataWriteable = new DataWriteable(all[4], all[5], all[6], all[7]);            context.write(new Text(all[1]), dataWriteable);        }    }    /**     * 自定义的reduce类     * @author Gary     *     */    static class MyReducer extends Reducer<Text, DataWriteable, Text, DataWriteable> {        @Override        protected void reduce(Text key, Iterable<DataWriteable> values,                Reducer<Text, DataWriteable, Text, DataWriteable>.Context context)                throws IOException, InterruptedException {            long a = 0L, b = 0L, c = 0L, d = 0L;            for(DataWriteable dataWriteable : values) {                a += dataWriteable.getA();                b += dataWriteable.getB();                c += dataWriteable.getC();                d += dataWriteable.getD();            }            DataWriteable dataWriteable = new DataWriteable(a+"", b+"", c+"", d+"");            context.write(key, dataWriteable);        }    }}

运行程序最终结果为：

13726230503 120 135 123405  123405

0 0