hadoop自定义数据类型

来源：互联网发布：net域名为什么遇冷编辑：程序博客网时间：2024/05/22 07:52

自定义数据类型实现实现接口WritableComparable
输入数据格式，第一列为班级号，第二列为姓名，第三列为学号
1 小明 10 3 大明 11
1 小王 12 4 大王 13
1 小张 14 6 大张 15

3.输出数据格式同一个班级在一起
1 学号:14姓名小张学号:12姓名小王学号:10姓名小明
3 学号:11姓名大明
4 学号:13姓名大王
6 学号:15姓名大张
4.定义user类

import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.WritableComparable;/** * @author DELL_pc *  @date 2017年3月15日 */public class User  implements WritableComparable<User>{    int id;    String name;    int classid;    /**     *      */    public User() {        // TODO Auto-generated constructor stub    }    /**     * @param id     * @param name     * @param classid     */    public User(int classid,String name,int id ) {        super();        this.id = id;        this.name = name;        this.classid = classid;    }    public int getId() {        return id;    }    public void setId(int id) {        this.id = id;    }    public String getName() {        return name;    }    public void setName(String name) {        this.name = name;    }    public int getClassid() {        return classid;    }    public void setClassid(int classid) {        this.classid = classid;    }    @Override    public void write(DataOutput out) throws IOException {        // TODO Auto-generated method stub         out.writeInt(id);         out.writeUTF(name);         out.writeInt(classid);    }    @Override    public void readFields(DataInput in) throws IOException {        // TODO Auto-generated method stub        id=in.readInt();        name=in.readUTF();        classid=in.readInt();    }    @Override    public int compareTo(User o) {        // TODO Auto-generated method stub        return 1;    }    @Override    public String toString() {        return "User [id=" + id + ", name=" + name + ", classid=" + classid + "]";    }    @Override    public int hashCode() {        final int prime = 31;        int result = 1;        result = prime * result + classid;        result = prime * result + id;        result = prime * result + ((name == null) ? 0 : name.hashCode());        return result;    }    @Override    public boolean equals(Object obj) {        if (this == obj)            return true;        if (obj == null)            return false;        if (getClass() != obj.getClass())            return false;        User other = (User) obj;        if (classid != other.classid)            return false;        if (id != other.id)            return false;        if (name == null) {            if (other.name != null)                return false;        } else if (!name.equals(other.name))            return false;        return true;    }}

5.定义map

import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;/** * @author DELL_pc *  @date 2017年3月15日 */public class MyMapper  extends Mapper<LongWritable, Text, IntWritable, User>{    IntWritable cla=new IntWritable();    @Override    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, IntWritable, User>.Context context)            throws IOException, InterruptedException {        // TODO Auto-generated method stub        StringTokenizer str=new StringTokenizer(value.toString(),"\t");        while (str.hasMoreTokens()) {            String con=str.nextToken();            StringTokenizer sTokenizer=new StringTokenizer(con);            while (sTokenizer.hasMoreTokens()) {            int classid=Integer.valueOf(sTokenizer.nextToken());            String name=sTokenizer.nextToken();            int id=Integer.valueOf(sTokenizer.nextToken());            User user=new User(classid, name, id);            cla.set(classid);            context.write(cla, user);            }        }    }}

6.定义reducer

import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;/** * @author DELL_pc *  @date 2017年3月15日 */public class MyRedecer  extends Reducer<IntWritable, User, IntWritable, Text>{    Text text=new Text();    @Override    protected void reduce(IntWritable key, Iterable<User> user,            Reducer<IntWritable, User, IntWritable, Text>.Context context) throws IOException, InterruptedException {        // TODO Auto-generated method stub        StringBuffer stringBuffer=new StringBuffer();        int classid=0;        IntWritable intWrit=new IntWritable();        for (User user2 : user) {            classid=user2.getClassid();            stringBuffer.append("学号:"+user2.getId()+"姓名"+user2.getName()+"\t");        }        intWrit.set(classid);        text.set(stringBuffer.toString());        context.write(intWrit,text);    }}

7.定义主函数

/** * @author DELL_pc *  @date 2017年3月15日 */import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;public class UserDemo implements Tool {    private Configuration conf=null;    @Override    public Configuration getConf() {        // TODO Auto-generated method stub        return this.conf;    }    @Override    public void setConf(Configuration that) {        // TODO Auto-generated method stub        this.conf=that;        this.conf.set("fs.defaultFS", "hdfs://192.168.59.130:8020");    }    @Override    public int run(String[] arg0) throws Exception {        // TODO Auto-generated method stub         Configuration conf=this.conf;         Job job=Job.getInstance(conf,"usercount");         //输入         FileInputFormat.setInputDirRecursive(job, true);         FileInputFormat.addInputPath(job, new Path("/user/lei/wordcount/"));         //map         job.setMapperClass(MyMapper.class);         job.setMapOutputKeyClass(IntWritable.class);         job.setMapOutputValueClass(User.class);         //3         //4         job.setReducerClass(MyRedecer.class);         job.setOutputKeyClass(IntWritable.class);         job.setOutputValueClass(Text.class);         FileOutputFormat.setOutputPath(job, new Path("/user/lei/wordcount/out"));         return job.waitForCompletion(true) ? 0:1;    }   public static void main(String[] args) throws Exception {       //运行程序       long startdate=System.currentTimeMillis();    ToolRunner.run(new UserDemo(), args);    long enddate=System.currentTimeMillis();    long costdate=enddate-startdate;    System.out.println("花费时间"+costdate+"ms");}}

8.计数器结果

INFO - Counters: 38    File System Counters        FILE: Number of bytes read=656        FILE: Number of bytes written=470578        FILE: Number of read operations=0        FILE: Number of large read operations=0        FILE: Number of write operations=0        HDFS: Number of bytes read=162        HDFS: Number of bytes written=144        HDFS: Number of read operations=15        HDFS: Number of large read operations=0        HDFS: Number of write operations=4    Map-Reduce Framework        Map input records=3        Map output records=6        Map output bytes=120        Map output materialized bytes=138        Input split bytes=116        Combine input records=0        Combine output records=0        Reduce input groups=4        Reduce shuffle bytes=138        Reduce input records=6        Reduce output records=4        Spilled Records=12        Shuffled Maps =1        Failed Shuffles=0        Merged Map outputs=1        GC time elapsed (ms)=45        CPU time spent (ms)=0        Physical memory (bytes) snapshot=0        Virtual memory (bytes) snapshot=0        Total committed heap usage (bytes)=473956352    Shuffle Errors        BAD_ID=0        CONNECTION=0        IO_ERROR=0        WRONG_LENGTH=0        WRONG_MAP=0        WRONG_REDUCE=0    File Input Format Counters         Bytes Read=81    File Output Format Counters         Bytes Written=144

0 0