MapReduce三次排序

来源:互联网 发布:ubuntu修改命令提示符 编辑:程序博客网 时间:2024/05/22 15:34

题目:
20170308,小强,小牛斗地主,22,360手机助手,0.2版本,北京
20170308,小强,小牛斗地主,14,360手机助手,0.3版本,北京
20170308,小强,小牛斗地主,13,360手机助手,0.3版本,北京
20170308,小强,小牛斗地主,16,360手机助手,0.4版本,北京
20170308,小强,小牛斗地主,18,360手机助手,0.4版本,北京
20170308,小强,小牛斗地主,19,360手机助手,0.4版本,北京
20170308,小强,小牛斗地主,15,360手机助手,0.4版本,北京
20170309,tom,小牛斗地主,8,360手机助手,0.1版本,北京
20170309,tom,小牛斗地主,5,360手机助手,0.1版本,北京
20170309,tom,小牛斗地主,6,360手机助手,0.1版本,北京
20170309,tom,小牛斗地主,10,360手机助手,0.2版本,北京
20170309,tom,小牛斗地主,12,360手机助手,0.2版本,北京
20170309,tom,小牛斗地主,11,360手机助手,0.3版本,北京
20170309,tom,小牛斗地主,9,360手机助手,0.2版本,北京
20170309,tom,小牛斗地主,23,360手机助手,0.2版本,北京
20170309,tom,小牛斗地主,22,360手机助手,0.2版本,北京
20170309,tom,小牛斗地主,14,360手机助手,0.3版本,北京
20170309,tom,小牛斗地主,13,360手机助手,0.3版本,北京
20170309,tom,小牛斗地主,16,360手机助手,0.4版本,北京
20170309,tom,小牛斗地主,18,360手机助手,0.4版本,北京
20170309,tom,小牛斗地主,19,360手机助手,0.5版本,北京
20170309,tom,小牛斗地主,15,360手机助手,0.4版本,北京
字段信息:
用户ID,用户名,游戏名,小时,数据来源,游戏版本,用户所在地
id, name, game, hour, source, version, city
题目要求:
在所有有版本变动的记录后面追加一条字段信息:该信息就是上一个版本的版本号,只限同用户
例如:
20170308,小强,小牛斗地主,10,360手机助手,0.2版本,北京
20170308,小强,小牛斗地主,13,360手机助手,0.3版本,北京,0.2版本
20170308,小强,小牛斗地主,14,360手机助手,0.3版本,北京
20170308,小强,小牛斗地主,15,360手机助手,0.4版本,北京,0.3版本
用户“小强”在10点钟是0.2版本,但是到了13点变成了0.3版本,那么就在13点钟这条记录的后面追加一个字段值0.2版本,也就是上个版本的版本号
当然,为什么从10点直接到了13点,因为11点和12点的数据没有收集到。

import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import cn.ssy.versions.homework.MyGroup;import cn.ssy.versions.homework.VersionsBean1;public class VersionsMR1 {    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        Job job = Job.getInstance(conf);        job.setJarByClass(VersionsMR1.class);        job.setMapperClass(VersionsMRMapper.class);        job.setReducerClass(VersionsMRReducer.class);        job.setGroupingComparatorClass(MyGroup.class);        job.setMapOutputKeyClass(VersionsBean1.class);        job.setMapOutputValueClass(Text.class);        job.setOutputKeyClass(NullWritable.class);        job.setOutputValueClass(Text.class);        // 多文件输入        FileInputFormat.addInputPath(job, new Path("D:/versions/input"));        Path outputPath = new Path("D:/versions/output2");        if (FileSystem.get(conf).exists(outputPath)) {            FileSystem.get(conf).delete(outputPath, true);        }        FileOutputFormat.setOutputPath(job, outputPath);        Boolean bool = job.waitForCompletion(true);        System.exit(bool ? 0 : 1);    }    public static class VersionsMRMapper extends Mapper<LongWritable, Text, VersionsBean1, Text>{        @Override        protected void map(LongWritable key, Text value, Context context)                throws IOException, InterruptedException {        String[] splits = value.toString().split(",");        String id = splits[0];        String version =splits[5];        String hour = splits[3];        //String outvalue =splits[1]+","+splits[2]+","+splits[4]+","+splits[5]+","+splits[6];        context.write(new VersionsBean1(id, version, hour), new Text(value.toString()));        }    }    public static class VersionsMRReducer extends Reducer<VersionsBean1, Text, NullWritable, Text>{        @Override        protected void reduce(VersionsBean1 key, Iterable<Text> value, Context context)                throws IOException, InterruptedException {            String a= key.getVersion();                 for(Text t:value){                                  if(key.getVersion().compareTo(a)!=0){                    context.write(NullWritable.get(), new Text(t+","+a));                     a= key.getVersion();                }else{                    context.write(NullWritable.get(), new Text(t));                }            }           }    }  }import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.io.WritableComparable;public class VersionsBean1 implements WritableComparable<VersionsBean1>{    private String id;    private String version;    private String hour;    public String getHour() {        return hour;    }    public void setHour(String hour) {        this.hour = hour;    }    public String getId() {        return id;    }    public void setId(String id) {        this.id = id;    }    public String getVersion() {        return version;    }    public void setVersion(String version) {        this.version = version;    }    @Override    public String toString() {        return id +"\t"+ version+"\t"+hour;    }    public VersionsBean1() {        super();    }    public VersionsBean1(String id, String version,String hour) {        super();        this.id = id;        this.version = version;        this.hour =hour;    }    @Override    public void write(DataOutput out) throws IOException {        out.writeUTF(id);        out.writeUTF(version);        out.writeUTF(hour);    }    @Override    public void readFields(DataInput in) throws IOException {        this.id = in.readUTF();        this.version = in.readUTF();        this.hour = in.readUTF();    }    @Override    public int compareTo(VersionsBean1 o) {        int h=Integer.parseInt(this.hour)-Integer.parseInt(o.hour);        if(this.id.compareTo(o.getId())!=0){            return this.id.compareTo(o.getId());        }else if(h!=0){            return h;        }else if(this.version.compareTo(o.getVersion())!=0){            return this.version.compareTo(o.getVersion());        }        return 0;    }   }import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;public class MyGroup extends WritableComparator{    protected  MyGroup(){        super(VersionsBean.class,true);    }    @Override    public int compare(WritableComparable a, WritableComparable b) {        VersionsBean sa = (VersionsBean) a;        VersionsBean sb = (VersionsBean) b;        return sa.getId().compareTo(sb.getId());    }}

输出结果:
20170308,小强,小牛斗地主,5,360手机助手,0.1版本,北京
20170308,小强,小牛斗地主,7,360手机助手,0.1版本,北京
20170308,小强,小牛斗地主,8,360手机助手,0.1版本,北京
20170308,小强,小牛斗地主,9,360手机助手,0.2版本,北京,0.1版本
20170308,小强,小牛斗地主,10,360手机助手,0.2版本,北京
20170308,小强,小牛斗地主,13,360手机助手,0.3版本,北京,0.2版本
20170308,小强,小牛斗地主,14,360手机助手,0.3版本,北京
20170308,小强,小牛斗地主,15,360手机助手,0.4版本,北京,0.3版本
20170308,小强,小牛斗地主,16,360手机助手,0.4版本,北京
20170308,小强,小牛斗地主,18,360手机助手,0.4版本,北京
20170308,小强,小牛斗地主,19,360手机助手,0.4版本,北京
20170308,小强,小牛斗地主,22,360手机助手,0.2版本,北京,0.4版本
20170308,小强,小牛斗地主,23,360手机助手,0.2版本,北京
20170309,tom,小牛斗地主,5,360手机助手,0.1版本,北京
20170309,tom,小牛斗地主,6,360手机助手,0.1版本,北京
20170309,tom,小牛斗地主,8,360手机助手,0.1版本,北京
20170309,tom,小牛斗地主,9,360手机助手,0.2版本,北京,0.1版本
20170309,tom,小牛斗地主,10,360手机助手,0.2版本,北京
20170309,tom,小牛斗地主,11,360手机助手,0.3版本,北京,0.2版本
20170309,tom,小牛斗地主,12,360手机助手,0.2版本,北京,0.3版本
20170309,tom,小牛斗地主,13,360手机助手,0.3版本,北京,0.2版本
20170309,tom,小牛斗地主,14,360手机助手,0.3版本,北京
20170309,tom,小牛斗地主,15,360手机助手,0.4版本,北京,0.3版本
20170309,tom,小牛斗地主,16,360手机助手,0.4版本,北京
20170309,tom,小牛斗地主,18,360手机助手,0.4版本,北京
20170309,tom,小牛斗地主,19,360手机助手,0.5版本,北京,0.4版本
20170309,tom,小牛斗地主,22,360手机助手,0.2版本,北京,0.5版本
20170309,tom,小牛斗地主,23,360手机助手,0.2版本,北京

0 0
原创粉丝点击