Hadoop入门之Mapreduce过程的几个Demo

来源：互联网发布：大商创开源破解版源码编辑：程序博客网时间：2024/05/16 14:11

1.简单的统计单词数量的demo熟悉下MR的过程

package com.demo.wordcount;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/** * @Description: 单词统计MR过程 * @author: songqinghu * @date: 2017年8月24日 下午6:12:24 * Version:1.0 */public class WordCount {        public static void main(String[] args) throws Exception {                Configuration conf = new Configuration();                //在hadoop集群机器上运行会读取相应的配置文件 $Hadoop_Home        Job job = Job.getInstance(conf);                job.setJarByClass(WordCount.class);//上传Jar                        //map & reduce 过程设置        job.setMapperClass(WordCountMap.class);//mapper 过程        job.setReducerClass(WordCountReduce.class);//reduce 过程                job.setMapOutputKeyClass(Text.class);//mapper 输出key        job.setMapOutputValueClass(IntWritable.class);//mapper 输出value                job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);                        //文件路径设置        FileInputFormat.setInputPaths(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));                                boolean flag = job.waitForCompletion(true);                System.out.println("the job exe is :" + flag);    }    }class WordCountMap  extends Mapper<LongWritable, Text, Text, IntWritable>{         @Override    protected void map(LongWritable key, Text value, Context context)            throws IOException, InterruptedException {         String line = value.toString();                  String[] words = line.split(" ");                  for (String word : words) {            context.write(new Text(word), new IntWritable(1));         }             }    }class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable>{        @Override    protected void reduce(Text text, Iterable<IntWritable> iter,            Context context) throws IOException, InterruptedException {        //初始化一个计数器用于叠加总次数        int count = 0;                for (IntWritable num : iter) {            count  = count + num.get();        }                //汇总结束,写出        context.write(text, new IntWritable(count));    }        }

2.MR过程使用到自定义序列化问题的处理

package com.demo.flowsum;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;/** *  * @Description: 流量统计(自定义序列化结果处理) * @author: songqinghu * @date: 2017年8月24日 下午8:04:21 * Version:1.0 */public class FlowSum {        public static void main(String[] args) throws Exception {                Configuration conf = new Configuration();                Job job = Job.getInstance(conf);                job.setJarByClass(FlowSum.class);                job.setMapperClass(FlowSumMapper.class);        job.setReducerClass(FlowSumReducer.class);                job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(Flow.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(Flow.class);                        //文件路径设置        FileInputFormat.setInputPaths(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));                        boolean flag = job.waitForCompletion(true);                System.out.println("the job exe is :" + flag);            }    }class FlowSumMapper extends Mapper<LongWritable, Text, Text, Flow>{        @Override    protected void map(LongWritable key, Text value, Context context)            throws IOException, InterruptedException {                String line = value.toString();                String[] words = line.split("\t");                context.write(new Text(words[1]), new Flow(new Integer(words[words.length-3])  , new Integer(words[words.length-2]) ));            }    }class FlowSumReducer extends Reducer<Text, Flow, Text, Flow>{        @Override    protected void reduce(Text text, Iterable<Flow> iters, Context context)            throws IOException, InterruptedException {                int uFlow = 0;        int dFlow = 0;                for (Flow flow : iters) {            uFlow = flow.getuFlow() + uFlow;            dFlow = flow.getdFlow() + dFlow;        }                context.write(text, new Flow(uFlow, dFlow));    }    }//hadoop框架序列化接口class Flow implements Writable{    private int uFlow;//上传        private int dFlow;//下载        private int sFlow;//总        public Flow() {} //反射        public Flow(int uFlow, int dFlow) {        super();        this.uFlow = uFlow;        this.dFlow = dFlow;        this.sFlow = uFlow + dFlow;    }    public int getuFlow() {        return uFlow;    }    public void setuFlow(int uFlow) {        this.uFlow = uFlow;    }    public int getdFlow() {        return dFlow;    }    public void setdFlow(int dFlow) {        this.dFlow = dFlow;    }    public int getsFlow() {        return sFlow;    }    public void setsFlow(int sFlow) {        this.sFlow = sFlow;    }    @Override    public void write(DataOutput out) throws IOException {        //序列化        out.writeInt(uFlow);        out.writeInt(dFlow);        out.writeInt(sFlow);    }    @Override    public void readFields(DataInput in) throws IOException {        //反序列化        this.uFlow = in.readInt();        this.dFlow = in.readInt();        this.sFlow = in.readInt();    }    @Override //最终输出时的打印方式    public String toString() {        return  uFlow +"\t" + dFlow +"\t" + sFlow;    }        }

3.MR过程需要对Mapper后的输出进行特殊的分区逻辑处理

package com.demo.flowpart;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class FlowPart {        public static void main(String[] args) throws Exception {                Configuration conf = new Configuration();                Job job = Job.getInstance(conf);                job.setJarByClass(FlowPart.class);                job.setMapperClass(FlowSumMapper.class);        job.setReducerClass(FlowSumReducer.class);                job.setMapOutputKeyClass(Text.class);        job.setMapOutputValueClass(Flow.class);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(Flow.class);                        //文件路径设置        FileInputFormat.setInputPaths(job, new Path(args[0]));        FileOutputFormat.setOutputPath(job, new Path(args[1]));                job.setPartitionerClass(FlowPartition.class);                job.setNumReduceTasks(5);                boolean flag = job.waitForCompletion(true);                System.out.println("the job exe is :" + flag);            }}class FlowSumMapper extends Mapper<LongWritable, Text, Text, Flow>{        @Override    protected void map(LongWritable key, Text value, Context context)            throws IOException, InterruptedException {                String line = value.toString();                String[] words = line.split("\t");                context.write(new Text(words[1]), new Flow(new Integer(words[words.length-3])  , new Integer(words[words.length-2]) ));            }    }class FlowSumReducer extends Reducer<Text, Flow, Text, Flow>{        @Override    protected void reduce(Text text, Iterable<Flow> iters, Context context)            throws IOException, InterruptedException {                int uFlow = 0;        int dFlow = 0;                for (Flow flow : iters) {            uFlow = flow.getuFlow() + uFlow;            dFlow = flow.getdFlow() + dFlow;        }                context.write(text, new Flow(uFlow, dFlow));    }    }//hadoop框架序列化接口class Flow implements Writable{    private int uFlow;//上传        private int dFlow;//下载        private int sFlow;//总        public Flow() {} //反射        public Flow(int uFlow, int dFlow) {        super();        this.uFlow = uFlow;        this.dFlow = dFlow;        this.sFlow = uFlow + dFlow;    }    public int getuFlow() {        return uFlow;    }    public void setuFlow(int uFlow) {        this.uFlow = uFlow;    }    public int getdFlow() {        return dFlow;    }    public void setdFlow(int dFlow) {        this.dFlow = dFlow;    }    public int getsFlow() {        return sFlow;    }    public void setsFlow(int sFlow) {        this.sFlow = sFlow;    }    @Override    public void write(DataOutput out) throws IOException {        //序列化        out.writeInt(uFlow);        out.writeInt(dFlow);        out.writeInt(sFlow);    }    @Override    public void readFields(DataInput in) throws IOException {        //反序列化        this.uFlow = in.readInt();        this.dFlow = in.readInt();        this.sFlow = in.readInt();    }    @Override //最终输出时的打印方式    public String toString() {        return  uFlow +"\t" + dFlow +"\t" + sFlow;    }        }

package com.demo.flowpart;import java.util.HashMap;import java.util.Map;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Partitioner;/** *  * @Description: mapper后的分发过程 * @author: songqinghu * @date: 2017年8月24日 下午8:41:33 * Version:1.0 */public class FlowPartition  extends Partitioner<Text, Flow>{    private static Map<String, Integer> iphonePri = new HashMap<String,Integer>();        static{        iphonePri.put("136", 0);        iphonePri.put("137", 1);        iphonePri.put("138", 2);        iphonePri.put("139", 3);            }        @Override    public int getPartition(Text key, Flow value, int numPartitions) {                String iphoneNum = key.toString();                String num = iphoneNum.substring(0, 3);                if(iphonePri.containsKey(num)){            return iphonePri.get(num);        }                return 4;    }    }

阅读全文

0 0