hadoop的MapReduce在java驱动代码

来源:互联网 发布:美人胚子知乎 编辑:程序博客网 时间:2024/04/30 05:00

统计文件中单词出现的次数,文件text

这里写图片描述

1、java 代码,WcMapper.class继承Mapper可执行业务代码

package com.jxl.mr;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable>{    //每次调用map方法会传入split中的一行数据key:改行数据所在文件中的位置下标,value 是这行数据    protected void map(LongWritable key, Text value,            Context context)            throws IOException, InterruptedException {        final String line = value.toString();        final StringTokenizer stringTokenizer = new StringTokenizer(line);        while(stringTokenizer.hasMoreTokens()){            final String word = stringTokenizer.nextToken();            context.write(new Text(word), new IntWritable(1));//map输出        }    }}

2、java 代码,WcReduce.class继承Reducer可执行业务代码

package com.jxl.mr;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class WcReduce extends Reducer<Text, IntWritable, Text, IntWritable>{    //重写reduce    protected void reduce(Text key, Iterable<IntWritable> iterable,            Context context)            throws IOException, InterruptedException {        int sum = 0;        for(IntWritable i:iterable){            sum = sum+i.get();        }        //输出        context.write(key, new IntWritable(sum));    }}

3、java 代码,JobRun.class执行的JobTracker

package com.jxl.mr;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class JobRun {    public static void main(String[] args) {        System.err.println("Job开始执行");        final Configuration config = new Configuration();        config.set("mapred.job.tracker", "centos-node6:9001");        try {            final Job job = new Job(config);            job.setJarByClass(JobRun.class);            job.setMapperClass(WcMapper.class);            job.setReducerClass(WcReduce.class);            job.setMapOutputKeyClass(Text.class);            job.setMapOutputValueClass(IntWritable.class);            //job.setNumReduceTasks(1);//任务执行次数            //mapreduce 输入数据所在的目录或者文件            FileInputFormat.addInputPath(job, new Path("/hello/input/wc/text"));            //mr执行之后的输出数据目录            FileOutputFormat.setOutputPath(job, new Path("/hello/output/wc/"));            //执行完退出,不可缺少            System.exit(job.waitForCompletion(true) ? 0 : 1);            System.err.println("Job执行完成");        } catch (Exception e) {            e.printStackTrace();        }    }}

4、打包wc.jar包执行一下命令

    ./hadoop jar /wc.jar com.jxl.mr.JobRun

这里写图片描述

5、浏览器访问:http://centos-node6:50030/jobtracker.jsp

这里写图片描述