hadoop MapReduce例子

来源：互联网发布：vb登录界面模板下载编辑：程序博客网时间：2024/06/05 18:17

import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;public class WcMapper extends Mapper<LongWritable, Text, Text, IntWritable>{    /*      * 每次调用map方法会传入split 中的一行数据     * key 该行数据所在文件的位置下标     * value 该行数据     */    @Override    protected void map(LongWritable key, Text value,            Context context)            throws IOException, InterruptedException {        // TODO Auto-generated method stub        StringTokenizer st = new StringTokenizer(value.toString());        while(st.hasMoreTokens()){            context.write(new Text(st.nextToken()), new IntWritable(1));        }    }}

该过程对应这个阶段，其中其map方法是可控的
这里写图片描述

import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;public class WcReducer extends Reducer<Text, IntWritable, Text, IntWritable>{        /*         *reduce 阶段，         *key 键值,传入         *iterable 传入的值，同一键值下有多个         */        @Override        protected void reduce(Text key, Iterable<IntWritable> iterable,                Context contex)                throws IOException, InterruptedException {            int sum = 0;            for(IntWritable i:iterable){                sum++;            }            contex.write(key, new IntWritable(sum));        }}

对应下图阶段
这里写图片描述
程序的main入口

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class JobRun {    public static void main(String[] args) {        Configuration configuration = new Configuration();        configuration.set("mapred.job.tracker", "192.168.6.132:9001");//配置mapred.job.tracker        configuration.set("fs.default.name", "192.168.6.132:9000");//配置fs.default.name        configuration.set("mapred.jar", "C:/Users/gmr/Desktop/wc.jar");//配置jar文件位置,就不用特地移到到服务器上跑，程序自动转移        try {            //下面设置程序的各个执行及输入输出的文件模块            Job job = new Job(configuration);//            job.setJarByClass(JobRun.class);//设置程序执行入口            job.setMapperClass(WcMapper.class);//设置mapper的执行类            job.setReducerClass(WcReducer.class);//设置reduce的执行类            job.setMapOutputKeyClass(Text.class);//设置输出的键的类型            job.setMapOutputValueClass(IntWritable.class);//设置输出的值的类型//            job.setNumReduceTasks(1);//设置reduce任务的个数,默认就是一，可不写            FileInputFormat.addInputPath(job, new Path("/opt/usr/input/wc"));//设置文件输入目录,path里面放目录，不是文件            FileOutputFormat.setOutputPath(job, new Path("/opt/usr/output/wc"));//设置输出目录            System.exit(job.waitForCompletion(true)?0:1);//等待job任务执行完成        } catch (Exception e) {            // TODO Auto-generated catch block            e.printStackTrace();        }    }}

0 0