Hadoop MultipleInputs.addInputPath 读取多个路径

来源:互联网 发布:质量好的淘宝店推荐 编辑:程序博客网 时间:2024/06/16 18:38

MultipleInputs.addInputPath

作用

可以指定多个输入路径,每个路径都可以指定相应的map方法

使用方法

MultipleInputs.addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass)

举例

使用wordcount来举例
F:\hadooptest\wordcount\input1下有个word.txt,单词用空格分割

aa bb ccdd ee ff aa  bb  ff

F:\hadooptest\wordcount\input2下有个word.txt。单词用 ## 分割

aa##bb##ccee##gg##kk

代码

package com.myhadoop.multiple;import com.myhadoop.mapreduce.test.WordCount;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import java.io.IOException;import java.util.StringTokenizer;/** * Created by kaishun on 2017/7/31. */public class TestMultipleInputs {    public static class MapA extends Mapper<LongWritable, Text, Text, IntWritable>    {        private final static IntWritable one = new IntWritable(1);        private Text word = new Text();        public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException        {            String lines = value.toString();            String strs[] = lines.split("\\s+");            for (int i = 0; i <strs.length ; i++) {                word.set(strs[i]);                context.write(word, one);            }        }    }    public static class MapB extends Mapper<LongWritable, Text, Text, IntWritable>    {        private final static IntWritable one = new IntWritable(1);        private Text word = new Text();        public void map(LongWritable key,Text value,Context context) throws IOException,InterruptedException        {            String lines = value.toString();            String strs[] = lines.split("##");            for (int i = 0; i <strs.length ; i++) {                word.set(strs[i]);                context.write(word, one);            }        }    }    public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>    {        public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException        {            int sum = 0;            for (IntWritable val : values) {                sum += val.get();            }            context.write(key, new IntWritable(sum));        }    }    public static void main(String[] args) throws Exception {        Configuration conf = new Configuration();        Job job = Job.getInstance(conf);        job.setJobName("MultipleWordCount");        job.setJarByClass(WordCount.class);        //多个输入,分别对应不同的map        MultipleInputs.addInputPath(job,new Path("F:\\hadooptest\\wordcount\\input1"),TextInputFormat.class,WordCount.MapA.class);        MultipleInputs.addInputPath(job,new Path("F:\\hadooptest\\wordcount\\input2"),TextInputFormat.class,WordCount.MapB.class);        job.setNumReduceTasks(1);        job.setOutputKeyClass(Text.class);        job.setOutputValueClass(IntWritable.class);        //分到一个reduce        job.setReducerClass(WordCount.Reduce.class);        FileOutputFormat.setOutputPath(job, new Path(args[0]));        System.exit(job.waitForCompletion(true) ? 0 : 1);    }}

输出

aa  3bb  3cc  2dd  1ee  2ff  2gg  1kk  1
阅读全文
1 0
原创粉丝点击