利用MultipleOutputs控制reduce输出路径
来源:互联网 发布:比较有名仓储软件 编辑:程序博客网 时间:2024/05/22 05:26
package com.mr.test;import java.io.IOException;import java.util.Iterator;import java.util.StringTokenizer;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapred.FileInputFormat;import org.apache.hadoop.mapred.FileOutputFormat;import org.apache.hadoop.mapred.JobClient;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.Mapper;import org.apache.hadoop.mapred.OutputCollector;import org.apache.hadoop.mapred.Reducer;import org.apache.hadoop.mapred.Reporter;import org.apache.hadoop.mapred.TextInputFormat;import org.apache.hadoop.mapred.TextOutputFormat;import org.apache.hadoop.mapred.lib.MultipleOutputs;public class WordCount { public static class MyMap implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } }public void configure(JobConf arg0) {}public void close() throws IOException {// TODO Auto-generated method stub} } public static class MyReduce implements Reducer<Text, IntWritable, Text, IntWritable> { private MultipleOutputs mos; private JobConf jobconf; public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } mos.getCollector(key.toString(), reporter).collect(key, new IntWritable(sum));// mos.getCollector(key.toString(),"test", reporter).collect(key, new IntWritable(sum)); }public void configure(JobConf jobconf) {mos = new MultipleOutputs(jobconf);this.jobconf = jobconf;}public void close() throws IOException {mos.close();} } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MyMap.class); conf.setCombinerClass(MyReduce.class); conf.setReducerClass(MyReduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class);System.out.println("args1:"+args[0]);System.out.println("args2:"+args[1]); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); MultipleOutputs.addNamedOutput(conf, "test1", TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(conf, "test2", TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(conf, "test3", TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(conf, "test4", TextOutputFormat.class, LongWritable.class, Text.class); MultipleOutputs.addNamedOutput(conf, "test5", TextOutputFormat.class, LongWritable.class, Text.class); JobClient.runJob(conf); }}
input.txt:
test1
test2
test3
test4
test5
output:
-rw-r--r-- 2 test supergroup 0 2014-04-20 11:23 /chukwa/output/0419-10/_SUCCESS
drwxr-xr-x - test supergroup 0 2014-04-20 11:23 /chukwa/output/0419-10/_logs
-rw-r--r-- 2 test supergroup 42 2014-04-20 11:23 /chukwa/output/0419-10/part-00000.lzo
-rw-r--r-- 2 test supergroup 58 2014-04-20 11:23 /chukwa/output/0419-10/test1-m-00000.lzo
-rw-r--r-- 2 test supergroup 58 2014-04-20 11:23 /chukwa/output/0419-10/test2-m-00000.lzo
-rw-r--r-- 2 test supergroup 58 2014-04-20 11:23 /chukwa/output/0419-10/test3-m-00000.lzo
-rw-r--r-- 2test supergroup 58 2014-04-20 11:23 /chukwa/output/0419-10/test4-m-00001.lzo
-rw-r--r-- 2 test supergroup 58 2014-04-20 11:23 /chukwa/output/0419-10/test5-m-00001.lzo
- 利用MultipleOutputs控制reduce输出路径
- Hadoop多路径输出(MultipleOutputs)
- Hadoop控制输出文件命名 - MultipleOutputs
- 在Maprecue中利用MultipleOutputs输出多个文件
- 在MapReduce中利用MultipleOutputs输出多个文件
- mapreduce程序reduce输出控制
- Hadoop:mapreduce程序reduce输出控制
- 【Hadoop】利用MultipleOutputs,MultiOutputFormat实现以不同格式输出到多个文件
- mr分类输出(MultipleOutputs)
- MultipleOutputs(一) Renaming Part Files in Hadoop Map Reduce
- hadoop 输出MultipleOutputs学习及应用情境
- MR->OutputFormat->多文件名输出格式 MultipleOutputs
- MapReduce处理输出多文件格式(MultipleOutputs)
- mapreduce多目录输出(MultipleOutputFormat和MultipleOutputs)
- Hadoop的MultipleOutputs进行多文件输出
- 案例十三:多格式文件输出MultipleOutputs
- 案例十二:多文件输出MultipleOutputs
- Hadoop MapReduce 修改输出文件名 MultipleOutputs
- java web 监听器详解
- 为什么我要选择erlang+go进行服务器架构(1)
- hdu 1024 Max Sum Plus Plus(dp)
- 匿名社交软件,为何就成洪水猛兽?
- windows和linux双系统修改启动项顺序
- 利用MultipleOutputs控制reduce输出路径
- Unity3D:角色拾取技术
- python 笔记1
- android代码的混淆
- Android生命周期详解
- 信息入口的新闻客户端如何盈利?
- JAVA数组常用方法
- 修改tomcat启动和停止时间
- Linux——安装ipython