hadoop多输出
来源:互联网 发布:工商银行重庆分行数据 编辑:程序博客网 时间:2024/05/19 10:10
在新API中 用 org.apache.hadoop.mapreduce.lib.output.MultipleOutputs 整合了上面旧API两个的功能
MultipleOutputs 作用
MultipleOutputs .write(Key key,Value value,String baseOutputPath)
2,以多种格式输出:
MultipleOutputs.write(namedOutput, key, value, baseOutputPath);
此时还需要调用
MultipleOutputs.addNamedOutput(job, namedOutput,outputFormatClass,keyClass, valueClass)
例子
package example;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.lib.MultipleTextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
/**
* 多个路径输入
* @author lijl
*
*/
public class MultiOutputFileMR {
static class MultiOutputFileMapper extends Mapper<LongWritable, Text, Text, Text>{
public void map(LongWritable key,Text value,Context context){
try {
String[] str = value.toString().split("\\|");
context.write(new Text(str[0]), new Text(str[1]));
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
static class MultiOutputFileReducer extends Reducer<Text, Text, Text, Text>{
MultipleOutputs<Text, Text> collector = null;
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
collector.close();
}
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
collector = new MultipleOutputs<Text, Text>(context);
}
public void reduce(Text key,Iterable<Text> values,Context context){
try {
for(Text value:values){
// collector.write( key, value,"/sina_yq/path7/a");
// collector.write("a1", key, value);
// collector.write("a2", key, value);
// collector.write("a3", key, value);
// collector.write("a4", key, value,"/sina_yq/path7/");
collector.write( key, value,"/sina_yq/path7/"+key.toString());
}
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
static class MultiOutPutTestFormat extends MultipleTextOutputFormat<Text, Text>{
protected String generateFileNameForKeyValue(Text key,Text value, String name) {
return key.toString();
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = new Job(conf,"MultiPathFileInput");
job.setJarByClass(MultiOutputFileMR.class);
// FileInputFormat.addInputPath(job, new Path("hdfs://RS5-112:9000/cs/path1"));
// FileInputFormat.addInputPath(job, new Path("hdfs://RS5-112:9000/cs/path2"));
FileInputFormat.addInputPaths(job, "hdfs://RS5-112:9000/sina_yq/path1,hdfs://RS5-112:9000/cs/path2");
FileOutputFormat.setOutputPath(job, new Path("hdfs://RS5-112:9000/cs/path7"));
// MultipleOutputs.addNamedOutput(job, "a1", TextOutputFormat.class, Text.class, Text.class);
// MultipleOutputs.addNamedOutput(job, "a2", TextOutputFormat.class, Text.class, Text.class);
// MultipleOutputs.addNamedOutput(job, "a3", TextOutputFormat.class, Text.class, Text.class);
// MultipleOutputs.addNamedOutput(job, "a4", TextOutputFormat.class, Text.class, Text.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(MultiOutputFileMapper.class);
job.setReducerClass(MultiOutputFileReducer.class);
job.setNumReduceTasks(1);
System.exit(job.waitForCompletion(true)?0:1);
}
}
- hadoop多输出
- hadoop多文件输出
- hadoop多文件输出
- [Hadoop]MapReduce多输出
- Hadoop reduce多个输出
- hadoop streaming 多路输出
- hadoop多目录输出1
- Hadoop 实现多文件输出
- Hadoop的多目录输出
- Hadoop多路径输出(MultipleOutputs)
- Hadoop多个输出案例
- (5)Hadoop 多文件输出
- [Hadoop系列]Hadoop的MapReduce中多文件输出
- [Hadoop系列]Hadoop的MapReduce中多文件输出
- hadoop文件输出控制,多路径输出到不同文件
- hadoop文件输出控制,多路径输出到不同文件
- hadoop的reducer输出多个文件
- Hadoop的MapReduce中多文件输出
- 齐次坐标与仿射变换
- 求一个书的立方
- IOC注解
- Math.round用法
- 检测data空间大小的工具类
- hadoop多输出
- 随便写写 (加密,解密,文件读写打开操作)
- 我的第三十个上机报告(Fibnacci)
- 苹果掌门人库克以近亿美元年薪当选美国最贵
- 调用函数解分段函数
- js数组操作
- 养生培训——奔三的人必看!
- 后缀数组模板
- 【android开发】下载文件工具类