自定义outputformat
来源:互联网 发布:linux ssh localhost 编辑:程序博客网 时间:2024/06/05 22:56
/**
* maptask或者reducetask在最终输出时,先调用OutputFormat的getRecordWriter方法拿到一个RecordWriter
* 然后再调用RecordWriter的write(k,v)方法将数据写出
*
* @author
*
*/
public class LogEnhanceOutputFormat extends FileOutputFormat<Text, NullWritable> {
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
FileSystem fs = FileSystem.get(context.getConfiguration());
Path enhancePath = new Path("D:/temp/en/log.dat");
Path tocrawlPath = new Path("D:/temp/crw/url.dat");
FSDataOutputStream enhancedOs = fs.create(enhancePath);
FSDataOutputStream tocrawlOs = fs.create(tocrawlPath);
return new EnhanceRecordWriter(enhancedOs, tocrawlOs);
}
/**
* 构造一个自己的recordwriter
*
* @author
*
*/
static class EnhanceRecordWriter extends RecordWriter<Text, NullWritable> {
FSDataOutputStream enhancedOs = null;
FSDataOutputStream tocrawlOs = null;
public EnhanceRecordWriter(FSDataOutputStream enhancedOs, FSDataOutputStream tocrawlOs) {
super();
this.enhancedOs = enhancedOs;
this.tocrawlOs = tocrawlOs;
}
@Override
public void write(Text key, NullWritable value) throws IOException, InterruptedException {
String result = key.toString();
// 如果要写出的数据是待爬的url,则写入待爬清单文件 /logenhance/tocrawl/url.dat
if (result.contains("tocrawl")) {
tocrawlOs.write(result.getBytes());
} else {
// 如果要写出的数据是增强日志,则写入增强日志文件 /logenhance/enhancedlog/log.dat
enhancedOs.write(result.getBytes());
}
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
if (tocrawlOs != null) {
tocrawlOs.close();
}
if (enhancedOs != null) {
enhancedOs.close();
}
}
}
* maptask或者reducetask在最终输出时,先调用OutputFormat的getRecordWriter方法拿到一个RecordWriter
* 然后再调用RecordWriter的write(k,v)方法将数据写出
*
* @author
*
*/
public class LogEnhanceOutputFormat extends FileOutputFormat<Text, NullWritable> {
@Override
public RecordWriter<Text, NullWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
FileSystem fs = FileSystem.get(context.getConfiguration());
Path enhancePath = new Path("D:/temp/en/log.dat");
Path tocrawlPath = new Path("D:/temp/crw/url.dat");
FSDataOutputStream enhancedOs = fs.create(enhancePath);
FSDataOutputStream tocrawlOs = fs.create(tocrawlPath);
return new EnhanceRecordWriter(enhancedOs, tocrawlOs);
}
/**
* 构造一个自己的recordwriter
*
* @author
*
*/
static class EnhanceRecordWriter extends RecordWriter<Text, NullWritable> {
FSDataOutputStream enhancedOs = null;
FSDataOutputStream tocrawlOs = null;
public EnhanceRecordWriter(FSDataOutputStream enhancedOs, FSDataOutputStream tocrawlOs) {
super();
this.enhancedOs = enhancedOs;
this.tocrawlOs = tocrawlOs;
}
@Override
public void write(Text key, NullWritable value) throws IOException, InterruptedException {
String result = key.toString();
// 如果要写出的数据是待爬的url,则写入待爬清单文件 /logenhance/tocrawl/url.dat
if (result.contains("tocrawl")) {
tocrawlOs.write(result.getBytes());
} else {
// 如果要写出的数据是增强日志,则写入增强日志文件 /logenhance/enhancedlog/log.dat
enhancedOs.write(result.getBytes());
}
}
@Override
public void close(TaskAttemptContext context) throws IOException, InterruptedException {
if (tocrawlOs != null) {
tocrawlOs.close();
}
if (enhancedOs != null) {
enhancedOs.close();
}
}
}
}
//在客户端注明
// 要控制不同的内容写往不同的目标路径,可以采用自定义outputformat的方法
job.setOutputFormatClass(LogEnhanceOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path("D:/srcdata/webloginput/"));
// 尽管我们用的是自定义outputformat,但是它是继承制fileoutputformat
// 在fileoutputformat中,必须输出一个_success文件,所以在此还需要设置输出path
FileOutputFormat.setOutputPath(job, new Path("D:/temp/output/"));
// 不需要reducer
job.setNumReduceTasks(0);
0 0
- 自定义OutPutFormat
- 自定义OutputFormat
- 自定义outputformat
- 自定义OutputFormat--Hadoop
- MapReduce 自定义outputFormat
- hadoop 自定义inputformat和outputformat
- Mapreduce编程三 自定义outputformat
- hadoop自定义InputFormat,OutputFormat输入输出类型
- mapreduce系列(9)--自定义OutputFormat
- Mapreduce之自定义OutputFormat应用-日志增强
- Hadoop自定义 inputformat 和outputformat 实现图像的读写
- Hadoop入门之自定义groupingcomparator和outputformat的使用
- MR之自定义outputformat输出方式代码演示
- InputFormat&OutputFormat
- 在Hadoop的streaming中使用自定义的inputformat和outputformat
- 在Hadoop的streaming中使用自定义的inputformat和outputformat
- hadoop学习;自定义Input/OutputFormat;类引用mapreduce.mapper;三种模式
- hadoop学习;自定义Input/OutputFormat;类引用mapreduce.mapper;三种模式
- gitlab配置126邮箱作为SMTP服务器
- iOS-App版本升级时数据库的迁移更新
- 【指导】配置vim编辑器格式限制
- 织梦dedecms漏洞修复大全注入漏洞
- P1049 装箱问题
- 自定义outputformat
- 01 背包找装满方案数 洛谷 p1164 小a点菜
- java之IO流总结
- 登峰杯第二届全国中学生数学建模大赛参赛感想
- 个人喜欢的sublime主题Boxy
- js函数以及js函数表达式
- Dynatrace分布式跟踪之RPC over Dubbo
- HDU 1848 Fibonacci again and again(博弈sg函数)
- 419. Battleships in a Board