ChainMapper和ChainReducer
来源:互联网 发布:西安财经行知怎么样 编辑:程序博客网 时间:2024/05/16 12:21
The ChainMapper class allows to use multiple Mapper classes within a single Map task.
The ChainReducer class allows to chain multiple Mapper classes after a Reducer within the Reducer task.
http://www.oratea.net/?p=371
通过ChainMapper可以将多个map类合并成一个map任务。
下面个这个例子没什么实际意思,但是很好的演示了ChainMapper的作用。
源文件
100 tom 90
101 mary 85
102 kate 60
map00的结果,过滤掉100的记录
101 mary 85
102 kate 60
map01的结果,过滤掉101的记录
102 kate 60
reduce结果
102 kate 60
package org.myorg;
import java.io.IOException;
import java.util.*;
import java.lang.String;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
import org.apache.hadoop.mapred.lib.*;
public class WordCount
{
public static class Map00extends MapReduceBaseimplements Mapper
{
publicvoid map(Text key, Text value, OutputCollector output, Reporter reporter)throws IOException
{
Text ft =new Text(“100″);
if(!key.equals(ft))
{
output.collect(key, value);
}
}
}
public static class Map01extends MapReduceBaseimplements Mapper
{
publicvoid map(Text key, Text value, OutputCollector output, Reporter reporter)throws IOException
{
Text ft =new Text(“101″);
if(!key.equals(ft))
{
output.collect(key, value);
}
}
}
public static class Reduceextends MapReduceBaseimplements Reducer
{
publicvoid reduce(Text key, Iterator values, OutputCollector output, Reporter reporter)throws IOException
{
while(values.hasNext())
{
output.collect(key, values.next());
}
}
}
public static void main(String[] args)throws Exception
{
JobConf conf =new JobConf(WordCount.class);
conf.setJobName(“wordcount00″);
conf.setInputFormat(KeyValueTextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
ChainMapper cm =new ChainMapper();
JobConf mapAConf =new JobConf(false);
cm.addMapper(conf, Map00.class, Text.class, Text.class, Text.class, Text.class,true, mapAConf);
JobConf mapBConf =new JobConf(false);
cm.addMapper(conf, Map01.class, Text.class, Text.class, Text.class, Text.class,true, mapBConf);
conf.setReducerClass(Reduce.class);
conf00.setOutputKeyClass(Text.class);
conf00.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
另外一个例子,代码很多,其实很简单,Conn几个类都是相同的
http://yixiaohuamax.iteye.com/blog/684244
package com.oncedq.code;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.SimpleDateFormat;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.hadoop.mapred.jobcontrol.JobControl;
import org.apache.hadoop.mapred.lib.ChainMapper;
import com.oncedq.code.util.DateUtil;
public class ProcessSample {
public static class ExtractMappperextends MapReduceBaseimplements
Mapper<LongWritable, Text, LongWritable, Conn1> {
@Override
publicvoid map(LongWritable arg0, Text arg1,
OutputCollector<LongWritable, Conn1> arg2, Reporter arg3)
throws IOException {
String line = arg1.toString();
String[] strs = line.split(";");
Conn1 conn1 =new Conn1();
conn1.orderKey = Long.parseLong(strs[0]);
conn1.customer = Long.parseLong(strs[1]);
conn1.state = strs[2];
conn1.price = Double.parseDouble(strs[3]);
conn1.orderDate = DateUtil.getDateFromString(strs[4],"yyyy-MM-dd");
LongWritable lw =new LongWritable(conn1.orderKey);
arg2.collect(lw, conn1);
}
}
private static class Conn1implements WritableComparable<Conn1> {
publiclong orderKey;
publiclong customer;
public String state;
publicdouble price;
public java.util.Date orderDate;
@Override
publicvoid readFields(DataInput in)throws IOException {
orderKey = in.readLong();
customer = in.readLong();
state = Text.readString(in);
price = in.readDouble();
orderDate = DateUtil.getDateFromString(Text.readString(in),
"yyyy-MM-dd");
}
@Override
publicvoid write(DataOutput out)throws IOException {
out.writeLong(orderKey);
out.writeLong(customer);
Text.writeString(out, state);
out.writeDouble(price);
Text.writeString(out, DateUtil.getDateStr(orderDate, "yyyy-MM-dd"));
}
@Override
publicint compareTo(Conn1 arg0) {
// TODO Auto-generated method stub
return0;
}
}
public static class Filter1Mapperextends MapReduceBaseimplements
Mapper<LongWritable, Conn1, LongWritable, Conn2> {
@Override
publicvoid map(LongWritable inKey, Conn1 c2,
OutputCollector<LongWritable, Conn2> collector, Reporter report)
throws IOException {
if (c2.state.equals("F")) {
Conn2 inValue =new Conn2();
inValue.customer = c2.customer;
inValue.orderDate = c2.orderDate;
inValue.orderKey = c2.orderKey;
inValue.price = c2.price;
inValue.state = c2.state;
collector.collect(inKey, inValue);
}
}
}
private static class Conn2implements WritableComparable<Conn1> {
publiclong orderKey;
publiclong customer;
public String state;
publicdouble price;
public java.util.Date orderDate;
@Override
publicvoid readFields(DataInput in)throws IOException {
orderKey = in.readLong();
customer = in.readLong();
state = Text.readString(in);
price = in.readDouble();
orderDate = DateUtil.getDateFromString(Text.readString(in),
"yyyy-MM-dd");
}
@Override
publicvoid write(DataOutput out)throws IOException {
out.writeLong(orderKey);
out.writeLong(customer);
Text.writeString(out, state);
out.writeDouble(price);
Text.writeString(out, DateUtil.getDateStr(orderDate, "yyyy-MM-dd"));
}
@Override
publicint compareTo(Conn1 arg0) {
// TODO Auto-generated method stub
return0;
}
}
public static class RegexMapperextends MapReduceBaseimplements
Mapper<LongWritable, Conn2, LongWritable, Conn3> {
@Override
publicvoid map(LongWritable inKey, Conn2 c3,
OutputCollector<LongWritable, Conn3> collector, Reporter report)
throws IOException {
c3.state = c3.state.replaceAll("F","Find");
Conn3 c2 =new Conn3();
c2.customer = c3.customer;
c2.orderDate = c3.orderDate;
c2.orderKey = c3.orderKey;
c2.price = c3.price;
c2.state = c3.state;
collector.collect(inKey, c2);
}
}
private static class Conn3implements WritableComparable<Conn1> {
publiclong orderKey;
publiclong customer;
public String state;
publicdouble price;
public java.util.Date orderDate;
@Override
publicvoid readFields(DataInput in)throws IOException {
orderKey = in.readLong();
customer = in.readLong();
state = Text.readString(in);
price = in.readDouble();
orderDate = DateUtil.getDateFromString(Text.readString(in),
"yyyy-MM-dd");
}
@Override
publicvoid write(DataOutput out)throws IOException {
out.writeLong(orderKey);
out.writeLong(customer);
Text.writeString(out, state);
out.writeDouble(price);
Text.writeString(out, DateUtil.getDateStr(orderDate, "yyyy-MM-dd"));
}
@Override
publicint compareTo(Conn1 arg0) {
// TODO Auto-generated method stub
return0;
}
}
public static class LoadMapperextends MapReduceBaseimplements
Mapper<LongWritable, Conn3, LongWritable, Conn3> {
@Override
publicvoid map(LongWritable arg0, Conn3 arg1,
OutputCollector<LongWritable, Conn3> arg2, Reporter arg3)
throws IOException {
arg2.collect(arg0, arg1);
}
}
public static void main(String[] args) {
JobConf job =new JobConf(ProcessSample.class);
job.setJobName("ProcessSample");
job.setNumReduceTasks(0);
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
JobConf mapper1 =new JobConf();
JobConf mapper2 =new JobConf();
JobConf mapper3 =new JobConf();
JobConf mapper4 =new JobConf();
ChainMapper cm =new ChainMapper();
cm.addMapper(job, ExtractMappper.class, LongWritable.class, Text.class,
LongWritable.class, Conn1.class,true, mapper1);
cm.addMapper(job, Filter1Mapper.class, LongWritable.class, Conn1.class,
LongWritable.class, Conn2.class,true, mapper2);
cm.addMapper(job, RegexMapper.class, LongWritable.class, Conn2.class,
LongWritable.class, Conn3.class,true, mapper3);
cm.addMapper(job, LoadMapper.class, LongWritable.class, Conn3.class,
LongWritable.class, Conn3.class,true, mapper4);
FileInputFormat.setInputPaths(job, new Path("orderData"));
FileOutputFormat.setOutputPath(job, new Path("orderDataOutput"));
Job job1;
try {
job1 =new Job(job);
JobControl jc =new JobControl("test");
jc.addJob(job1);
jc.run();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.SimpleDateFormat;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.jobcontrol.Job;
import org.apache.hadoop.mapred.jobcontrol.JobControl;
import org.apache.hadoop.mapred.lib.ChainMapper;
import com.oncedq.code.util.DateUtil;
public class ProcessSample {
public static class ExtractMappperextends MapReduceBaseimplements
Mapper<LongWritable, Text, LongWritable, Conn1> {
@Override
publicvoid map(LongWritable arg0, Text arg1,
OutputCollector<LongWritable, Conn1> arg2, Reporter arg3)
throws IOException {
String line = arg1.toString();
String[] strs = line.split(";");
Conn1 conn1 =new Conn1();
conn1.orderKey = Long.parseLong(strs[0]);
conn1.customer = Long.parseLong(strs[1]);
conn1.state = strs[2];
conn1.price = Double.parseDouble(strs[3]);
conn1.orderDate = DateUtil.getDateFromString(strs[4],"yyyy-MM-dd");
LongWritable lw =new LongWritable(conn1.orderKey);
arg2.collect(lw, conn1);
}
}
private static class Conn1implements WritableComparable<Conn1> {
publiclong orderKey;
publiclong customer;
public String state;
publicdouble price;
public java.util.Date orderDate;
@Override
publicvoid readFields(DataInput in)throws IOException {
orderKey = in.readLong();
customer = in.readLong();
state = Text.readString(in);
price = in.readDouble();
orderDate = DateUtil.getDateFromString(Text.readString(in),
"yyyy-MM-dd");
}
@Override
publicvoid write(DataOutput out)throws IOException {
out.writeLong(orderKey);
out.writeLong(customer);
Text.writeString(out, state);
out.writeDouble(price);
Text.writeString(out, DateUtil.getDateStr(orderDate, "yyyy-MM-dd"));
}
@Override
publicint compareTo(Conn1 arg0) {
// TODO Auto-generated method stub
return0;
}
}
public static class Filter1Mapperextends MapReduceBaseimplements
Mapper<LongWritable, Conn1, LongWritable, Conn2> {
@Override
publicvoid map(LongWritable inKey, Conn1 c2,
OutputCollector<LongWritable, Conn2> collector, Reporter report)
throws IOException {
if (c2.state.equals("F")) {
Conn2 inValue =new Conn2();
inValue.customer = c2.customer;
inValue.orderDate = c2.orderDate;
inValue.orderKey = c2.orderKey;
inValue.price = c2.price;
inValue.state = c2.state;
collector.collect(inKey, inValue);
}
}
}
private static class Conn2implements WritableComparable<Conn1> {
publiclong orderKey;
publiclong customer;
public String state;
publicdouble price;
public java.util.Date orderDate;
@Override
publicvoid readFields(DataInput in)throws IOException {
orderKey = in.readLong();
customer = in.readLong();
state = Text.readString(in);
price = in.readDouble();
orderDate = DateUtil.getDateFromString(Text.readString(in),
"yyyy-MM-dd");
}
@Override
publicvoid write(DataOutput out)throws IOException {
out.writeLong(orderKey);
out.writeLong(customer);
Text.writeString(out, state);
out.writeDouble(price);
Text.writeString(out, DateUtil.getDateStr(orderDate, "yyyy-MM-dd"));
}
@Override
publicint compareTo(Conn1 arg0) {
// TODO Auto-generated method stub
return0;
}
}
public static class RegexMapperextends MapReduceBaseimplements
Mapper<LongWritable, Conn2, LongWritable, Conn3> {
@Override
publicvoid map(LongWritable inKey, Conn2 c3,
OutputCollector<LongWritable, Conn3> collector, Reporter report)
throws IOException {
c3.state = c3.state.replaceAll("F","Find");
Conn3 c2 =new Conn3();
c2.customer = c3.customer;
c2.orderDate = c3.orderDate;
c2.orderKey = c3.orderKey;
c2.price = c3.price;
c2.state = c3.state;
collector.collect(inKey, c2);
}
}
private static class Conn3implements WritableComparable<Conn1> {
publiclong orderKey;
publiclong customer;
public String state;
publicdouble price;
public java.util.Date orderDate;
@Override
publicvoid readFields(DataInput in)throws IOException {
orderKey = in.readLong();
customer = in.readLong();
state = Text.readString(in);
price = in.readDouble();
orderDate = DateUtil.getDateFromString(Text.readString(in),
"yyyy-MM-dd");
}
@Override
publicvoid write(DataOutput out)throws IOException {
out.writeLong(orderKey);
out.writeLong(customer);
Text.writeString(out, state);
out.writeDouble(price);
Text.writeString(out, DateUtil.getDateStr(orderDate, "yyyy-MM-dd"));
}
@Override
publicint compareTo(Conn1 arg0) {
// TODO Auto-generated method stub
return0;
}
}
public static class LoadMapperextends MapReduceBaseimplements
Mapper<LongWritable, Conn3, LongWritable, Conn3> {
@Override
publicvoid map(LongWritable arg0, Conn3 arg1,
OutputCollector<LongWritable, Conn3> arg2, Reporter arg3)
throws IOException {
arg2.collect(arg0, arg1);
}
}
public static void main(String[] args) {
JobConf job =new JobConf(ProcessSample.class);
job.setJobName("ProcessSample");
job.setNumReduceTasks(0);
job.setInputFormat(TextInputFormat.class);
job.setOutputFormat(TextOutputFormat.class);
JobConf mapper1 =new JobConf();
JobConf mapper2 =new JobConf();
JobConf mapper3 =new JobConf();
JobConf mapper4 =new JobConf();
ChainMapper cm =new ChainMapper();
cm.addMapper(job, ExtractMappper.class, LongWritable.class, Text.class,
LongWritable.class, Conn1.class,true, mapper1);
cm.addMapper(job, Filter1Mapper.class, LongWritable.class, Conn1.class,
LongWritable.class, Conn2.class,true, mapper2);
cm.addMapper(job, RegexMapper.class, LongWritable.class, Conn2.class,
LongWritable.class, Conn3.class,true, mapper3);
cm.addMapper(job, LoadMapper.class, LongWritable.class, Conn3.class,
LongWritable.class, Conn3.class,true, mapper4);
FileInputFormat.setInputPaths(job, new Path("orderData"));
FileOutputFormat.setOutputPath(job, new Path("orderDataOutput"));
Job job1;
try {
job1 =new Job(job);
JobControl jc =new JobControl("test");
jc.addJob(job1);
jc.run();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
- ChainMapper和ChainReducer
- ChainMapper和ChainReducer
- ChainMapper和ChainReducer
- 链式MapReduce:ChainMapper和ChainReducer
- 如何使用Hadoop的ChainMapper和ChainReducer
- 如何使用Hadoop的ChainMapper和ChainReducer
- 如何使用Hadoop的ChainMapper和ChainReducer
- Hadoop2.X的ChainMapper和ChainReducer
- ChainMapper/ChainReducer实现原理
- Hadoop的ChainMapper/ChainReducer
- MapReduce基础开发之十二ChainMapper和ChainReducer使用
- MapReduce练习二:ChainMapper和ChainReducer的使用
- ChainMapper/ChainReducer 的实现原理
- ChainMapper/ChainReducer的实现原理
- MR简单串联(ChainMapper/ChainReducer)
- ChainMapper/ChainReducer实现原理及案例分析
- hadoop 2.0中ChainMapper与ChainReducer的使用
- [Hadoop] Hadoop 链式任务 : ChainMapper and ChainReducer的使用
- Database Configuration and I/O Issues(2)
- 读《Objective-C基础教程》学习笔记
- Task和Activity相关
- PHP连接mysql的简单登录
- Struts1.x的validator框架使用
- ChainMapper和ChainReducer
- 事务: spring+hibernate实现事务回滚及其他
- JQuery.LazyLoad 实现图片懒加载
- J2EE开发使用cookie
- JNDI——J2EE规范(一)
- Android 体系结构
- HillTop链接分析算法详解
- 上传文件,is_uploaded_file($_FILES['imgfile']['tmp_name']) 一直返回false
- JAXB(.xsd文件到java类的映射)