mapreduce标准过程

来源：互联网发布：抢票软件原理编辑：程序博客网时间：2024/06/15 18:23

Databean封装的value看序列化和反序列那一篇。

Databean的缺点：

1.继承的是writable没有compareTo方法只能进行value的封装而不能进行key的封装

和排序。

2.使用的是构造方法来进行一次性赋值，需要不停的new对象来赋值。

Mapreduce的缺点：

1.Map中context.write(new Text(tel), bean);需要每写一条new一个Text对象。

2.Reduce中DataBean bean = new DataBean("", up_sum, down_sum);

context.write(key, bean);

每写一个需要new一个对象和map一样;

DatanBean只能这样new的方法才能赋值。

首先看一个简单的mapreduce

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class DataCount {

public static class DCMapper extends Mapper<LongWritable, Text, Text, DataBean>{

@Override

protected void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

//accept

String line = value.toString();

//split

String[] fields = line.split("\t");

String tel = fields[1];

long up = Long.parseLong(fields[8]);

long down = Long.parseLong(fields[9]);

DataBean bean = new DataBean(tel, up, down);

//send

context.write(new Text(tel), bean);

}

public static class DCReducer extends Reducer<Text, DataBean, Text, DataBean>{

@Override

protected void reduce(Text key, Iterable<DataBean> values, Context context)

throws IOException, InterruptedException {

long up_sum = 0;

long down_sum = 0;

for(DataBean bean : values){

up_sum += bean.getUpPayLoad();

down_sum += bean.getDownPayLoad();

}

DataBean bean = new DataBean("", up_sum, down_sum);

context.write(key, bean);

}

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJarByClass(DataCount.class);

job.setMapperClass(DCMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(DataBean.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));

job.setReducerClass(DCReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(DataBean.class);

FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);

}

标准的mapreduce过程：

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SumStep {

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJarByClass(SumStep.class);

job.setMapperClass(SumMapper.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(InfoBean.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));

job.setReducerClass(SumReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(InfoBean.class);

FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);

}

public static class SumMapper extends Mapper<LongWritable, Text, Text, InfoBean>{

private InfoBean bean = new InfoBean();

private Text k = new Text();

@Override

protected void map(LongWritable key, Text value, Context context)

throws IOException, InterruptedException {

// split

String line = value.toString();

String[] fields = line.split("\t");

// get useful field

String account = fields[0];

double income = Double.parseDouble(fields[1]);

double expenses = Double.parseDouble(fields[2]);

k.set(account);

bean.set(account, income, expenses);

context.write(k, bean);

}

public static class SumReducer extends Reducer<Text, InfoBean, Text, InfoBean>{

private InfoBean bean = new InfoBean();

@Override

protected void reduce(Text key, Iterable<InfoBean> v2s, Context context)

throws IOException, InterruptedException {

double in_sum = 0;

double out_sum = 0;

for(InfoBean bean : v2s){

in_sum += bean.getIncome();

out_sum += bean.getExpenses();

}

bean.set("", in_sum, out_sum);

context.write(key, bean);

}

Databean封装的value看序列化和反序列那一篇。

Databean的缺点：

1.继承的是writable没有compareTo方法只能进行value的封装而不能进行key的封装

和排序。

2.使用的是构造方法来进行一次性赋值，需要不停的new对象来赋值。

Mapreduce的缺点：

1.Map中context.write(new Text(tel), bean);需要每写一条new一个Text对象。

2.Reduce中DataBean bean = new DataBean("", up_sum, down_sum);

context.write(key, bean);

每写一个需要new一个对象和map一样;

DatanBean只能这样new的方法才能赋值。

0 0