MapReduce 编程模板
来源:互联网 发布:mac如何彻底关机 编辑:程序博客网 时间:2024/05/19 18:10
一. MapReduce编程模板类结构图
二. MapReduce模板代码骨架
package com.hadoop.senior.mapreduce;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;/** * MapReuce * * @author * */public class ModuleMapReduce extends Configured implements Tool {// step 1: Map Class/** * * public class Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT> */// TODOpublic static class ModuleMapper extendsMapper<LongWritable, Text, Text, IntWritable> {@Overridepublic void setup(Context context) throws IOException,InterruptedException {// Nothing}@Overridepublic void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {// TODO}@Overridepublic void cleanup(Context context) throws IOException,InterruptedException {// Nothing}}// step 2: Reduce Class/** * * public class Reducer<KEYIN,VALUEIN,KEYOUT,VALUEOUT> */// TODOpublic static class ModuleReducer extendsReducer<Text, IntWritable, Text, IntWritable> {@Overrideprotected void setup(Context context) throws IOException,InterruptedException {// Nothing}@Overridepublic void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {// TODO}@Overrideprotected void cleanup(Context context) throws IOException,InterruptedException {// Nothing}}// step 3: Driver ,component jobpublic int run(String[] args) throws Exception {// 1: get confifurationConfiguration configuration = getConf();// 2: create JobJob job = Job.getInstance(configuration, //this.getClass().getSimpleName());// run jarjob.setJarByClass(this.getClass());// 3: set job// input -> map -> reduce -> output// 3.1: inputPath inPath = new Path(args[0]);FileInputFormat.addInputPath(job, inPath);// 3.2: mapjob.setMapperClass(ModuleMapper.class);// TODOjob.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);//****************************Shuffle*********************************// 1) partitioner//job.setPartitionerClass(cls);// 2) sort//job.setSortComparatorClass(cls);// 3) optional,combiner//job.setCombinerClass(cls);// 4) group//job.setGroupingComparatorClass(cls);//****************************Shuffle*********************************// 3.3: reducejob.setReducerClass(ModuleReducer.class);// TODOjob.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);// set reduce number//job.setNumReduceTasks(2);// 3.4: outputPath outPath = new Path(args[1]);FileOutputFormat.setOutputPath(job, outPath);// 4: submit jobboolean isSuccess = job.waitForCompletion(true);return isSuccess ? 0 : 1;}// step 4: run programpublic static void main(String[] args) throws Exception {// 1: get confifurationConfiguration configuration = new Configuration();//set compress//configuration.set("mapreduce.map.output.compress", "true");//configuration.set("mapreduce.map.output.compress.codec", "org.apache.hadoop.io.compress.SnappyCodec");// int status = new WordCountMapReduce().run(args);int status = ToolRunner.run(configuration,//new ModuleMapReduce(),//args);System.exit(status);}}
阅读全文
0 0
- MapReduce 编程模板
- MapReduce编程模板
- hadoop 中的mapreduce编程模板
- 新版MapReduce的API编程简单模板
- MapReduce模板
- Mapreduce模板
- MapReduce编程
- MapReduce编程
- MapReduce编程
- MapReduce编程
- MapReduce编程
- MapReduce编程
- mapreduce编程
- MapReduce编程
- MapReduce模板程序
- MapReduce模板程序
- MapReduce模板2
- mapreduce 模板代码
- 575 Distribute Candies 分发糖果问题
- 连接池
- Cache-Control: no-cache和no-store
- 网络获取tablayout数据
- el表达式比较
- MapReduce 编程模板
- PHP学习中的问题总结
- C++的顶层const和底层const的理解
- Java设计模式透析之 —— 单例(Singleton)
- 贝叶斯决策理论
- 类的静态成员
- 石子合并(一) 时间限制:1000 ms | 内存限制:65535 KB 难度:3 描述 有N堆石子排成一排,每堆石子有一定的数量。现要将N堆石子并成为一堆。合并的过程只能每次将相邻的两堆
- poj 1014 Dividing 【多重背包】
- java实现堆栈 打印英文字母表