mapreduce 的partitioner,GroupComparator,KeyComparator,分布式缓存使用示例
来源:互联网 发布:游戏编程的软件 编辑:程序博客网 时间:2024/05/17 05:53
package org.test.CommonDep;/* * 主要功能是针对输入为年份tab温度格式的数据,返回每年的最高汽温 * 1,partitioner分区,将同一年份的数据放一起。key=年份 温度 value=空 * 2,KeyComparator,key比较算法,让数据先按年份升序排序,如果年份相同,按温度降序。 * 3,GroupComparator,同一年份的数据为一组。 * 4,输出为每个分组里面的第一个数据 * 5,分布式缓存的使用 * *//***author:carl.zhang*email:18510665908@163.com */import java.io.BufferedReader;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import java.net.URI;import java.nio.Buffer;import java.util.HashMap;import java.util.regex.Pattern;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.examples.SecondarySort.IntPair;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.WritableComparable;import org.apache.hadoop.io.WritableComparator;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.filecache.DistributedCache;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class PartitionGroupingDemo{enum MYCounter{MISSING,O} public static final Pattern DELIMITER = Pattern.compile("[\t,]"); public static class PartitionGroupingMapper extends Mapper<LongWritable, Text, Text, NullWritable>{ HashMap<String,String> cache=new HashMap<String,String>(); public void setup(Context context) throws IOException { FileSplit fileSplit=(FileSplit) context.getInputSplit(); Configuration conf=context.getConfiguration(); String name=conf.get("whoami"); } private Text k=new Text(); public void map(LongWritable key, Text value, Context context ) throws IOException, InterruptedException { String []valueArray = PartitionGroupingDemo.DELIMITER.split(value.toString().trim()); if (value.toString().trim().length()==0 || valueArray.length!=2 || value.toString().length()==0) return; if (valueArray[0].length()==0 || valueArray[1].length()==0) return; k.set(valueArray[0]+" "+valueArray[1]); context.write(k,NullWritable.get()); } } public static class PartitionGroupingReducer extends Reducer<Text,NullWritable,Text,NullWritable> { public void cleanup(Context context) { } HashMap<String,String> cache=new HashMap<String,String>(); public void setup(Context context) throws IOException { /*单项的配置信息*/ BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream("symLink"))); String pair=null; while(null!=(pair=br.readLine())) { cache.put(pair.split("\t")[0], pair.split("\t")[1]); } br.close(); } public void reduce(Text key, Iterable<NullWritable> values, Context context ) throws IOException, InterruptedException { String k=null; if (cache.get(key.toString().split(" ")[0]) != null) k=cache.get(key.toString().split(" ")[0])+" "+key.toString().split(" ")[1]; else k=key.toString(); context.write(new Text(k), NullWritable.get()); } } public static class FirstPartitioner extends Partitioner<Text,NullWritable> {@Overridepublic int getPartition(Text arg0, NullWritable arg1, int arg2) {// TODO Auto-generated method stubString []keyArray=arg0.toString().split(" ");return Math.abs(Integer.parseInt(keyArray[0].trim())*127)%arg2;} } public static class KeyComparator extends WritableComparator { protected KeyComparator() { super(Text.class, true); } @Override public int compare(WritableComparable a, WritableComparable b) { String []keyArray1=((Text)a).toString().split(" "); String []keyArray2=((Text)b).toString().split(" "); int w1Left=Integer.parseInt(keyArray1[0].trim()); int w1Right=Integer.parseInt(keyArray1[1].trim()); int w2Left=Integer.parseInt(keyArray2[0].trim()); int w2Right=Integer.parseInt(keyArray2[1].trim()); int result=0; if (w1Left>w2Left) result=1; else if (w1Left<w2Left) result=-1; else if (w1Right>w2Right) result=-1; else if (w1Right<w2Right) result=1; else result=0; return result; } } public static class GroupComparator extends WritableComparator { protected GroupComparator() { super(Text.class,true); } @Override public int compare(WritableComparable a,WritableComparable b) { String []keyArray1=((Text)a).toString().split(" "); String []keyArray2=((Text)b).toString().split(" "); int w1Left=Integer.parseInt(keyArray1[0].trim()); int w1Right=Integer.parseInt(keyArray1[1].trim()); int w2Left=Integer.parseInt(keyArray2[0].trim()); int w2Right=Integer.parseInt(keyArray2[1].trim()); int result=0; if (w1Left>w2Left) result=1; else if (w1Left<w2Left) result=-1; return result; } }@SuppressWarnings("deprecation")public static int run(String[] args) throws Exception {// TODO Auto-generated method stub Configuration conf=new Configuration(); //conf.set("mapreduce.framework.name", "yarn"); //conf.set("yarn.resourcemanager.address", "192.168.10.225:8032"); conf.set("mapreduce.job.jar","/export/workspace/CommonDep/CommonScheduler.jar" ); /*配置数据可以直接通过configuration传递*/ conf.set("whoami", "carlzhang"); /*分布式缓存使用*/ String cacheFile="hdfs://192.168.10.225:9000/input/cacheFiles/yeartocode.conf"; Path inPath=new Path(cacheFile); String pathLink=inPath.toUri().toString()+"#symLink"; DistributedCache.addCacheFile(new URI(pathLink), conf); DistributedCache.createSymlink(conf); String []remaingArgs=new GenericOptionsParser(conf,args).getRemainingArgs(); Job job=new Job(conf); job.setJobName("PartitionGroupingDemo"); job.setJarByClass(PartitionGroupingDemo.class); job.setNumReduceTasks(2); job.setMapperClass(PartitionGroupingMapper.class); job.setReducerClass(PartitionGroupingReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setPartitionerClass(FirstPartitioner.class); job.setSortComparatorClass(KeyComparator.class); job.setGroupingComparatorClass(GroupComparator.class); /*避免输出空文件*/ LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path(args[0])); TextOutputFormat.setOutputPath(job,new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1;}}
0 0
- mapreduce 的partitioner,GroupComparator,KeyComparator,分布式缓存使用示例
- MapReduce的GroupComparator
- MapReduce中的分布式缓存使用
- mapReduce使用分布式缓存机制
- Mapreduce中分布式缓存的使用注意点
- MapReduce之Partitioner的理解
- mapreduce分布式缓存
- Hadoop1.*中MapReduce的partitioner源码分析
- 分布式缓存的使用
- mapreduce的缓存(addCacheFile)使用
- Hadoop MapReduce进阶 使用分布式缓存进行replicated join
- Hadoop MapReduce进阶 使用分布式缓存进行replicated join
- Hadoop MapReduce进阶 使用分布式缓存进行replicated join
- Hadoop MapReduce进阶 使用分布式缓存进行replicated join
- Hadoop MapReduce进阶 使用分布式缓存进行replicated join
- Hadoop MapReduce进阶 使用分布式缓存进行replicated join
- 18.MapReduce中的分布式缓存
- 实验9 MapReduce-分布式缓存
- iframe
- (原创)分享一个实用的圆形ImageView
- Strcpy函数的实现
- 微信扫码绑定及登陆流程.png
- Android:网络层的封装(使用volley)
- mapreduce 的partitioner,GroupComparator,KeyComparator,分布式缓存使用示例
- AFNetworking封装
- 第一行代码-第5章 广播机制
- UC/OS的46个系统函数2
- 设置页面过时时间及重定向 点叉关闭或alt+F4时
- 开发规范:前端代码命名语义化推荐实践
- html怎么引用字体包
- mySql 为查询的结果集增加一个序号字段
- 二、LinQ使用方法