mapreduce代码整理之sort
来源:互联网 发布:护卫神php套件好用吗 编辑:程序博客网 时间:2024/06/02 06:52
本编文章主要运用mapreduce中的机制进行排序
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.compress.CompressionCodec;import org.apache.hadoop.io.compress.GzipCodec;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.lib.HashPartitioner;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser; public class Sort { //map将输入中的value化成IntWritable类型,作为输出的key public static class Map extends Mapper<Object,Text,IntWritable,IntWritable>{ private static IntWritable data=new IntWritable(); //实现map函数 public void map(Object key,Text value,Context context) throws IOException,InterruptedException{ String line=value.toString(); data.set(Integer.parseInt(line)); context.write(data, new IntWritable(1)); } } //reduce将输入中的key复制到输出数据的key上, //然后根据输入的value-list中元素的个数决定key的输出次数 //用全局linenum来代表key的位次 public static class Reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{ private static IntWritable linenum = new IntWritable(1); //实现reduce函数 public void reduce(IntWritable key,Iterable<IntWritable> values,Context context) throws IOException,InterruptedException{ for(IntWritable val:values){ context.write(linenum, key); linenum = new IntWritable(linenum.get()+1); } } } public static class MyPartition extends Partitioner<IntWritable,IntWritable>{ @Override public int getPartition(IntWritable key,IntWritable value,int numPartitions){ if(key.get()>=1&&key.get()<=11){ return 1; } if(key.get()>=12&&key.get()<=21){ return 2; } if(key.get()>=22&&key.get()<=31){ return 3; } if(key.get()>=32&&key.get()<=41){ return 4; } if(key.get()>=42&&key.get()<=51){ return 5; } if(key.get()>=52&&key.get()<=61){ return 6; } return 0;/*System.out.print(numPartitions);int MaxNumber = 100;int bound = MaxNumber / numPartitions +1;int keynumber = key.get();for(int i = 0;i<numPartitions;i++){if(keynumber<bound*i&&keynumber>=bound*(i-1))return i-1;}return 0;*/ } } public static void main(String[] args) throws Exception{ Configuration conf = new Configuration(); //conf.setBoolean("mapred.compress.map.output",true); //conf.set("mapred.compress.map.output", "true");//代码实现map输出压缩减少网络传输压力 //conf.set("mapred.map.output.compression.codec","org.apache.hadoop.io.compress.DefaultCodec"); Job job = new Job(conf, "Data Sort"); job.setJarByClass(Sort.class); //设置Map和Reduce处理类 job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); // job.setPartitionerClass(MyPartition.class); // job.setNumReduceTasks(10); //设置输出类型 job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); //设置输入和输出目录 FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //FileOutputFormat.setCompressOutput(job,true);//代码实现输出压缩 //FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }}
0 0
- mapreduce代码整理之sort
- mapreduce代码整理之wordcount
- mapreduce代码整理之MyInputFormat(自定义InputFormat)
- mapreduce代码整理之mywritable(自定义writable)
- mapreduce代码整理之简单的kmeans聚类
- mapreduce代码整理之实现压缩输出减少网络传输
- MapReduce之join和sort
- MapReduce Sort
- 代码整理之ViewPager
- MapReduce中的shuffle&sort
- mapreduce shuffle merge sort
- MapReduce(partation,sort,combiner)
- mapreduce之倒排索引代码
- MapReduce之WordCount、FlowCount代码汇总
- 《代码之道》段落整理
- Mapreduce实例-sort全排序
- Hadoop MapReduce Shuffle and Sort
- sort代码
- virtual box安装centeros
- sqoop
- ZOJ
- libcurl使用简介
- PAT-A1006
- mapreduce代码整理之sort
- 用数组实现线性表
- 彻底理解JavaScript原型
- 如何判断linux下物理cpu数,核数以及是否支持超线程
- 减少HTTP请求之合并图片详解(大型网站优化技术)
- Integral Channel Features
- android学习-去掉test编译
- Linux core 文件介绍
- freeimage转char*