mapreduce代码整理之sort

来源：互联网发布：护卫神php套件好用吗编辑：程序博客网时间：2024/06/02 06:52
本编文章主要运用mapreduce中的机制进行排序
import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.compress.CompressionCodec;import org.apache.hadoop.io.compress.GzipCodec;import org.apache.hadoop.mapred.JobConf;import org.apache.hadoop.mapred.lib.HashPartitioner;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Partitioner;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser; public class Sort {     //map将输入中的value化成IntWritable类型，作为输出的key    public static class Map extends Mapper<Object,Text,IntWritable,IntWritable>{        private static IntWritable data=new IntWritable();               //实现map函数        public void map(Object key,Text value,Context context)                throws IOException,InterruptedException{            String line=value.toString();            data.set(Integer.parseInt(line));            context.write(data, new IntWritable(1));        }           }       //reduce将输入中的key复制到输出数据的key上，    //然后根据输入的value-list中元素的个数决定key的输出次数    //用全局linenum来代表key的位次    public static class Reduce extends            Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{               private static IntWritable linenum = new IntWritable(1);               //实现reduce函数        public void reduce(IntWritable key,Iterable<IntWritable> values,Context context)                throws IOException,InterruptedException{            for(IntWritable val:values){                context.write(linenum, key);                linenum = new IntWritable(linenum.get()+1);            }        }    }  public static class MyPartition extends Partitioner<IntWritable,IntWritable>{   @Override   public int getPartition(IntWritable key,IntWritable value,int numPartitions){   if(key.get()>=1&&key.get()<=11){   return 1;   }   if(key.get()>=12&&key.get()<=21){   return 2;   }   if(key.get()>=22&&key.get()<=31){   return 3;   }   if(key.get()>=32&&key.get()<=41){   return 4;   }   if(key.get()>=42&&key.get()<=51){   return 5;   }   if(key.get()>=52&&key.get()<=61){   return 6;   }   return 0;/*System.out.print(numPartitions);int MaxNumber = 100;int bound = MaxNumber / numPartitions +1;int keynumber = key.get();for(int i = 0;i<numPartitions;i++){if(keynumber<bound*i&&keynumber>=bound*(i-1))return  i-1;}return 0;*/  }   }    public static void main(String[] args) throws Exception{        Configuration conf = new Configuration();        //conf.setBoolean("mapred.compress.map.output",true);      //conf.set("mapred.compress.map.output", "true");//代码实现map输出压缩减少网络传输压力      //conf.set("mapred.map.output.compression.codec","org.apache.hadoop.io.compress.DefaultCodec");     Job job = new Job(conf, "Data Sort");     job.setJarByClass(Sort.class);          //设置Map和Reduce处理类     job.setMapperClass(Map.class);     job.setReducerClass(Reduce.class);    // job.setPartitionerClass(MyPartition.class);    // job.setNumReduceTasks(10);     //设置输出类型     job.setOutputKeyClass(IntWritable.class);     job.setOutputValueClass(IntWritable.class);          //设置输入和输出目录     FileInputFormat.addInputPath(job, new Path(args[0]));     FileOutputFormat.setOutputPath(job, new Path(args[1]));   //FileOutputFormat.setCompressOutput(job,true);//代码实现输出压缩  //FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);     System.exit(job.waitForCompletion(true) ? 0 : 1);     }}
0 0