Mapreduce求TopK最大值

来源:互联网 发布:手机如何申请淘宝直播 编辑:程序博客网 时间:2024/05/01 10:17
package suanfa;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;public class TopK {public static final String INPUT_PATH="hdfs://192.168.0.9:9000/seq100w.txt";public static final String OUTPUT_PATH="hdfs://192.168.0.9:9000/maxseq";public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = new Job(conf,TopK.class.getSimpleName());//设置输入路径FileInputFormat.addInputPath(job, new Path(INPUT_PATH));//设置输入格式化job.setInputFormatClass(TextInputFormat.class);//设置自定义mapjob.setMapperClass(MyMapper.class);//设置map输出类型job.setMapOutputKeyClass(LongWritable.class);job.setMapOutputValueClass(NullWritable.class);//分区job.setPartitionerClass(HashPartitioner.class);//设置reduce任务job.setNumReduceTasks(1);//排序、分组//规约//设置自定义reduce类job.setReducerClass(MyReduce.class);//设置reduce输出类型job.setOutputKeyClass(LongWritable.class);job.setOutputValueClass(NullWritable.class);//删除已存在的路径FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);Path path=new Path(OUTPUT_PATH);if(fileSystem.exists(path)){fileSystem.delete(path,true);}//设置输出路径FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));//设置输出格式化类job.setOutputFormatClass(TextOutputFormat.class);//提交任务job.waitForCompletion(true);}static class MyMapper extends Mapper<LongWritable, Text, LongWritable, NullWritable>{public long max=Long.MIN_VALUE;@Overrideprotected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {long temp=Long.parseLong(value.toString());if(temp>max){max=temp;}}@Overrideprotected void cleanup(Context context)throws IOException, InterruptedException {context.write(new LongWritable(max), NullWritable.get());}}static class MyReduce extends Reducer<LongWritable, NullWritable, LongWritable, NullWritable>{public long max=Long.MIN_VALUE;@Overrideprotected void reduce(LongWritable k2, Iterable<NullWritable> v2s,Context context)throws IOException, InterruptedException {long temp=k2.get();if(temp>max){max=temp;}}@Overrideprotected void cleanup(Context context)throws IOException, InterruptedException {context.write(new LongWritable(max), NullWritable.get());}}}


0 0
原创粉丝点击