3-1、topk代码

来源:互联网 发布:快感增强液 知乎 编辑:程序博客网 时间:2024/05/17 01:00


1、生成随机数代码:

package test;import java.util.Random;public class RandomTest { public static void main(String[] args){ int number=0; for (int i = 0; i < 100000; i++) {  number = new Random().nextInt(100000) + 1; System.out.println(number);}}}



2、生成部分数据如下所示:
[hadoop@hadoop ~]$ tail -10 top_k.txt1628786786639425960875245389881409920206832792706[hadoop@hadoop ~]$



3、代码:
package test;import java.io.IOException;import java.util.TreeMap;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.NullWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;public class Topk {/*** map方法*/private static final int k=10;public static class MyMapper extends Mapper<LongWritable, Text, LongWritable, Text>{ LongWritable lw=new LongWritable();Text text=new Text();TreeMap<Long, String > treemap_mapper=new TreeMap<Long, String>();@Overrideprotected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {String line=value.toString();String[] splited=line.split("\t");treemap_mapper.put(Long.parseLong(splited[0]), line);if(treemap_mapper.size()>k){treemap_mapper.remove(treemap_mapper.firstKey());}}@Overrideprotected void cleanup(Context context)throws IOException, InterruptedException {for (Long numLong : treemap_mapper.keySet()) {context.write(new LongWritable(numLong), new Text(treemap_mapper.get(numLong)));}}}/*** reducer方法* @author Administrator**/public static class MyReducer extends Reducer<LongWritable, Text, LongWritable, NullWritable>{TreeMap<Long, String> treemap_reducer=new TreeMap<Long, String>();@Overrideprotected void reduce(LongWritable k2,Iterable<Text> v2s,Context context)throws IOException, InterruptedException {treemap_reducer.put(k2.get(), v2s.iterator().next().toString());if(treemap_reducer.size()>k){ treemap_reducer.remove(treemap_reducer.firstKey());}}@Overrideprotected void cleanup(Context context)throws IOException, InterruptedException {Long[] outLong=new Long[10];int flag=0;for (Long numLong : treemap_reducer.keySet()) {outLong[flag]=numLong;flag++;}for (int i=k-1;i>=0;i--) {context.write(new LongWritable(outLong[i]),NullWritable.get());}}}/*** 主方法* @param args* @throws Exception*/public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job=Job.getInstance(conf, Topk.class.getSimpleName());job.setJarByClass(Topk.class);job.setNumReduceTasks(1);job.setMapperClass(MyMapper.class);job.setReducerClass(MyReducer.class);job.setMapOutputKeyClass(LongWritable.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(LongWritable.class);job.setOutputValueClass(NullWritable.class);job.setInputFormatClass(TextInputFormat.class);job.setOutputFormatClass(TextOutputFormat.class);FileInputFormat.setInputPaths(job, args[0]);FileOutputFormat.setOutputPath(job, new Path(args[1]));job.waitForCompletion(true);}}




4、代码运行后输出在HDFS上面的目录:
[hadoop@hadoop ~]$ hdfs dfs -ls /user/hadoop/output_topk/output3Found 2 items-rw-r--r--   3 hadoop supergroup          0 2015-08-30 21:09 /user/hadoop/output_topk/output3/_SUCCESS-rw-r--r--   3 hadoop supergroup         80 2015-08-30 21:09 /user/hadoop/output_topk/output3/part-r-00000



5、查看top10的数据:
[hadoop@hadoop ~]$ hdfs dfs -text /user/hadoop/output_topk/output3/part-r-000009999706999959499994249999199999720899965139995640999551599939779991946




0 0
原创粉丝点击