mapreduce top K实现
来源:互联网 发布:tcl王牌液晶网络电视 编辑:程序博客网 时间:2024/06/09 04:41
主题思想:
通过TreeMap数据结构实现在各个Map端预计算top k,再由reduce聚合
public class TopK extends Configured implements Tool { public static class TopKMapper extends Mapper<Object, Text, NullWritable, LongWritable> { public static final int K = 100; private TreeMap<Long, Long> tm = new TreeMap<Long, Long>(); @Override protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { try { long k = Integer.parseInt(value.toString().substring(0, 9)); tm.put(k, k); if (tm.size() > K) { tm.remove(tm.firstKey()); } } catch (Exception e) { context.getCounter("TopK", "errorlog").increment(1); } } @Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Long text : tm.values()) { context.write(NullWritable.get(), new LongWritable(text)); } } } public static class TopKReducer extends Reducer<NullWritable, LongWritable, NullWritable, LongWritable> { public static final int K = 100; private TreeMap<Long, Long> mt = new TreeMap<Long, Long>(); @Override protected void reduce(NullWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { for (LongWritable value : values) { mt.put(value.get(), value.get()); if (mt.size() > K) { mt.remove(mt.firstKey()); } } for (Long val : mt.descendingKeySet()) { context.write(NullWritable.get(), new LongWritable(val)); } } } @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "TopKNum"); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(TopKMapper.class); job.setReducerClass(TopKReducer.class); job.setJarByClass(TopK.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; } public static void main(String[] args) throws IOException, InterruptedException { try { if (args.length < 2) { System.err.println("ERROR: Parameter format length "); System.exit(0); } int ret = ToolRunner.run(new TopK(), args); System.exit(ret); } catch (Exception e) { e.printStackTrace(); } } }
- mapreduce top K实现
- mapreduce实现Top K
- mapreduce Top K算法
- hadoop mapreduce 解决 top K问题
- Top-K in MapReduce Haddop Framework
- Top K算法的实现
- java实现top K排序
- 利用priority_queue实现top K
- MapReduce 实现k-means聚类
- k-means的MapReduce实现
- Pig、Hive、MapReduce 解决分组 Top K 问题
- Pig、Hive、MapReduce 解决分组 Top K 问题
- MapReduce解决在海量数据中求Top K
- Pig、Hive、MapReduce 解决分组 Top K 问题
- Pig、Hive、MapReduce 解决分组 Top K 问题
- MapReduce功能实现三---Top N
- 转 -- Top K算法问题的实现
- Top K算法问题的实现
- ASIHTTP 终结者
- Windows 2012安装.NET Framework3.5(0x800F0907)
- 面试问题集锦二
- css box-shadow ie6-8
- 敏捷开发绩效管理之十一:如何提高人员可用率?
- mapreduce top K实现
- 单链表的逆置
- 10条网站易用性设计改进技巧
- Unity 一段简单的AI
- sicily 1011. Lenny's Lucky Lotto
- Android单元测试
- JavaScript基础-Array
- 计算机会议排名:Computer Science Conference Rankings 转帖
- jQuery插件开发的模式和结构