mapreduce实现Top K
来源:互联网 发布:甲醛挥发温度曲线 知乎 编辑:程序博客网 时间:2024/06/05 20:06
package oldapi;
import java.io.IOException;
import java.util.ArrayList;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
//利用MapReduce求最大值海量数据中的K个数
public class Top_k_new {
public static class MapClass extends Mapper<LongWritable, Text, NullWritable, IntWritable> {
public static final int K = 5;
private TreeSet<Integer>set=new TreeSet<Integer>();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
int temp = Integer.parseInt(value.toString());
set.add(temp);
if(set.size()>K)
set.remove(set.first());
}
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
int size=set.size();
for(int i=0;i<size;++i)
{
context.write(NullWritable.get(),new IntWritable(set.first()));
set.remove(set.first());
}
}
}
public static class Reduce extends Reducer<NullWritable, IntWritable, NullWritable, IntWritable> {
public static final int K = 5;
public void reduce(NullWritable key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
for (IntWritable val : values) {
context.write(NullWritable.get(), val);
}
}
}
// 初始化参数
public static final String HOST_PATH = "hdfs://test:9000";
public static final String INPUT_PATH = HOST_PATH + "/number";
public static final String OUTPUT_PATH = HOST_PATH + "/out";
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "TopKNum");
FileInputFormat.setInputPaths(job, INPUT_PATH);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
job.setMapperClass(MapClass.class);
//job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(IntWritable.class);
job.waitForCompletion(true);
}
}
- mapreduce top K实现
- mapreduce实现Top K
- mapreduce Top K算法
- hadoop mapreduce 解决 top K问题
- Top-K in MapReduce Haddop Framework
- Top K算法的实现
- java实现top K排序
- 利用priority_queue实现top K
- MapReduce 实现k-means聚类
- k-means的MapReduce实现
- Pig、Hive、MapReduce 解决分组 Top K 问题
- Pig、Hive、MapReduce 解决分组 Top K 问题
- MapReduce解决在海量数据中求Top K
- Pig、Hive、MapReduce 解决分组 Top K 问题
- Pig、Hive、MapReduce 解决分组 Top K 问题
- MapReduce功能实现三---Top N
- 转 -- Top K算法问题的实现
- Top K算法问题的实现
- Android 线程优先级设置方法
- asp.net 打印控件使用方法
- LINUX 内核中物理内存分配函数(kernel api)分析
- HDU - 1575 Tr A
- 第8章 Linux磁盘与文件系统管理
- mapreduce实现Top K
- C++重载函数
- UART0串口编程系列之前奏篇
- Devexpress Silverlight GridControl Banded列样式
- 汇编入门学习笔记 (二)—— 寄存器(内存访问)、栈
- intellij idea 使用总结
- iOS开发者必备的五大编程类工具
- ExtJs时间日期
- I/O流之--FileOutputStream 和FileInputStream