PageRank Hadoop MapReduce
来源:互联网 发布:js insertbefore 方法 编辑:程序博客网 时间:2024/06/10 02:12
links.txt
链接关系
A B C DB A DC CD B C
part-r-00000 初试概率分布向量
a=0.8
A a 0.25B a 0.25C a 0.25D a 0.25
PageRankMapReduce
package org.bigdata.pagerank;import java.io.IOException;import java.util.HashMap;import java.util.List;import java.util.Map;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.bigdata.util.HadoopCfg;import org.bigdata.util.HadoopUtil;/** * PageRank * * @author wwhhf * */public class PageRankMapReduce { private final static String JOB_NAME = "PageRank"; private static String LINKS = "links"; private static Map<String, Double> rand = new HashMap<String, Double>(); private static final double a = 0.8; public static void initRand(String pathin, String filename) throws IOException { List<String> lines = HadoopUtil.lslFile(pathin, filename); for (String line : lines) { String terms[] = line.toString().split("\t"); rand.put(terms[0], Double.valueOf(terms[1])); } } private static class PageRankMapper extends Mapper<Text, Text, Text, DoubleWritable> { @Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { String filename = key.toString(); if (filename.startsWith(LINKS)) { String dests[] = value.toString().split(" "); double e = rand.get(dests[0]); for (int i = 0, len = dests.length; i < len; i++) { String dest = dests[i]; if (i == 0) { context.write(new Text(dest), new DoubleWritable(0.0)); } else { context.write(new Text(dest), new DoubleWritable(e / (len - 1))); } } } } } private static class PageRankReducer extends Reducer<Text, DoubleWritable, Text, DoubleWritable> { @Override protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { Double sum = 0.0; for (DoubleWritable value : values) { sum += value.get(); } double e = rand.get(key.toString()); context.write(key, new DoubleWritable(a * sum + (1 - a) * e)); } } public static void solve(String linksin, String pathin, String pathout) throws ClassNotFoundException, InterruptedException { try { Configuration cfg = HadoopCfg.getConfiguration(); Job job = Job.getInstance(cfg); job.setJobName(JOB_NAME); job.setJarByClass(PageRankMapReduce.class); job.setInputFormatClass(FileNameInputFormat.class); // mapper job.setMapperClass(PageRankMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); // reducer job.setReducerClass(PageRankReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(pathin)); FileInputFormat.addInputPath(job, new Path(linksin)); FileOutputFormat.setOutputPath(job, new Path(pathout)); job.waitForCompletion(true); } catch (IllegalStateException | IllegalArgumentException | IOException e) { e.printStackTrace(); } } public static void main(String[] args) throws ClassNotFoundException, InterruptedException, IOException { String path = "/pagerank"; String links_pathin = "/pagerank_links"; String filename = "part-r-00000"; String tmp_pathin = path; for (int i = 1; i <= 5; i++) { initRand(tmp_pathin, filename); String tmp_pathout = path + i; System.out.println(links_pathin + " " + tmp_pathin + " " + tmp_pathout); solve(links_pathin, tmp_pathin, tmp_pathout); tmp_pathin = tmp_pathout; } }}
0 0
- PageRank Hadoop MapReduce
- Hadoop-MapReduce下的PageRank 矩阵分块算法
- PageRank的MapReduce实现
- PageRank及其MapReduce实现
- Pagerank 的mapreduce
- MapReduce实例之PageRank
- PageRank算法实现------MapReduce
- 使用MapReduce实现Pagerank算法
- MapReduce实现的PageRank原理
- PageRank算法的MapReduce实现
- pagerank算法的MapReduce实现
- 使用MapReduce实现PageRank算法
- PageRank算法及MapReduce实现
- MapReduce&hadoop
- Hadoop MapReduce
- Hadoop MapReduce
- Hadoop MapReduce
- Hadoop Mapreduce
- oracle数据库的静默(Quiesce)状态
- 长微博图片制作(无水印)
- Android小技巧
- C++重载重写重定义
- word2vec (一) 简介与训练过程概要
- PageRank Hadoop MapReduce
- hdoj-2099-整除的尾数
- 如何制作纯 ASCII 文本流程图
- Python安装模块出错(ImportError: No module named setuptools)解决方法
- Hibernate实现数据库增、删、改、查
- 【汇编】算符与伪指令笔记
- openjudge7624 山区建小学
- CS231n笔记3--Gradient Descent与Backward Propagation
- leetcode 092 Reverse Linked List II