实现pageRank
来源:互联网 发布:linux查看系统字体设置 编辑:程序博客网 时间:2024/05/20 13:06
package com.scala
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
/**
* scala实现pageRank算法
*
* Computes the PageRank of URLs from an input file. Input file should
* be in format of:
* URL neighbor URL
* URL neighbor URL
* URL neighbor URL
* ...
* where URL and their neighbors are separated by space(s).
*/
object PageRank {
def main(args:Array[String]):Unit={
// if (args.length < 1) {
// System.err.println("Usage: SparkPageRank <file> <iter>")
// System.exit(1)
// }
val sparkConf = new SparkConf().setAppName("PageRank").setMaster("local[1]")
val iters = 20;
// val iters = if (args.length > 0) args(1).toInt else 10
val ctx = new SparkContext(sparkConf)
val lines = ctx.textFile("page.txt")
//根据边关系数据生成 邻接表 如:(1,(2,3,4,5)) (2,(1,5))..
val links = lines.map{ s =>
val parts = s.split("\\s+")
(parts(0), parts(1))
}.distinct().groupByKey().cache()
links.foreach(println)
// (1,1.0) (2,1.0)..
var ranks = links.mapValues(v => 1.0)
ranks.foreach(println)
for (i <- 1 to iters) {
// (1,((2,3,4,5), 1.0))
val contribs = links.join(ranks).values.flatMap{ case (urls, rank) =>
val size = urls.size
urls.map(url => (url, rank / size))
}
ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
}
val output = ranks.collect()
output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + "."))
ctx.stop()
}
}
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
/**
* scala实现pageRank算法
*
* Computes the PageRank of URLs from an input file. Input file should
* be in format of:
* URL neighbor URL
* URL neighbor URL
* URL neighbor URL
* ...
* where URL and their neighbors are separated by space(s).
*/
object PageRank {
def main(args:Array[String]):Unit={
// if (args.length < 1) {
// System.err.println("Usage: SparkPageRank <file> <iter>")
// System.exit(1)
// }
val sparkConf = new SparkConf().setAppName("PageRank").setMaster("local[1]")
val iters = 20;
// val iters = if (args.length > 0) args(1).toInt else 10
val ctx = new SparkContext(sparkConf)
val lines = ctx.textFile("page.txt")
//根据边关系数据生成 邻接表 如:(1,(2,3,4,5)) (2,(1,5))..
val links = lines.map{ s =>
val parts = s.split("\\s+")
(parts(0), parts(1))
}.distinct().groupByKey().cache()
links.foreach(println)
// (1,1.0) (2,1.0)..
var ranks = links.mapValues(v => 1.0)
ranks.foreach(println)
for (i <- 1 to iters) {
// (1,((2,3,4,5), 1.0))
val contribs = links.join(ranks).values.flatMap{ case (urls, rank) =>
val size = urls.size
urls.map(url => (url, rank / size))
}
ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
}
val output = ranks.collect()
output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + "."))
ctx.stop()
}
}
0 0
- 实现pageRank
- java 实现pagerank
- pagerank算法java实现
- PageRank的MapReduce实现
- PageRank算法并行实现
- PageRank的CUDA实现
- PageRank及其MapReduce实现
- pagerank的matlab实现
- Python 实现pagerank
- java 实现PageRank算法
- PageRank算法并行实现
- python实现pagerank
- PageRank简单实现
- Python实现PageRank算法
- Spark GraphX实现PageRank
- PageRank算法实现------MapReduce
- spark实现PageRank
- PageRank计算方法的SQL实现
- java中equals和==的区别
- 常用算法文章收集
- 欢迎使用CSDN-markdown编辑器
- Codevs 4768 跳石头 NOIP2015 DAY2 T1
- leetcode 88. Merge Sorted Array
- 实现pageRank
- 【一天一道LeetCode】#80. Remove Duplicates from Sorted Array II
- delphi多线程TThread详解
- HDU 2073 无限的路
- ecshop入门知识
- 涨姿势!阿里巴巴几轮面试的点滴经历
- composer嵌入框架以及创建
- 微服务、单体应用以及NoOps
- Android - 开源框架 - xUtils - BitmapUtils模块