PageRank 算法

来源:互联网 发布:什么叫函数式编程 编辑:程序博客网 时间:2024/06/05 05:22
package com.sdcetimport org.apache.spark.rdd.RDDimport org.apache.spark.{SparkConf, SparkContext}/**  * Created by Administrator on 2017/2/10.  */object PageRank {  def main(args: Array[String]) {    System.setProperty("hadoop.home.dir", "E:\\winutils-hadoop-2.6.4\\hadoop-2.6.4")    val conf = new SparkConf().setAppName("PageRank").setMaster("local")    val sc = new SparkContext(conf)    val links: RDD[(String, Array[String])] = sc.parallelize(Array(("A", Array("d")), ("B", Array("A")), ("C", Array("A", "B")), ("D", Array("A", "C"))), 2)    var rank: RDD[(String, Double)] = sc.parallelize(Array(("A", 1.0), ("B", 1.0), ("C", 1.0), ("D", 1.0)), 2)    for (i <- 1 to 10) {      val contribs = links.join(rank, 2).flatMap {        case (url, (links, rank)) => links.map(dest => (dest, rank / links.size))      }      rank = contribs.reduceByKey(_ + _, 2).mapValues(0.15 + 0.85 * _)    }    println("rank:"+rank)    rank.saveAsTextFile("hdfs://hadoop1:9000/pageRank")  }}
0 0