KwikCluster算法实现

来源：互联网发布：欧米伽网络招聘骗局编辑：程序博客网时间：2024/06/06 18:41

KwikCluster算法是一种典型的Correlation Clustering算法，实现如下：

package clustering.CorrelationClusteringimport scala.collection.mutableimport scala.io.Source/**  * Created by fhqplzj on 16-12-17 at 下午12:54.  *//**  *  * @param path 文件路径  * @param n    顶点个数  */class KwikCluster(path: String, n: Int) {  /*图*/  val graph = {    val result = Array.ofDim[Int](n, n)    /*三元组*/    val triples = Source.fromFile(path).getLines().map {      line =>        val parts = line.split(" ")        (parts(0).toInt - 1, parts(1).toInt - 1, parts(2).toInt)    }.toArray    triples.foreach {      case (i, j, value) =>        result(i)(j) = value        result(j)(i) = value    }    result  }  def run(): Unit = {    /*得到0到n-1的一个随机排列*/    val hashSet = mutable.HashSet.empty[Int]    for (i <- 0 until n) hashSet += i    while (hashSet.nonEmpty) {      /*获取第一个元素*/      val v = hashSet.head      /*边是正边，且顶点在hashSet中*/      val indices = graph(v).zipWithIndex.filter {        x =>          x._1 == 1 && hashSet.contains(x._2)      }.map(_._2)      /*去掉当前源顶点*/      hashSet -= v      /*标注遍历过的边，从hashSet中去掉连通顶点*/      indices.foreach {        idx =>          graph(v)(idx) = 2          graph(idx)(v) = 2          hashSet -= idx      }      /*打印结果*/      println((indices ++ Array(v)).sorted.map(_ + 1).mkString(" "))    }  }}object KwikCluster {  def main(args: Array[String]): Unit = {    val kwikCluster = new KwikCluster("/home/fhqplzj/IdeaProjects/DocumentClustering/src/main/scala/clustering/CorrelationClustering/data.txt", 6)    kwikCluster.run()  }}

数据：

1 2 11 3 12 3 12 4 14 5 14 6 11 5 -15 6 -13 6 -1

具体参考论文：Parallel Correlation Clustering on Big Graphs

0 0