simhash实现

来源:互联网 发布:淘宝联盟没有提现按钮 编辑:程序博客网 时间:2024/04/28 01:29
import com.clearspring.analytics.hash.MurmurHash/**  * Created by fhqplzj on 17-3-1 at 下午6:07.  */object Sim {  def simHash(features: Array[String], weights: Array[Int]): Long = {    val hist = Array.ofDim[Int](64)    features.zip(weights).foreach {      case (feature, weight) => {        val hash = MurmurHash.hash64(feature)        for (i <- 0 until 64) {          if ((hash & (1 << i)) == 0) {            hist(i) += -weight          } else {            hist(i) += weight          }        }      }    }    var result: Long = 0    for (i <- 0 until 64) {      if (hist(i) >= 0) {        result |= (1 << i)      }    }    result  }  def main(args: Array[String]): Unit = {    val features = "zhao jun haha".split(" ")    val weights = Array.fill(features.length)(1)    println(simHash(features, weights))  }}

0 0
原创粉丝点击