Spark成长之路(7)-Hypothesis testing

来源:互联网 发布:delphi登陆淘宝联盟 编辑:程序博客网 时间:2024/05/20 23:39

Hypothesis testing

样例

import org.apache.spark.ml.linalg.{Vector, Vectors}import org.apache.spark.ml.stat.ChiSquareTestimport org.apache.spark.sql.SparkSessionobject HypothesisTestingExample {  def main(args: Array[String]): Unit = {    val spark = SparkSession.builder.appName("HypothesisTestingExample").getOrCreate()    spark.sparkContext.setLogLevel("WARN")    val data = Seq(      (0.0, Vectors.dense(0.5, 10.0)),      (0.0, Vectors.dense(1.5, 20.0)),      (1.0, Vectors.dense(1.5, 30.0)),      (0.0, Vectors.dense(3.5, 30.0)),      (0.0, Vectors.dense(3.5, 40.0)),      (1.0, Vectors.dense(3.5, 40.0))    )    import spark.implicits._    val df = data.toDF("label", "features")    val chi = ChiSquareTest.test(df, "features", "label").head    println("pValues = " + chi.getAs[Vector](0))    println("degreesOfFreedom = " + chi.getSeq[Int](1).mkString("[", ",", "]"))    println("statistics = " + chi.getAs[Vector](2))  }}

结果

pValues = [0.6872892787909721,0.6822703303362126]degreesOfFreedom = [2,3]statistics = [0.75,1.5]