spark mllib机器学习之二 DecisionTree

来源:互联网 发布:mac blueray play 编辑:程序博客网 时间:2024/04/28 14:24


数据格式:

1 1:2 2:3 3:4
2 1:1 2:2 3:3
1 1:1 2:3 3:3
1 1:3 2:1 3:3
1 1:4 2:6 3:7
2 1:1 2:5 3:5
1 1:3 2:3 3:3
1 1:3 2:2 3:3
1 1:4 2:3 3:4
2 1:2 2:6 3:6
1 1:1 2:7 3:3
1 1:4 2:1 3:2
1 1:3 2:3 3:7
2 1:5 2:5 3:5


package com.agm.clssify



import org.apache.spark.mllib.tree.DecisionTree
import org.apache.spark.mllib.tree.model.DecisionTreeModel
import org.apache.spark.mllib.util.MLUtils
import java.io.File
import java.io.PrintWriter
import java.io.File
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.log4j.{Level, Logger}
object c45 {
  def main(args:Array[String]){
    Logger.getLogger("org").setLevel(Level.ERROR)
    val path = new File(".").getCanonicalPath()
    /*
    System.getProperties().put("hadoop.home.dir", path);
    new File("./bin").mkdirs();
    new File("./bin/winutils.exe").createNewFile();
    */
    val conf = new SparkConf().setAppName("Simple Application")       //给Application命名    
    conf.setMaster("local")
    val sc = new SparkContext(conf)
    println("be")
    val data = MLUtils.loadLibSVMFile(sc,"F:\\testData\\spark\\svm.txt")


    val splits = data.randomSplit(Array(0.8,0.2))
    val (trainData,testData) = (splits(0),splits(1))
    testData.foreach(println)
    println("sdaf")
    testData.foreach(f=>println(f.features))
    val numClasses = 4
    val categoricalFeaturesInfo = Map[Int, Int]()
    val impurity = "gini"
    val maxDepth = 10
    val maxBins = 32


    val model = DecisionTree.trainClassifier(trainData,numClasses,categoricalFeaturesInfo,impurity,maxDepth,maxBins)


    val labelAndPreds = testData.map { point =>
    val prediction = model.predict(point.features)
    (point.label,prediction)
    }


    val testErr = labelAndPreds.filter(r => r._1 != r._2).count().toDouble /testData.count()
    println("Test Error =" + testErr)
    println("Learned classification tree model:\n" + model.toDebugString)


    model.save(sc, "F:\\testData\\spark\\myDecisionTreeClassificationModel")
    val sameModel = DecisionTreeModel.load(sc, "F:\\testData\\spark\\myDecisionTreeClassificationModel")


  }
}
0 0