spark mllib机器学习之二 DecisionTree

来源：互联网发布：mac blueray play 编辑：程序博客网时间：2024/04/28 14:24

数据格式：

1 1:2 2:3 3:4
2 1:1 2:2 3:3
1 1:1 2:3 3:3
1 1:3 2:1 3:3
1 1:4 2:6 3:7
2 1:1 2:5 3:5
1 1:3 2:3 3:3
1 1:3 2:2 3:3
1 1:4 2:3 3:4
2 1:2 2:6 3:6
1 1:1 2:7 3:3
1 1:4 2:1 3:2
1 1:3 2:3 3:7
2 1:5 2:5 3:5

package com.agm.clssify

import org.apache.spark.mllib.tree.DecisionTree
import org.apache.spark.mllib.tree.model.DecisionTreeModel
import org.apache.spark.mllib.util.MLUtils
import java.io.File
import java.io.PrintWriter
import java.io.File
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.log4j.{Level, Logger}
object c45 {
def main(args:Array[String]){
Logger.getLogger("org").setLevel(Level.ERROR)
val path = new File(".").getCanonicalPath()
/*
System.getProperties().put("hadoop.home.dir", path);
new File("./bin").mkdirs();
new File("./bin/winutils.exe").createNewFile();
*/
val conf = new SparkConf().setAppName("Simple Application") //给Application命名
conf.setMaster("local")
val sc = new SparkContext(conf)
println("be")
val data = MLUtils.loadLibSVMFile(sc,"F:\\testData\\spark\\svm.txt")

val splits = data.randomSplit(Array(0.8,0.2))
val (trainData,testData) = (splits(0),splits(1))
testData.foreach(println)
println("sdaf")
testData.foreach(f=>println(f.features))
val numClasses = 4
val categoricalFeaturesInfo = Map[Int, Int]()
val impurity = "gini"
val maxDepth = 10
val maxBins = 32

val model = DecisionTree.trainClassifier(trainData,numClasses,categoricalFeaturesInfo,impurity,maxDepth,maxBins)

val labelAndPreds = testData.map { point =>
val prediction = model.predict(point.features)
(point.label,prediction)
}

val testErr = labelAndPreds.filter(r => r._1 != r._2).count().toDouble /testData.count()
println("Test Error =" + testErr)
println("Learned classification tree model:\n" + model.toDebugString)

model.save(sc, "F:\\testData\\spark\\myDecisionTreeClassificationModel")
val sameModel = DecisionTreeModel.load(sc, "F:\\testData\\spark\\myDecisionTreeClassificationModel")

}
}

0 0