spark mllib机器学习之二 DecisionTree
来源:互联网 发布:mac blueray play 编辑:程序博客网 时间:2024/04/28 14:24
数据格式:
1 1:2 2:3 3:4
2 1:1 2:2 3:3
1 1:1 2:3 3:3
1 1:3 2:1 3:3
1 1:4 2:6 3:7
2 1:1 2:5 3:5
1 1:3 2:3 3:3
1 1:3 2:2 3:3
1 1:4 2:3 3:4
2 1:2 2:6 3:6
1 1:1 2:7 3:3
1 1:4 2:1 3:2
1 1:3 2:3 3:7
2 1:5 2:5 3:5
package com.agm.clssify
import org.apache.spark.mllib.tree.DecisionTree
import org.apache.spark.mllib.tree.model.DecisionTreeModel
import org.apache.spark.mllib.util.MLUtils
import java.io.File
import java.io.PrintWriter
import java.io.File
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.log4j.{Level, Logger}
object c45 {
def main(args:Array[String]){
Logger.getLogger("org").setLevel(Level.ERROR)
val path = new File(".").getCanonicalPath()
/*
System.getProperties().put("hadoop.home.dir", path);
new File("./bin").mkdirs();
new File("./bin/winutils.exe").createNewFile();
*/
val conf = new SparkConf().setAppName("Simple Application") //给Application命名
conf.setMaster("local")
val sc = new SparkContext(conf)
println("be")
val data = MLUtils.loadLibSVMFile(sc,"F:\\testData\\spark\\svm.txt")
val splits = data.randomSplit(Array(0.8,0.2))
val (trainData,testData) = (splits(0),splits(1))
testData.foreach(println)
println("sdaf")
testData.foreach(f=>println(f.features))
val numClasses = 4
val categoricalFeaturesInfo = Map[Int, Int]()
val impurity = "gini"
val maxDepth = 10
val maxBins = 32
val model = DecisionTree.trainClassifier(trainData,numClasses,categoricalFeaturesInfo,impurity,maxDepth,maxBins)
val labelAndPreds = testData.map { point =>
val prediction = model.predict(point.features)
(point.label,prediction)
}
val testErr = labelAndPreds.filter(r => r._1 != r._2).count().toDouble /testData.count()
println("Test Error =" + testErr)
println("Learned classification tree model:\n" + model.toDebugString)
model.save(sc, "F:\\testData\\spark\\myDecisionTreeClassificationModel")
val sameModel = DecisionTreeModel.load(sc, "F:\\testData\\spark\\myDecisionTreeClassificationModel")
}
}
0 0
- spark mllib机器学习之二 DecisionTree
- Spark MLlib之机器学习(二)
- 二 Spark机器学习MLlib: LogisticRegression
- spark mllib源码分析之DecisionTree与GBDT
- Spark MLlib之机器学习(一)
- Spark MLlib之机器学习(三)
- spark mllib机器学习之三 FPGrowth
- spark mllib机器学习之四 kmeans
- spark mllib机器学习之五 LinearRegressionWithSGD
- spark mllib机器学习之六 ALS
- spark mllib机器学习之七 TFIDF
- spark之MLlib机器学习-Kmeans
- spark之MLlib机器学习-线性回归
- Spark MLlib机器学习之朴素贝叶斯小试牛刀
- Spark学习之基于MLlib的机器学习
- Spark机器学习库mllib之协同过滤
- spark MLlib、ML机器学习之Logistic回归
- Spark MLlib学习笔记之二——Spark Mllib矩阵向量
- session机制和cookie机制的理解
- Caffe学习系列(2):数据层及参数
- 让我们从机器学习谈起
- 知道这20个正则表达式,能让你少写1,000行代码
- Android学习之路(一)-Activity的生命周期和启动模式-1
- spark mllib机器学习之二 DecisionTree
- [JQuery]原生态Javascript与JQuery调用Ajax之比较
- LeetCode--152. Maximum Product Subarray
- 冒泡动画
- 内联函数 inline
- Lecture 3: Testing
- 机器学习算法的随机数据生成
- Scrollview 生成长截图
- NMI(标准化互信息) python实现