【scala】【spark】基于随机梯度下降的简单线性回归编程实现:Linear Regression - SGD
来源:互联网 发布:梦幻西游175魔化生数据 编辑:程序博客网 时间:2024/05/08 02:05
基于 Spark + Scala 编程实现基于随机梯度下降的简单线性回归算法;
数据集来自于 Spark 源码包;
调用 org.jblas.DoubleMatrix 包;
实现过程避免直接调用 Spark.MLlib 或 Spark.ml ,旨在深入理解“线性回归 - 随机梯度下降法”的基本原理。
package org.lily.optimization.testimport org.apache.spark.{SparkContext,SparkConf}import org.apache.spark.mllib.regression.LabeledPointimport org.apache.spark.mllib.linalg.Vectorsimport org.apache.spark.mllib.util.MLUtilsimport org.jblas.DoubleMatriximport scala.collection.mutable.ArrayBufferobject LinearRegressionSGDDemo { def main(args: Array[String]): Unit = { val conf = new SparkConf().setMaster("local").setAppName("simpletest") val sc = new SparkContext(conf) // Load training data in LIBSVM format. val sourcedata = MLUtils.loadLibSVMFile(sc, "D:/data/mllib/sample_linear_regression_data.txt") //初始化参数 val addIntercept:Boolean = true//是否设置截距 val alpha = 0.01 //学习率 val numIterations = 2000//迭代次数 var loss = 10.0 //对象转化为元组(类标签,特征) val data = if(addIntercept) { // data : RDD[(Double, Array[Double])] sourcedata.map(x => (x.label, 1.0 +: x.features.toArray)) } else { sourcedata.map(x => (x.label, x.features.toArray)) } //初始化权重向量 val numFeatures = sourcedata.first().features.toArray.length // numFeatures = 10 val initialWeights = new Array[Double](numFeatures) val initialWeightsWithIntercept = if(addIntercept) { //initialWeightsWithIntercept: Array[Double] 0.0 +: initialWeights } else { initialWeights } val numExamples = data.count().toInt//样本点个数 numExamples = 501 var weights = new DoubleMatrix(initialWeightsWithIntercept.length,1,initialWeightsWithIntercept:_*) println("initial weights: " + weights ) val label = data.map(x => x._1).collect()// label : Array[Double] val features = data.map(x => x._2).collect()// features: Array[Array[Double]] - 501*10 //features.length = numExamples = 501 var hypothesis = 0.0 var midError = 0.0 for( k <- 0 until numIterations if (loss > 1.0) ) { val i = (new util.Random).nextInt(numExamples) //生成一个随机数,不包括 - numExamples // blog.csdn.net/springlustre/article/details/48828507 val variable = new DoubleMatrix(features(i).length, 1, features(i):_*) hypothesis = variable.dot(weights) midError = label(i) - hypothesis weights = weights.add(variable.mul(alpha * midError)) println("The current weights: " + weights) var cacheLoss = 0.0 for(j <- 0 to (numExamples - 1)) { var multiplier = new DoubleMatrix(features(j).length, 1, features(j):_*) cacheLoss += (label(j) - weights.dot(multiplier))*(label(j) - weights.dot(multiplier)) } loss = 0.5 * cacheLoss / numExamples println("The current loss: " + loss) } // for(i <- 0 to 10) println(i)//- 0,1,2,3,4,5,6,7,8,9,10 sc.stop() }}
参考文章1:http://blog.csdn.net/yangguo_2011/article/details/33859337
参考文章2:http://blog.csdn.net/springlustre/article/details/48828507
1 0
- 【scala】【spark】基于随机梯度下降的简单线性回归编程实现:Linear Regression - SGD
- 【scala】简单线性回归的随机梯度下降算法实现:Linear Regression - SGD
- 简单线性回归的随机梯度下降算法实现:Linear Regression - SGD
- 【Breeze】【Scala】基于梯度下降的简单逻辑回归编程实现 Logistic Regression - GD
- 线性回归、梯度下降(Linear Regression、Gradient Descent)
- 线性回归、梯度下降(Linear Regression、Gradient Descent)
- 线性回归、梯度下降(Linear Regression、Gradient Descent)
- Gradient Descent for Linear Regression,线性回归的梯度下降算法
- linear regression for classification +随机梯度下降+多分类之logistic回归+多分类之线性分类投票法
- 线性回归、梯度下降、逻辑回归(Linear Regression、Gradient Descent、Logistic Regression)
- 基于Tensorflow实现基本的线性回归(Linear regression)
- 基于matlab的梯度下降法实现线性回归
- 线性回归 + 随机梯度下降 + JAVA LIR + Spark LIR
- 利用梯度下降法实现简单的线性回归
- 基于spark用线性回归(linear regression)进行数据预测
- SGD(随机梯度下降)
- 随机梯度下降 (SGD)
- 机器学习笔记01:线性回归(Linear Regression)和梯度下降(Gradient Decent)
- 前台页面 列表动态显示 加 子页面 脚本
- sed 浅谈
- FAST
- MHX 存档修改工具 ver0.9.0.1
- Android 事件拦截机制
- 【scala】【spark】基于随机梯度下降的简单线性回归编程实现:Linear Regression - SGD
- Android应用启动优化:一种DelayLoad的实现和原理(上篇)(转载)
- Makefile工具
- Java中的基础-----static关键字的作用
- public class与class的区别
- XMLHttpRequest2的进步之处
- 聊题“谈、闲、想、省”
- Android开发-API指南-<instrumentation>
- Canvas绘制风向盘