在写這篇博客的时候,翻阅了一些互联网上的资料,发现文献[1]写的比较系统。所以推荐大家读读文献[1].但是出现了一些错误,所以我在此简述一些。如果推理不过去了。可以看看我的简述。
------------------------------------前言
背景:
(1)在医学领域药物剂量反应中,随着药物剂量的增加,疗效和副作用会呈现一定趋势。比如剂量越高,疗效越
高,剂量越高,毒性越大等
(2)评估药物在不同剂量水平下的毒性,并且建议一个对病人既安全又有效的剂量称为最大耐受剂量(Maximum Tolerated Dose)简称 MTD。
(3)随着药物的增加,药物的毒性是非减的。MTD被定义为毒性概率不超过毒性靶水平的最高剂量水平
(4)基于每个剂量水平下病人的毒性反应的比率估计不同,剂量水平下的毒性概率可能不是剂量水平的非减函
数,于是我们可以采用保序回归的方法
L2保序回归
L2保序回归算法
一些具体的定义和命题查看文献[1]
Spark源码分析(大图见附录)
-
-
-
-
-
-
-
- @Since("1.3.0")
- class IsotonicRegressionModel @Since("1.3.0") (
- @Since("1.3.0") val boundaries: Array[Double],
- @Since("1.3.0") val predictions: Array[Double],
- @Since("1.3.0") val isotonic: Boolean) extends Serializable with Saveable {
-
- private val predictionOrd = if (isotonic) Ordering[Double] else Ordering[Double].reverse
-
- require(boundaries.length == predictions.length)
- assertOrdered(boundaries)
- assertOrdered(predictions)(predictionOrd)
-
-
-
-
- @Since("1.4.0")
- def this(boundaries: java.lang.Iterable[Double],
- predictions: java.lang.Iterable[Double],
- isotonic: java.lang.Boolean) = {
- this(boundaries.asScala.toArray, predictions.asScala.toArray, isotonic)
- }
-
-
- private def assertOrdered(xs: Array[Double])(implicit ord: Ordering[Double]): Unit = {
- var i = 1
- val len = xs.length
- while (i < len) {
- require(ord.compare(xs(i - 1), xs(i)) <= 0,
- s"Elements (${xs(i - 1)}, ${xs(i)}) are not ordered.")
- i += 1
- }
- }
-
-
-
-
-
-
-
-
- @Since("1.3.0")
- def predict(testData: RDD[Double]): RDD[Double] = {
- testData.map(predict)
- }
-
-
-
-
-
-
-
-
- @Since("1.3.0")
- def predict(testData: JavaDoubleRDD): JavaDoubleRDD = {
- JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]]))
- }
-
-
-
-
-
-
-
-
-
-
-
- @Since("1.3.0")
- def predict(testData: Double): Double = {
-
- def linearInterpolation(x1: Double, y1: Double, x2: Double, y2: Double, x: Double): Double = {
- y1 + (y2 - y1) * (x - x1) / (x2 - x1)
- }
-
- val foundIndex = binarySearch(boundaries, testData)
- val insertIndex = -foundIndex - 1
-
-
-
- if (insertIndex == 0) {
- predictions.head
- } else if (insertIndex == boundaries.length) {
- predictions.last
- } else if (foundIndex < 0) {
- linearInterpolation(
- boundaries(insertIndex - 1),
- predictions(insertIndex - 1),
- boundaries(insertIndex),
- predictions(insertIndex),
- testData)
- } else {
- predictions(foundIndex)
- }
- }
-
-
- private[mllib] def boundaryVector: Vector = Vectors.dense(boundaries)
-
-
- private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
-
- @Since("1.4.0")
- override def save(sc: SparkContext, path: String): Unit = {
- IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
- }
-
- override protected def formatVersion: String = "1.0"
- }
-
- @Since("1.4.0")
- object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
-
- import org.apache.spark.mllib.util.Loader._
-
- private object SaveLoadV1_0 {
-
- def thisFormatVersion: String = "1.0"
-
-
- def thisClassName: String = "org.apache.spark.mllib.regression.IsotonicRegressionModel"
-
-
- case class Data(boundary: Double, prediction: Double)
-
- def save(
- sc: SparkContext,
- path: String,
- boundaries: Array[Double],
- predictions: Array[Double],
- isotonic: Boolean): Unit = {
- val sqlContext = SQLContext.getOrCreate(sc)
-
- val metadata = compact(render(
- ("class" -> thisClassName) ~ ("version" -> thisFormatVersion) ~
- ("isotonic" -> isotonic)))
- sc.parallelize(Seq(metadata), 1).saveAsTextFile(metadataPath(path))
-
- sqlContext.createDataFrame(
- boundaries.toSeq.zip(predictions).map { case (b, p) => Data(b, p) }
- ).write.parquet(dataPath(path))
- }
-
- def load(sc: SparkContext, path: String): (Array[Double], Array[Double]) = {
- val sqlContext = SQLContext.getOrCreate(sc)
- val dataRDD = sqlContext.read.parquet(dataPath(path))
-
- checkSchema[Data](dataRDD.schema)
- val dataArray = dataRDD.select("boundary", "prediction").collect()
- val (boundaries, predictions) = dataArray.map { x =>
- (x.getDouble(0), x.getDouble(1))
- }.toList.sortBy(_._1).unzip
- (boundaries.toArray, predictions.toArray)
- }
- }
-
- @Since("1.4.0")
- override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
- implicit val formats = DefaultFormats
- val (loadedClassName, version, metadata) = loadMetadata(sc, path)
- val isotonic = (metadata \ "isotonic").extract[Boolean]
- val classNameV1_0 = SaveLoadV1_0.thisClassName
- (loadedClassName, version) match {
- case (className, "1.0") if className == classNameV1_0 =>
- val (boundaries, predictions) = SaveLoadV1_0.load(sc, path)
- new IsotonicRegressionModel(boundaries, predictions, isotonic)
- case _ => throw new Exception(
- s"IsotonicRegressionModel.load did not recognize model with (className, format version):" +
- s"($loadedClassName, $version). Supported:\n" +
- s" ($classNameV1_0, 1.0)"
- )
- }
- }
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- @Since("1.3.0")
- class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
-
-
-
-
-
-
- @Since("1.3.0")
- def this() = this(true)
-
-
-
-
-
-
-
- @Since("1.3.0")
- def setIsotonic(isotonic: Boolean): this.type = {
- this.isotonic = isotonic
- this
- }
-
-
-
-
-
-
-
- @Since("1.3.0")
- def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = {
- val preprocessedInput = if (isotonic) {
- input
- } else {
- input.map(x => (-x._1, x._2, x._3))
- }
-
- val pooled = parallelPoolAdjacentViolators(preprocessedInput)
-
- val predictions = if (isotonic) pooled.map(_._1) else pooled.map(-_._1)
- val boundaries = pooled.map(_._2)
-
- new IsotonicRegressionModel(boundaries, predictions, isotonic)
- }
-
-
-
-
-
-
-
-
-
-
-
- @Since("1.3.0")
- def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = {
- run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]])
- }
-
-
-
-
-
-
- private def poolAdjacentViolators(
- input: Array[(Double, Double, Double)]): Array[(Double, Double, Double)] = {
-
- if (input.isEmpty) {
- return Array.empty
- }
-
-
- def pool(input: Array[(Double, Double, Double)], start: Int, end: Int): Unit = {
- val poolSubArray = input.slice(start, end + 1)
-
- val weightedSum = poolSubArray.map(lp => lp._1 * lp._3).sum
- val weight = poolSubArray.map(_._3).sum
-
- var i = start
- while (i <= end) {
- input(i) = (weightedSum / weight, input(i)._2, input(i)._3)
- i = i + 1
- }
- }
-
- var i = 0
- val len = input.length
- while (i < len) {
- var j = i
-
-
- while (j < len - 1 && input(j)._1 > input(j + 1)._1) {
- j = j + 1
- }
-
-
- if (i == j) {
- i = i + 1
- } else {
-
-
- while (i >= 0 && input(i)._1 > input(i + 1)._1) {
- pool(input, i, j)
- i = i - 1
- }
-
- i = j
- }
- }
-
-
- val compressed = ArrayBuffer.empty[(Double, Double, Double)]
-
- var (curLabel, curFeature, curWeight) = input.head
- var rightBound = curFeature
- def merge(): Unit = {
- compressed += ((curLabel, curFeature, curWeight))
- if (rightBound > curFeature) {
- compressed += ((curLabel, rightBound, 0.0))
- }
- }
- i = 1
- while (i < input.length) {
- val (label, feature, weight) = input(i)
- if (label == curLabel) {
- curWeight += weight
- rightBound = feature
- } else {
- merge()
- curLabel = label
- curFeature = feature
- curWeight = weight
- rightBound = curFeature
- }
- i += 1
- }
- merge()
-
- compressed.toArray
- }
-
-
-
-
-
-
-
-
- private def parallelPoolAdjacentViolators(
- input: RDD[(Double, Double, Double)]): Array[(Double, Double, Double)] = {
- val parallelStepResult = input
- .sortBy(x => (x._2, x._1))
- .glom()
- .flatMap(poolAdjacentViolators)
- .collect()
- .sortBy(x => (x._2, x._1))
- poolAdjacentViolators(parallelStepResult)
- }
- }
spark实验
import org.apache.spark.mllib.regression.{IsotonicRegression, IsotonicRegressionModel}import org.apache.spark.{SparkConf, SparkContext}object IsotonicRegressionExample { def main(args: Array[String]) { val conf = new SparkConf().setAppName("IsotonicRegressionExample").setMaster("local") val sc = new SparkContext(conf) val data = sc.textFile("C:\\Users\\alienware\\IdeaProjects\\sparkCore\\data\\mllib\\sample_isotonic_regression_data.txt") // Create label, feature, weight tuples from input data with weight set to default value 1.0. val parsedData = data.map { line => val parts = line.split(',').map(_.toDouble) (parts(0), parts(1), 1.0) } // Split data into training (60%) and test (40%) sets. val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L) val training = splits(0) val test = splits(1) // Create isotonic regression model from training data. // Isotonic parameter defaults to true so it is only shown for demonstration val model = new IsotonicRegression().setIsotonic(true).run(training) // Create tuples of predicted and real labels. val predictionAndLabel = test.map { point => val predictedLabel = model.predict(point._2) (predictedLabel, point._1) } //predictionAndLabel.foreach(println) /** * (0.16868944399999988,0.31208567)(0.16868944399999988,0.35900051)(0.16868944399999988,0.03926568)(0.16868944399999988,0.12952575)(0.16868944399999988,0.0)(0.16868944399999988,0.01376849)(0.16868944399999988,0.13105558)(0.19545421571428565,0.13717491)(0.19545421571428565,0.19020908)(0.19545421571428565,0.19581846)(0.31718510999999966,0.29576747)(0.5322114566666667,0.4854666)(0.5368859433333334,0.49209587)(0.5602243760000001,0.5017848)(0.5701674724126985,0.58286588)(0.5801105688253968,0.64660887)(0.5900536652380952,0.65782764)(0.5900536652380952,0.63029067)(0.5900536652380952,0.63233044)(0.5900536652380952,0.33299337)(0.5900536652380952,0.36206017)(0.5900536652380952,0.56348802)(0.5900536652380952,0.48393677)(0.5900536652380952,0.46965834)(0.5900536652380952,0.45843957)(0.5900536652380952,0.47118817)(0.5900536652380952,0.51555329)(0.5900536652380952,0.56297807)(0.6881693,0.65119837)(0.7135390099999999,0.66598674)(0.861295255,0.91330954)(0.903875573,0.90719021)(0.9275879659999999,0.93115757)(0.9275879659999999,0.91942886) */ // Calculate mean squared error between predicted and real labels. val meanSquaredError = predictionAndLabel.map { case (p, l) => math.pow((p - l), 2) }.mean() println("Mean Squared Error = " + meanSquaredError) //Mean Squared Error = 0.010049744711808193 // Save and load model model.save(sc, "target/tmp/myIsotonicRegressionModel") val sameModel = IsotonicRegressionModel.load(sc, "target/tmp/myIsotonicRegressionModel") }}
参考文献
1、http://wenku.baidu.com/link?url=rbcbI3L7M83F62Aey_kyGZk7kwuJxr5ZW61EqFH5T45umsdZOCrAbfpl8a1yuMyzObd1_kG-kQ9DPcSTl7wnoX6UyNN_gT5bBYh_p1yMgD7url=rbcbI3L7M83F62Aey_kyGZk7kwuJxr5ZW61EqFH5T45umsdZOCrAbfpl8a1yuMyzObd1_kG-kQ9DPcSTl7wnoX6UyNN_gT5bBYh_p1yMgD7