spark的StorageLevel注解
来源:互联网 发布:红猫网络加速器 编辑:程序博客网 时间:2024/05/16 09:58
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */package org.apache.spark.storageimport java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}import java.util.concurrent.ConcurrentHashMapimport org.apache.spark.annotation.DeveloperApiimport org.apache.spark.memory.MemoryModeimport org.apache.spark.util.Utils/** * :: DeveloperApi :: * Flags for controlling the storage of an RDD. Each StorageLevel records whether to use memory, * or ExternalBlockStore, whether to drop the RDD to disk if it falls out of memory or * ExternalBlockStore, whether to keep the data in memory in a serialized format, and whether * to replicate the RDD partitions on multiple nodes. * * The [[org.apache.spark.storage.StorageLevel$]] singleton object contains some static constants * for commonly useful storage levels. To create your own storage level object, use the * factory method of the singleton object (`StorageLevel(...)`). */ //主构造器私有,必须使用辅助构造器初始化@DeveloperApiclass StorageLevel private( private var _useDisk: Boolean, private var _useMemory: Boolean, private var _useOffHeap: Boolean, private var _deserialized: Boolean, private var _replication: Int = 1) extends Externalizable { // TODO: Also add fields for caching priority, dataset ID, and flushing. private def this(flags: Int, replication: Int) { this((flags & 8) != 0, (flags & 4) != 0, (flags & 2) != 0, (flags & 1) != 0, replication) } //辅助构造器 def this() = this(false, true, false, false) // For deserialization def useDisk: Boolean = _useDisk def useMemory: Boolean = _useMemory def useOffHeap: Boolean = _useOffHeap def deserialized: Boolean = _deserialized def replication: Int = _replication //副本数不能超过40 assert(replication < 40, "Replication restricted to be less than 40 for calculating hash codes") //堆外存储不支持反序列化存储 if (useOffHeap) { require(!deserialized, "Off-heap storage level does not support deserialized storage") } //spark包可访问 //memorymode的getter,指示是堆外存储还是堆内存储 private[spark] def memoryMode: MemoryMode = { if (useOffHeap) MemoryMode.OFF_HEAP else MemoryMode.ON_HEAP } override def clone(): StorageLevel = { new StorageLevel(useDisk, useMemory, useOffHeap, deserialized, replication) } //判断两个StorageLevel实例是否相等 override def equals(other: Any): Boolean = other match { case s: StorageLevel => s.useDisk == useDisk && s.useMemory == useMemory && s.useOffHeap == useOffHeap && s.deserialized == deserialized && s.replication == replication case _ => false } //StorageLevel实例是否有效的标志,要么使用memory或磁盘进行缓存,且副本数要大于0 def isValid: Boolean = (useMemory || useDisk) && (replication > 0) //将当前存储级别转化为整数表示。是利用二进制数实现一个使用存储的判等,如果只使用_useDisk,最终的ret是1000: def toInt: Int = { var ret = 0 if (_useDisk) { ret |= 8 } if (_useMemory) { ret |= 4 } if (_useOffHeap) { ret |= 2 } if (_deserialized) { ret |= 1 } ret } //序列化的时候调用的函数 override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException { out.writeByte(toInt) out.writeByte(_replication) } //反序列化的时候调用 override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException { val flags = in.readByte() _useDisk = (flags & 8) != 0 _useMemory = (flags & 4) != 0 _useOffHeap = (flags & 2) != 0 _deserialized = (flags & 1) != 0 _replication = in.readByte() } @throws(classOf[IOException]) private def readResolve(): Object = StorageLevel.getCachedStorageLevel(this) override def toString: String = { val disk = if (useDisk) "disk" else "" val memory = if (useMemory) "memory" else "" val heap = if (useOffHeap) "offheap" else "" val deserialize = if (deserialized) "deserialized" else "" val output = Seq(disk, memory, heap, deserialize, s"$replication replicas").filter(_.nonEmpty) s"StorageLevel(${output.mkString(", ")})" } //改为的标志数字:存储标志转化为二进制后的值乘以41再加上40,如果replication允许超过40,则无法准确还原出存储标志位, //而且可能造成多种不同存储标志位的存储层级具备相同的hashcode override def hashCode(): Int = toInt * 41 + replication def description: String = { var result = "" result += (if (useDisk) "Disk " else "") if (useMemory) { result += (if (useOffHeap) "Memory (off heap) " else "Memory ") } result += (if (deserialized) "Deserialized " else "Serialized ") result += s"${replication}x Replicated" result }}/** * Various [[org.apache.spark.storage.StorageLevel]] defined and utility functions for creating * new storage levels. */ //StorageLevel伴生对象,主要用于定义类似于JAVA中静态成员和方法object StorageLevel { val NONE = new StorageLevel(false, false, false, false) val DISK_ONLY = new StorageLevel(true, false, false, false) val DISK_ONLY_2 = new StorageLevel(true, false, false, false, 2) val MEMORY_ONLY = new StorageLevel(false, true, false, true) val MEMORY_ONLY_2 = new StorageLevel(false, true, false, true, 2) val MEMORY_ONLY_SER = new StorageLevel(false, true, false, false) val MEMORY_ONLY_SER_2 = new StorageLevel(false, true, false, false, 2) val MEMORY_AND_DISK = new StorageLevel(true, true, false, true) val MEMORY_AND_DISK_2 = new StorageLevel(true, true, false, true, 2) val MEMORY_AND_DISK_SER = new StorageLevel(true, true, false, false) val MEMORY_AND_DISK_SER_2 = new StorageLevel(true, true, false, false, 2) val OFF_HEAP = new StorageLevel(true, true, true, false, 1) /** * :: DeveloperApi :: * Return the StorageLevel object with the specified name. */ @DeveloperApi def fromString(s: String): StorageLevel = s match { case "NONE" => NONE case "DISK_ONLY" => DISK_ONLY case "DISK_ONLY_2" => DISK_ONLY_2 case "MEMORY_ONLY" => MEMORY_ONLY case "MEMORY_ONLY_2" => MEMORY_ONLY_2 case "MEMORY_ONLY_SER" => MEMORY_ONLY_SER case "MEMORY_ONLY_SER_2" => MEMORY_ONLY_SER_2 case "MEMORY_AND_DISK" => MEMORY_AND_DISK case "MEMORY_AND_DISK_2" => MEMORY_AND_DISK_2 case "MEMORY_AND_DISK_SER" => MEMORY_AND_DISK_SER case "MEMORY_AND_DISK_SER_2" => MEMORY_AND_DISK_SER_2 case "OFF_HEAP" => OFF_HEAP case _ => throw new IllegalArgumentException(s"Invalid StorageLevel: $s") } /** * :: DeveloperApi :: * Create a new StorageLevel object. */ @DeveloperApi def apply( useDisk: Boolean, useMemory: Boolean, useOffHeap: Boolean, deserialized: Boolean, replication: Int): StorageLevel = { getCachedStorageLevel( new StorageLevel(useDisk, useMemory, useOffHeap, deserialized, replication)) } /** * :: DeveloperApi :: * Create a new StorageLevel object without setting useOffHeap. */ @DeveloperApi def apply( useDisk: Boolean, useMemory: Boolean, deserialized: Boolean, replication: Int = 1): StorageLevel = { getCachedStorageLevel(new StorageLevel(useDisk, useMemory, false, deserialized, replication)) } /** * :: DeveloperApi :: * Create a new StorageLevel object from its integer representation. */ @DeveloperApi def apply(flags: Int, replication: Int): StorageLevel = { getCachedStorageLevel(new StorageLevel(flags, replication)) } /** * :: DeveloperApi :: * Read StorageLevel object from ObjectInput stream. */ @DeveloperApi def apply(in: ObjectInput): StorageLevel = { val obj = new StorageLevel() obj.readExternal(in) getCachedStorageLevel(obj) } //创建一个并发hashmap storageLevelCache来存储存储层级,其键为StoreageLevel类型,其值也为StorageLevel类型 private[spark] val storageLevelCache = new ConcurrentHashMap[StorageLevel, StorageLevel]() //先将存储层级进行缓存,并返回该存储层级 private[spark] def getCachedStorageLevel(level: StorageLevel): StorageLevel = { storageLevelCache.putIfAbsent(level, level) storageLevelCache.get(level) }}
0 0
- spark的StorageLevel注解
- spark的StorageLevel源码分析
- spark 2.1 StorageLevel
- spark Streaming 存储级别定义StorageLevel
- Spark源码之persist方法,cache方法以及StorageLevel
- 注解的注解--MyAnnotation2
- Spark---Spark开头的杂项
- 【Spark】Spark的Shuffle机制
- 注解的入门,什么是注解
- spring注解 springmvc的注解
- 注解的理解、自定义注解
- 注解之注解的基本概念
- 了解注解以及注解的注解---元注解
- spark学习十五 spark的容错分析
- Spark源码阅读笔记:Spark的Task
- spark-02-学习spark需要的阶段
- 【Spark】Spark的Standalone模式安装部署
- spark下的分词--spark+jieba
- 欢迎使用CSDN-markdown编辑器
- 初学java(一)
- 刀剑神域序列之争(OS篇)OP【Catch the Moment】LiSA
- POJ-3244 Difference between Triplets
- 在 Win10 系统下安装 JDK 及配置环境变量的方法
- spark的StorageLevel注解
- Asp.net mvc 知多少(十)
- Android 编程技巧之 ----- 自定义 View 踩坑总结
- 高精度基础之减法格式1
- echart实现ajax动态数据加载(前台js代码)
- Java设计模式之单例(Singleton)模式解析
- POJ 1019 Number Sequence (数学+预处理 循环递增序列第k位数字)
- python2和python3 都有如何使用 pip
- 修理牛棚