saprk core 2.0 Partition CheckpointRDDPartition
来源:互联网 发布:网络布线怎么报价单 编辑:程序博客网 时间:2024/05/01 06:42
/** * An identifier for a partition in an RDD. */trait Partition extends Serializable { /** * Get the partition's index within its parent RDD */ def index: Int // A better default implementation of HashCode override def hashCode(): Int = index override def equals(other: Any): Boolean = super.equals(other)}
/** * An RDD partition used to recover checkpointed data. */private[spark] class CheckpointRDDPartition(val index: Int) extends Partition
/** * An RDD that recovers checkpointed data from storage. */private[spark] abstract class CheckpointRDD[T: ClassTag](sc: SparkContext) extends RDD[T](sc, Nil) { // CheckpointRDD should not be checkpointed again override def doCheckpoint(): Unit = { } override def checkpoint(): Unit = { } override def localCheckpoint(): this.type = this // Note: There is a bug in MiMa that complains about `AbstractMethodProblem`s in the // base [[org.apache.spark.rdd.RDD]] class if we do not override the following methods. // scalastyle:off protected override def getPartitions: Array[Partition] = ??? override def compute(p: Partition, tc: TaskContext): Iterator[T] = ??? // scalastyle:on}
/** * A dummy CheckpointRDD that exists to provide informative error messages during failures. * * This is simply a placeholder because the original checkpointed RDD is expected to be * fully cached. Only if an executor fails or if the user explicitly unpersists the original * RDD will Spark ever attempt to compute this CheckpointRDD. When this happens, however, * we must provide an informative error message. * * @param sc the active SparkContext * @param rddId the ID of the checkpointed RDD * @param numPartitions the number of partitions in the checkpointed RDD */private[spark] class LocalCheckpointRDD[T: ClassTag]( sc: SparkContext, rddId: Int, numPartitions: Int) extends CheckpointRDD[T](sc) { def this(rdd: RDD[T]) { this(rdd.context, rdd.id, rdd.partitions.length) } protected override def getPartitions: Array[Partition] = { (0 until numPartitions).toArray.map { i => new CheckpointRDDPartition(i) } } /** * Throw an exception indicating that the relevant block is not found. * * This should only be called if the original RDD is explicitly unpersisted or if an * executor is lost. Under normal circumstances, however, the original RDD (our child) * is expected to be fully cached and so all partitions should already be computed and * available in the block storage. */ override def compute(partition: Partition, context: TaskContext): Iterator[T] = { throw new SparkException( s"Checkpoint block ${RDDBlockId(rddId, partition.index)} not found! Either the executor " + s"that originally checkpointed this partition is no longer alive, or the original RDD is " + s"unpersisted. If this problem persists, you may consider using `rdd.checkpoint()` " + s"instead, which is slower than local checkpointing but more fault-tolerant.") }}
0 0
- saprk core 2.0 Partition CheckpointRDDPartition
- spark core 2.0 Partition and HadoopPartition
- saprk学习笔记0
- saprk 核心组件总结
- Saprk aggregateByKey操作示例
- 【Spark】worker、executor、core、stage、task、partition概念
- Saprk Streaming性能调优
- hadoop和Saprk程序员软件包
- saprk原理与简单上手
- Hadoop和Saprk的异同
- Partition
- PARTITION
- partition
- partition
- partition
- Partition
- partition
- partition
- 五年前,John Carmack对Oculus Rift的一次评测
- DOM数
- 点击出气泡
- 发短信小项目
- 仿微信欢迎界面
- saprk core 2.0 Partition CheckpointRDDPartition
- 写网页常用
- DW常用
- 天使投资人给创业者的一封信
- div使用
- 获取联系人姓名和电话
- ASP对数据库的操作方法
- 数据库草早
- 会员登录HTML