saprk core 2.0 Partition CheckpointRDDPartition

来源:互联网 发布:网络布线怎么报价单 编辑:程序博客网 时间:2024/05/01 06:42


/** * An identifier for a partition in an RDD. */trait Partition extends Serializable {  /**   * Get the partition's index within its parent RDD   */  def index: Int  // A better default implementation of HashCode  override def hashCode(): Int = index  override def equals(other: Any): Boolean = super.equals(other)}

/** * An RDD partition used to recover checkpointed data. */private[spark] class CheckpointRDDPartition(val index: Int) extends Partition

/** * An RDD that recovers checkpointed data from storage. */private[spark] abstract class CheckpointRDD[T: ClassTag](sc: SparkContext)  extends RDD[T](sc, Nil) {  // CheckpointRDD should not be checkpointed again  override def doCheckpoint(): Unit = { }  override def checkpoint(): Unit = { }  override def localCheckpoint(): this.type = this  // Note: There is a bug in MiMa that complains about `AbstractMethodProblem`s in the  // base [[org.apache.spark.rdd.RDD]] class if we do not override the following methods.  // scalastyle:off  protected override def getPartitions: Array[Partition] = ???  override def compute(p: Partition, tc: TaskContext): Iterator[T] = ???  // scalastyle:on}

/** * A dummy CheckpointRDD that exists to provide informative error messages during failures. * * This is simply a placeholder because the original checkpointed RDD is expected to be * fully cached. Only if an executor fails or if the user explicitly unpersists the original * RDD will Spark ever attempt to compute this CheckpointRDD. When this happens, however, * we must provide an informative error message. * * @param sc the active SparkContext * @param rddId the ID of the checkpointed RDD * @param numPartitions the number of partitions in the checkpointed RDD */private[spark] class LocalCheckpointRDD[T: ClassTag](    sc: SparkContext,    rddId: Int,    numPartitions: Int)  extends CheckpointRDD[T](sc) {  def this(rdd: RDD[T]) {    this(rdd.context, rdd.id, rdd.partitions.length)  }  protected override def getPartitions: Array[Partition] = {    (0 until numPartitions).toArray.map { i => new CheckpointRDDPartition(i) }  }  /**   * Throw an exception indicating that the relevant block is not found.   *   * This should only be called if the original RDD is explicitly unpersisted or if an   * executor is lost. Under normal circumstances, however, the original RDD (our child)   * is expected to be fully cached and so all partitions should already be computed and   * available in the block storage.   */  override def compute(partition: Partition, context: TaskContext): Iterator[T] = {    throw new SparkException(      s"Checkpoint block ${RDDBlockId(rddId, partition.index)} not found! Either the executor " +      s"that originally checkpointed this partition is no longer alive, or the original RDD is " +      s"unpersisted. If this problem persists, you may consider using `rdd.checkpoint()` " +      s"instead, which is slower than local checkpointing but more fault-tolerant.")  }}


0 0
原创粉丝点击
热门问题 老师的惩罚 人脸识别 我在镇武司摸鱼那些年 重生之率土为王 我在大康的咸鱼生活 盘龙之生命进化 天生仙种 凡人之先天五行 春回大明朝 姑娘不必设防,我是瞎子 肆意难撩 女尊之夫郎他娇软又呆萌 穿书后,小茶精在疯批怀里撒娇 拯救六界从谈恋爱开始 重生后前世宿敌说要养我 她的沙雕又暴露了 在虐文里绑定了男主系统 我在末世成了领主大人 玄学老祖上综艺后轰动全球了 我继承了老公的神位 仵作掌中娇 世子爷她不可能是女的 穿进红楼后,我成了人生赢家 假死后夫人她称霸黑莲界 奶萌小团宠她觉醒了种田系统 巨星的大佬青梅 总裁的小撩精又生气了 开荒种田:农门辣妻有空间 签到后,小白花她在娱乐圈翻红了 江月照 魂飞魄散的上古大仙在修真界诈尸 快穿:大佬她又抢了反派剧本 夫人她总想逃 穿成阴戾反派未来嫂嫂 上神大人他偏甜系啊 咸鱼后妈带崽在综艺当对照组 系统BUG让我成了舔包专业户 快穿:系统有的是力气和手段 当我的霸总老公有了读心术 穿成炮灰渣妻后我和反派开农场 前任小姐姐帅又飒 弄潮1990从厂长开始 光阴之外 心动侵占 重回逃荒开端,手握空间来逆袭 重生之网红教父 蛮荒求生:反派大佬被迫种田 玄学大佬在星际重建地府 总裁又惹火夫人了 农门空间:重生娇娘撩糙汉 重生后我弃了天运之子