spark 2.1 RDD persist process

来源:互联网 发布:移动数据 编辑:程序博客网 时间:2024/05/20 18:46
  /**   * Persist this RDD with the default storage level (`MEMORY_ONLY`).   */  def persist(): this.type = persist(StorageLevel.MEMORY_ONLY)
  /**   * Set this RDD's storage level to persist its values across operations after the first time   * it is computed. This can only be used to assign a new storage level if the RDD does not   * have a storage level set yet. Local checkpointing is an exception.   */  def persist(newLevel: StorageLevel): this.type = {    if (isLocallyCheckpointed) {      // This means the user previously called localCheckpoint(), which should have already      // marked this RDD for persisting. Here we should override the old storage level with      // one that is explicitly requested by the user (after adapting it to use disk).      persist(LocalRDDCheckpointData.transformStorageLevel(newLevel), allowOverride = true)    } else {      persist(newLevel, allowOverride = false)    }  }

persist(newLevel: StorageLevel, allowOverride: Boolean)

/**   * Mark this RDD for persisting using the specified level.   *   * @param newLevel the target storage level   * @param allowOverride whether to override any existing level with the new one   */  private def persist(newLevel: StorageLevel, allowOverride: Boolean): this.type = {    // TODO: Handle changes of StorageLevel    if (storageLevel != StorageLevel.NONE && newLevel != storageLevel && !allowOverride) {      throw new UnsupportedOperationException(        "Cannot change storage level of an RDD after it was already assigned a level")    }    // If this is the first time this RDD is marked for persisting, register it    // with the SparkContext for cleanups and accounting. Do this only once.    if (storageLevel == StorageLevel.NONE) {      sc.cleaner.foreach(_.registerRDDForCleanup(this))      sc.persistRDD(this)    }    storageLevel = newLevel    this  }
  /**   * Register an RDD to be persisted in memory and/or disk storage   */  private[spark] def persistRDD(rdd: RDD[_]) {    persistentRdds(rdd.id) = rdd  }