spark 2.1 CallerContext

来源:互联网 发布:scott disick 知乎 编辑:程序博客网 时间:2024/06/07 14:11

object CallerContext

private[util] object CallerContext extends Logging {  val callerContextSupported: Boolean = {    SparkHadoopUtil.get.conf.getBoolean("hadoop.caller.context.enabled", false) && {      try {        // `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in        // master Maven build, so do not use it before resolving SPARK-17714.        // scalastyle:off classforname        Class.forName("org.apache.hadoop.ipc.CallerContext")        Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")        // scalastyle:on classforname        true      } catch {        case _: ClassNotFoundException =>          false        case NonFatal(e) =>          logWarning("Fail to load the CallerContext class", e)          false      }    }  }}

class CallerContext

/** * An utility class used to set up Spark caller contexts to HDFS and Yarn. The `context` will be * constructed by parameters passed in. * When Spark applications run on Yarn and HDFS, its caller contexts will be written into Yarn RM * audit log and hdfs-audit.log. That can help users to better diagnose and understand how * specific applications impacting parts of the Hadoop system and potential problems they may be * creating (e.g. overloading NN). As HDFS mentioned in HDFS-9184, for a given HDFS operation, it's * very helpful to track which upper level job issues it. * * @param from who sets up the caller context (TASK, CLIENT, APPMASTER) * * The parameters below are optional: * @param appId id of the app this task belongs to * @param appAttemptId attempt id of the app this task belongs to * @param jobId id of the job this task belongs to * @param stageId id of the stage this task belongs to * @param stageAttemptId attempt id of the stage this task belongs to * @param taskId task id * @param taskAttemptNumber task attempt id */private[spark] class CallerContext(   from: String,   appId: Option[String] = None,   appAttemptId: Option[String] = None,   jobId: Option[Int] = None,   stageId: Option[Int] = None,   stageAttemptId: Option[Int] = None,   taskId: Option[Long] = None,   taskAttemptNumber: Option[Int] = None) extends Logging {   val appIdStr = if (appId.isDefined) s"_${appId.get}" else ""   val appAttemptIdStr = if (appAttemptId.isDefined) s"_${appAttemptId.get}" else ""   val jobIdStr = if (jobId.isDefined) s"_JId_${jobId.get}" else ""   val stageIdStr = if (stageId.isDefined) s"_SId_${stageId.get}" else ""   val stageAttemptIdStr = if (stageAttemptId.isDefined) s"_${stageAttemptId.get}" else ""   val taskIdStr = if (taskId.isDefined) s"_TId_${taskId.get}" else ""   val taskAttemptNumberStr =     if (taskAttemptNumber.isDefined) s"_${taskAttemptNumber.get}" else ""   val context = "SPARK_" + from + appIdStr + appAttemptIdStr +     jobIdStr + stageIdStr + stageAttemptIdStr + taskIdStr + taskAttemptNumberStr  /**   * Set up the caller context [[context]] by invoking Hadoop CallerContext API of   * [[org.apache.hadoop.ipc.CallerContext]], which was added in hadoop 2.8.   */  def setCurrentContext(): Unit = {    if (CallerContext.callerContextSupported) {      try {        // `Utils.classForName` will make `ReplSuite` fail with `ClassCircularityError` in        // master Maven build, so do not use it before resolving SPARK-17714.        // scalastyle:off classforname        val callerContext = Class.forName("org.apache.hadoop.ipc.CallerContext")        val builder = Class.forName("org.apache.hadoop.ipc.CallerContext$Builder")        // scalastyle:on classforname        val builderInst = builder.getConstructor(classOf[String]).newInstance(context)        val hdfsContext = builder.getMethod("build").invoke(builderInst)        callerContext.getMethod("setCurrent", callerContext).invoke(null, hdfsContext)      } catch {        case NonFatal(e) =>          logWarning("Fail to set Spark caller context", e)      }    }  }}