Spark 2.1 CallSite

来源:互联网 发布:淘宝日系原单店铺推荐 编辑:程序博客网 时间:2024/06/03 05:44

CallSite Definition

CallSite represents a place in user code. It can have a short and a long form.

private[spark] case class CallSite(shortForm: String, longForm: String)private[spark] object CallSite {  val SHORT_FORM = "callSite.short"  val LONG_FORM = "callSite.long"  val empty = CallSite("", "")}

To create a CallSite object, just call Utils.getCallSiste().

private val creationSite: CallSite = Utils.getCallSite()

getCallSite

/**   * When called inside a class in the spark package, returns the name of the user code class   * (outside the spark package) that called into Spark, as well as which Spark method they called.   * This is used, for example, to tell users where in their code each RDD got created.   *   * @param skipClass Function that is used to exclude non-user-code classes.   */  def getCallSite(skipClass: String => Boolean = sparkInternalExclusionFunction): CallSite = {    // Keep crawling up the stack trace until we find the first function not inside of the spark    // package. We track the last (shallowest) contiguous Spark method. This might be an RDD    // transformation, a SparkContext function (such as parallelize), or anything else that leads    // to instantiation of an RDD. We also track the first (deepest) user method, file, and line.    var lastSparkMethod = "<unknown>"    var firstUserFile = "<unknown>"    var firstUserLine = 0    var insideSpark = true    var callStack = new ArrayBuffer[String]() :+ "<unknown>"    Thread.currentThread.getStackTrace().foreach { ste: StackTraceElement =>      // When running under some profilers, the current stack trace might contain some bogus      // frames. This is intended to ensure that we don't crash in these situations by      // ignoring any frames that we can't examine.      if (ste != null && ste.getMethodName != null        && !ste.getMethodName.contains("getStackTrace")) {        if (insideSpark) {          if (skipClass(ste.getClassName)) {            lastSparkMethod = if (ste.getMethodName == "<init>") {              // Spark method is a constructor; get its class name              ste.getClassName.substring(ste.getClassName.lastIndexOf('.') + 1)            } else {              ste.getMethodName            }            callStack(0) = ste.toString // Put last Spark method on top of the stack trace.          } else {            if (ste.getFileName != null) {              firstUserFile = ste.getFileName              if (ste.getLineNumber >= 0) {                firstUserLine = ste.getLineNumber              }            }            callStack += ste.toString            insideSpark = false          }        } else {          callStack += ste.toString        }      }    }    val callStackDepth = System.getProperty("spark.callstack.depth", "20").toInt    val shortForm =      if (firstUserFile == "HiveSessionImpl.java") {        // To be more user friendly, show a nicer string for queries submitted from the JDBC        // server.        "Spark JDBC Server Query"      } else {        s"$lastSparkMethod at $firstUserFile:$firstUserLine"      }    val longForm = callStack.take(callStackDepth).mkString("\n")    CallSite(shortForm, longForm)  }

sparkInternalExclusionFunction

  /** Default filtering function for finding call sites using `getCallSite`. */  private def sparkInternalExclusionFunction(className: String): Boolean = {    // A regular expression to match classes of the internal Spark API's    // that we want to skip when finding the call site of a method.    val SPARK_CORE_CLASS_REGEX =      """^org\.apache\.spark(\.api\.java)?(\.util)?(\.rdd)?(\.broadcast)?\.[A-Z]""".r    val SPARK_SQL_CLASS_REGEX = """^org\.apache\.spark\.sql.*""".r    val SCALA_CORE_CLASS_PREFIX = "scala"    val isSparkClass = SPARK_CORE_CLASS_REGEX.findFirstIn(className).isDefined ||      SPARK_SQL_CLASS_REGEX.findFirstIn(className).isDefined    val isScalaClass = className.startsWith(SCALA_CORE_CLASS_PREFIX)    // If the class is a Spark internal class or a Scala class, then exclude.    isSparkClass || isScalaClass  }

Thread.getStackTrace()

/**     * Returns an array of stack trace elements representing the stack dump     * of this thread.  This method will return a zero-length array if     * this thread has not started, has started but has not yet been     * scheduled to run by the system, or has terminated.     * If the returned array is of non-zero length then the first element of     * the array represents the top of the stack, which is the most recent     * method invocation in the sequence.  The last element of the array     * represents the bottom of the stack, which is the least recent method     * invocation in the sequence.     *     * <p>If there is a security manager, and this thread is not     * the current thread, then the security manager's     * <tt>checkPermission</tt> method is called with a     * <tt>RuntimePermission("getStackTrace")</tt> permission     * to see if it's ok to get the stack trace.     *     * <p>Some virtual machines may, under some circumstances, omit one     * or more stack frames from the stack trace.  In the extreme case,     * a virtual machine that has no stack trace information concerning     * this thread is permitted to return a zero-length array from this     * method.     *     * @return an array of <tt>StackTraceElement</tt>,     * each represents one stack frame.     *     * @throws SecurityException     *        if a security manager exists and its     *        <tt>checkPermission</tt> method doesn't allow     *        getting the stack trace of thread.     * @see SecurityManager#checkPermission     * @see RuntimePermission     * @see Throwable#getStackTrace     *     * @since 1.5     */    public StackTraceElement[] getStackTrace() {        if (this != Thread.currentThread()) {            // check for getStackTrace permission            SecurityManager security = System.getSecurityManager();            if (security != null) {                security.checkPermission(                    SecurityConstants.GET_STACK_TRACE_PERMISSION);            }            // optimization so we do not call into the vm for threads that            // have not yet started or have terminated            if (!isAlive()) {                return EMPTY_STACK_TRACE;            }            StackTraceElement[][] stackTraceArray = dumpThreads(new Thread[] {this});            StackTraceElement[] stackTrace = stackTraceArray[0];            // a thread that was alive during the previous isAlive call may have            // since terminated, therefore not having a stacktrace.            if (stackTrace == null) {                stackTrace = EMPTY_STACK_TRACE;            }            return stackTrace;        } else {            // Don't need JVM help for current thread            return (new Exception()).getStackTrace();        }    }
0 1