spark学习-29-源代码解析从start-all.sh脚本开始

来源:互联网 发布:linux semaphore 编辑:程序博客网 时间:2024/06/06 19:48

1。先总体看一下启动脚本
这里写图片描述

2。start-all.sh

if [ -z "${SPARK_HOME}" ]; then  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"fi# Load the Spark configuration. "${SPARK_HOME}/sbin/spark-config.sh"# Start Master"${SPARK_HOME}/sbin"/start-master.sh# Start Workers"${SPARK_HOME}/sbin"/start-slaves.sh~                                        

3.park-config.sh主要是python的配置

[root@biluos sbin]# vim spark-config.sh # symlink and absolute path should rely on SPARK_HOME to resolveif [ -z "${SPARK_HOME}" ]; then  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"fiexport SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"# Add the PySpark classes to the PYTHONPATH:if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then  export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH}"  export PYSPARK_PYTHONPATH_SET=1fi

3。start-master.sh

[root@biluos sbin]# vim start-master.sh #[ -z STRING ] “STRING” 的长度为零则为真。  ##z这里是判断spark的home目录是否配置if [ -z "${SPARK_HOME}" ]; then  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"fi# NOTE: This exact class name is matched downstream by SparkSubmit.# Any changes need to be reflected there.#指定要启动master主节点的类CLASS="org.apache.spark.deploy.master.Master"# $@ 是传给脚本的所有参数的列表if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then  echo "Usage: ./sbin/start-master.sh [options]"  pattern="Usage:"  pattern+="\|Using Spark's default log4j profile:"  pattern+="\|Registered signal handlers for"  "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2  exit 1fiORIGINAL_ARGS="$@". "${SPARK_HOME}/sbin/spark-config.sh". "${SPARK_HOME}/bin/load-spark-env.sh"#指定SPARK_MASTER_PORT 默认为7077if [ "$SPARK_MASTER_PORT" = "" ]; then  SPARK_MASTER_PORT=7077fi#指定SPARK_MASTER_HOST主机ip  hostname -f获取的是主机名称,例如 biluos.com if [ "$SPARK_MASTER_HOST" = "" ]; then  case `uname` in      (SunOS)          SPARK_MASTER_HOST="`/usr/sbin/check-hostname | awk '{print $NF}'`"          ;;      (*)          SPARK_MASTER_HOST="`hostname -f`"          ;;  esacfi#这里指定了spark web监控界面的端口if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then  echo "里指定了spark web监控界面的端口8080  可以手动启动的时候指定端口"  SPARK_MASTER_WEBUI_PORT=8080fiecho "ORIGINAL_ARGS启动参数"$ORIGINAL_ARGS#最后使用命令spark-daemon.sh start org.apache.spark.deploy.master.Master 1 去启动主节点                                                                                    "${SPARK_HOME}/sbin"/spark-daemon.sh start $CLASS 1 \                                                                                                                        --host $SPARK_MASTER_HOST --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \                                                                                $ORIGINAL_ARGS  

4。start-slaves.sh

[root@biluos sbin]# vim slaves.sh# Check if --config is passed as an argument. It is an optional parameter.# Exit if the argument is not a directory.if [ "$1" == "--config" ]then  shift  conf_dir="$1"  if [ ! -d "$conf_dir" ]  then    echo "ERROR : $conf_dir is not a directory"    echo $usage    exit 1  else    export SPARK_CONF_DIR="$conf_dir"  fi  shiftfi. "${SPARK_HOME}/bin/load-spark-env.sh"if [ "$HOSTLIST" = "" ]; then  if [ "$SPARK_SLAVES" = "" ]; then    if [ -f "${SPARK_CONF_DIR}/slaves" ]; then      HOSTLIST=`cat "${SPARK_CONF_DIR}/slaves"`    else      HOSTLIST=localhost    fi  else    HOSTLIST=`cat "${SPARK_SLAVES}"`  fifi# By default disable strict host key checkingif [ "$SPARK_SSH_OPTS" = "" ]; then  SPARK_SSH_OPTS="-o StrictHostKeyChecking=no"fifor slave in `echo "$HOSTLIST"|sed  "s/#.*$//;/^$/d"`; do  if [ -n "${SPARK_SSH_FOREGROUND}" ]; then    ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \      2>&1 | sed "s/^/$slave: /"  else    ssh $SPARK_SSH_OPTS "$slave" $"${@// /\\ }" \      2>&1 | sed "s/^/$slave: /" &  fi  if [ "$SPARK_SLAVE_SLEEP" != "" ]; then    sleep $SPARK_SLAVE_SLEEP  fidonewait

6。 load-spark-env.sh

[root@biluos bin]# vim load-spark-env.sh# See the License for the specific language governing permissions and# limitations under the License.## This script loads spark-env.sh if it exists, and ensures it is only loaded once.# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's# conf/ subdirectory.# Figure out where Spark is installedif [ -z "${SPARK_HOME}" ]; then  source "$(dirname "$0")"/find-spark-homefiecho “spark的环境部署loaded SPARK_ENV_LOADED=”$SPARK_ENV_LOADEDif [ -z "$SPARK_ENV_LOADED" ]; then  export SPARK_ENV_LOADED=1  # Returns the parent of the directory this script lives in.  parent_dir="${SPARK_HOME}"  user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"#[ -f FILE ] 如果 FILE 存在且是一个普通文件则为真。   if [ -f "${user_conf_dir}/spark-env.sh" ]; then    # Promote all variable declarations to environment (exported) variables    set -a    . "${user_conf_dir}/spark-env.sh"    set +a  fifi# Setting SPARK_SCALA_VERSION if not already set.# -z STRING ] “STRING” 的长度为零则为真。  #如果scala的版本没有设置就设置scala的版本if [ -z "$SPARK_SCALA_VERSION" ]; then  ASSEMBLY_DIR2="${SPARK_HOME}/assembly/target/scala-2.11"  ASSEMBLY_DIR1="${SPARK_HOME}/assembly/target/scala-2.10"#[ -d FILE ] 如果 FILE 存在且是一个目录则为真。   if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then    echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2    echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2    exit 1  fi  if [ -d "$ASSEMBLY_DIR2" ]; then    export SPARK_SCALA_VERSION="2.11"  else    export SPARK_SCALA_VERSION="2.10"  fifi[root@biluos bin]# vim load-spark-env.sh# See the License for the specific language governing permissions and# limitations under the License.## This script loads spark-env.sh if it exists, and ensures it is only loaded once.# spark-env.sh is loaded from SPARK_CONF_DIR if set, or within the current directory's# conf/ subdirectory.# Figure out where Spark is installedif [ -z "${SPARK_HOME}" ]; then  source "$(dirname "$0")"/find-spark-homefiecho “spark的环境部署loaded SPARK_ENV_LOADED=”$SPARK_ENV_LOADEDif [ -z "$SPARK_ENV_LOADED" ]; then  export SPARK_ENV_LOADED=1  # Returns the parent of the directory this script lives in.  parent_dir="${SPARK_HOME}"  user_conf_dir="${SPARK_CONF_DIR:-"$parent_dir"/conf}"#[ -f FILE ] 如果 FILE 存在且是一个普通文件则为真。   if [ -f "${user_conf_dir}/spark-env.sh" ]; then    # Promote all variable declarations to environment (exported) variables    set -a    . "${user_conf_dir}/spark-env.sh"    set +a  fifi# Setting SPARK_SCALA_VERSION if not already set.# -z STRING ] “STRING” 的长度为零则为真。  #如果scala的版本没有设置就设置scala的版本if [ -z "$SPARK_SCALA_VERSION" ]; then  ASSEMBLY_DIR2="${SPARK_HOME}/assembly/target/scala-2.11"  ASSEMBLY_DIR1="${SPARK_HOME}/assembly/target/scala-2.10"#[ -d FILE ] 如果 FILE 存在且是一个目录则为真。   if [[ -d "$ASSEMBLY_DIR2" && -d "$ASSEMBLY_DIR1" ]]; then    echo -e "Presence of build for both scala versions(SCALA 2.10 and SCALA 2.11) detected." 1>&2    echo -e 'Either clean one of them or, export SPARK_SCALA_VERSION=2.11 in spark-env.sh.' 1>&2    exit 1  fi  if [ -d "$ASSEMBLY_DIR2" ]; then    export SPARK_SCALA_VERSION="2.11"  else    export SPARK_SCALA_VERSION="2.10"  fifi"load-spark-env.sh" 68L, 2454C                   
阅读全文
1 0