spark 2.1 spark executor topology information

来源:互联网 发布:淘宝实拍保护怎么申请 编辑:程序博客网 时间:2024/05/21 22:31

BlockManagerMasterEndpoint return block manager id with topology information.
In register method, calls topologyMapper.getTopologyForHost(idWithoutTopologyInfo.host)

val id = BlockManagerId(      idWithoutTopologyInfo.executorId,      idWithoutTopologyInfo.host,      idWithoutTopologyInfo.port,      topologyMapper.getTopologyForHost(idWithoutTopologyInfo.host))

The defaule class of topologyMapper is DefaultTopologyMapper.

  private val topologyMapper = {    val topologyMapperClassName = conf.get(      "spark.storage.replication.topologyMapper", classOf[DefaultTopologyMapper].getName)    val clazz = Utils.classForName(topologyMapperClassName)    val mapper =      clazz.getConstructor(classOf[SparkConf]).newInstance(conf).asInstanceOf[TopologyMapper]    logInfo(s"Using $topologyMapperClassName for getting topology information")    mapper  }

DefaultTopologyMapper. It assumes all nodes are in the same rack

@DeveloperApiclass DefaultTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with Logging {  override def getTopologyForHost(hostname: String): Option[String] = {    logDebug(s"Got a request for $hostname")    None  }}

TopologyMapper

/** * ::DeveloperApi:: * TopologyMapper provides topology information for a given host * @param conf SparkConf to get required properties, if needed */@DeveloperApiabstract class TopologyMapper(conf: SparkConf) {  /**   * Gets the topology information given the host name   *   * @param hostname Hostname   * @return topology information for the given hostname. One can use a 'topology delimiter'   *         to make this topology information nested.   *         For example : ‘/myrack/myhost’, where ‘/’ is the topology delimiter,   *         ‘myrack’ is the topology identifier, and ‘myhost’ is the individual host.   *         This function only returns the topology information without the hostname.   *         This information can be used when choosing executors for block replication   *         to discern executors from a different rack than a candidate executor, for example.   *   *         An implementation can choose to use empty strings or None in case topology info   *         is not available. This would imply that all such executors belong to the same rack.   */  def getTopologyForHost(hostname: String): Option[String]}

FileBasedTopologyMapper

/** * A simple file based topology mapper. This expects topology information provided as a * [[java.util.Properties]] file. The name of the file is obtained from SparkConf property * `spark.storage.replication.topologyFile`. To use this topology mapper, set the * `spark.storage.replication.topologyMapper` property to * [[org.apache.spark.storage.FileBasedTopologyMapper]] * @param conf SparkConf object */@DeveloperApiclass FileBasedTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with Logging {  val topologyFile = conf.getOption("spark.storage.replication.topologyFile")  require(topologyFile.isDefined, "Please specify topology file via " +    "spark.storage.replication.topologyFile for FileBasedTopologyMapper.")  val topologyMap = Utils.getPropertiesFromFile(topologyFile.get)  override def getTopologyForHost(hostname: String): Option[String] = {    val topology = topologyMap.get(hostname)    if (topology.isDefined) {      logDebug(s"$hostname -> ${topology.get}")    } else {      logWarning(s"$hostname does not have any topology information")    }    topology  }}
0 0
原创粉丝点击