第226讲：Spark Shuffle Pluggable框架SortShuffle具体实现解析

来源：互联网发布：手机淘宝怎样代理商品编辑：程序博客网时间：2024/06/06 09:02

SortShuffle在进行ShuffleMapTask的时候，按照Key（具体的处理元素的key）相应的PartitionID进行sort，注意这个进行sort，很消耗性能，为了避免Hadoop Map reduce sort 的弊端，Spark的处理是对于属于同一个Partition的Key不进行sort排序。这样就极大的提升了性能。（但是Hadoop Map Reduce的mapper对相同的key进行排序，先进行排序）。那Spark SortByKey怎么进行sort呢？这个是在Reduce中进行排序。
ShuffleMapTask端有很多的ShuffleMapTask，整个过程只会生成一个文件，因此有文件索引，如果内存不够，排序好的数据将溢出到外部存储，然后将文件进行归并排序，由于是一个文件因此使用index，下一个阶段的task根据index读取不同的partition使用的数据。

ExternalSorter ：对于每个partition，实质上会创建一个数组Array，数组会包括Key-value，每一个待处理的key-value将插入相应的Array里面，如果Array超过了大小，那就将内存的数据写入到外部目录，文件的开始部分记录partitionID和文件保存了多少数据的信息，最后进行排序，将写入外部文件的数据进行归并排序，将打开很多文件，推荐打开10-100文件，就会有index文件的生成。
这里是writeIndexFileAndCommit，最后是生成mapStatus。

 /** Write a bunch of records to this task's output */  override def write(records: Iterator[Product2[K, V]]): Unit = {    sorter = if (dep.mapSideCombine) {      require(dep.aggregator.isDefined, "Map-side combine without Aggregator specified!")      new ExternalSorter[K, V, C](        context, dep.aggregator, Some(dep.partitioner), dep.keyOrdering, dep.serializer)    } else {      // In this case we pass neither an aggregator nor an ordering to the sorter, because we don't      // care whether the keys get sorted in each partition; that will be done on the reduce side      // if the operation being run is sortByKey.      new ExternalSorter[K, V, V](        context, aggregator = None, Some(dep.partitioner), ordering = None, dep.serializer)    }    sorter.insertAll(records)    // Don't bother including the time to open the merged output file in the shuffle write time,    // because it just opens a single file, so is typically too fast to measure accurately    // (see SPARK-3570).    val output = shuffleBlockResolver.getDataFile(dep.shuffleId, mapId)    val tmp = Utils.tempFileWith(output)    try {      val blockId = ShuffleBlockId(dep.shuffleId, mapId, IndexShuffleBlockResolver.NOOP_REDUCE_ID)      val partitionLengths = sorter.writePartitionedFile(blockId, tmp)      shuffleBlockResolver.writeIndexFileAndCommit(dep.shuffleId, mapId, partitionLengths, tmp)      mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths)    } finally {      if (tmp.exists() && !tmp.delete()) {        logError(s"Error while deleting temp file ${tmp.getAbsolutePath}")      }    }  }

我们看一下SortShuffleManager，spark 2.1.x其中的getReader方法是new BlockStoreShuffleReader，但是getWriter方法在BaseShuffleHandle的情况下是SortShuffleWriter。

   override def getReader[K, C](      handle: ShuffleHandle,      startPartition: Int,      endPartition: Int,      context: TaskContext): ShuffleReader[K, C] = {    new BlockStoreShuffleReader(      handle.asInstanceOf[BaseShuffleHandle[K, _, C]], startPartition, endPartition, context)  }  /** Get a writer for a given partition. Called on executors by map tasks. */  override def getWriter[K, V](      handle: ShuffleHandle,      mapId: Int,      context: TaskContext): ShuffleWriter[K, V] = {    numMapsForShuffle.putIfAbsent(      handle.shuffleId, handle.asInstanceOf[BaseShuffleHandle[_, _, _]].numMaps)    val env = SparkEnv.get    handle match {      case unsafeShuffleHandle: SerializedShuffleHandle[K @unchecked, V @unchecked] =>        new UnsafeShuffleWriter(          env.blockManager,          shuffleBlockResolver.asInstanceOf[IndexShuffleBlockResolver],          context.taskMemoryManager(),          unsafeShuffleHandle,          mapId,          context,          env.conf)      case bypassMergeSortHandle: BypassMergeSortShuffleHandle[K @unchecked, V @unchecked] =>        new BypassMergeSortShuffleWriter(          env.blockManager,          shuffleBlockResolver.asInstanceOf[IndexShuffleBlockResolver],          bypassMergeSortHandle,          mapId,          context,          env.conf)      case other: BaseShuffleHandle[K @unchecked, V @unchecked, _] =>        new SortShuffleWriter(shuffleBlockResolver, other, mapId, context)    }  }

我们看一下ShuffledRDD，其中的compute方法读取数据的是shuffleManager.getReader。

  override def compute(split: Partition, context: TaskContext): Iterator[(K, C)] = {    val dep = dependencies.head.asInstanceOf[ShuffleDependency[K, V, C]]    SparkEnv.get.shuffleManager.getReader(dep.shuffleHandle, split.index, split.index + 1, context)      .read()      .asInstanceOf[Iterator[(K, C)]]  }

0 0