spark学习-41-Spark的块传输服务BlockTransferService
来源:互联网 发布:动漫和动画的区别 知乎 编辑:程序博客网 时间:2024/06/05 08:24
1.blockTransferService默认为NettyBlockTransferService ,它使用Netty法人一步时间驱动的网络应用框架,提供web服务及客户端,获取远程节点上的Block集合。
2.块传输服务BlockTransferService是在SparkEvn初始化的时候创建的
// =================================.创建块传输服务BlockTransferService;=========================== /* blockTransferService默认为NettyBlockTransferService(可以配置属相spark.shuffle.blockTransferService使用NioBlockTransferService) ,它使用Netty法人一步时间驱动的网络应用框架,提供web服务及客户端,获取远程节点上的Block集合。 */ val blockTransferService = new NettyBlockTransferService(conf, securityManager, bindAddress, advertiseAddress, blockManagerPort, numUsableCores)
3。NettyBlockTransferService继承与BlockTransferService,BlockTransferService中有如下方法
package org.apache.spark.networkimport java.io.{Closeable, File}import java.nio.ByteBufferimport scala.concurrent.{Future, Promise}import scala.concurrent.duration.Durationimport scala.reflect.ClassTagimport org.apache.spark.internal.Loggingimport org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient}import org.apache.spark.storage.{BlockId, StorageLevel}import org.apache.spark.util.ThreadUtilsprivate[spark]abstract class BlockTransferService extends ShuffleClient with Closeable with Logging { /** * Initialize the transfer service by giving it the BlockDataManager that can be used to fetch * local blocks or put local blocks. * 通过提供可以用来获取本地块或放置本地块的BlockDataManager来初始化传输服务。 */ def init(blockDataManager: BlockDataManager): Unit /** * Tear down the transfer service. * 拆除transfer服务 */ def close(): Unit /** * Port number the service is listening on, available only after [[init]] is invoked. * 服务正在监听的端口号,只有在[[init]]调用后才可用。 */ def port: Int /** * Host name the service is listening on, available only after [[init]] is invoked. * 服务的主机名是监听的,只有在[[init]]调用后才可以使用。 */ def hostName: String /** * Fetch a sequence of blocks from a remote node asynchronously, * available only after [[init]] is invoked. * 以异步方式从远程节点获取块序列,仅在[[init]]调用后才可用。 * * Note that this API takes a sequence so the implementation can batch requests, and does not * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as * the data of a block is fetched, rather than waiting for all blocks to be fetched. * * 请注意,这个API采用了一个序列,因此实现可以批量请求,而且不会返回一个future,因此底层实现可以在一个块的数据被获取时调用 * onBlockFetchSuccess,而不是等待所有的块都被获取。 */ override def fetchBlocks( host: String, port: Int, execId: String, blockIds: Array[String], listener: BlockFetchingListener, shuffleFiles: Array[File]): Unit /** * Upload a single block to a remote node, available only after [[init]] is invoked. * 将单个块上载到远程节点,仅在[[init]]之后才可使用。 * */ def uploadBlock( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel, classTag: ClassTag[_]): Future[Unit] /** * A special case of [[fetchBlocks]], as it fetches only one block and is blocking. * 一个特殊的例子[[fetchBlocks]],因为它只读取一个块并且阻塞。 * * It is also only available after [[init]] is invoked. * 只有在调用[[init]]后才可以使用它。 */ def fetchBlockSync(host: String, port: Int, execId: String, blockId: String): ManagedBuffer = { // 监控等待的线程. // A monitor for the thread to wait on. val result = Promise[ManagedBuffer]() fetchBlocks(host, port, execId, Array(blockId), new BlockFetchingListener { override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = { result.failure(exception) } override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = { val ret = ByteBuffer.allocate(data.size.toInt) ret.put(data.nioByteBuffer()) ret.flip() result.success(new NioManagedBuffer(ret)) } }, shuffleFiles = null) ThreadUtils.awaitResult(result.future, Duration.Inf) } /** * Upload a single block to a remote node, available only after [[init]] is invoked. * 将单个块上载到远程节点,仅在[[init]]之后才可使用。 * * This method is similar to [[uploadBlock]], except this one blocks the thread * until the upload finishes. * 这种方法类似于[[uploadBlock]],除非这个方法阻塞线程,直到上传完成。 */ def uploadBlockSync( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel, classTag: ClassTag[_]): Unit = { val future = uploadBlock(hostname, port, execId, blockId, blockData, level, classTag) ThreadUtils.awaitResult(future, Duration.Inf) }}
4。看代码
package org.apache.spark.network.nettyimport java.io.Fileimport java.nio.ByteBufferimport scala.collection.JavaConverters._import scala.concurrent.{Future, Promise}import scala.reflect.ClassTagimport org.apache.spark.{SecurityManager, SparkConf}import org.apache.spark.network._import org.apache.spark.network.buffer.ManagedBufferimport org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap, TransportClientFactory}import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap}import org.apache.spark.network.server._import org.apache.spark.network.shuffle.{BlockFetchingListener, OneForOneBlockFetcher, RetryingBlockFetcher}import org.apache.spark.network.shuffle.protocol.UploadBlockimport org.apache.spark.network.util.JavaUtilsimport org.apache.spark.serializer.JavaSerializerimport org.apache.spark.storage.{BlockId, StorageLevel}import org.apache.spark.util.Utils/** * A BlockTransferService that uses Netty to fetch a set of blocks at time. * 一个块传输服务,它使用Netty在同一时间内获取一组块的集合。 */private[spark] class NettyBlockTransferService( conf: SparkConf, securityManager: SecurityManager, bindAddress: String, override val hostName: String, _port: Int, numCores: Int) extends BlockTransferService { // TODO: Don't use Java serialization, use a more cross-version compatible serialization format. // 不要使用java的序列化 使用更跨版本的兼容序列化格式。 private val serializer = new JavaSerializer(conf) // Spark负责安全的类。 检查是否启用了Spark通信协议的身份验证。 private val authEnabled = securityManager.isAuthenticationEnabled() private val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle", numCores) private[this] var transportContext: TransportContext = _ private[this] var server: TransportServer = _ private[this] var clientFactory: TransportClientFactory = _ private[this] var appId: String = _ /** 1.创建RpcServer; 2.构建TransportContext; 3.创建RPC客户端工厂TransportClientFactory; 4.创建Netty服务器TraansportServer,可以修改属性spark.blockManager.port (默认为0,表示随机选择)改变TransportServer的端口。 */ override def init(blockDataManager: BlockDataManager): Unit = { // 1.创建RpcServer; val rpcHandler = new NettyBlockRpcServer(conf.getAppId, serializer, blockDataManager) var serverBootstrap: Option[TransportServerBootstrap] = None var clientBootstrap: Option[TransportClientBootstrap] = None if (authEnabled) { serverBootstrap = Some(new AuthServerBootstrap(transportConf, securityManager)) clientBootstrap = Some(new AuthClientBootstrap(transportConf, conf.getAppId, securityManager)) } // 2.构建TransportContext; transportContext = new TransportContext(transportConf, rpcHandler) clientFactory = transportContext.createClientFactory(clientBootstrap.toSeq.asJava) server = createServer(serverBootstrap.toList) appId = conf.getAppId logInfo(s"Server created on ${hostName}:${server.getPort}") } /** Creates and binds the TransportServer, possibly trying multiple ports. */ private def createServer(bootstraps: List[TransportServerBootstrap]): TransportServer = { def startService(port: Int): (TransportServer, Int) = { val server = transportContext.createServer(bindAddress, port, bootstraps.asJava) (server, server.getPort) } Utils.startServiceOnPort(_port, startService, conf, getClass.getName)._1 } /** * NettyBlockTransferService的fetchBlocks方法用于获取远程shuffle文件,实际上是利用NettyBlockTransferService * 中创建的netty服务。 * * 获取远程节点上的shuffle文件 */ override def fetchBlocks( host: String, port: Int, execId: String, blockIds: Array[String], listener: BlockFetchingListener, shuffleFiles: Array[File]): Unit = { logTrace(s"Fetch blocks from $host:$port (executor id $execId)") try { val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter { override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) { //通过C/S模式从远程进行通信,来拉去数据。 val client = clientFactory.createClient(host, port) new OneForOneBlockFetcher(client, appId, execId, blockIds.toArray, listener, transportConf, shuffleFiles).start() } } val maxRetries = transportConf.maxIORetries() if (maxRetries > 0) { // Note this Fetcher will correctly handle maxRetries == 0; we avoid it just in case there's // a bug in this code. We should remove the if statement once we're sure of the stability. new RetryingBlockFetcher(transportConf, blockFetchStarter, blockIds, listener).start() } else { blockFetchStarter.createAndStart(blockIds, listener) } } catch { case e: Exception => logError("Exception while beginning fetchBlocks", e) blockIds.foreach(listener.onBlockFetchFailure(_, e)) } } override def port: Int = server.getPort /** * NettyBlockTransferService的uploadBlock方法用于上传shuffle文件到远程Executor,实际上也是利用NettyBlockTransferService * 中创建的Netty服务。其中步骤如下: * 1.创建Netty服务的客户端,客户端连接的hostname和port正式我们随机选择的BlockManager的hostname和port. * 2.将Block的存储级别Storagelevel序列化。 * 3.将BlockByteBuffer转换为数组,便于序列化。 * 4.将appId,execId,blockId,序列化的Storagelevel,转换为数组的Block封装为UploadBlock,并将UploadBlock序列化为字节数组。 * 5.最终调用Netty客户端的sendRpc方法将字节数组上传,回调函数RpcResponseCallback根据RPC的结果更改上传状态。 */ override def uploadBlock( hostname: String, port: Int, execId: String, blockId: BlockId, blockData: ManagedBuffer, level: StorageLevel, classTag: ClassTag[_]): Future[Unit] = { val result = Promise[Unit]() val client = clientFactory.createClient(hostname, port) // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer. // Everything else is encoded using our binary protocol. val metadata = JavaUtils.bufferToArray(serializer.newInstance().serialize((level, classTag))) // Convert or copy nio buffer into array in order to serialize it. val array = JavaUtils.bufferToArray(blockData.nioByteBuffer()) client.sendRpc(new UploadBlock(appId, execId, blockId.toString, metadata, array).toByteBuffer, new RpcResponseCallback { override def onSuccess(response: ByteBuffer): Unit = { logTrace(s"Successfully uploaded block $blockId") result.success((): Unit) } override def onFailure(e: Throwable): Unit = { logError(s"Error while uploading block $blockId", e) result.failure(e) } }) result.future } override def close(): Unit = { if (server != null) { server.close() } if (clientFactory != null) { clientFactory.close() } }}
阅读全文
0 0
- spark学习-41-Spark的块传输服务BlockTransferService
- Spark的注册服务
- Spark storage系列------2.Spark cache数据块的读取
- spark学习十五 spark的容错分析
- spark-02-学习spark需要的阶段
- 关于Spark和Spark的学习资料
- spark学习-18-Spark的Core理解
- spark学习-20-Spark的sample理解
- spark学习-21-Spark的groupByKey
- spark学习-36-Spark的ShuffleManager
- spark学习-37-Spark的SortShuffleManager
- spark学习-38-Spark的MemoryManager
- spark学习-39-Spark的StaticMemoryManager
- spark学习-40-Spark的UnifiedMemoryManager
- spark学习-43-Spark的BlockManager
- spark学习-49-Spark的job调度
- spark学习-58-Spark的EventLoggingListener
- spark shell的学习
- android studio设置主题、护眼色、字体、Logcat前景色背景色
- Hbase_配置说明
- Android实现类似C#ComBox功能用AutoCompleteTextView实现
- jsoup学习总结
- php杂记
- spark学习-41-Spark的块传输服务BlockTransferService
- 提高MySQL性能的7个技巧
- Win10安装问题 无法安装到这个磁盘 采用gpt分区形式 我们无法创建新的分区 也找不到现有分区 ThinkPad
- json、list、map、String简单转换方法
- android 面试笔记二
- oracle轻量级客户端链接plsql
- java编译时,报错误: 编码GBK的不可映射字符
- Ubuntu文件系统损坏修复
- 最近看的目标检测