spark 2.1 DiskBlockObjectWriter
来源:互联网 发布:ap100编程教程 编辑:程序博客网 时间:2024/06/08 08:12
call stack
In BlockManager.getDiskWriter method, create a DiskBlockObjectWriter object.
def getDiskWriter( blockId: BlockId, file: File, serializerInstance: SerializerInstance, bufferSize: Int, writeMetrics: ShuffleWriteMetrics): DiskBlockObjectWriter = { val syncWrites = conf.getBoolean("spark.shuffle.sync", false) new DiskBlockObjectWriter(file, serializerManager, serializerInstance, bufferSize, syncWrites, writeMetrics, blockId) }
trait ManualCloseOutputStream
/** * Guards against close calls, e.g. from a wrapping stream. * Call manualClose to close the stream that was extended by this trait. * Commit uses this trait to close object streams without paying the * cost of closing and opening the underlying file. */ private trait ManualCloseOutputStream extends OutputStream { abstract override def close(): Unit = { flush() } def manualClose(): Unit = { super.close() } }
fields
/** The file channel, used for repositioning / truncating the file. */ private var channel: FileChannel = null private var mcs: ManualCloseOutputStream = null private var bs: OutputStream = null private var fos: FileOutputStream = null private var ts: TimeTrackingOutputStream = null private var objOut: SerializationStream = null private var initialized = false private var streamOpen = false private var hasBeenClosed = false /** * Cursors used to represent positions in the file. * * xxxxxxxxxx|----------|-----| * ^ ^ ^ * | | channel.position() * | reportedPosition * committedPosition * * reportedPosition: Position at the time of the last update to the write metrics. * committedPosition: Offset after last committed write. * -----: Current writes to the underlying file. * xxxxx: Committed contents of the file. */ private var committedPosition = file.length() private var reportedPosition = committedPosition /** * Keep track of number of records written and also use this to periodically * output bytes written since the latter is expensive to do for each record. */ private var numRecordsWritten = 0
initialize
private def initialize(): Unit = { fos = new FileOutputStream(file, true) channel = fos.getChannel() ts = new TimeTrackingOutputStream(writeMetrics, fos) class ManualCloseBufferedOutputStream extends BufferedOutputStream(ts, bufferSize) with ManualCloseOutputStream mcs = new ManualCloseBufferedOutputStream }
def open(): DiskBlockObjectWriter = { if (hasBeenClosed) { throw new IllegalStateException("Writer already closed. Cannot be reopened.") } if (!initialized) { initialize() initialized = true } bs = serializerManager.wrapStream(blockId, mcs) objOut = serializerInstance.serializeStream(bs) streamOpen = true this }
closeResources
/** * Close and cleanup all resources. * Should call after committing or reverting partial writes. */ private def closeResources(): Unit = { if (initialized) { mcs.manualClose() channel = null mcs = null bs = null fos = null ts = null objOut = null initialized = false streamOpen = false hasBeenClosed = true } }
close
/** * Commits any remaining partial writes and closes resources. */ override def close() { if (initialized) { Utils.tryWithSafeFinally { commitAndGet() } { closeResources() } } }
commitAndGet
/** * Flush the partial writes and commit them as a single atomic block. * A commit may write additional bytes to frame the atomic block. * * @return file segment with previous offset and length committed on this call. */ def commitAndGet(): FileSegment = { if (streamOpen) { // NOTE: Because Kryo doesn't flush the underlying stream we explicitly flush both the // serializer stream and the lower level stream. objOut.flush() bs.flush() objOut.close() streamOpen = false if (syncWrites) { // Force outstanding writes to disk and track how long it takes val start = System.nanoTime() fos.getFD.sync() writeMetrics.incWriteTime(System.nanoTime() - start) } val pos = channel.position() val fileSegment = new FileSegment(file, committedPosition, pos - committedPosition) committedPosition = pos // In certain compression codecs, more bytes are written after streams are closed writeMetrics.incBytesWritten(committedPosition - reportedPosition) reportedPosition = committedPosition fileSegment } else { new FileSegment(file, committedPosition, 0) } }
revertPartialWritesAndClose
/** * Reverts writes that haven't been committed yet. Callers should invoke this function * when there are runtime exceptions. This method will not throw, though it may be * unsuccessful in truncating written data. * * @return the file that this DiskBlockObjectWriter wrote to. */ def revertPartialWritesAndClose(): File = { // Discard current writes. We do this by flushing the outstanding writes and then // truncating the file to its initial position. try { if (initialized) { writeMetrics.decBytesWritten(reportedPosition - committedPosition) writeMetrics.decRecordsWritten(numRecordsWritten) streamOpen = false closeResources() } val truncateStream = new FileOutputStream(file, true) try { truncateStream.getChannel.truncate(committedPosition) file } finally { truncateStream.close() } } catch { case e: Exception => logError("Uncaught exception while reverting partial writes to file " + file, e) file } }
write
/** * Writes a key-value pair. */ def write(key: Any, value: Any) { if (!streamOpen) { open() } objOut.writeKey(key) objOut.writeValue(value) recordWritten() } override def write(kvBytes: Array[Byte], offs: Int, len: Int): Unit = { if (!streamOpen) { open() } bs.write(kvBytes, offs, len) }
/** * Notify the writer that a record worth of bytes has been written with OutputStream#write. */ def recordWritten(): Unit = { numRecordsWritten += 1 writeMetrics.incRecordsWritten(1) if (numRecordsWritten % 16384 == 0) { updateBytesWritten() } }
/** * Report the number of bytes written in this writer's shuffle write metrics. * Note that this is only valid before the underlying streams are closed. */ private def updateBytesWritten() { val pos = channel.position() writeMetrics.incBytesWritten(pos - reportedPosition) reportedPosition = pos }
// For testing private[spark] override def flush() { objOut.flush() bs.flush() }}
阅读全文
0 0
- spark 2.1 DiskBlockObjectWriter
- spark core 2.0 DiskBlockObjectWriter
- spark 2.1 spark.yarn.services
- Spark 2.1.0 -- Spark Streaming Programming Guide
- spark 2.1 spark-shell Startup Process
- spark 2.1 spark executor topology information
- spark-2.1.0安装
- spark(2.1.0)
- Spark 2.1 structured streaming
- Spark 2.1 CallSite
- spark 2.1 ConfigProvider
- Spark ML 2.1 --Pipelines
- spark-2.1.0
- spark 2.1 SparkContext postEnvironmentUpdate
- spark 2.1 StorageLevel
- spark 2.1 BlockManagerId
- spark 2.1 BlockManagerMasterEndpoint
- spark 2.1 BlockManagerInfo
- 【三分钟让你彻底明白LinearLayout线性布局】
- PHP程序员进阶学习书籍参考指南
- spring对象加载过程测试
- C#基础
- Java通过Socket实现TCP编程
- spark 2.1 DiskBlockObjectWriter
- Linux中profile、bashrc、~/.bash_profile、~/.bashrc、~/.bash_profile之间的区别和联系以及执行顺序
- handler机制
- 我我我我问无无
- 极光推送集成
- Caffe三方库相关资料、编译方法和注意事项[补坑中]
- c#向MFC窗体发送消息
- Solr学习总结(四)索引配置schema.xml & solrconfig.xml
- centOS7中将Python2.7.5 升级到Python3.5.2