PartitionedPairBuffer

来源:互联网 发布:淘宝形象模特 编辑:程序博客网 时间:2024/05/01 18:38

PartitionedPairBuffer

** * Append-only buffer of key-value pairs, each with a corresponding partition ID, that keeps track * of its estimated size in bytes. * * The buffer can support up to `1073741823 (2 ^ 30 - 1)` elements. */private[spark] class PartitionedPairBuffer[K, V](initialCapacity: Int = 64)  extends WritablePartitionedPairCollection[K, V] with SizeTracker{  import PartitionedPairBuffer._

fields

require(initialCapacity <= MAXIMUM_CAPACITY,    s"Can't make capacity bigger than ${MAXIMUM_CAPACITY} elements")  require(initialCapacity >= 1, "Invalid initial capacity")  // Basic growable array data structure. We use a single array of AnyRef to hold both the keys  // and the values, so that we can sort them efficiently with KVArraySortDataFormat.  private var capacity = initialCapacity  private var curSize = 0  private var data = new Array[AnyRef](2 * initialCapacity)
/** Add an element into the buffer */  def insert(partition: Int, key: K, value: V): Unit = {    if (curSize == capacity) {      growArray()    }    data(2 * curSize) = (partition, key.asInstanceOf[AnyRef])    data(2 * curSize + 1) = value.asInstanceOf[AnyRef]    curSize += 1    afterUpdate()  }
/** Double the size of the array because we've reached capacity */  private def growArray(): Unit = {    if (capacity >= MAXIMUM_CAPACITY) {      throw new IllegalStateException(s"Can't insert more than ${MAXIMUM_CAPACITY} elements")    }    val newCapacity =      if (capacity * 2 < 0 || capacity * 2 > MAXIMUM_CAPACITY) { // Overflow        MAXIMUM_CAPACITY      } else {        capacity * 2      }    val newArray = new Array[AnyRef](2 * newCapacity)    System.arraycopy(data, 0, newArray, 0, 2 * capacity)    data = newArray    capacity = newCapacity    resetSamples()  }

partitionedDestructiveSortedIterator

 /** Iterate through the data in a given order. For this class this is not really destructive. */  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])    : Iterator[((Int, K), V)] = {    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)    iterator  }

iterator

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {    var pos = 0    override def hasNext: Boolean = pos < curSize    override def next(): ((Int, K), V) = {      if (!hasNext) {        throw new NoSuchElementException      }      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])      pos += 1      pair    }  }

object PartitionedPairBuffer

private object PartitionedPairBuffer {  val MAXIMUM_CAPACITY = Int.MaxValue / 2 // 2 ^ 30 - 1}
原创粉丝点击