Akka(23): Stream:自定义流构件功能-Custom defined stream processing stages

    从总体上看:akka-stream是由数据源头Source,流通节点Flow和数据流终点Sink三个框架性的流构件(stream components)组成的。这其中:Source和Sink是stream的两个独立端点,而Flow处于stream Source和Sink中间可能由多个通道式的节点组成,每个节点代表某些数据流元素转化处理功能,它们的链接顺序则可能代表整体作业的流程。一个完整的数据流(可运行数据流)必须是一个闭合的数据流,即:从外表上看,数据流两头必须连接一个Source和一个Sink。我们可以直接把一个Sink连接到一个Source来获取一个最简单的可运行数据流,如下:

  Source(1 to 10).runWith(Sink.foreach(println))

从另一个角度说明:akka-stream又包括数据流图Graph及运算器Materializer两个部分。Graph代表运算方案,Materializer负责准备环境并把运算方案Graph放置到Actor系统里去实际运算产生效果(effects)及获取运算结果。所以:akka-stream必须有一个Graph描述功能和流程。每个Graph又可以由一些代表更细小功能的子Graph组成。一个可运行数据流必须由一个闭合的数据流图(closed graph)来代表,而这个ClosedGraph又是由代表不同数据转化处理功能的子图(sub-graph)组成。定制数据流功能就是针对Graph按功能需要进行自定义。


/** * A Shape describes the inlets and outlets of a [[Graph]]. In keeping with the * philosophy that a Graph is a freely reusable blueprint, everything that * matters from the outside are the connections that can be made with it, * otherwise it is just a black box. */abstract class Shape {  /**   * Scala API: get a list of all input ports   */  def inlets: immutable.Seq[Inlet[_]]  /**   * Scala API: get a list of all output ports   */  def outlets: immutable.Seq[Outlet[_]]  /**   * Create a copy of this Shape object, returning the same type as the   * original; this constraint can unfortunately not be expressed in the   * type system.   */  def deepCopy(): Shape...}


/** * A Source [[Shape]] has exactly one output and no inputs, it models a source * of data. */final case class SourceShape[+T](out: Outlet[T @uncheckedVariance]) extends Shape {  override val inlets: immutable.Seq[Inlet[_]] = EmptyImmutableSeq  override val outlets: immutable.Seq[Outlet[_]] = out :: Nil  override def deepCopy(): SourceShape[T] = SourceShape(out.carbonCopy())}object SourceShape {  /** Java API */  def of[T](outlet: Outlet[T @uncheckedVariance]): SourceShape[T] =    SourceShape(outlet)}/** * A Flow [[Shape]] has exactly one input and one output, it looks from the * outside like a pipe (but it can be a complex topology of streams within of * course). */final case class FlowShape[-I, +O](in: Inlet[I @uncheckedVariance], out: Outlet[O @uncheckedVariance]) extends Shape {  override val inlets: immutable.Seq[Inlet[_]] = in :: Nil  override val outlets: immutable.Seq[Outlet[_]] = out :: Nil  override def deepCopy(): FlowShape[I, O] = FlowShape(in.carbonCopy(), out.carbonCopy())}object FlowShape {  /** Java API */  def of[I, O](inlet: Inlet[I @uncheckedVariance], outlet: Outlet[O @uncheckedVariance]): FlowShape[I, O] =    FlowShape(inlet, outlet)}


//#bidi-shape/** * A bidirectional flow of elements that consequently has two inputs and two * outputs, arranged like this: * * {{{ *        +------+ *  In1 ~>|      |~> Out1 *        | bidi | * Out2 <~|      |<~ In2 *        +------+ * }}} */final case class BidiShape[-In1, +Out1, -In2, +Out2](  in1:  Inlet[In1 @uncheckedVariance],  out1: Outlet[Out1 @uncheckedVariance],  in2:  Inlet[In2 @uncheckedVariance],  out2: Outlet[Out2 @uncheckedVariance]) extends Shape {  //#implementation-details-elided  override val inlets: immutable.Seq[Inlet[_]] = in1 :: in2 :: Nil  override val outlets: immutable.Seq[Outlet[_]] = out1 :: out2 :: Nil  /**   * Java API for creating from a pair of unidirectional flows.   */  def this(top: FlowShape[In1, Out1], bottom: FlowShape[In2, Out2]) = this(top.in, top.out, bottom.in, bottom.out)  override def deepCopy(): BidiShape[In1, Out1, In2, Out2] =    BidiShape(in1.carbonCopy(), out1.carbonCopy(), in2.carbonCopy(), out2.carbonCopy())  //#implementation-details-elided}//#bidi-shapeobject BidiShape {  def fromFlows[I1, O1, I2, O2](top: FlowShape[I1, O1], bottom: FlowShape[I2, O2]): BidiShape[I1, O1, I2, O2] =    BidiShape(top.in, top.out, bottom.in, bottom.out)  /** Java API */  def of[In1, Out1, In2, Out2](    in1:  Inlet[In1 @uncheckedVariance],    out1: Outlet[Out1 @uncheckedVariance],    in2:  Inlet[In2 @uncheckedVariance],    out2: Outlet[Out2 @uncheckedVariance]): BidiShape[In1, Out1, In2, Out2] =    BidiShape(in1, out1, in2, out2)}


  case class TwoThreeShape[I, I2, O, O2, O3](                                              in1: Inlet[I],                                              in2: Inlet[I2],                                              out1: Outlet[O],                                              out2: Outlet[O2],                                              out3: Outlet[O3]) extends Shape {    override def inlets: immutable.Seq[Inlet[_]] = in1 :: in2 :: Nil    override def outlets: immutable.Seq[Outlet[_]] = out1 :: out2 :: out3 :: Nil    override def deepCopy(): Shape = TwoThreeShape(      in1.carbonCopy(),      in2.carbonCopy(),      out1.carbonCopy(),      out2.carbonCopy(),      out3.carbonCopy()    )  }



/** * A GraphStage represents a reusable graph stream processing stage. A GraphStage consists of a [[Shape]] which describes * its input and output ports and a factory function that creates a [[GraphStageLogic]] which implements the processing * logic that ties the ports together. */abstract class GraphStage[S <: Shape] extends GraphStageWithMaterializedValue[S, NotUsed] {  final override def createLogicAndMaterializedValue(inheritedAttributes: Attributes): (GraphStageLogic, NotUsed) =    (createLogic(inheritedAttributes), NotUsed)  @throws(classOf[Exception])  def createLogic(inheritedAttributes: Attributes): GraphStageLogic}


/** * Represents the processing logic behind a [[GraphStage]]. Roughly speaking, a subclass of [[GraphStageLogic]] is a * collection of the following parts: *  * A set of [[InHandler]] and [[OutHandler]] instances and their assignments to the [[Inlet]]s and [[Outlet]]s *    of the enclosing [[GraphStage]] *  * Possible mutable state, accessible from the [[InHandler]] and [[OutHandler]] callbacks, but not from anywhere *    else (as such access would not be thread-safe) *  * The lifecycle hooks [[preStart()]] and [[postStop()]] *  * Methods for performing stream processing actions, like pulling or pushing elements * * The stage logic is completed once all its input and output ports have been closed. This can be changed by * setting `setKeepGoing` to true. * * The `postStop` lifecycle hook on the logic itself is called once all ports are closed. This is the only tear down * callback that is guaranteed to happen, if the actor system or the materializer is terminated the handlers may never * see any callbacks to `onUpstreamFailure`, `onUpstreamFinish` or `onDownstreamFinish`. Therefore stage resource * cleanup should always be done in `postStop`. */abstract class GraphStageLogic private[stream] (val inCount: Int, val outCount: Int) {...}


/** * Collection of callbacks for an input port of a [[GraphStage]] */trait InHandler {  /**   * Called when the input port has a new element available. The actual element can be retrieved via the   * [[GraphStageLogic.grab()]] method.   */  @throws(classOf[Exception])  def onPush(): Unit  /**   * Called when the input port is finished. After this callback no other callbacks will be called for this port.   */  @throws(classOf[Exception])  def onUpstreamFinish(): Unit = GraphInterpreter.currentInterpreter.activeStage.completeStage()  /**   * Called when the input port has failed. After this callback no other callbacks will be called for this port.   */  @throws(classOf[Exception])  def onUpstreamFailure(ex: Throwable): Unit = GraphInterpreter.currentInterpreter.activeStage.failStage(ex)}/** * Collection of callbacks for an output port of a [[GraphStage]] */trait OutHandler {  /**   * Called when the output port has received a pull, and therefore ready to emit an element, i.e. [[GraphStageLogic.push()]]   * is now allowed to be called on this port.   */  @throws(classOf[Exception])  def onPull(): Unit  /**   * Called when the output port will no longer accept any new elements. After this callback no other callbacks will   * be called for this port.   */  @throws(classOf[Exception])  def onDownstreamFinish(): Unit = {    GraphInterpreter      .currentInterpreter      .activeStage      .completeStage()  }}



class AlphaSource(chars: Seq[String]) extends GraphStage[SourceShape[String]] {  val outport = Outlet[String]("output")  val shape = SourceShape(outport)  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =    new GraphStageLogic(shape) {      var pos: Int = 0      setHandler(outport,new OutHandler {        override def onPull(): Unit = {          push(outport,chars(pos))          pos += 1          if (pos == chars.length) pos = 0        }      })    }}


abstract class GraphStage[S <: Shape] extends GraphStageWithMaterializedValue[S, NotUsed] {...}abstract class GraphStageWithMaterializedValue[+S <: Shape, +M] extends Graph[S, M] {...}


  val sourceGraph: Graph[SourceShape[String],NotUsed] = new AlphaSource(Seq("A","B","C","D"))  val alphaSource = Source.fromGraph(sourceGraph).delay(1.second,DelayOverflowStrategy.backpressure)  alphaSource.runWith(Sink.foreach(println))


class UppercaseSink extends GraphStage[SinkShape[String]] {  val inport = Inlet[String]("input")  val shape = SinkShape(inport)  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =    new GraphStageLogic(shape) with InHandler {      override def preStart(): Unit = pull(inport)      override def onPush(): Unit = {        println(grab(inport).toUpperCase)        pull(inport)      }      setHandler(inport,this)    }}
































 /**   * Emit a sequence of elements through the given outlet and continue with the given thunk   * afterwards, suspending execution if necessary.   * This action replaces the [[OutHandler]] for the given outlet if suspension   * is needed and reinstalls the current handler upon receiving an `onPull()`   * signal (before invoking the `andThen` function).   */  final protected def emitMultiple[T](out: Outlet[T], elems: Iterator[T], andThen: () ⇒ Unit): Unit =    if (elems.hasNext) {      if (isAvailable(out)) {        push(out, elems.next())        if (elems.hasNext)          setOrAddEmitting(out, new EmittingIterator(out, elems, getNonEmittingHandler(out), andThen))        else andThen()      } else {        setOrAddEmitting(out, new EmittingIterator(out, elems, getNonEmittingHandler(out), andThen))      }    } else andThen()

下面我们就定制一个Flow GraphStage,利用read/emit让用户自定义的函数可以控制数据流元素的流动和筛选。对于Flow,同时需要关注输入端口上游推送数据状态及输出端口上下游读取请求状态:

trait Rowtrait Movecase object Stand extends Movecase class Next(rows: Iterable[Row]) extends Moveclass FlowValve(controller: Row => Move) extends GraphStage[FlowShape[Row,Row]] {  val inport = Inlet[Row]("input")  val outport = Outlet[Row]("output")  val shape = FlowShape.of(inport,outport)  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =    new GraphStageLogic(shape) with InHandler with OutHandler {      override def onPush(): Unit = {        controller(grab(inport)) match {          case Next(rows) => emitMultiple(outport,rows)          case _ => pull(inport)        }      }      override def onPull(): Unit = pull(inport)      setHandlers(inport,outport,this)    }}


 case class Order(burger: String, qty: Int) extends Row  case class Burger(msg: String) extends Row  def orderDeliver: Row => Move = order => {    order match {      case Order(name,qty) =>        if (qty > 0) {          val burgers: Iterable[Burger] =            (1 to qty).foldLeft(Iterable[Burger]()) { (b, a) =>              b ++ Iterable(Burger(s"$name $a of ${qty}"))            }          Next(burgers)        } else Stand    }  }  val flowGraph: Graph[FlowShape[Row,Row],NotUsed] = new FlowValve(orderDeliver)  val deliverFlow: Flow[Row,Row,NotUsed] = Flow.fromGraph(flowGraph)  val orders = List(Order("cheeze",2),Order("beef",3),Order("pepper",1),Order("Rice",0)                    ,Order("plain",1),Order("beef",2))  Source(orders).via(deliverFlow).to(Sink.foreach(println)).run()



Burger(cheeze 1 of 2)Burger(cheeze 2 of 2)Burger(beef 1 of 3)Burger(beef 2 of 3)Burger(beef 3 of 3)Burger(pepper 1 of 1)Burger(plain 1 of 1)Burger(beef 1 of 2)Burger(beef 2 of 2)




import akka.NotUsedimport akka.actor._import akka.stream.ActorMaterializerimport akka.stream.scaladsl._import akka.stream.stage._import akka.stream._import scala.concurrent.duration._import scala.collection.immutable.Iterableclass AlphaSource(chars: Seq[String]) extends GraphStage[SourceShape[String]] {  val outport = Outlet[String]("output")  val shape = SourceShape(outport)  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =    new GraphStageLogic(shape) {      var pos: Int = 0      setHandler(outport,new OutHandler {        override def onPull(): Unit = {          push(outport,chars(pos))          pos += 1          if (pos == chars.length) pos = 0        }      })    }}class UppercaseSink extends GraphStage[SinkShape[String]] {  val inport = Inlet[String]("input")  val shape = SinkShape(inport)  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =    new GraphStageLogic(shape) with InHandler {      override def preStart(): Unit = pull(inport)      override def onPush(): Unit = {        println(grab(inport).toUpperCase)        pull(inport)      }      setHandler(inport,this)    }}trait Rowtrait Movecase object Stand extends Movecase class Next(rows: Iterable[Row]) extends Moveclass FlowValve(controller: Row => Move) extends GraphStage[FlowShape[Row,Row]] {  val inport = Inlet[Row]("input")  val outport = Outlet[Row]("output")  val shape = FlowShape.of(inport,outport)  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic =    new GraphStageLogic(shape) with InHandler with OutHandler {      override def onPush(): Unit = {        controller(grab(inport)) match {          case Next(rows) => emitMultiple(outport,rows)          case _ => pull(inport)        }      }      override def onPull(): Unit = pull(inport)      setHandlers(inport,outport,this)    }}object GraphStages extends App {  implicit val sys = ActorSystem("demoSys")  implicit val ec = sys.dispatcher  implicit val mat = ActorMaterializer(    ActorMaterializerSettings(sys)      .withInputBuffer(initialSize = 16, maxSize = 16)  )  val sourceGraph: Graph[SourceShape[String],NotUsed] = new AlphaSource(Seq("a","b","c","d"))  val alphaSource = Source.fromGraph(sourceGraph).delay(1.second,DelayOverflowStrategy.backpressure)  // alphaSource.runWith(Sink.foreach(println))  val sinkGraph: Graph[SinkShape[String],NotUsed] = new UppercaseSink  val upperSink = Sink.fromGraph(sinkGraph)  alphaSource.runWith(upperSink)  case class Order(burger: String, qty: Int) extends Row  case class Burger(msg: String) extends Row  def orderDeliver: Row => Move = order => {    order match {      case Order(name,qty) =>        if (qty > 0) {          val burgers: Iterable[Burger] =            (1 to qty).foldLeft(Iterable[Burger]()) { (b, a) =>              b ++ Iterable(Burger(s"$name $a of ${qty}"))            }          Next(burgers)        } else Stand    }  }  val flowGraph: Graph[FlowShape[Row,Row],NotUsed] = new FlowValve(orderDeliver)  val deliverFlow: Flow[Row,Row,NotUsed] = Flow.fromGraph(flowGraph)  val orders = List(Order("cheeze",2),Order("beef",3),Order("pepper",1),Order("Rice",0)                    ,Order("plain",1),Order("beef",2))  Source(orders).via(deliverFlow).to(Sink.foreach(println)).run()  // Source(1 to 10).runWith(Sink.foreach(println))    scala.io.StdIn.readLine()  sys.terminate()}