Flink学习笔记 --- 研究 FlinkForward2017 源码

来源:互联网 发布:上海淘宝厂家 编辑:程序博客网 时间:2024/06/05 17:31

文件目录如下:

|-functions

|-BootStraoFunction

|-CounterFunction

|-FilterFunction

|-QualifierFunctiion

|-models

|-ControlEvent

|-CustomerEvent

|-FilteredEvent

|-ObjectMapperTrait ( 接口)

|-QualifiedEvent

|-serialization

|-ControlEventsSchema

|-CustomerEventSchema

|-Constants

|-Job


[注]:下面代码的依赖并未写出,只写了主要核心代码的注释理解。


一、Job

object Job {  def main(args: Array[String]) {    // set up the execution environment    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment    env.setParallelism(1)     // set Parallelism 1    // set up the kafka properties    val properties = new Properties()    properties.setProperty("bootstrap.servers", "192.168.1.81:6667")    // generate filterFunc, qualifierFunction, counterFunc, bootstrapFunc    val filterFunction = new FilterFunction    val qualifierFunction = new QualifierFunction    val counterFunction = new CounterFunction    val bootstrapFunction = new BootstrapFunction    // add historical datasource    val bootstrapStream = env.addSource(new FlinkKafkaConsumer09("bootstrap", new ControlEventSchema(), properties))      .filter(x => x.isDefined)   // value is noEmpty      .map(x => x.get)            // get value      .flatMap(bootstrapFunction).name("Bootstrap Function")  // event stream rename      .keyBy((fe: FilteredEvent) => { fe.event.customerId } ) // hash by every events.customerId    // flinkStream results output to kafka    val bootstrapSink = new FlinkKafkaProducer09("bootstrap", new ControlEventSchema(), properties)    // add Costumer Stream    val eventStream = env.addSource(new FlinkKafkaConsumer09("events", new CustomerEventSchema(), properties))      .filter(x => x.isDefined)   // value is noEmpty      .map(x => x.get)            // get value      .keyBy((ce: CustomerEvent) => { ce.customerId } ) //hash by every events.customerId    // Grab "control" events from Kafka    // Separate the streams into "global" which get broadcast to all tasks and "specific" which only impact a single customer    val controlStream = env.addSource(new FlinkKafkaConsumer09("controls", new ControlEventSchema(), properties))                           .filter(x => x.isDefined)  // value is nonEmpty                           .map(x => x.get)           // get value                           .name("Control Source")    // rename Stream "Control Stream"                           .split((ce: ControlEvent) => {                             ce.customerId match {                               case Constants.GLOBAL_CUSTOMER_ID => List("global")                               case _ => List("specific")                             }                           })   // split Stream into two List 1.global, 2.specific    // Broadcast "global" control messages    val globalControlStream = controlStream.select("global").broadcast    // Key "specific" messages by customerId    val specificControlStream = controlStream.select("specific")      .keyBy((ce: ControlEvent) => { ce.customerId })    // Join the control and event streams    val filterStream = globalControlStream.union(specificControlStream)      .connect(        eventStream      )            // union control stream and event stream      .flatMap(filterFunction).name("Filtering Function")     // filter by FilterFunction and rename "Filtering Function"      .union(bootstrapStream)               // union real-time stream and historical stream stored by /resources/events.txt      .flatMap(qualifierFunction).name("Qualifier Function")    // qualifier by Qualifier Function      .flatMap(counterFunction).name("Counter Function")        // count by Counter Function      .addSink(bootstrapSink)                                   // put flink data output to kafka    // execute program    env.execute("BetterCloud Flink Forward")  }}



二、Constants


object Constants {  final val GLOBAL_CUSTOMER_ID: UUID = UUID.fromString("deadbeef-dead-beef-dead-beefdeadbeef")    // define the enum var GLOBAL_CUSTOMER_ID}

三、serialization

ControlEventSchema

class ControlEventSchema extends DeserializationSchema[Option[ControlEvent]] with SerializationSchema[ControlEvent]{  // judge is End of Stream  override def isEndOfStream(nextElement: Option[ControlEvent]): Boolean = {    false  }  // deserialize the json to Object  override def deserialize(message: Array[Byte]): Option[ControlEvent] = {    val jsonString = new String(message, "UTF-8")    Try(ControlEvent.fromJson(jsonString)) match {      case Success(controlEvent) => Some(controlEvent)      case Failure(ex) => None    }  }  // serialize object to a json  override def serialize(element: ControlEvent): Array[Byte] = {    ControlEvent.toJson(element).map(_.toByte).toArray  }  // get producer type  override def getProducedType: TypeInformation[Option[ControlEvent]] = {    BasicTypeInfo.getInfoFor(classOf[Option[ControlEvent]])  }}


CustomerEventSchema


class CustomerEventSchema extends DeserializationSchema[Option[CustomerEvent]] with SerializationSchema[CustomerEvent]{  // judge is end of stream  override def isEndOfStream(nextElement: Option[CustomerEvent]): Boolean = {    false  }  // deserialize the json to object  override def deserialize(message: Array[Byte]): Option[CustomerEvent] = {    val jsonString = new String(message, "UTF-8")    Try(CustomerEvent.fromJson(jsonString)) match {      case Success(customerEvent) => Some(customerEvent)      case Failure(ex) => None    }  }  // serialize the object to json  override def serialize(element: CustomerEvent): Array[Byte] = {    CustomerEvent.toJson(element).map(_.toByte).toArray  }  // get producer type  override def getProducedType: TypeInformation[Option[CustomerEvent]] = {    BasicTypeInfo.getInfoFor(classOf[Option[CustomerEvent]])  }}

四、models

ControlEvent

package com.bettercloud.flinkforward.modelsimport java.util.UUID// controlEvent classcase class ControlEvent(customerId: UUID, alertId: UUID, alertName: String, alertDescription: String, threshold: Int, jsonPath: String, bootstrapCustomerId: UUID)// ControlEvent objectobject ControlEvent extends ObjectMapperTrait[ControlEvent]


CustomerEvent

// Customer classcase class CustomerEvent(customerId: UUID, payload: String)// CustomerEvent Objectobject CustomerEvent extends ObjectMapperTrait[CustomerEvent] {  def apply(s:String): CustomerEvent = {    CustomerEvent.fromJson(s)       // Deserializes Json to alert  }}


FilteredEvent

// FilterEvent classcase class FilteredEvent(event: CustomerEvent, controls: List[ControlEvent])


ObjectMapperTrait

trait ObjectMapperTrait[T] {  /**    * The Jackson ObjectMapper instance to use with Alerts    */  protected val mapper = new ObjectMapper()  mapper.registerModule(DefaultScalaModule)  mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)  /**    * Serializes the provided Alert to a JSON string    *    * @param clazz the class to serialize    * @return a JSON string    */  def toJson(clazz: T): String = {    mapper.writeValueAsString(clazz)  }  /**    * Deserializes the provided JSON string to an Alert    *    * @param json the JSON string to deserialize    * @return an Alert    */  def fromJson(json: String)(implicit tag: ClassTag[T]): T = {    mapper.readValue(json, tag.runtimeClass.asInstanceOf[Class[T]])  }}


QualifiedEvent

// QualifiedEventcase class QualifiedEvent(event: CustomerEvent, control: ControlEvent)


五、functions

BootstraoFunction

class BootstrapFunction extends FlatMapFunction[ControlEvent, FilteredEvent] {  override def flatMap(value: ControlEvent, out: Collector[FilteredEvent]): Unit = {    val stream = getClass.getResourceAsStream("/events.txt")  // load historical data into stream    Source.fromInputStream(stream)                            // read stream      .getLines                                               // read by line      .toList                                                 // convert stream to a list      .map(x => CustomerEvent(x))                             // use the Customer function      .filter(x => x.customerId == value.bootstrapCustomerId) // fliter input.customerId equal x.customerId      .foreach(x => {        out.collect(FilteredEvent(x, List(value)))            // add to list, by FilteredEvent      })  }}

CounterFunction

class CounterFunction extends FlatMapFunction[QualifiedEvent, ControlEvent] {  var counts = scala.collection.mutable.HashMap[String, Int]()              // define a (String, int) map  override def flatMap(value: QualifiedEvent, out: Collector[ControlEvent]): Unit = {    val key = s"${value.event.customerId}${value.control.alertId}"          // key = customerId+alertId    if (counts.contains(key)) {                                             // if qualifiedEvent.value.key in counts      counts.put(key, counts.get(key).get + 1)                              // put the count + 1      println(s"Count for ${key}: ${counts.get(key).get}")                  // print the key: countNumber    } else {      val c = value.control                                                 //  else      counts.put(key, 1)                                                    //  put key to 1      out.collect(ControlEvent(c.customerId, c.alertId, c.alertName, c.alertDescription, c.threshold, c.jsonPath, value.event.customerId))  // add collect      println(s"Bootstrap count for ${key}: ${counts.get(key).get}")        //  print the key : countNumber    }  }}

FilterFunction


class FilterFunction() extends RichCoFlatMapFunction[ControlEvent, CustomerEvent, FilteredEvent] {  var configs = new mutable.ListBuffer[ControlEvent]()            // define a buffer list  override def flatMap1(value: ControlEvent, out: Collector[FilteredEvent]): Unit = {         // type control Event    configs = configs.filter(x => (x.customerId != value.customerId) && (x.alertId != value.alertId)) // add events which not in configs    configs.append(value)  }  override def flatMap2(value: CustomerEvent, out: Collector[FilteredEvent]): Unit = {        // type customer Event    val eventConfigs = configs.filter(x => (x.customerId == x.customerId) || (x.customerId == Constants.GLOBAL_CUSTOMER_ID)) // add all input customer stream    if (eventConfigs.size > 0) {      out.collect(FilteredEvent(value, eventConfigs.toList))    // collect after filter by config list stream    }  }}

QualifierFunction

class QualifierFunction extends FlatMapFunction[FilteredEvent, QualifiedEvent] {  override def flatMap(value: FilteredEvent, out: Collector[QualifiedEvent]): Unit = {    Try(JsonPath.parse(value.event.payload)).map(ctx => {           // usr Json path filter the qualifier stream      value.controls.foreach(control => {        Try {          val result: String = ctx.read(control.jsonPath)           // read the regix filter the ctx stream          if (!result.isEmpty) {                                    // if result is nonEmpty            out.collect(QualifiedEvent(value.event, control))       // output the after filter stream          }        }      })    })  }}



原创粉丝点击