Flink学习笔记 --- 研究 FlinkForward2017 源码
来源:互联网 发布:上海淘宝厂家 编辑:程序博客网 时间:2024/06/05 17:31
文件目录如下:
|-functions
|-BootStraoFunction
|-CounterFunction
|-FilterFunction
|-QualifierFunctiion
|-models
|-ControlEvent
|-CustomerEvent
|-FilteredEvent
|-ObjectMapperTrait ( 接口)
|-QualifiedEvent
|-serialization
|-ControlEventsSchema
|-CustomerEventSchema
|-Constants
|-Job
[注]:下面代码的依赖并未写出,只写了主要核心代码的注释理解。
一、Job
object Job { def main(args: Array[String]) { // set up the execution environment val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment env.setParallelism(1) // set Parallelism 1 // set up the kafka properties val properties = new Properties() properties.setProperty("bootstrap.servers", "192.168.1.81:6667") // generate filterFunc, qualifierFunction, counterFunc, bootstrapFunc val filterFunction = new FilterFunction val qualifierFunction = new QualifierFunction val counterFunction = new CounterFunction val bootstrapFunction = new BootstrapFunction // add historical datasource val bootstrapStream = env.addSource(new FlinkKafkaConsumer09("bootstrap", new ControlEventSchema(), properties)) .filter(x => x.isDefined) // value is noEmpty .map(x => x.get) // get value .flatMap(bootstrapFunction).name("Bootstrap Function") // event stream rename .keyBy((fe: FilteredEvent) => { fe.event.customerId } ) // hash by every events.customerId // flinkStream results output to kafka val bootstrapSink = new FlinkKafkaProducer09("bootstrap", new ControlEventSchema(), properties) // add Costumer Stream val eventStream = env.addSource(new FlinkKafkaConsumer09("events", new CustomerEventSchema(), properties)) .filter(x => x.isDefined) // value is noEmpty .map(x => x.get) // get value .keyBy((ce: CustomerEvent) => { ce.customerId } ) //hash by every events.customerId // Grab "control" events from Kafka // Separate the streams into "global" which get broadcast to all tasks and "specific" which only impact a single customer val controlStream = env.addSource(new FlinkKafkaConsumer09("controls", new ControlEventSchema(), properties)) .filter(x => x.isDefined) // value is nonEmpty .map(x => x.get) // get value .name("Control Source") // rename Stream "Control Stream" .split((ce: ControlEvent) => { ce.customerId match { case Constants.GLOBAL_CUSTOMER_ID => List("global") case _ => List("specific") } }) // split Stream into two List 1.global, 2.specific // Broadcast "global" control messages val globalControlStream = controlStream.select("global").broadcast // Key "specific" messages by customerId val specificControlStream = controlStream.select("specific") .keyBy((ce: ControlEvent) => { ce.customerId }) // Join the control and event streams val filterStream = globalControlStream.union(specificControlStream) .connect( eventStream ) // union control stream and event stream .flatMap(filterFunction).name("Filtering Function") // filter by FilterFunction and rename "Filtering Function" .union(bootstrapStream) // union real-time stream and historical stream stored by /resources/events.txt .flatMap(qualifierFunction).name("Qualifier Function") // qualifier by Qualifier Function .flatMap(counterFunction).name("Counter Function") // count by Counter Function .addSink(bootstrapSink) // put flink data output to kafka // execute program env.execute("BetterCloud Flink Forward") }}
二、Constants
object Constants { final val GLOBAL_CUSTOMER_ID: UUID = UUID.fromString("deadbeef-dead-beef-dead-beefdeadbeef") // define the enum var GLOBAL_CUSTOMER_ID}
三、serialization
ControlEventSchema
class ControlEventSchema extends DeserializationSchema[Option[ControlEvent]] with SerializationSchema[ControlEvent]{ // judge is End of Stream override def isEndOfStream(nextElement: Option[ControlEvent]): Boolean = { false } // deserialize the json to Object override def deserialize(message: Array[Byte]): Option[ControlEvent] = { val jsonString = new String(message, "UTF-8") Try(ControlEvent.fromJson(jsonString)) match { case Success(controlEvent) => Some(controlEvent) case Failure(ex) => None } } // serialize object to a json override def serialize(element: ControlEvent): Array[Byte] = { ControlEvent.toJson(element).map(_.toByte).toArray } // get producer type override def getProducedType: TypeInformation[Option[ControlEvent]] = { BasicTypeInfo.getInfoFor(classOf[Option[ControlEvent]]) }}
CustomerEventSchema
class CustomerEventSchema extends DeserializationSchema[Option[CustomerEvent]] with SerializationSchema[CustomerEvent]{ // judge is end of stream override def isEndOfStream(nextElement: Option[CustomerEvent]): Boolean = { false } // deserialize the json to object override def deserialize(message: Array[Byte]): Option[CustomerEvent] = { val jsonString = new String(message, "UTF-8") Try(CustomerEvent.fromJson(jsonString)) match { case Success(customerEvent) => Some(customerEvent) case Failure(ex) => None } } // serialize the object to json override def serialize(element: CustomerEvent): Array[Byte] = { CustomerEvent.toJson(element).map(_.toByte).toArray } // get producer type override def getProducedType: TypeInformation[Option[CustomerEvent]] = { BasicTypeInfo.getInfoFor(classOf[Option[CustomerEvent]]) }}
四、models
ControlEvent
package com.bettercloud.flinkforward.modelsimport java.util.UUID// controlEvent classcase class ControlEvent(customerId: UUID, alertId: UUID, alertName: String, alertDescription: String, threshold: Int, jsonPath: String, bootstrapCustomerId: UUID)// ControlEvent objectobject ControlEvent extends ObjectMapperTrait[ControlEvent]
CustomerEvent
// Customer classcase class CustomerEvent(customerId: UUID, payload: String)// CustomerEvent Objectobject CustomerEvent extends ObjectMapperTrait[CustomerEvent] { def apply(s:String): CustomerEvent = { CustomerEvent.fromJson(s) // Deserializes Json to alert }}
FilteredEvent
// FilterEvent classcase class FilteredEvent(event: CustomerEvent, controls: List[ControlEvent])
ObjectMapperTrait
trait ObjectMapperTrait[T] { /** * The Jackson ObjectMapper instance to use with Alerts */ protected val mapper = new ObjectMapper() mapper.registerModule(DefaultScalaModule) mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) /** * Serializes the provided Alert to a JSON string * * @param clazz the class to serialize * @return a JSON string */ def toJson(clazz: T): String = { mapper.writeValueAsString(clazz) } /** * Deserializes the provided JSON string to an Alert * * @param json the JSON string to deserialize * @return an Alert */ def fromJson(json: String)(implicit tag: ClassTag[T]): T = { mapper.readValue(json, tag.runtimeClass.asInstanceOf[Class[T]]) }}
QualifiedEvent
// QualifiedEventcase class QualifiedEvent(event: CustomerEvent, control: ControlEvent)
五、functions
BootstraoFunction
class BootstrapFunction extends FlatMapFunction[ControlEvent, FilteredEvent] { override def flatMap(value: ControlEvent, out: Collector[FilteredEvent]): Unit = { val stream = getClass.getResourceAsStream("/events.txt") // load historical data into stream Source.fromInputStream(stream) // read stream .getLines // read by line .toList // convert stream to a list .map(x => CustomerEvent(x)) // use the Customer function .filter(x => x.customerId == value.bootstrapCustomerId) // fliter input.customerId equal x.customerId .foreach(x => { out.collect(FilteredEvent(x, List(value))) // add to list, by FilteredEvent }) }}
CounterFunction
class CounterFunction extends FlatMapFunction[QualifiedEvent, ControlEvent] { var counts = scala.collection.mutable.HashMap[String, Int]() // define a (String, int) map override def flatMap(value: QualifiedEvent, out: Collector[ControlEvent]): Unit = { val key = s"${value.event.customerId}${value.control.alertId}" // key = customerId+alertId if (counts.contains(key)) { // if qualifiedEvent.value.key in counts counts.put(key, counts.get(key).get + 1) // put the count + 1 println(s"Count for ${key}: ${counts.get(key).get}") // print the key: countNumber } else { val c = value.control // else counts.put(key, 1) // put key to 1 out.collect(ControlEvent(c.customerId, c.alertId, c.alertName, c.alertDescription, c.threshold, c.jsonPath, value.event.customerId)) // add collect println(s"Bootstrap count for ${key}: ${counts.get(key).get}") // print the key : countNumber } }}
FilterFunction
class FilterFunction() extends RichCoFlatMapFunction[ControlEvent, CustomerEvent, FilteredEvent] { var configs = new mutable.ListBuffer[ControlEvent]() // define a buffer list override def flatMap1(value: ControlEvent, out: Collector[FilteredEvent]): Unit = { // type control Event configs = configs.filter(x => (x.customerId != value.customerId) && (x.alertId != value.alertId)) // add events which not in configs configs.append(value) } override def flatMap2(value: CustomerEvent, out: Collector[FilteredEvent]): Unit = { // type customer Event val eventConfigs = configs.filter(x => (x.customerId == x.customerId) || (x.customerId == Constants.GLOBAL_CUSTOMER_ID)) // add all input customer stream if (eventConfigs.size > 0) { out.collect(FilteredEvent(value, eventConfigs.toList)) // collect after filter by config list stream } }}
QualifierFunction
class QualifierFunction extends FlatMapFunction[FilteredEvent, QualifiedEvent] { override def flatMap(value: FilteredEvent, out: Collector[QualifiedEvent]): Unit = { Try(JsonPath.parse(value.event.payload)).map(ctx => { // usr Json path filter the qualifier stream value.controls.foreach(control => { Try { val result: String = ctx.read(control.jsonPath) // read the regix filter the ctx stream if (!result.isEmpty) { // if result is nonEmpty out.collect(QualifiedEvent(value.event, control)) // output the after filter stream } } }) }) }}
阅读全文
0 0
- Flink学习笔记 --- 研究 FlinkForward2017 源码
- 《Flink学习笔记一》
- Flink学习笔记 --- Flink中Windows机制
- Flink学习笔记:1、Flink快速入门
- Flink学习笔记:2、Flink介绍
- Flink学习笔记 --- DataStream Transformations
- Flink学习笔记 --- Flink本地(Loacl模式)安装
- Flink学习笔记:3、Flink分布式模式(Standalone)
- Flink学习笔记 --- Basic Concepts整理笔记
- jQuery源码研究学习笔记(二)
- Flink Basic API Concepts 学习笔记&译文
- Flink学习笔记 --- DataSet Source and Sink
- Flink学习笔记 --- Intellij自动导入
- Flink学习笔记 --- 理解ConnectedStream 与 Union
- Flink学习笔记 --- 理解DataStream WordCount
- Flink学习笔记 --- 理解DataSet WordCount
- CJSON源码研究笔记
- CJSON源码研究笔记
- 初涉Linux(无代码)
- 机器学习实战_初识kNN算法_理解其python代码
- PAT a1050题解
- ‘XXXX’ was compiled with optimization
- Matlab常用概率密度函数、累计概率分布函数及其逆函数
- Flink学习笔记 --- 研究 FlinkForward2017 源码
- POJ #1186 方程的解数 | 中途相遇法 + Hash | 解题报告
- PAT a1051题解
- PAT a1052题解
- 自定义表格控件
- 信号槽如何传递参数
- Nginx80端口转发+域名——实现IP+端口隐藏
- 第2章 渲染流水线
- PAT a1053题解