第6课:Spark Streaming源码解读之Job动态生成和深度思考

来源:互联网 发布:java u003d 编辑:程序博客网 时间:2024/05/16 12:53

第6课:Spark Streaming源码解读之Job动态生成和深度思考

 

/** * This class schedules jobs to be run on Spark. It uses the JobGenerator to generate * the jobs and runs them using a thread pool. */private[streaming]class JobScheduler(val ssc: StreamingContext) extends Logging {  // Use of ConcurrentHashMap.keySet later causes an odd runtime problem due to Java 7/8 diff  // https://gist.github.com/AlainODea/1375759b8720a3f9f094  private val jobSets: java.util.Map[Time, JobSet] = new ConcurrentHashMap[Time, JobSet]  private val numConcurrentJobs = ssc.conf.getInt("spark.streaming.concurrentJobs", 1)  private val jobExecutor =    ThreadUtils.newDaemonFixedThreadPool(numConcurrentJobs, "streaming-job-executor")  private val jobGenerator = new JobGenerator(this)  val clock = jobGenerator.clock  val listenerBus = new StreamingListenerBus()  // These two are created only when scheduler starts.  // eventLoop not being null means the scheduler has been started and not stopped  var receiverTracker: ReceiverTracker = null  // A tracker to track all the input stream information as well as processed record number  var inputInfoTracker: InputInfoTracker = null  private var eventLoop: EventLoop[JobSchedulerEvent] = null  def start(): Unit = synchronized {    if (eventLoop != null) return // scheduler has already been started    logDebug("Starting JobScheduler")    eventLoop = new EventLoop[JobSchedulerEvent]("JobScheduler") {      override protected def onReceive(event: JobSchedulerEvent): Unit = processEvent(event)      override protected def onError(e: Throwable): Unit = reportError("Error in job scheduler", e)    }    eventLoop.start()    // attach rate controllers of input streams to receive batch completion updates    for {      inputDStream <- ssc.graph.getInputStreams      rateController <- inputDStream.rateController    } ssc.addStreamingListener(rateController)    listenerBus.start(ssc.sparkContext)    receiverTracker = new ReceiverTracker(ssc)    inputInfoTracker = new InputInfoTracker(ssc)    receiverTracker.start()    jobGenerator.start()    logInfo("Started JobScheduler")  }


 

 

0 0
原创粉丝点击