structured streaming ——wordcounts_kafka

来源:互联网 发布:幼儿看图学英语软件 编辑:程序博客网 时间:2024/06/06 03:33
// Subscribe to 1 topic  从数据库读取偏移量。todoval words = spark  .readStream  .format("kafka")  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")  .option("subscribe", "topic1")  .load()  .selectExpr("CAST(value AS STRING)")

.as[(String)]

// DataFrame 的一些API    val wordCounts = words.groupBy("name").count()

val query = wordCounts.writeStream.trigger(ProcessingTime(5.seconds))      .outputMode("complete")      .foreach(new ForeachWriter[Row] {      var fileWriter: FileWriter = _      override def process(value: Row): Unit = {

//记录偏移量。        fileWriter.append(value.toSeq.mkString(","))      }      override def close(errorOrNull: Throwable): Unit = {        fileWriter.close()      }      override def open(partitionId: Long, version: Long): Boolean = {        FileUtils.forceMkdir(new File(s"/tmp/example/${partitionId}"))        fileWriter = new FileWriter(new File(s"/tmp/example/${partitionId}/temp"))        true      }    }).start()

原创粉丝点击