Kafka+SparkStreaming+HBase

来源:互联网 发布:淘宝卖家子账号认证 编辑:程序博客网 时间:2024/05/21 07:58

先上pom文件

<dependencies>        <dependency>            <groupId>org.apache.kafka</groupId>            <artifactId>kafka_2.10</artifactId>            <version>0.10.0.0</version>        </dependency>        <dependency>            <groupId>org.apache.kafka</groupId>            <artifactId>kafka-clients</artifactId>            <version>0.10.0.0</version>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-core_2.10</artifactId>            <version>1.6.0</version>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-streaming_2.10</artifactId>            <version>1.6.0</version>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-streaming-kafka_2.10</artifactId>            <version>1.6.0</version>        </dependency>        <dependency>            <groupId>org.apache.hbase</groupId>            <artifactId>hbase-client</artifactId>            <version>1.2.0</version>        </dependency>

模拟Kafka生产者

导包
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
import java.util.Properties

模拟生产者代码

def main(args: Array[String]): Unit = {    val topic = "user_events"    val brokers = "服务器名:9092"    val props = new Properties()    props.put("bootstrap.servers",brokers)    props.put("acks", "0")    props.put("retries",0.asInstanceOf[Integer])    props.put("batch.size", 16384.asInstanceOf[Integer])     props.put("linger.ms", 1.asInstanceOf[Integer])     props.put("buffer.memory", 33554432.asInstanceOf[Integer])    props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")    props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")    val producer = new KafkaProducer[String, String](props)    for(i <- 0 to 100){      producer.send(new ProducerRecord[String, String](topic, Integer.toString(i)))//参数还可以添加分区    }    producer.close()}

SparkStreaming + HBase

导包

import kafka.common.TopicAndPartitionimport kafka.serializer.StringDecoderimport org.apache.spark.SparkConfimport org.apache.spark.streaming.{Seconds, StreamingContext}import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName}import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, Put}import org.apache.hadoop.hbase.util.Bytes

数据处理代码

object KafkaSparkStreaming {  var hconf = HBaseConfiguration.create()  hconf.set("hbase.zookeeper.quorum", "rozntgtest8")  hconf.set("hbase.zookeeper.property.clientPort", "2181")  hconf.set("mapreduce.task.timeout", "120000000")  hconf.set("hbase.client.scanner.timeout.period", "600000000")  hconf.set("hbase.rpc.timeout", "600000000")  val conn:Connection = ConnectionFactory.createConnection(hconf)  def main(args: Array[String]): Unit = {    val conf = new SparkConf().setMaster("local[1]").setAppName("UserClickCountStat")    val ssc = new StreamingContext(conf, Seconds(5))    val topics = Set("user_events")    val brokers = "主机名:9092"    val kafkaParams = Map[String, String](      "metadata.broker.list" -> brokers, "value.serializer"-> "org.apache.kafka.common.serialization.StringSerializer","key.serializer"-> "org.apache.kafka.common.serialization.StringSerializer")    val kafkaStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topics)    kafkaStream.foreachRDD(rdd => {    if (!conn.getAdmin.tableExists(TableName.valueOf("number"))) {        val descriptor = new HTableDescriptor(TableName.valueOf("number")).addFamily(new HColumnDescriptor("info"))        conn.getAdmin.createTable(descriptor)      }    rdd.foreachPartition(x => {        x.foreach( a => {          val put = new Put(Bytes.toBytes("rowkey"))          put.addColumn(Bytes.toBytes("info"), Bytes.toBytes(a._2), Bytes.toBytes(a._2))          val table = conn.getTable(TableName.valueOf("number"))          table.put(put)        })      })    })    ssc.start()    ssc.awaitTermination()}

这中间有一个问题,conn必须在作为成员变量,我在测试的时候把它放在局部的时候无法序列化,报错:task not serializable。
阅读全文
'); })();
0 0
原创粉丝点击
热门IT博客
热门问题 老师的惩罚 人脸识别 我在镇武司摸鱼那些年 重生之率土为王 我在大康的咸鱼生活 盘龙之生命进化 天生仙种 凡人之先天五行 春回大明朝 姑娘不必设防,我是瞎子 旅顺 蓬莱松 贵定 水母 田林 泾阳 眉县 富锦 丰源 pokemmo丰源 丰源集团 刘丰源 丰源村 国瓷永丰源 盈丰源宾馆 丰源淳和 丰源大药房 pokemmo丰源攻略 山东丰源集团 日丰球阀价格 风田汽车 新款威驰 fengtian toyota越野 雅力士 车市场价格及图片 toyota汽车 广汽汽车报价及图片 新款锐志 豐田 toyota toyota是什么车 坦途改装 新威驰多少钱 全新rav4 凯美瑞车 凯美瑞车型 威驰轿车 广汽toyota 丰益 丰益集团