spark-streaming kafka api(KafkaUtils.createDirectStream)使用

来源:互联网 发布:网络施工队 编辑:程序博客网 时间:2024/06/15 14:00

本程序参照spark-streaming kafka官方示例

注意:官方提供的spark-streaming-kafka调用借口,java和scala使用是不同的

1.本次的程序为(以local[2]方式启动

本次主要学习spark-streaming-kafka接口

KafkaUtils.createDirectStream

object DirectKafkaWordCount {  def main(args: Array[String]) {    if (args.length < 2) {      System.err.println(s"""        |Usage: DirectKafkaWordCount <brokers> <topics>        |  <brokers> is a list of one or more Kafka brokers        |  <topics> is a list of one or more kafka topics to consume from        |        """.stripMargin)      System.exit(1)    }    StreamingExamples.setStreamingLogLevels()    val Array(brokers, topics) = args    // Create context with 2 second batch interval    val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount").setMaster("local[2]").set("spark.executor.memory","3g")    val ssc = new StreamingContext(sparkConf, Seconds(2))    // Create direct kafka stream with brokers and topics    val topicsSet = topics.split(",").toSet    val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)    val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](      ssc, kafkaParams, topicsSet)    // Get the lines, split them into words, count the words and print    val lines = messages.map(_._2)    val words = lines.flatMap(_.split(" "))    val wordCounts = words.map(x => (x, 1L)).reduceByKey(_ + _)    wordCounts.print()    // Start the computation    ssc.start()    ssc.awaitTermination()  }}

2 .本程序的maven工程的pom.xml文件为

<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0"         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">    <modelVersion>4.0.0</modelVersion>    <groupId>com.reco</groupId>    <artifactId>spark-examples</artifactId>    <version>2.1.1</version>    <properties>        <scala.version>2.11.8</scala.version>        <scala.binary.version>2.11</scala.binary.version>        <project.version>2.1.1</project.version>        <jline.version>2.12.1</jline.version>        <jline.groupid>jline</jline.groupid>        <hbase.version>1.3.0</hbase.version>        <kafka.version>0.8.2.2</kafka.version>    </properties>        <dependencies>        <!-- Prevent our dummy JAR from being included in Spark distributions or uploaded to YARN -->        <dependency>            <groupId>org.spark-project.spark</groupId>            <artifactId>unused</artifactId>            <version>1.0.0</version>            <scope>compile</scope>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-core_${scala.binary.version}</artifactId>            <version>${project.version}</version>            <scope>compile</scope>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-streaming_${scala.binary.version}</artifactId>            <version>${project.version}</version>            <scope>compile</scope>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-mllib_${scala.binary.version}</artifactId>            <version>${project.version}</version>            <scope>compile</scope>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-hive_${scala.binary.version}</artifactId>            <version>${project.version}</version>            <scope>compile</scope>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-graphx_${scala.binary.version}</artifactId>            <version>${project.version}</version>            <scope>compile</scope>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-streaming-flume_${scala.binary.version}</artifactId>            <version>${project.version}</version>            <scope>compile</scope>        </dependency>        <dependency>            <groupId>org.apache.spark</groupId>            <artifactId>spark-streaming-kafka-0-8_${scala.binary.version}</artifactId>            <version>${project.version}</version>            <scope>compile</scope>        </dependency>        <!-- https://mvnrepository.com/artifact/org.apache.commons/commons-math3 -->        <dependency>            <groupId>org.apache.commons</groupId>            <artifactId>commons-math3</artifactId>            <version>3.6.1</version>        </dependency>        <dependency>            <groupId>com.github.scopt</groupId>            <artifactId>scopt_${scala.binary.version}</artifactId>            <version>3.3.0</version>        </dependency>        <dependency>            <groupId>org.apache.kafka</groupId>            <artifactId>kafka_${scala.binary.version}</artifactId>            <version>${kafka.version}</version>            <scope>compile</scope>        </dependency>        <dependency>            <groupId>com.google.guava</groupId>            <artifactId>guava</artifactId>            <!-- Can't update past this or we run into Hadoop incompatibility -->            <version>16.0.1</version>        </dependency>    </dependencies>    <build>        <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>        <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>        <plugins>            <plugin>                <groupId>org.apache.maven.plugins</groupId>                <artifactId>maven-deploy-plugin</artifactId>                <configuration>                    <skip>true</skip>                </configuration>            </plugin>            <plugin>                <groupId>org.apache.maven.plugins</groupId>                <artifactId>maven-install-plugin</artifactId>                <configuration>                    <skip>true</skip>                </configuration>            </plugin>            <plugin>                <groupId>org.apache.maven.plugins</groupId>                <artifactId>maven-jar-plugin</artifactId>                <configuration>                    <outputDirectory>${jars.target.dir}</outputDirectory>                </configuration>            </plugin>            <plugin>                <groupId>org.apache.maven.plugins</groupId>                <artifactId>maven-compiler-plugin</artifactId>                <configuration>                    <source>1.8</source>                    <target>1.8</target>                </configuration>            </plugin>        </plugins>    </build>    <profiles>        <profile>            <id>kinesis-asl</id>            <dependencies>                <dependency>                    <groupId>org.apache.spark</groupId>                    <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>                    <version>${project.version}</version>                    <scope>provided</scope>                </dependency>            </dependencies>        </profile>    </profiles></project>


0 0