Flink学习笔记 --- 理解DataSet WordCount

来源:互联网 发布:mac改壁纸 编辑:程序博客网 时间:2024/06/01 09:15

POM.xml文件:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">  <modelVersion>4.0.0</modelVersion>  <groupId>zetyun</groupId>  <artifactId>FlinkWordCounts</artifactId>  <version>1.0-SNAPSHOT</version>  <inceptionYear>2008</inceptionYear>  <properties>    <scala.version>2.11.0</scala.version>  </properties>  <dependencies>    <dependency>      <groupId>org.scala-lang</groupId>      <artifactId>scala-library</artifactId>      <version>${scala.version}</version>    </dependency>      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-core -->      <dependency>          <groupId>org.apache.flink</groupId>          <artifactId>flink-core</artifactId>          <version>1.3.0</version>      </dependency>      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients_2.11 -->      <dependency>          <groupId>org.apache.flink</groupId>          <artifactId>flink-clients_2.11</artifactId>          <version>1.3.0</version>      </dependency>      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-scala_2.11 -->      <dependency>          <groupId>org.apache.flink</groupId>          <artifactId>flink-scala_2.11</artifactId>          <version>1.3.0</version>      </dependency>      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala_2.11 -->      <dependency>          <groupId>org.apache.flink</groupId>          <artifactId>flink-streaming-scala_2.11</artifactId>          <version>1.3.0</version>      </dependency>      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-core -->      <dependency>          <groupId>org.apache.flink</groupId>          <artifactId>flink-streaming-core</artifactId>          <version>0.9.1-hadoop1</version>      </dependency>  </dependencies></project>


DataSet WordCount

package zetyunimport org.apache.flink.api.scala.ExecutionEnvironment/**  * Created by ryan on 17-7-19.  */object DataSetWordCount {  def main(args: Array[String]): Unit ={    // set up the execution environment    val env = ExecutionEnvironment.getExecutionEnvironment    // get input data    val text = env.fromElements("To be, or not to be,--that is the question:--",      "Whether 'tis nobler in the mind to suffer", "The slings and arrows of outrageous fortune",      "Or to take arms against a sea of troubles,")    val counts = text.flatMap { _.toLowerCase.split("\\W+")}      .map { (_, 1) }       // put (char, 1) format      .groupBy(0)           // group by key      .sum(1)               // sum the every key's number    // emit result and print result    counts.print()  }}