spark与elasticsearch整合
来源:互联网 发布:c语言常考题 编辑:程序博客网 时间:2024/06/01 10:20
<dependencies> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-library</artifactId> <version>${scala.version}</version> <scope>compile</scope> </dependency> <dependency> <groupId>org.scala-lang</groupId> <artifactId>scala-compiler</artifactId> <version>${scala.version}</version> <scope>compile</scope> </dependency> <!--<dependency>--> <!--<groupId>org.specs2</groupId>--> <!--<artifactId>specs2_${scala.binary.version}</artifactId>--> <!--<version>3.3.1</version>--> <!--<scope>test</scope>--> <!--</dependency>--> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_${scala.binary.version}</artifactId> <version>${spark.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_${scala.binary.version}</artifactId> <version>${spark.version}</version> </dependency> <dependency> <groupId>org.elasticsearch</groupId> <artifactId>elasticsearch-hadoop</artifactId> <version>${elasticsearch.version}</version> <exclusions> <exclusion> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> </exclusion> <exclusion> <groupId>org.apache.spark</groupId> <artifactId>spark-sql_2.10</artifactId> </exclusion> <exclusion> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> </exclusion> <exclusion> <groupId>cascading</groupId> <artifactId>cascading-hadoop</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>2.8.1</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-pool2</artifactId> <version>2.2</version> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>0.9.0.1</version> </dependency> <dependency> <groupId>org.codehaus.jettison</groupId> <artifactId>jettison</artifactId> <version>1.3.7</version> </dependency> </dependencies>
demo1:
package demo.spark.elasticsearchimport org.apache.spark.SparkConfimport org.apache.spark.SparkContextimport org.apache.spark.SparkContext._import org.elasticsearch.spark._/** * Created by cao on 16-3-25. */object Demo1 { def main(args: Array[String]) { val conf = new SparkConf().setAppName("ESDemo1") conf.set("es.index.auto.create", "true") val sc = new SparkContext(conf) val numbers = Map("one" -> 1, "two" -> 2, "three" -> 3) val airports = Map("arrival" -> "Otopeni", "SFO" -> "San Fran") sc.makeRDD(Seq(numbers,airports)).saveToEs("spark/docs") }}
{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":12,"max_score":1.0,"hits":[{"_index":"spark","_type":"docs","_id":"AVOukOOI0OVBGh8ft4am","_score":1.0,"_source":{"one":1,"two":2,"three":3}},{"_index":"spark","_type":"docs","_id":"AVOu-vRa0OVBGh8ft4a9","_score":1.0,"_source":{"one":1,"two":2,"three":3}},{"_index":"spark","_type":"docs","_id":"AVOu_kMq0OVBGh8ft4a_","_score":1.0,"_source":{"departure":"MUC","arrival":"OTP"}},{"_index":"spark","_type":"docs","_id":"AVOvAVuS0OVBGh8ft4bE","_score":1.0,"_source":{"one":1,"two":2,"three":3}},{"_index":"spark","_type":"docs","_id":"AVOujInV0OVBGh8ft4aj","_score":1.0,"_source":{"arrival":"Otopeni","SFO":"San Fran"}},{"_index":"spark","_type":"docs","_id":"AVOujInn0OVBGh8ft4ak","_score":1.0,"_source":{"one":1,"two":2,"three":3}},{"_index":"spark","_type":"docs","_id":"AVOumniH0OVBGh8ft4as","_score":1.0,"_source":{"departure":"MUC","arrival":"OTP"}},{"_index":"spark","_type":"docs","_id":"AVOumniH0OVBGh8ft4at","_score":1.0,"_source":{"departure":"OTP","arrival":"SFO"}},{"_index":"spark","_type":"docs","_id":"AVOu_kMq0OVBGh8ft4a-","_score":1.0,"_source":{"departure":"OTP","arrival":"SFO"}},{"_index":"spark","_type":"docs","_id":"AVOvAVuJ0OVBGh8ft4bD","_score":1.0,"_source":{"arrival":"Otopeni","SFO":"San Fran"}}]}}
package demo.spark.elasticsearch/** * Created by cao on 16-3-26. */import org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.SQLContextimport org.apache.spark.sql.SQLContext._import org.elasticsearch.spark.rdd.EsSparkimport org.elasticsearch.spark.sql._import org.apache.spark.rdd.RDD._import org.elasticsearch.spark._object Demo2 { def main(args: Array[String]) { val sc = new SparkContext(new SparkConf().setAppName("Demo2")) case class Trip(departure: String, arrival: String) val upcomingTrip = Trip("OTP", "SFO") val lastWeekTrip = Trip("MUC", "OTP") val rdd = sc.makeRDD(Seq(upcomingTrip, lastWeekTrip)) EsSpark.saveToEs(rdd, "spark/docs") }}
package demo.spark.elasticsearch/** * Created by cao on 16-3-26. */import org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.SQLContextimport org.elasticsearch.spark.sql._import org.apache.spark.rdd.RDD._import org.elasticsearch.spark._//定义Person case classcase class Person(name: String, surname: String, age: Int)object Demo3 { def main(args: Array[String]) { val sc = new SparkContext(new SparkConf().setAppName("Demo4")) //创建sqlContext val sqlContext = new SQLContext(sc) import sqlContext.implicits._ //创建DataFrame val people = sc.textFile("file:///home/cao/Desktop/poeple.txt").map(_.split(",")).map(p => Person(p(0), p(1), p(2).trim.toInt)).toDF() people.saveToEs("spark/people") }}
Demo4
package demo.spark.elasticsearch/** * Created by cao on 16-3-26. */import org.apache.spark.{SparkConf, SparkContext}import org.apache.spark.sql.SQLContextimport org.elasticsearch.spark.sql._import org.apache.spark.rdd.RDD._import org.elasticsearch.spark._object Demo4 { def main(args: Array[String]) { val sc = new SparkContext(new SparkConf().setAppName("Demo4")) //创建sqlContext val sqlContext = new SQLContext(sc) import sqlContext.implicits._ val options = Map("pushdown" -> "true", "es.nodes" -> "localhost", "es.port" -> "9200") val spark14DF = sqlContext.read.format("org.elasticsearch.spark.sql").options(options).load("spark/people") spark14DF.select("name","age").collect().foreach(println(_)) spark14DF.registerTempTable("people") val results = sqlContext.sql("SELECT name FROM people") results.map(t => "Name:"+t(0)).collect().foreach(println) }}
Demo5
package demo.spark.elasticsearch/** * Created by cao on 16-3-26. */import org.apache.spark.sql.SQLContextimport org.apache.spark.{SparkConf, SparkContext}object Demo5 { def main(args: Array[String]) { val sc = new SparkContext(new SparkConf().setAppName("Demo4")) //创建sqlContext val sqlContext = new SQLContext(sc) sqlContext.sql( "CREATE TEMPORARY TABLE myPeople " + "USING org.elasticsearch.spark.sql " + "OPTIONS ( resource 'spark/people', nodes 'localhost:9200')" ) sqlContext.sql("select * from myPeople").collect.foreach(println) }}
Demo6
package demo.spark.elasticsearch/** * Created by cao on 16-3-26. */import org.apache.spark.sql.SQLContextimport org.apache.spark.{SparkConf, SparkContext}import org.elasticsearch.spark.sql._object Demo6 { def main(args: Array[String]) { val sc = new SparkContext(new SparkConf().setAppName("Demo4")) //创建sqlContext val sqlContext = new SQLContext(sc) val people = sqlContext.esDF("spark/people") println(people.schema.treeString) val wangs = sqlContext.esDF("spark/people","?q=wang") wangs.show() }}
0 0
- spark与elasticsearch整合
- spark-sql与elasticsearch整合&测试
- spark-sql与elasticsearch整合&测试
- spark-sql与elasticsearch整合&测试
- spark和elasticsearch的整合
- Flume与Elasticsearch整合
- 使用spark与ElasticSearch交互
- Hadoop与Spark整合
- Sping Data与Elasticsearch整合
- flume-ng 与elasticsearch整合
- [日志处理工作之九]整合Spark SQL和Elasticsearch
- spark与codis、kafka 整合
- spark sql与hive整合
- Spark-Streaming与Kafka整合
- Spark与HBase的整合
- elasticsearch系列-03(elasticsearch与java的整合)
- Spark与Mysql(JdbcRDD)整合开发(zh)
- tachyon与hdfs,以及spark整合
- c语言:单向循环链表的实现
- 多点触控图片 自定义View
- 从小事做起,记录点点滴滴。
- 设计模式之工厂三姐妹解读(二)
- 广工人福利,openwrt+gduth3c通过inode认证,妈妈再也不用担心我要用电脑开wifi了
- spark与elasticsearch整合
- eclipse 发布 签名android 程序
- Python入门 之 函数
- C++ 类成员的可访问范围
- 设计模式之工厂三姐妹解读(三)
- C++ 11 查看硬件线程个数等信息
- 内联成员函数和重载成员函数
- 解决Ubuntu 14.04 grub选择启动项 10秒等待时间
- GridView详解