Spark读写HBASE
来源:互联网 发布:设置数据库单用户模式 编辑:程序博客网 时间:2024/05/16 10:15
scala访问HBASE通常2种方式,一种是使用SPARK方式读取HBASE数据直接转换成RDD, 一种采用和JAVA类似的方式,通过HTable操作HBASE,数据获取之后再自己进行处理。 这2种方式区别应该是RDD是跑在多节点通过从HBASE获取数据,而采用HTable的方式,应该是串行了,仅仅是HBASE层面是分布式而已。
1. 转换为RDD
package com.isesol.spark
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.spark._
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.filter._
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp
object hbasescan {
def main(args: Array[String]) {
val conf = new SparkConf()
conf.setMaster("local").setAppName("this is for spark SQL")
//conf.setSparkHome("d:\\spark_home")
val hbaseconf = HBaseConfiguration.create()
hbaseconf.set("hbase.zookeeper.quorum", "datanode01.isesol.com,datanode02.isesol.com,datanode03.isesol.com,datanode04.isesol.com,cmserver.isesol.com")
hbaseconf.set("hbase.zookeeper.property.clientPort", "2181")
hbaseconf.set("maxSessionTimeout", "6")
val sc = new SparkContext(conf)
try {
println("start to read from hbase")
val hbaseContext = new HBaseContext(sc, hbaseconf)
val scan = new Scan()
scan.setMaxVersions()
//scan.setRowPrefixFilter(Bytes.toBytes("i51530048-1007-9223370552914159518"))
scan.setCaching(100)
val filter = new SingleColumnValueFilter(Bytes.toBytes("cf"), Bytes.toBytes("age"), CompareOp.LESS, Bytes.toBytes("1"));
scan.setFilter(filter)
val hbaserdd = hbaseContext.hbaseRDD(TableName.valueOf("bank"), scan)
hbaserdd.cache()
println(hbaserdd.count())
} catch {
case ex: Exception => println("can not connect hbase")
}
}
}
2. 采用 HTable方式处理
val htable = new HTable(hbaseconf, "t_device_fault_statistics")
val scan1 = new Scan()
scan1.setCaching(3*1024*1024)
val scaner = htable.getScanner(scan1)
while(scaner.iterator().hasNext()){
val result = scaner.next()
if(result.eq(null)){
} else {
println(Bytes.toString(result.getRow) + "\t" + Bytes.toString(result.getValue("cf".getBytes, "fault_level2_name".getBytes)))
}
}
scaner.close()
htable.close()
1. 转换为RDD
package com.isesol.spark
import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.hadoop.hbase.HBaseConfiguration
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.hbase.mapred.TableOutputFormat
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
import org.apache.hadoop.hbase.client.Put
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.spark._
import org.apache.hadoop.hbase.client.Scan
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.filter._
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp
object hbasescan {
def main(args: Array[String]) {
val conf = new SparkConf()
conf.setMaster("local").setAppName("this is for spark SQL")
//conf.setSparkHome("d:\\spark_home")
val hbaseconf = HBaseConfiguration.create()
hbaseconf.set("hbase.zookeeper.quorum", "datanode01.isesol.com,datanode02.isesol.com,datanode03.isesol.com,datanode04.isesol.com,cmserver.isesol.com")
hbaseconf.set("hbase.zookeeper.property.clientPort", "2181")
hbaseconf.set("maxSessionTimeout", "6")
val sc = new SparkContext(conf)
try {
println("start to read from hbase")
val hbaseContext = new HBaseContext(sc, hbaseconf)
val scan = new Scan()
scan.setMaxVersions()
//scan.setRowPrefixFilter(Bytes.toBytes("i51530048-1007-9223370552914159518"))
scan.setCaching(100)
val filter = new SingleColumnValueFilter(Bytes.toBytes("cf"), Bytes.toBytes("age"), CompareOp.LESS, Bytes.toBytes("1"));
scan.setFilter(filter)
val hbaserdd = hbaseContext.hbaseRDD(TableName.valueOf("bank"), scan)
hbaserdd.cache()
println(hbaserdd.count())
} catch {
case ex: Exception => println("can not connect hbase")
}
}
}
2. 采用 HTable方式处理
val htable = new HTable(hbaseconf, "t_device_fault_statistics")
val scan1 = new Scan()
scan1.setCaching(3*1024*1024)
val scaner = htable.getScanner(scan1)
while(scaner.iterator().hasNext()){
val result = scaner.next()
if(result.eq(null)){
} else {
println(Bytes.toString(result.getRow) + "\t" + Bytes.toString(result.getValue("cf".getBytes, "fault_level2_name".getBytes)))
}
}
scaner.close()
htable.close()
阅读全文
0 0
- Spark读写HBASE
- spark读写hbase
- spark hbase读写
- Spark读写Hbase示例代码
- Spark读写Hbase示例代码
- Spark实战之读写HBase
- hbase-spark全新的spark读写hbase的方式
- spark-shell 读写hdfs 读写hbase 读写redis
- 如何使用scala+spark读写hbase?
- spark源码阅读一-spark读写hbase代码分析
- Spark读写Hbase的二种方式对比
- Spark读写Hbase的二种方式对比
- Spark连接HBase进行读写相关操作【CDH5.7.X】
- spark hbase
- spark hbase
- Spark&hbase
- spark hbase hbase-rdd
- Spark操作hbase
- httpclient中禁止301、302自动重定向
- Student s = new Student();在内存中做了哪些事情?
- ZOJ Problem Set
- Linux静态库和动态库区别
- sql语句整理1--统计当月签到次数最多
- Spark读写HBASE
- codeforces 813e 莫队算法
- Scala入门到精通——第十四节 Case Class与模式匹配(一)
- 使用springMVC + Spring进行web开发时,aop不生效的问题
- Unity3D中使用git
- nginx 做proxy 不转发 http header问题解决
- windows 安装mongo数据库&注册mongo服务
- CSS bem命名规范
- Android7.0 自定义控件addView(...)无效,View的绘制流程(onMeasure、onLayout等)完全没执行的解决办法。