spark访问hbase
来源:互联网 发布:t95e6数据 编辑:程序博客网 时间:2024/06/07 13:00
import org.apache.hadoop.hbase.{HBaseConfiguration, HTableDescriptor}import org.apache.hadoop.hbase.mapreduce.TableInputFormatimport org.apache.spark.rdd.NewHadoopRDDval conf = HBaseConfiguration.create()conf.set(TableInputFormat.INPUT_TABLE, "tmp")var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])hBaseRDD.count()import scala.collection.JavaConverters._hBaseRDD.map(tuple => tuple._2).map(result => result.getColumn("cf".getBytes(), "val".getBytes())).map(keyValues => {( keyValues.asScala.reduceLeft { (a, b) => if (a.getTimestamp > b.getTimestamp) a else b }.getRow, keyValues.asScala.reduceLeft { (a, b) => if (a.getTimestamp > b.getTimestamp) a else b }.getValue)}).take(10)hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("cf".getBytes(), "val".getBytes()))).map(row => {( row._1.map(_.toChar).mkString, row._2.asScala.reduceLeft { (a, b) => if (a.getTimestamp > b.getTimestamp) a else b }.getValue.map(_.toChar).mkString)}).take(10)conf.set(TableInputFormat.INPUT_TABLE, "test1")//var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])hBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("lf".getBytes(), "app1".getBytes()))).map(row => if (row._2.size > 0) {( row._1.map(_.toChar).mkString, row._2.asScala.reduceLeft { (a, b) => if (a.getTimestamp > b.getTimestamp) a else b }.getValue.map(_.toInt).mkString)}).take(10)import java.nio.ByteBufferhBaseRDD.map(tuple => tuple._2).map(result => (result.getRow, result.getColumn("lf".getBytes(), "app1".getBytes()))).map(row => if (row._2.size > 0) {( row._1.map(_.toChar).mkString, ByteBuffer.wrap(row._2.asScala.reduceLeft { (a, b) => if (a.getTimestamp > b.getTimestamp) a else b }.getValue).getLong)}).take(10)//conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, "lf")conf.set(TableInputFormat.SCAN_COLUMNS, "lf:app1")//var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])import java.nio.ByteBufferhBaseRDD.map(tuple => tuple._2).map(result => { ( result.getRow.map(_.toChar).mkString, ByteBuffer.wrap(result.value).getLong )}).take(10)val conf = HBaseConfiguration.create()conf.set(TableInputFormat.INPUT_TABLE, "test1")var hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result])var rows = hBaseRDD.map(tuple => tuple._2).map(result => result.getRow.map(_.toChar).mkString)rows.map(row => row.split("\\|")).map(r => if (r.length > 1) (r(0), r(1)) else (r(0), "") ).groupByKey.take(10)
本文出自 “点滴积累” 博客,请务必保留此出处http://tianxingzhe.blog.51cto.com/3390077/1717761
0 0
- spark访问hbase
- 在windows上通过Spark访问HBase
- Spark访问HBase的Eclipse代码实现
- spark使用hbasefilter访问hbase表数据(封装)
- 在Spark上通过自定义RDD访问HBase
- spark hbase
- spark hbase
- Spark&hbase
- spark hbase hbase-rdd
- Spark操作hbase
- spark 操作 hbase
- spark操作hbase
- Spark 连接 Hbase 配置
- spark操作hbase
- Spark操作Hbase
- spark&hbase integration
- spark streaming插入hbase
- spark sql读hbase
- hive ETL之广告行业-用户行为归类sql
- hive ETL之电商零售行业-推荐系统sql
- hive ETL之业绩报表sql
- hbase REST API
- hbase手动compact与split
- spark访问hbase
- spark Sql
- 匿名函数和闭包
- crontab/cron详解
- java动态代理实现Proxy和InvocationHandler cglib
- Java String.intern()方法学习
- Samba服务器rpm安装
- Hash树(散列树)和Trie树(字典树、前缀树)
- sqlserver实现树形结构递归查询(无限极分类)