spark1.4 操作hbase 基于rdd
来源:互联网 发布:阿里云服务器ddos攻击 编辑:程序博客网 时间:2024/05/19 08:38
import org.apache.hadoop.hbase.util.Bytesimport org.apache.spark.rdd.NewHadoopRDDimport org.apache.hadoop.hbase.HBaseConfigurationimport org.apache.hadoop.mapred.JobConfimport org.apache.spark.SparkContextimport java.util.Propertiesimport java.io.FileInputStreamimport org.apache.hadoop.hbase.io.ImmutableBytesWritableimport org.apache.hadoop.hbase.mapreduce.TableInputFormatobject readDataFromHbase { def main(args: Array[String]): Unit = { var propFileName = "hbaseConfig.properties" if(args.size > 0){ propFileName = args(0) } /** Load properties **/ val prop = new Properties val inStream = new FileInputStream(propFileName) prop.load(inStream) //set spark context and open input file val sparkMaster = prop.getProperty("hbase.spark.master") val sparkJobName = prop.getProperty("hbase.spark.job.name") val sc = new SparkContext(sparkMaster,sparkJobName ) //set hbase connection val hbaseConf = HBaseConfiguration.create() hbaseConf.set("hbase.rootdir", prop.getProperty("hbase.rootdir")) hbaseConf.set(TableInputFormat.INPUT_TABLE, prop.getProperty("hbase.table.name")) val hBaseRDD = sc.newAPIHadoopRDD(hbaseConf, classOf[org.apache.hadoop.hbase.mapreduce.TableInputFormat], classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable], classOf[org.apache.hadoop.hbase.client.Result] ) val hBaseData = hBaseRDD.map(t=>t._2) .map(res =>res.getColumnLatestCell("cf".getBytes(), "col".getBytes())) .map(c=>c.getValueArray()) .map(a=> new String(a, "utf8")) hBaseData.foreach(println) }
}
<!-- HBase --> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-common</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-hadoop2-compat</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-hadoop-compat</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-hadoop-compat</artifactId> <version>${hbase.version}</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-protocol</artifactId> <version>${hbase.version}</version> </dependency>
<dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>${hbase.version}</version></dependency>
0 0
- spark1.4 基于rdd操作hbase
- spark1.4 操作hbase 基于rdd
- Spark1.x RDD基本操作
- Spark 操作 HBase 详解 spark1.4 hbase1.0
- spark1.4 spark-shell hbase
- spark1.2.0源码分析之RDD的reduce操作
- Spark1.6学习-RDD
- spark1.x-rdd运算原理
- spark hbase hbase-rdd
- 基于spark1.4的Spark-Sql
- 基于spark1.4的Spark-Sql
- 基于spark1.4.1的sparkR的实例操作
- 基于spark1.4.1的sparkR的实例操作
- 基于spark1.4.1的sparkR的实例操作
- 基于Java操作HBase数据库
- spark1.x-rdd api (大全)
- spark RDD写入 Hbase
- spark1.4 读取hbase 0.96 报错 java.io.NotSerializableException: org.apache.hadoop.hbase.io.ImmutableBytes
- 关于vmware esxi 5.1安装MegaCLI工具
- #pragma data_seg 跨程序通信 单件模式
- 聊聊 tcpdump 与 Wireshark 抓包分析
- mysql数据库数据改变监控 触发器 游标 new. old. java监控比对 最终是在后端java进行比对
- 排序方法----选择排序
- spark1.4 操作hbase 基于rdd
- 关于Cassandra一些调优配置
- 总结1
- Tesla超越谷歌无人驾驶汽车雄心的背后,是以色列公司Mobileye
- Nginx 模块自主开发四: 模块数据结构
- 简析 .NET Core 构成体系
- 逻辑运算的充分集
- gvr-android-sdk 初探
- 动态规划算法总结及实例简介