spark1.4 操作hbase 基于rdd

来源:互联网 发布:阿里云服务器ddos攻击 编辑:程序博客网 时间:2024/05/19 08:38
import org.apache.hadoop.hbase.util.Bytesimport org.apache.spark.rdd.NewHadoopRDDimport org.apache.hadoop.hbase.HBaseConfigurationimport org.apache.hadoop.mapred.JobConfimport org.apache.spark.SparkContextimport java.util.Propertiesimport java.io.FileInputStreamimport org.apache.hadoop.hbase.io.ImmutableBytesWritableimport org.apache.hadoop.hbase.mapreduce.TableInputFormatobject readDataFromHbase {  def main(args: Array[String]): Unit = {    var propFileName = "hbaseConfig.properties"    if(args.size > 0){      propFileName = args(0)    }    /** Load properties **/    val prop = new Properties    val inStream = new FileInputStream(propFileName)    prop.load(inStream)    //set spark context and open input file    val sparkMaster = prop.getProperty("hbase.spark.master")    val sparkJobName = prop.getProperty("hbase.spark.job.name")    val sc = new SparkContext(sparkMaster,sparkJobName )    //set hbase connection    val hbaseConf = HBaseConfiguration.create()    hbaseConf.set("hbase.rootdir", prop.getProperty("hbase.rootdir"))    hbaseConf.set(TableInputFormat.INPUT_TABLE, prop.getProperty("hbase.table.name"))   val hBaseRDD = sc.newAPIHadoopRDD(hbaseConf, classOf[org.apache.hadoop.hbase.mapreduce.TableInputFormat],     classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],     classOf[org.apache.hadoop.hbase.client.Result]    )    val hBaseData = hBaseRDD.map(t=>t._2)    .map(res =>res.getColumnLatestCell("cf".getBytes(), "col".getBytes()))    .map(c=>c.getValueArray())    .map(a=> new String(a, "utf8"))    hBaseData.foreach(println)  }

}

 <!-- HBase -->    <dependency>      <groupId>org.apache.hbase</groupId>      <artifactId>hbase</artifactId>      <version>${hbase.version}</version>    </dependency>    <dependency>      <groupId>org.apache.hbase</groupId>      <artifactId>hbase-client</artifactId>      <version>${hbase.version}</version>    </dependency>    <dependency>      <groupId>org.apache.hbase</groupId>      <artifactId>hbase-server</artifactId>      <version>${hbase.version}</version>    </dependency>    <dependency>      <groupId>org.apache.hbase</groupId>      <artifactId>hbase-common</artifactId>      <version>${hbase.version}</version>    </dependency>    <dependency>      <groupId>org.apache.hbase</groupId>      <artifactId>hbase-hadoop2-compat</artifactId>      <version>${hbase.version}</version>    </dependency>    <dependency>      <groupId>org.apache.hbase</groupId>      <artifactId>hbase-hadoop-compat</artifactId>      <version>${hbase.version}</version>    </dependency>    <dependency>      <groupId>org.apache.hbase</groupId>      <artifactId>hbase-hadoop-compat</artifactId>      <version>${hbase.version}</version>    </dependency>    <dependency>      <groupId>org.apache.hbase</groupId>      <artifactId>hbase-protocol</artifactId>      <version>${hbase.version}</version>    </dependency>
<dependency>    <groupId>org.apache.hbase</groupId>    <artifactId>hbase-server</artifactId>    <version>${hbase.version}</version></dependency>


0 0
原创粉丝点击