hbase 下mapreduce 读取hbase中数据

来源：互联网发布：翻墙原理是什么知乎编辑：程序博客网时间：2024/05/22 02:17

简单小程序演示如何在Hbase 中编写mr程序：

hbase 中已经存在的数据：

hbase(main):009:0> scan 'test'ROW                           COLUMN+CELL                                                                        row1                         column=li:a, timestamp=1386254343222, value=liang                                  row1                         column=li:b, timestamp=1386253305942, value=\x00\x00\x00\x03                       row1                         column=li:c, timestamp=1386256955011, value=liang                                  row2                         column=li:a, timestamp=1386255985261, value=liang                                  row3                         column=li:a, timestamp=1386256003938, value=lei                                    row4                         column=li:a, timestamp=1386256057937, value=lei                                    row5                         column=li:a, timestamp=1386256064945, value=lei                                    row6                         column=li:a, timestamp=1386256226767, value=lei                                    row7                         column=li:a, timestamp=1386256230868, value=lei                                    row8                         column=li:a, timestamp=1386256234817, value=lei

测试代码如下：

package com.hbase.create;import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.KeyValue;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.ByteWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;/** * this file ,containing the map and reduce ,which is prepared for the crawler program  * for realizing the goal of storing image files in the hdfs. * @author lloppo * */public class MapReduce_Hbase_DownLoad extends Configured implements Tool{public static void main(String[] args) throws Exception {ToolRunner.run(new MapReduce_Hbase_DownLoad(), args);}    static class  MyMapper extends TableMapper<Text, LongWritable>{    public void map(ImmutableBytesWritable row, Result value, Context context)     throws InterruptedException, IOException {//            for(KeyValue kv:value.raw()){//                System.out.print(new String(kv.getRow()));//                System.out.println(new String(kv.getValue()));//             }    for (int i = 0; i < value.size(); i++) {    String val =  new String(value.getValue(Bytes.toBytes("li"), Bytes.toBytes("a")));    System.out.println("value :"+val);}    }    }public int run(String[] args) throws Exception {    Configuration config = HBaseConfiguration.create();    try {Job job = new Job(config,"ExampleReadWrite");job.setJarByClass(MapReduce_Hbase_DownLoad.class);     // class that contains mapperScan scan = new Scan();scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobsscan.setCacheBlocks(false);TableMapReduceUtil.initTableMapperJob(  "test",        // input HBase table name  scan,             // Scan instance to control CF and attribute selection  MyMapper.class,   // mapper  null,             // mapper output key   null,             // mapper output value  job);job.setOutputFormatClass(NullOutputFormat.class);   // because we aren't emitting anything from mapperboolean b = job.waitForCompletion(true);if (!b) {  throw new IOException("error with job!");}    } catch (Exception e) {e.printStackTrace();}return 0;}}