java操作hbase

来源：互联网发布：没有违法淘宝规则编辑：程序博客网时间：2024/06/03 03:29

前提：HBase版本为 hbase0.94.18.tar.gz使用HBase的Java api，对HBase进行操作。1、连接hbase//设置配置信息public Configuration getConfig() {    Configuration conf = HBaseConfiguration.create();    conf.set("hbase.master", "192.168.206.21:60000");    conf.set("hbase.zookeeper.quorum", "192.168.206.22,192.168.206.23,192.168.206.24");    conf.set("hbase.zookeeper.property.clientport", "2181");    return conf;}2、在hbase中创建表// 创建表public boolean createTable(Configuration conf, String tableName) {    boolean result = false;    HBaseAdmin hBaseAdmin = null;    try {        hBaseAdmin = new HBaseAdmin(conf);        if (hBaseAdmin.tableExists(tableName)) {            hBaseAdmin.disableTable(tableName);            hBaseAdmin.deleteTable(tableName);        }        HTableDescriptor hTableDescriptor = new HTableDescriptor(tableName);        hTableDescriptor.addFamily(new HColumnDescriptor("columnFamily1"));        hBaseAdmin.createTable(hTableDescriptor);        result = true;    } catch (MasterNotRunningException e) {        e.printStackTrace();        result = false;    } catch (ZooKeeperConnectionException e) {        e.printStackTrace();        result = false;    } catch (IOException e) {        e.printStackTrace();        result = false;    } finally {        try {            hBaseAdmin.close();        } catch (IOException e) {            e.printStackTrace();        }    }    return result;}3、往HBase插入记录// 插入一条记录public boolean putRowIntoTable(Configuration conf,String tableName){    boolean result = false;    HTable hTable = null;    try {        hTable = new HTable(conf, tableName);        Put put = new Put("rowkey001".getBytes());        put.add("columnFamily1".getBytes(),"column1".getBytes(),"value1".getBytes());        put.add("columnFamily1".getBytes(),"column2".getBytes(),"value2".getBytes());        hTable.put(put);        result = true;    } catch (IOException e) {        e.printStackTrace();        result = false;    }finally {        try {            hTable.close();        } catch (IOException e) {            e.printStackTrace();        }    }    return result;}4、删除记录// 删除一条记录public boolean deleteRow(Configuration conf,String hTableName,String rowKey){    boolean result = false;    HTable hTable  = null;    try {        hTable = new HTable(conf,hTableName);        hTable.delete(new Delete(rowKey.getBytes()));        result = true;    } catch (IOException e) {        e.printStackTrace();        result = false;    }finally {        try {            hTable.close();        } catch (IOException e) {            e.printStackTrace();        }    }    return result;}5、删除表//删除表public boolean dropTable(Configuration conf, String tableName){    boolean result = false;    HBaseAdmin hBaseAdmin = null;    try {        hBaseAdmin = new HBaseAdmin(conf);        if(hBaseAdmin.tableExists(tableName)){            hBaseAdmin.disableTable(tableName);            hBaseAdmin.deleteTable(tableName);        }else{            System.err.println(tableName + " is not exist!");        }        result = true;    } catch (IOException e) {        e.printStackTrace();        result = false;    }finally {        try {            hBaseAdmin.close();        } catch (IOException e) {            e.printStackTrace();        }    }    return result;}

难度升级，再次使用Java api，对HBase进行查询。

HBase数据库，数据量 613,428,000条

Hbase java api

HBase表结构，为一表，一列族，一列，一值

Hbase java api

表名dnslog，columnfamily为info，列为c1,value.

rowkey组成：账号+”_”+时间

?大牛笔记

需求：

获取zywy这个账号，在2014-11-11 10:10:10秒到 2014-12-11 10:10:10秒之间，最后发送的那条数据的内容。

先看代码：

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
// startrow and endrow
publicvoid startRowAndEndRow(Configuration conf,String tableName){
    DateFormat dFormat = newSimpleDateFormat("yyyyMMdd HH:mm:ss SSSS");
    String d1 = dFormat.format(newDate());
    String account = "zywy";
    HTable hTable = null;
    ResultScanner rs = null;
    try{
        hTable = newHTable(conf, tableName);
 
        FilterList filterList = newFilterList(FilterList.Operator.MUST_PASS_ALL);
 
        PrefixFilter filter1 = newPrefixFilter((account+"_").getBytes());
        filterList.addFilter(filter1);
 
        // RowKey最小日期过滤
        String minRowKey = "";
        minRowKey = account + "_"+ "20141111101010";
        System.out.println(minRowKey);
        RowFilter minRowFilter = newRowFilter(CompareFilter.CompareOp.GREATER_OR_EQUAL,newBinaryComparator(minRowKey.getBytes()));
        filterList.addFilter(minRowFilter);
 
        // RowKey最小日期过滤
        String maxRowKey = "";
        maxRowKey = account + "_"+ "20141211101010";
        System.out.println(minRowKey);
        RowFilter maxRowFilter = newRowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,newBinaryComparator(maxRowKey.getBytes()));
        filterList.addFilter(maxRowFilter);
 
        //PageFilter pageFilter = new PageFilter(1);
        //filterList.addFilter(pageFilter);
 
        Scan scan = newScan();
        scan.setMaxVersions();
 
        scan.setFilter(filterList);
 
        //scan.setStartRow(maxRowKey.getBytes());
        //scan.setStopRow(minRowKey.getBytes());
 
        scan.setStartRow(minRowKey.getBytes());
        scan.setStopRow(maxRowKey.getBytes());
        scan.setCaching(1000);
        scan.setCacheBlocks(false);
 
        rs = hTable.getScanner(scan);
        ArrayList<KeyValue> resultTempList = newArrayList<KeyValue>();
 
        Result lastR = null;
        for(Result r : rs) {
        //  for (KeyValue kv : r.raw()) {
        //      resultTempList.add(kv);
        //      System.err.println("rowkey:" + new String(kv.getKey()));
        //      System.err.println("-------------------------------");
        //      System.err.println("columnFamily:" + new String(kv.getFamily()) +"===column:" + new String(kv.getQualifier()) + "===getValue:" + new String(kv.getValue()));
        //  }
            lastR = r;
        }
 
        for(KeyValue kv : lastR.raw()) {
            resultTempList.add(kv);
            System.err.println("rowkey:"+ newString(kv.getKey()));
            System.err.println("-------------------------------");
            System.err.println("columnFamily:"+ newString(kv.getFamily()) +"===column:"+ newString(kv.getQualifier()) + "===getValue:"+ newString(kv.getValue()));
        }
 
        System.err.println("*********************");
        System.err.println("resultTempList.size:"+ resultTempList.size());
        System.err.println("*********************");
 
        String d2 = dFormat.format(newDate());
        System.err.println("开始时间："+d1);
        System.err.println("结束时间："+d2);
    }catch(IOException e) {
        e.printStackTrace();
    }finally{
        rs.close();
        try{
            hTable.close();
        }catch(IOException e) {
            e.printStackTrace();
        }
    }
}

?结果：

执行速度上，在一秒内完成。如果要返回多个结果，需要将keyvalue的for循环，放到result的for循环内（执行时间，10秒左右）。

在速度上，还是可以接受的。这么快的原因主要在rowkey设计方面，设计的合理性会影响查询时速度。

解释：

ResultScanner，扫描表，获取最终结果。

下面就是定义一些filter的使用。

PrefixFilter，前缀的筛选，也就是筛选rowkey中，最前面字符为zywy的记录。

RowFilter，也是根据rowkey筛选定义记录的范围 minRowKey 和 maxRowKey之间的数据。

PageFilter，这里的pageFilter没用到，这个也是很好的一个类。分页时使用。

scan.setMaxVersions();选择最新版本的数据

scan.setStartRow(minRowKey.getBytes()) 和scan.setStopRow(maxRowKey.getBytes()) 就是设置rowkey的范围了。

Result lastR = null; 定义一个变量，用来存储最后一条记录。

HBase的RowKey是自动排好序的。rowkey是按照ascii码进行排序的。所以最先扫描到的，是最老的数据。但是需求要求返回最新的一条记录。

这里如果向MySql等关系型数据库那样，来一个order by rowkey desc 换个顺序就爽了。可惜HBase没有。官网说明

1
2
3
4
5
6
6.3.3. 倒序时间戳
一个数据库处理的通常问题是找到最近版本的值。采用倒序时间戳作为键的一部分可以对此特定情况有很大帮助。也在Tom White的Hadoop书籍的HBase 章节能找到: The Definitive Guide (O'Reilly), 该技术包含追加(Long.MAX_VALUE - timestamp) 到key的后面，如 [key][reverse_timestamp].
 
表内[key]的最近的值可以用[key]进行 Scan 找到并获取第一个记录。由于 HBase 行键是排序的，该键排在任何比它老的行键的前面，所以必然是第一个。
 
该技术可以用于代替Section 6.4, “ 版本的数量 ” ，其目的是保存所有版本到“永远”(或一段很长时间) 。同时，采用同样的Scan技术，可以很快获取其他版本

我所使用的HBase版本为hbase-0.94.18.tar.gz算是比较老的版本了。

由于没有排序的功能，所以只能是从前往后循环一遍，将最后的变量，存到lastR中。

如果要是支持orderby操作，那么我可以加上pagefilter

1
2
PageFilter pageFilter = newPageFilter(1);
filterList.addFilter(pageFilter);

这样直接返回的就是最新的一条。

其实，在hbase-0.98+就支持了这个顺序反转的功能，通过添加如下代码，就可以实现了。官网说明

1
Scan.setReversed(true)

阅读全文

0 0