使用filter(过滤器)按照条件查询hbase

来源:互联网 发布:linux curl get请求 编辑:程序博客网 时间:2024/05/16 01:18

1.1. hbase过滤器

1.1.1. FilterList

FilterList 代表一个过滤器列表,可以添加多个过滤器进行查询,多个过滤器之间的关系有:

与关系(符合所有):FilterList.Operator.MUST_PASS_ALL  

或关系(符合任一):FilterList.Operator.MUST_PASS_ONE

 

使用方法:

FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ONE);   

Scan s1 = new Scan();  

 filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes(“f1”),  Bytes.toBytes(“c1”),  CompareOp.EQUAL,Bytes.toBytes(“v1”) )  );  

filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes(“f1”),  Bytes.toBytes(“c2”),  CompareOp.EQUAL,Bytes.toBytes(“v2”) )  );  

 // 添加下面这一行后,则只返回指定的cell,同一行中的其他cell不返回  

 s1.addColumn(Bytes.toBytes(“f1”), Bytes.toBytes(“c1”));  

 s1.setFilter(filterList);  //设置filter

 ResultScanner ResultScannerFilterList = table.getScanner(s1);  //返回结果列表

1.1.2. 过滤器的种类

过滤器的种类:

列植过滤器SingleColumnValueFilter

      过滤列植的相等、不等、范围等

列名前缀过滤器—ColumnPrefixFilter

      过滤指定前缀的列名

多个列名前缀过滤器MultipleColumnPrefixFilter

       过滤多个指定前缀的列名

rowKey过滤器—RowFilter

      通过正则,过滤rowKey值。

1.1.3. 列植过滤器—SingleColumnValueFilter

SingleColumnValueFilter 列值判断

相等 (CompareOp.EQUAL ),

不等(CompareOp.NOT_EQUAL),

范围 (e.g., CompareOp.GREATER)…………

下面示例检查列值和字符串'values' 相等...

SingleColumnValueFilter f = new  SingleColumnValueFilter(

Bytes.toBytes("cFamily")              Bytes.toBytes("column"), CompareFilter.CompareOp.EQUAL,

        Bytes.toBytes("values"));

s1.setFilter(f);

注意:如果过滤器过滤的列在数据表中有的行中不存在,那么这个过滤器对此行无法过滤。

1.1.4. 列名前缀过滤器—ColumnPrefixFilter

过滤器ColumnPrefixFilter

ColumnPrefixFilter 用于指定列名前缀值相等

ColumnPrefixFilter f = new ColumnPrefixFilter(Bytes.toBytes("values"));

s1.setFilter(f);

1.1.5. 多个列值前缀过滤器—MultipleColumnPrefixFilter

MultipleColumnPrefixFilter ColumnPrefixFilter行为差不多,但可以指定多个前缀

byte[][] prefixes = new byte[][] {Bytes.toBytes("value1"),Bytes.toBytes("value2")};

Filter f = new MultipleColumnPrefixFilter(prefixes);

s1.setFilter(f);

1.1.6. rowKey过滤器—RowFilter

RowFilter rowkey过滤器

通常根据rowkey来指定范围时,使用scan扫描器的StartRowStopRow方法比较好。

Filter f = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator("^1234")); //匹配以1234开头的rowkey

s1.setFilter(f);

package hbase.test;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.*;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.filter.*;import org.apache.hadoop.hbase.util.Bytes;import org.junit.Test;import java.text.DecimalFormat;import java.util.Iterator;import java.util.List;/** * 测试过滤器 */public class TestFilter {/** * rowkey过滤 */@Testpublic void testRowFilter() throws Exception {Configuration conf = HBaseConfiguration.create();Connection conn = ConnectionFactory.createConnection(conf);Table t = conn.getTable(TableName.valueOf("ns1:t2"));Scan scan = new Scan();//where rowkey <= row088//RowFilter filter = new RowFilter(CompareFilter.CompareOp.LESS_OR_EQUAL,new BinaryComparator(Bytes.toBytes("row088")));//where rowkey like '%88%'//RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator("88"));//while rowkey like '%88' ;RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator("88$"));scan.setFilter(filter);ResultScanner rs = t.getScanner(scan) ;Iterator<Result> it = rs.iterator();while(it.hasNext()){Result r = it.next();outResult(r);}rs.close();}/** * family过滤 */@Testpublic void testFamilyFilter() throws Exception {Configuration conf = HBaseConfiguration.create();Connection conn = ConnectionFactory.createConnection(conf);Table t = conn.getTable(TableName.valueOf("ns1:t2"));Scan scan = new Scan();//以f开头的列族进行过滤FamilyFilter filter = new FamilyFilter(CompareFilter.CompareOp.EQUAL,new RegexStringComparator("^f"));scan.setFilter(filter);ResultScanner rs = t.getScanner(scan) ;Iterator<Result> it = rs.iterator();while(it.hasNext()){Result r = it.next();outResult(r);}rs.close();}/** * col过滤 */@Testpublic void testQualifierFilter() throws Exception {Configuration conf = HBaseConfiguration.create();Connection conn = ConnectionFactory.createConnection(conf);Table t = conn.getTable(TableName.valueOf("ns1:t2"));Scan scan = new Scan();//以f开头的列族进行过滤QualifierFilter filter = new QualifierFilter(QualifierFilter.CompareOp.EQUAL,new RegexStringComparator("^name$"));scan.setFilter(filter);ResultScanner rs = t.getScanner(scan) ;Iterator<Result> it = rs.iterator();while(it.hasNext()){Result r = it.next();outResult(r);}rs.close();}/** * 组合过滤 */@Testpublic void testFilterList() throws Exception {Configuration conf = HBaseConfiguration.create();Connection conn = ConnectionFactory.createConnection(conf);Table t = conn.getTable(TableName.valueOf("ns1:t2"));Scan scan = new Scan();//MUST_PASS_ALL : and//MUST_PASS_ONe :OrFilterList filter = new FilterList(FilterList.Operator.MUST_PASS_ALL) ;ValueFilter f1 = new ValueFilter(QualifierFilter.CompareOp.EQUAL,new RegexStringComparator("m8"));filter.addFilter(f1);RowFilter f2 = new RowFilter(CompareFilter.CompareOp.GREATER,new BinaryComparator(Bytes.toBytes("row8"))) ;filter.addFilter(f2);scan.setFilter(filter);ResultScanner rs = t.getScanner(scan) ;Iterator<Result> it = rs.iterator();while(it.hasNext()){Result r = it.next();outResult(r);}rs.close();}/** * value过滤 */@Testpublic void testValueFilter() throws Exception {Configuration conf = HBaseConfiguration.create();Connection conn = ConnectionFactory.createConnection(conf);Table t = conn.getTable(TableName.valueOf("ns1:t2"));Scan scan = new Scan();//以f开头的列族进行过滤ValueFilter filter = new ValueFilter(QualifierFilter.CompareOp.EQUAL,new RegexStringComparator("m8"));scan.setFilter(filter);ResultScanner rs = t.getScanner(scan) ;Iterator<Result> it = rs.iterator();while(it.hasNext()){Result r = it.next();outResult(r);}rs.close();}private void outResult(Result r) {System.out.println("=========================");List<Cell> cells = r.listCells();for (Cell cell : cells) {String rowkey = Bytes.toString(CellUtil.cloneRow(cell));String f = Bytes.toString(CellUtil.cloneFamily(cell));String col = Bytes.toString(CellUtil.cloneQualifier(cell));long ts = cell.getTimestamp();String value = Bytes.toString(CellUtil.cloneValue(cell));System.out.println(rowkey + "/" + f + ":" + col + "/" + ts + "=" + value);}}


原创粉丝点击