hbase扫描优化-扫描缓存-cacheing-timetout-retries-batch

来源:互联网 发布:淘宝借钱逾期 编辑:程序博客网 时间:2024/06/05 21:51
扫描
-----------------
不应该全表扫描.

缓存和批处理
-----------------
扫描器缓存可以在一次RPC中返回多条记录。默认是关闭的。
从两个层面上控制扫描器缓存。
全局配置.(优先级最低)
<property>
<name>hbase.client.scanner.caching</name>
<value>2147483647</value>
</property>


1.表层面()
HTable.set
HConstants#HBASE_CLIENT_SCANNER_CACHING
2. 扫描器范围(有限级最高)
Scan scan = new Scan();
scan.setCaching(5) ;
HConstants#HBASE_CLIENT_SCANNER_CACHING}//默认值
scan.setCaching(no set) ;//2036
scan.setCaching(1) ;//2632
scan.setCaching(Integer.MAX) ;//1792
scan.setCaching(1000) ;//1626
scan.setCaching(-1) ;//1690
scan.setCaching(5000) ;//1752
scan.setCaching(200) ;//1586

//设置扫描超时,默认60000,超时之后重试
conf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,"1");
//设置重试次数(35次)

conf.set(HConstants.HBASE_CLIENT_RETRIES_NUMBER,"1");

1、测试caching

/** * 添加扫描 */@Testpublic void testScan() throws Exception {long start = System.currentTimeMillis() ;//Configuration conf = HBaseConfiguration.create();Connection conn = ConnectionFactory.createConnection(conf);HTable table = (HTable) conn.getTable(TableName.valueOf("ns1:t2"));Scan scan = new Scan();scan.setCaching(200) ;//[startRow,stopRow)//scan.setStartRow(Bytes.toBytes("row500"));//scan.setStopRow(Bytes.toBytes("row520"));ResultScanner scanner = table.getScanner(scan);Iterator<Result> it = scanner.iterator() ;while(it.hasNext()){Result r = it.next();//outResult(r);}scanner.close();System.out.println(System.currentTimeMillis() - start );}private void outResult(Result r){System.out.println("=========================");List<Cell> cells = r.listCells();for(Cell cell : cells){String rowkey = Bytes.toString(CellUtil.cloneRow(cell));String f = Bytes.toString(CellUtil.cloneFamily(cell));String col = Bytes.toString(CellUtil.cloneQualifier(cell));long ts = cell.getTimestamp();String value = Bytes.toString(CellUtil.cloneValue(cell));System.out.println(rowkey+"/"+f+":"+col+"/"+ts + "=" + value);}
}

2、timetout-retries
/** * 添加扫描 */@Testpublic void testScanCache() throws Exception {//Configuration conf = HBaseConfiguration.create();System.out.println(conf.get(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD));System.out.println(conf.get(HConstants.HBASE_CLIENT_RETRIES_NUMBER));//设置扫描超时,默认60000,超时之后重试//conf.set(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD,"1");//设置重试次数(35次)//conf.set(HConstants.HBASE_CLIENT_RETRIES_NUMBER,"1");Connection conn = ConnectionFactory.createConnection(conf);HTable table = (HTable) conn.getTable(TableName.valueOf("ns1:t2"));System.out.println(table.getScannerCaching());Scan scan = new Scan();System.out.println(scan.getCaching()) ;scan.setCaching(5) ;//[startRow,stopRow)//scan.setStartRow(Bytes.toBytes("row500"));//scan.setStopRow(Bytes.toBytes("row520"));ResultScanner scanner = table.getScanner(scan);Iterator<Result> it = scanner.iterator();while (it.hasNext()) {Result r = it.next();outResult(r);}scanner.close();}private void outResult(Result r){System.out.println("=========================");List<Cell> cells = r.listCells();for(Cell cell : cells){String rowkey = Bytes.toString(CellUtil.cloneRow(cell));String f = Bytes.toString(CellUtil.cloneFamily(cell));String col = Bytes.toString(CellUtil.cloneQualifier(cell));long ts = cell.getTimestamp();String value = Bytes.toString(CellUtil.cloneValue(cell));System.out.println(rowkey+"/"+f+":"+col+"/"+ts + "=" + value);}}

3.batch
设置缓存时面向行的,服务器一次RPC回传多少行数据,但如果单行数据过大,
可能造成client OOM,可以设置batch来解决。batch面向列的。

scan.setBatch(2);//默认-1 全部返回.

/** * 添加扫描 */@Testpublic void testScanCacheBatch() throws Exception {//Configuration conf = HBaseConfiguration.create();Connection conn = ConnectionFactory.createConnection(conf);HTable table = (HTable) conn.getTable(TableName.valueOf("ns1:t2"));Scan scan = new Scan();System.out.println(scan.getBatch());//三行scan.setCaching(3) ;//2列scan.setBatch(1) ;//ResultScanner scanner = table.getScanner(scan);//Iterator<Result> it = scanner.iterator();//while (it.hasNext()) {//Result r = it.next();//outResult(r);//}//scanner.close();}private void outResult(Result r){System.out.println("=========================");List<Cell> cells = r.listCells();for(Cell cell : cells){String rowkey = Bytes.toString(CellUtil.cloneRow(cell));String f = Bytes.toString(CellUtil.cloneFamily(cell));String col = Bytes.toString(CellUtil.cloneQualifier(cell));long ts = cell.getTimestamp();String value = Bytes.toString(CellUtil.cloneValue(cell));System.out.println(rowkey+"/"+f+":"+col+"/"+ts + "=" + value);}}


原创粉丝点击