Hbase协处理器实战笔记

来源：互联网发布：php getcontents 编辑：程序博客网时间：2024/06/09 21:16

协处理器介绍

定义：

HBase可以让用户的部分逻辑在数据存放端及hbase服务端进行计算的机制（框架）。协处理器允许用户在hbase服务端上运行自己的代码。

分类：

Observer：

RegionObserver 提供客户端的数据操纵事件钩子： Get、 Put、 Delete、Scan等

RegionServerObserver 专门处理RegionServer上的一些事件

MasterObserver 提供DDL-类型的操作钩子。如创建、删除、修改数据表等

WalObserver 提供WAL相关操作

这些接口可以同时使用在同一个地方，按照不同优先级顺序执行。用户可以任意基于协处理器实现复杂的HBase功能层。 HBase有很多种事件可以触发观察者方法，这些事件与方法从HBase0.92版本起，都会集成在HBase API中。不过这些API可能会由于各种原因有所改动，不同版本的接口改动比较大。

RegionObserver工作原理，如图所示

Endpoint

终端是动态RPC插件的接口，它的实现代码被安装在服务器端，从而能够通过HBase RPC唤醒。客户端类库提供了非常方便的方法来调用这些动态接口，它们可以在任意时候调用一个终端，它们的实现代码会被目标region远程执行，结果会返回到终端。用户可以结合使用这些强大的插件接口，为HBase添加全新的特性

endpoint服务端编写

1、在一个.proto文件中定义一个信息格式.

option java_package = "edu.endpoint";option java_outer_classname = "Sum";option java_generic_services = true;option java_generate_equals_and_hash = true;option optimize_for = SPEED;message SumRequest {required string family = 1;required string column = 2;}message SumResponse {required int64 sum = 1 [default = 0];}service SumService {rpc getSum(SumRequest)returns (SumResponse);}

2、使用protoc命令进行编译,生成java代码.

首先在Linux虚拟机上安装protobuf

$wget https://github.com/google/protobuf/archive/v2.6.1.zip$unzip protobuf-2.6.1.zip$cd protobuf-2.6.1

下载自github的代码需要首先执行 $ ./autogen.sh 生成configure文件

$ ./configure $ make $ make check $ make install<span style="font-family:Arial, Helvetica, sans-serif;"><span style="white-space: normal;"></span></span>

将.proto文件生成java代码

protoc endpoint.proto --java_cut=./

在eclipse中编写SumEndPoint.java(edu.endpoint包)

package edu.endpoint;import java.io.IOException;import java.util.ArrayList;import java.util.List;import org.apache.hadoop.hbase.Coprocessor;import org.apache.hadoop.hbase.CoprocessorEnvironment;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.coprocessor.CoprocessorException;import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;import org.apache.hadoop.hbase.regionserver.InternalScanner;import org.apache.hadoop.hbase.util.Bytes;import com.google.protobuf.RpcCallback;import com.google.protobuf.RpcController;import com.google.protobuf.Service;import edu.endpoint.Sum.SumRequest;import edu.endpoint.Sum.SumResponse;import edu.endpoint.Sum.SumService;public class SumEndPoint extends SumService implements Coprocessor,CoprocessorService{private RegionCoprocessorEnvironment env;public void getSum(RpcController controller,SumRequest request,RpcCallback<SumResponse> done) throws IOException{Scan scan = new Scan();scan.addFamily(Bytes.toBytes(request.getFamily()));scan.addColumn(Bytes.toBytes(request.getFamily()), Bytes.toBytes(request.getColumn()));    SumResponse response = null;    InternalScanner scanner = null;    try{    scanner = env.getRegion().getScanner(scan);    List<Cell> results = new ArrayList<Cell>();    boolean hasMore = false;    Long sum = 0;    do {hasMore = scanner.next(results);for(Cell cell:results){sum += Long.parseLong(new String(CellUtil.cloneValue(cell)));}results.clear();} while (hasMore);    response = SumResponse.newBuilder().setSum(sum).build();    }catch (IOException e) {    ResponseConverter.setControllerException(controller,e);    }finally {if (scanner!=null) {try {scanner.close();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}}}    done.run(response);}public Service getService(){return this;}@Overridepublic void start(CoprocessorEnvironment env) throws IOException {if (env instanceof RegionCoprocessorEnvironment) {this.env =(RegionCoprocessorEnvironment) env;}else {throw new CoprocessorException("no load region");}}@Overridepublic void stop(CoprocessorEnvironment arg0) throws IOException {}}

endpoint对表中某个列进行计数客户端编写

在eclipse中编写Test.java(edu.endpoint包)

package edu.endpoint;import java.io.IOException;import java.util.Map;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.ZooKeeperConnectionException;import org.apache.hadoop.hbase.client.HConnection;import org.apache.hadoop.hbase.client.HConnectionManager;import org.apache.hadoop.hbase.client.HTableInterface;import org.apache.hadoop.hbase.ipc.BlockingRpcCallback;import com.google.protobuf.ServiceException;import edu.endpoint.Sum.SumRequest;import edu.endpoint.Sum.SumResponse;import edu.endpoint.Sum.SumService;public class Test {public static void main(String[] args) throws IOException{Configuration conf = HBaseConfiguration.create();conf.set("hbase.zookeeper.quorum", "cluster1");HConnection conn = HConnectionManager.createConnection(conf);HTableInterface table = conn.getTable("sum_table");long h =0L;final SumRequest request = SumRequest.newBuilder().setFamily("d").setColumn("b").build();try {Map<byte[], Long> result = table.coprocessorService(SumService.class,null,null,new Batch.Call<SumService, Long>() {@Overridepublic Long call(SumService aggregate) throws IOException,{BlockingRpcCallback rpcCallback = new BlockingRpcCallback();aggregate.getSum(null,request,rpcCallback);SumResponse response = (SumResponse)rpcCallback.get();return response.hasSum() ? response.getSum() : 0L;}});for(Long sum : result.values()){h += sum;}system.out.println("sum = " + h);} catch (ServiceException e) {// TODO: handle exceptione.printStackTrace();}catch(Throwable e){e.printStackTrace();}table.close();conn.close();}}

代码打包上传到hdfs上进行测试

HBase协处理器Observer

场景

table1

r d:b d:c

table2

d:b d:c

在eclipse中编写ObserverPut.java(edu.observer包)

package edu.observer;import java.util.Iterator;import java.util.List;import org.apache.hadoop.hbase.client.Durability;import org.apache.hadoop.hbase.client.HTableInterface;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;import org.apache.hadoop.hbase.coprocessor.ObserverContext;import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;import org.apache.hadoop.hbase.regionserver.wal.WALEdit;import org.apache.hadoop.hbase.rest.protobuf.generated.CellMessage.Cell;public class ObserverPut extends BaseRegionObserver{public void postPut(final ObserverContext<RegionCoprocessorEnvironment> e,final Put put,final WALEdit edit,final Durability durability){HTableInterface table = e.getEnvironment().getTable(TableName.valueOf("index_ob_table"));List<Cell> cell = put.get("d".getBytes(),"b".getBytes());Iterator<Cell> cellItor = cell.iterator();List<Cell> cell_c = put.get("d".getBytes(),"c".getBytes());while (cellItor.hasNext()) {Cell tmp = cellItor.next();Put indexput = new Put(CellUtil.cloneValue(tmp));for(Cell c : cell c){indexput.add("d".getBytes(),"c".getBytes(),CellUtil.cloneValue(c))}table.put(indexput);}table.close();}}

再建立一个Test.java进行测试

package edu.endpoint;import java.io.IOException;import java.util.Map;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.HConnection;import org.apache.hadoop.hbase.client.HConnectionManager;import org.apache.hadoop.hbase.client.HTableInterface;import org.apache.hadoop.hbase.client.Put;public class Test {public static void main(String[] args) throws IOException{Configuration conf = HBaseConfiguration.create();HConnection conn = HConnectionManager.createConnection(conf);HTableInterface table = conn.getTable("t2");Put put = new Put("r".getBytes());put.add("d".getBytes(),"b".getBytes(),"b1".getBytes());put.add("d".getBytes(),"c".getBytes(),"c1".getBytes());table.put(put);table.close();conn.close();}}

这次实验比较匆忙，还有一些问题没有解决

0 0