hbase 源代码解析(21) 自定义过滤器

来源:互联网 发布:付3000就帮办淘宝贷款 编辑:程序博客网 时间:2024/05/17 23:27
filter 需要实现Filter 或者继承FilterBase
  1. @InterfaceAudience.Public
  2. @InterfaceStability.Stable
  3. public abstract class Filter {
  4. //返回码
  5. public enum ReturnCode {
  6. INCLUDE, //结果中包含着一样
  7. INCLUDE_AND_NEXT_COL, //包含着这样一行,跳到下一行比较
  8. SKIP, //跳到下一个keyvalue 并进行处理
  9. NEXT_COL, //跳过当前一col
  10. NEXT_ROW, //跳过当前一行
  11. SEEK_NEXT_USING_HINT, //跳到下一个满足地方,需要调用getNextKeyHint()
  12. }
  13. protected transient boolean reversed;
  14. abstract public void reset() throws IOException;
  15. //判断行健是否满足,不满足可以跳过,避免其他检查:比如前缀过滤器
  16. abstract public boolean filterRowKey(byte[] buffer, int offset, int length) throws IOException;
  17. //这个过滤器可以提前结束
  18. abstract public boolean filterAllRemaining() throws IOException;
  19. //对cell处理,
  20. abstract public ReturnCode filterKeyValue(final Cell v) throws IOException;
  21. abstract public Cell transformCell(final Cell v) throws IOException;
  22. @Deprecated // use Cell transformCell(final Cell)
  23. abstract public KeyValue transform(final KeyValue currentKV) throws IOException;
  24. //经过前面处理后,如果还有数据,将对当前行一起处理, 比如依赖过去器
  25. abstract public void filterRowCells(List<Cell> kvs) throws IOException;
  26. abstract public boolean hasFilterRow();
  27. //经过这么多流程如果还有数据,会去检查一下数据的要求。比如pagefilter 是否已经够一页了
  28. abstract public boolean filterRow() throws IOException;
  29. @Deprecated
  30. abstract public KeyValue getNextKeyHint(final KeyValue currentKV) throws IOException;
  31. abstract public Cell getNextCellHint(final Cell currentKV) throws IOException;
  32. abstract public boolean isFamilyEssential(byte[] name) throws IOException;
  33. abstract public byte[] toByteArray() throws IOException;
  34. public static Filter parseFrom(final byte [] pbBytes) throws DeserializationException {
  35. throw new DeserializationException(
  36. "parseFrom called on base Filter, but should be called on derived type");
  37. }
  38. abstract boolean areSerializedFieldsEqual(Filter other);
  39. public void setReversed(boolean reversed) {
  40. this.reversed = reversed;
  41. }
  42. public boolean isReversed() {
  43. return this.reversed;
  44. }
  45. }

流程如下:
 

scan,或者get是调用的入口
基本流程就是下面 一样,但是没有看到fiterKeyValue,如果你们找到了,告诉我一声。
  1. private boolean nextInternal(List<Cell> results, ScannerContext scannerContext)
  2. throws IOException {
  3. while (true) {
  4.  
  5. boolean stopRow = isStopRow(currentRow, offset, length);
  6. boolean hasFilterRow = this.filter != null && this.filter.hasFilterRow();
  7. if (hasFilterRow) {
  8. if (LOG.isTraceEnabled()) {
  9. LOG.trace("filter#hasFilterRow is true which prevents partial results from being "
  10. + " formed. Changing scope of limits that may create partials");
  11. }
  12. scannerContext.setSizeLimitScope(LimitScope.BETWEEN_ROWS);
  13. scannerContext.setTimeLimitScope(LimitScope.BETWEEN_ROWS);
  14. }
  15. if (filterRowKey(currentRow, offset, length)) {
  16. incrementCountOfRowsFilteredMetric(scannerContext);
  17. // early check, see HBASE-16296
  18. //filterAllRemaining 实际调用
  19. if (isFilterDoneInternal()) {
  20. return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
  21. }
  22. incrementCountOfRowsScannedMetric(scannerContext);
  23. //里面会调用filter.reset();
  24. boolean moreRows = nextRow(scannerContext, currentRow, offset, length);
  25. if (!moreRows) {
  26. return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
  27. }
  28. results.clear();
  29. continue;
  30. }
  31. populateResult(results, this.storeHeap, scannerContext, currentRow, offset, length);
  32. Cell nextKv = this.storeHeap.peek();
  33. stopRow = nextKv == null ||
  34. isStopRow(nextKv.getRowArray(), nextKv.getRowOffset(), nextKv.getRowLength());
  35. final boolean isEmptyRow = results.isEmpty();
    1. FilterWrapper.FilterRowRetCode ret = FilterWrapper.FilterRowRetCode.NOT_CALLED;
  36. if (hasFilterRow) {
  37. //会调用filterRowCells(List) 和filterRowCells(cell)
  38. ret = filter.filterRowCellsWithRet(results);
  39. long timeProgress = scannerContext.getTimeProgress();
  40. if (scannerContext.getKeepProgress()) {
  41. scannerContext.setProgress(initialBatchProgress, initialSizeProgress,
  42. initialTimeProgress);
  43. } else {
  44. scannerContext.clearProgress();
  45. }
  46. scannerContext.setTimeProgress(timeProgress);
  47. scannerContext.incrementBatchProgress(results.size());
  48. for (Cell cell : results) {
  49. scannerContext.incrementSizeProgress(CellUtil.estimatedHeapSizeOfWithoutTags(cell));
  50. }
  51. }
  52. if (isEmptyRow || ret == FilterWrapper.FilterRowRetCode.EXCLUDE || filterRow()) {
  53. incrementCountOfRowsFilteredMetric(scannerContext);
  54. results.clear();
  55. boolean moreRows = nextRow(scannerContext, currentRow, offset, length);
  56. if (!moreRows) {
  57. return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
  58. }
  59. if (!stopRow) continue;
  60. return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
  61. }
  62. if (this.joinedHeap != null) {
  63. boolean mayHaveData = joinedHeapMayHaveData(currentRow, offset, length);
  64. if (mayHaveData) {
  65. joinedContinuationRow = current;
  66. populateFromJoinedHeap(results, scannerContext);
  67. if (scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
  68. return true;
  69. }
  70. }
  71. }
  72. } else {
  73. populateFromJoinedHeap(results, scannerContext);
  74. if (scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
  75. return true;
  76. }
  77. }
  78. if (stopRow) {
  79. return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
  80. } else {
  81. return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
  82. }
  83. }
  84. }


就这样。

自定义完成后,打成jar ,需要export export HBASE_CLAPSS 
或者将jar 放到hbase 的安装目录的lib下面,重启hbase










原创粉丝点击