hbase 源代码分析 (8) delete 过程 详解

来源:互联网 发布:淘宝官网电脑版 编辑:程序博客网 时间:2024/06/05 06:20
上一章节:hbase 源代码分析 (7) put 过程 详解
http://blog.csdn.net/chenfenggang/article/details/75094161


delete的过程和put相似。

具体可以参考put过程

delete很直接,直接到第一步就发送了请求,相比put,get,scan,已经getRegionLocator等过程,这简直就是奇迹。所以我也很直接把所有都贴出来了。
  1. @Override
  2. public void delete(final Delete delete)
  3. throws IOException {
  4. RegionServerCallable<Boolean> callable = new RegionServerCallable<Boolean>(connection,
  5. tableName, delete.getRow()) {
  6. @Override
  7. public Boolean call(int callTimeout) throws IOException {
  8. PayloadCarryingRpcController controller = rpcControllerFactory.newController();
  9. controller.setPriority(tableName);
  10. controller.setCallTimeout(callTimeout);
  11. try {
  12. MutateRequest request = RequestConverter.buildMutateRequest(
  13. getLocation().getRegionInfo().getRegionName(), delete);
  14. MutateResponse response = getStub().mutate(controller, request);
  15. return Boolean.valueOf(response.getProcessed());
  16. } catch (ServiceException se) {
  17. throw ProtobufUtil.getRemoteException(se);
  18. }
  19. }
  20. };
  21. rpcCallerFactory.<Boolean> newCaller(rpcTimeout).callWithRetries(callable,
  22. this.operationTimeout);
  23. }


服务端,装转几回合之后调用了这主要的方法。长的要命。400行,我不删除了,留着你们细细看吧。

1)转到后面的类的几个重要过程。
  1. region.delete(delete);
  1. OperationStatus[] batchMutate = this.batchMutate(new Mutation[]{mutation});
这里面的几个转转主要是,
1)钩子,很多协处理要处理。
2)认证,判断正确性
3)分情况。
然后到这个类。

这个大类主要就看看里面红色的注解就行了。什么都帮我做了。我就轻松了。
  1. private long doMiniBatchMutation(BatchOperationInProgress<?> batchOp) throws IOException {
  2. boolean isInReplay = batchOp.isInReplay();
  3. // variable to note if all Put items are for the same CF -- metrics related
  4. boolean putsCfSetConsistent = true;
  5. //The set of columnFamilies first seen for Put.
  6. Set<byte[]> putsCfSet = null;
  7. // variable to note if all Delete items are for the same CF -- metrics related
  8. boolean deletesCfSetConsistent = true;
  9. //The set of columnFamilies first seen for Delete.
  10. Set<byte[]> deletesCfSet = null;
  11. long currentNonceGroup = HConstants.NO_NONCE, currentNonce = HConstants.NO_NONCE;
  12. WALEdit walEdit = new WALEdit(isInReplay);
  13. MultiVersionConcurrencyControl.WriteEntry writeEntry = null;
  14. long txid = 0;
  15. boolean doRollBackMemstore = false;
  16. boolean locked = false;
  17. /** Keep track of the locks we hold so we can release them in finally clause */
  18. List<RowLock> acquiredRowLocks = Lists.newArrayListWithCapacity(batchOp.operations.length);
  19. // reference family maps directly so coprocessors can mutate them if desired
  20. Map<byte[], List<Cell>>[] familyMaps = new Map[batchOp.operations.length];
  21. // We try to set up a batch in the range [firstIndex,lastIndexExclusive)
  22. int firstIndex = batchOp.nextIndexToProcess;
  23. int lastIndexExclusive = firstIndex;
  24. boolean success = false;
  25. int noOfPuts = 0, noOfDeletes = 0;
  26. WALKey walKey = null;
  27. long mvccNum = 0;
  28. long addedSize = 0;
  29. try {
  30. // ------------------------------------
  31. // STEP 1. Try to acquire as many locks as we can, and ensure
  32. // we acquire at least one.
  33. // ----------------------------------
  34. int numReadyToWrite = 0;
  35. long now = EnvironmentEdgeManager.currentTime();
  36. while (lastIndexExclusive < batchOp.operations.length) {
  37. Mutation mutation = batchOp.getMutation(lastIndexExclusive);
  38. boolean isPutMutation = mutation instanceof Put;
  39. Map<byte[], List<Cell>> familyMap = mutation.getFamilyCellMap();
  40. // store the family map reference to allow for mutations
  41. familyMaps[lastIndexExclusive] = familyMap;
  42. // skip anything that "ran" already
  43. if (batchOp.retCodeDetails[lastIndexExclusive].getOperationStatusCode()
  44. != OperationStatusCode.NOT_RUN) {
  45. lastIndexExclusive++;
  46. continue;
  47. }
  48. try {
  49. if (isPutMutation) {
  50. // Check the families in the put. If bad, skip this one.
  51. if (isInReplay) {
  52. removeNonExistentColumnFamilyForReplay(familyMap);
  53. } else {
  54. checkFamilies(familyMap.keySet());
  55. }
  56. checkTimestamps(mutation.getFamilyCellMap(), now);
  57. } else {
  58. prepareDelete((Delete) mutation);
  59. }
  60. checkRow(mutation.getRow(), "doMiniBatchMutation");
  61. } catch (NoSuchColumnFamilyException nscf) {
  62. LOG.warn("No such column family in batch mutation", nscf);
  63. batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
  64. OperationStatusCode.BAD_FAMILY, nscf.getMessage());
  65. lastIndexExclusive++;
  66. continue;
  67. } catch (FailedSanityCheckException fsce) {
  68. LOG.warn("Batch Mutation did not pass sanity check", fsce);
  69. batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
  70. OperationStatusCode.SANITY_CHECK_FAILURE, fsce.getMessage());
  71. lastIndexExclusive++;
  72. continue;
  73. } catch (WrongRegionException we) {
  74. LOG.warn("Batch mutation had a row that does not belong to this region", we);
  75. batchOp.retCodeDetails[lastIndexExclusive] = new OperationStatus(
  76. OperationStatusCode.SANITY_CHECK_FAILURE, we.getMessage());
  77. lastIndexExclusive++;
  78. continue;
  79. }
  80. // If we haven't got any rows in our batch, we should block to
  81. // get the next one.
  82. RowLock rowLock = null;
  83. try {
  84. rowLock = getRowLock(mutation.getRow(), true);
  85. } catch (IOException ioe) {
  86. LOG.warn("Failed getting lock in batch put, row="
  87. + Bytes.toStringBinary(mutation.getRow()), ioe);
  88. }
  89. if (rowLock == null) {
  90. // We failed to grab another lock
  91. break; // stop acquiring more rows for this batch
  92. } else {
  93. acquiredRowLocks.add(rowLock);
  94. }
  95. lastIndexExclusive++;
  96. numReadyToWrite++;
  97. if (isPutMutation) {
  98. // If Column Families stay consistent through out all of the
  99. // individual puts then metrics can be reported as a mutliput across
  100. // column families in the first put.
  101. if (putsCfSet == null) {
  102. putsCfSet = mutation.getFamilyCellMap().keySet();
  103. } else {
  104. putsCfSetConsistent = putsCfSetConsistent
  105. && mutation.getFamilyCellMap().keySet().equals(putsCfSet);
  106. }
  107. } else {
  108. if (deletesCfSet == null) {
  109. deletesCfSet = mutation.getFamilyCellMap().keySet();
  110. } else {
  111. deletesCfSetConsistent = deletesCfSetConsistent
  112. && mutation.getFamilyCellMap().keySet().equals(deletesCfSet);
  113. }
  114. }
  115. }
  116. // we should record the timestamp only after we have acquired the rowLock,
  117. // otherwise, newer puts/deletes are not guaranteed to have a newer timestamp
  118. now = EnvironmentEdgeManager.currentTime();
  119. byte[] byteNow = Bytes.toBytes(now);
  120. // Nothing to put/delete -- an exception in the above such as NoSuchColumnFamily?
  121. if (numReadyToWrite <= 0) return 0L;
  122. // We've now grabbed as many mutations off the list as we can
  123. // ------------------------------------
  124. // STEP 2. Update any LATEST_TIMESTAMP timestamps
  125. // ----------------------------------
  126. for (int i = firstIndex; !isInReplay && i < lastIndexExclusive; i++) {
  127. // skip invalid
  128. if (batchOp.retCodeDetails[i].getOperationStatusCode()
  129. != OperationStatusCode.NOT_RUN) continue;
  130. Mutation mutation = batchOp.getMutation(i);
  131. if (mutation instanceof Put) {
  132. updateCellTimestamps(familyMaps[i].values(), byteNow);
  133. noOfPuts++;
  134. } else {
  135. prepareDeleteTimestamps(mutation, familyMaps[i], byteNow);
  136. noOfDeletes++;
  137. }
  138. rewriteCellTags(familyMaps[i], mutation);
  139. }
  140. lock(this.updatesLock.readLock(), numReadyToWrite);
  141. locked = true;
  142. // calling the pre CP hook for batch mutation
  143. if (!isInReplay && coprocessorHost != null) {
  144. MiniBatchOperationInProgress<Mutation> miniBatchOp =
  145. new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(),
  146. batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive);
  147. if (coprocessorHost.preBatchMutate(miniBatchOp)) return 0L;
  148. }
  149. // ------------------------------------
  150. // STEP 3. Build WAL edit
  151. // ----------------------------------
  152. Durability durability = Durability.USE_DEFAULT;
  153. for (int i = firstIndex; i < lastIndexExclusive; i++) {
  154. // Skip puts that were determined to be invalid during preprocessing
  155. if (batchOp.retCodeDetails[i].getOperationStatusCode() != OperationStatusCode.NOT_RUN) {
  156. continue;
  157. }
  158. Mutation m = batchOp.getMutation(i);
  159. Durability tmpDur = getEffectiveDurability(m.getDurability());
  160. if (tmpDur.ordinal() > durability.ordinal()) {
  161. durability = tmpDur;
  162. }
  163. if (tmpDur == Durability.SKIP_WAL) {
  164. recordMutationWithoutWal(m.getFamilyCellMap());
  165. continue;
  166. }
  167. long nonceGroup = batchOp.getNonceGroup(i), nonce = batchOp.getNonce(i);
  168. // In replay, the batch may contain multiple nonces. If so, write WALEdit for each.
  169. // Given how nonces are originally written, these should be contiguous.
  170. // They don't have to be, it will still work, just write more WALEdits than needed.
  171. if (nonceGroup != currentNonceGroup || nonce != currentNonce) {
  172. if (walEdit.size() > 0) {
  173. assert isInReplay;
  174. if (!isInReplay) {
  175. throw new IOException("Multiple nonces per batch and not in replay");
  176. }
  177. // txid should always increase, so having the one from the last call is ok.
  178. // we use HLogKey here instead of WALKey directly to support legacy coprocessors.
  179. walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(),
  180. this.htableDescriptor.getTableName(), now, m.getClusterIds(),
  181. currentNonceGroup, currentNonce, mvcc);
  182. txid = this.wal.append(this.htableDescriptor, this.getRegionInfo(), walKey,
  183. walEdit, true);
  184. walEdit = new WALEdit(isInReplay);
  185. walKey = null;
  186. }
  187. currentNonceGroup = nonceGroup;
  188. currentNonce = nonce;
  189. }
  190. // Add WAL edits by CP
  191. WALEdit fromCP = batchOp.walEditsFromCoprocessors[i];
  192. if (fromCP != null) {
  193. for (Cell cell : fromCP.getCells()) {
  194. walEdit.add(cell);
  195. }
  196. }
  197. addFamilyMapToWALEdit(familyMaps[i], walEdit);
  198. }
  199. // -------------------------
  200. // STEP 4. Append the final edit to WAL. Do not sync wal.
  201. // -------------------------
  202. Mutation mutation = batchOp.getMutation(firstIndex);
  203. if (isInReplay) {
  204. // use wal key from the original
  205. walKey = new ReplayHLogKey(this.getRegionInfo().getEncodedNameAsBytes(),
  206. this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now,
  207. mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc);
  208. long replaySeqId = batchOp.getReplaySequenceId();
  209. walKey.setOrigLogSeqNum(replaySeqId);
  210. }
  211. if (walEdit.size() > 0) {
  212. if (!isInReplay) {
  213. // we use HLogKey here instead of WALKey directly to support legacy coprocessors.
  214. walKey = new HLogKey(this.getRegionInfo().getEncodedNameAsBytes(),
  215. this.htableDescriptor.getTableName(), WALKey.NO_SEQUENCE_ID, now,
  216. mutation.getClusterIds(), currentNonceGroup, currentNonce, mvcc);
  217. }
  218. txid = this.wal.append(this.htableDescriptor, this.getRegionInfo(), walKey, walEdit, true);
  219. }
  220. // ------------------------------------
  221. // Acquire the latest mvcc number
  222. // ----------------------------------
  223. if (walKey == null) {
  224. // If this is a skip wal operation just get the read point from mvcc
  225. walKey = this.appendEmptyEdit(this.wal);
  226. }
  227. if (!isInReplay) {
  228. writeEntry = walKey.getWriteEntry();
  229. mvccNum = writeEntry.getWriteNumber();
  230. } else {
  231. mvccNum = batchOp.getReplaySequenceId();
  232. }
  233. // ------------------------------------
  234. // STEP 5. Write back to memstore
  235. // Write to memstore. It is ok to write to memstore
  236. // first without syncing the WAL because we do not roll
  237. // forward the memstore MVCC. The MVCC will be moved up when
  238. // the complete operation is done. These changes are not yet
  239. // visible to scanners till we update the MVCC. The MVCC is
  240. // moved only when the sync is complete.
  241. // ----------------------------------
  242. for (int i = firstIndex; i < lastIndexExclusive; i++) {
  243. if (batchOp.retCodeDetails[i].getOperationStatusCode()
  244. != OperationStatusCode.NOT_RUN) {
  245. continue;
  246. }
  247. doRollBackMemstore = true; // If we have a failure, we need to clean what we wrote
  248. addedSize += applyFamilyMapToMemstore(familyMaps[i], mvccNum, isInReplay);
  249. }
  250. // -------------------------------
  251. // STEP 6. Release row locks, etc.
  252. // -------------------------------
  253. if (locked) {
  254. this.updatesLock.readLock().unlock();
  255. locked = false;
  256. }
  257. releaseRowLocks(acquiredRowLocks);
  258. // -------------------------
  259. // STEP 7. Sync wal.
  260. // -------------------------
  261. if (txid != 0) {
  262. syncOrDefer(txid, durability);
  263. }
  264. doRollBackMemstore = false;
  265. // calling the post CP hook for batch mutation
  266. if (!isInReplay && coprocessorHost != null) {
  267. MiniBatchOperationInProgress<Mutation> miniBatchOp =
  268. new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(),
  269. batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex, lastIndexExclusive);
  270. coprocessorHost.postBatchMutate(miniBatchOp);
  271. }
  272. // ------------------------------------------------------------------
  273. // STEP 8. Advance mvcc. This will make this put visible to scanners and getters.
  274. // ------------------------------------------------------------------
  275. if (writeEntry != null) {
  276. mvcc.completeAndWait(writeEntry);
  277. writeEntry = null;
  278. } else if (isInReplay) {
  279. // ensure that the sequence id of the region is at least as big as orig log seq id
  280. mvcc.advanceTo(mvccNum);
  281. }
  282. for (int i = firstIndex; i < lastIndexExclusive; i ++) {
  283. if (batchOp.retCodeDetails[i] == OperationStatus.NOT_RUN) {
  284. batchOp.retCodeDetails[i] = OperationStatus.SUCCESS;
  285. }
  286. }
  287. // ------------------------------------
  288. // STEP 9. Run coprocessor post hooks. This should be done after the wal is
  289. // synced so that the coprocessor contract is adhered to.
  290. // ------------------------------------
  291. if (!isInReplay && coprocessorHost != null) {
  292. for (int i = firstIndex; i < lastIndexExclusive; i++) {
  293. // only for successful puts
  294. if (batchOp.retCodeDetails[i].getOperationStatusCode()
  295. != OperationStatusCode.SUCCESS) {
  296. continue;
  297. }
  298. Mutation m = batchOp.getMutation(i);
  299. if (m instanceof Put) {
  300. coprocessorHost.postPut((Put) m, walEdit, m.getDurability());
  301. } else {
  302. coprocessorHost.postDelete((Delete) m, walEdit, m.getDurability());
  303. }
  304. }
  305. }
  306. success = true;
  307. return addedSize;
  308. } finally {
  309. // if the wal sync was unsuccessful, remove keys from memstore
  310. if (doRollBackMemstore) {
  311. for (int j = 0; j < familyMaps.length; j++) {
  312. for(List<Cell> cells:familyMaps[j].values()) {
  313. rollbackMemstore(cells);
  314. }
  315. }
  316. if (writeEntry != null) mvcc.complete(writeEntry);
  317. } else {
  318. this.addAndGetGlobalMemstoreSize(addedSize);
  319. if (writeEntry != null) {
  320. mvcc.completeAndWait(writeEntry);
  321. }
  322. }
  323. if (locked) {
  324. this.updatesLock.readLock().unlock();
  325. }
  326. releaseRowLocks(acquiredRowLocks);
  327. // See if the column families were consistent through the whole thing.
  328. // if they were then keep them. If they were not then pass a null.
  329. // null will be treated as unknown.
  330. // Total time taken might be involving Puts and Deletes.
  331. // Split the time for puts and deletes based on the total number of Puts and Deletes.
  332. if (noOfPuts > 0) {
  333. // There were some Puts in the batch.
  334. if (this.metricsRegion != null) {
  335. this.metricsRegion.updatePut();
  336. }
  337. }
  338. if (noOfDeletes > 0) {
  339. // There were some Deletes in the batch.
  340. if (this.metricsRegion != null) {
  341. this.metricsRegion.updateDelete();
  342. }
  343. }
  344. if (!success) {
  345. for (int i = firstIndex; i < lastIndexExclusive; i++) {
  346. if (batchOp.retCodeDetails[i].getOperationStatusCode() == OperationStatusCode.NOT_RUN) {
  347. batchOp.retCodeDetails[i] = OperationStatus.FAILURE;
  348. }
  349. }
  350. }
  351. if (coprocessorHost != null && !batchOp.isInReplay()) {
  352. // call the coprocessor hook to do any finalization steps
  353. // after the put is done
  354. MiniBatchOperationInProgress<Mutation> miniBatchOp =
  355. new MiniBatchOperationInProgress<Mutation>(batchOp.getMutationsForCoprocs(),
  356. batchOp.retCodeDetails, batchOp.walEditsFromCoprocessors, firstIndex,
  357. lastIndexExclusive);
  358. coprocessorHost.postBatchMutateIndispensably(miniBatchOp, success);
  359. }
  360. batchOp.nextIndexToProcess = lastIndexExclusive;
  361. }
  362. }

很轻松完成了。

未完待续。。。














原创粉丝点击