mymeiyi commented on code in PR #47988:
URL: https://github.com/apache/doris/pull/47988#discussion_r2006744514
##########
fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java:
##########
@@ -885,132 +936,146 @@ private void getDeleteBitmapUpdateLock(long
transactionId, List<OlapTable> mowTa
}
StopWatch stopWatch = new StopWatch();
stopWatch.start();
- getPartitionInfo(mowTableList, tabletCommitInfos, lockContext);
int totalRetryTime = 0;
String retryMsg = "";
- for (Map.Entry<Long, Set<Long>> entry :
lockContext.getTableToPartitions().entrySet()) {
- GetDeleteBitmapUpdateLockRequest.Builder builder =
GetDeleteBitmapUpdateLockRequest.newBuilder();
-
builder.setTableId(entry.getKey()).setLockId(transactionId).setInitiator(-1)
-
.setExpiration(Config.delete_bitmap_lock_expiration_seconds).setRequireCompactionStats(true);
- List<Long> tabletList =
lockContext.getTableToTabletList().get(entry.getKey());
- for (Long tabletId : tabletList) {
- TabletMeta tabletMeta =
lockContext.getTabletToTabletMeta().get(tabletId);
- TabletIndexPB.Builder tabletIndexBuilder =
TabletIndexPB.newBuilder();
- tabletIndexBuilder.setDbId(tabletMeta.getDbId());
- tabletIndexBuilder.setTableId(tabletMeta.getTableId());
- tabletIndexBuilder.setIndexId(tabletMeta.getIndexId());
- tabletIndexBuilder.setPartitionId(tabletMeta.getPartitionId());
- tabletIndexBuilder.setTabletId(tabletId);
- builder.addTabletIndexes(tabletIndexBuilder);
- }
- final GetDeleteBitmapUpdateLockRequest request = builder.build();
- GetDeleteBitmapUpdateLockResponse response = null;
-
- int retryTime = 0;
- while (retryTime++ < Config.metaServiceRpcRetryTimes()) {
- try {
- response =
MetaServiceProxy.getInstance().getDeleteBitmapUpdateLock(request);
- if (LOG.isDebugEnabled()) {
- LOG.debug("get delete bitmap lock, transactionId={},
Request: {}, Response: {}", transactionId,
- request, response);
- }
- if
(DebugPointUtil.isEnable("CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.conflict"))
{
- DebugPoint debugPoint = DebugPointUtil.getDebugPoint(
-
"CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.conflict");
- double percent = debugPoint.param("percent", 0.4);
- long timestamp = System.currentTimeMillis();
- Random random = new Random(timestamp);
- if (Math.abs(random.nextInt()) % 100 < 100 * percent) {
- LOG.info("set kv txn conflict for test");
- GetDeleteBitmapUpdateLockResponse.Builder
getLockResponseBuilder
- =
GetDeleteBitmapUpdateLockResponse.newBuilder();
-
getLockResponseBuilder.setStatus(MetaServiceResponseStatus.newBuilder()
-
.setCode(MetaServiceCode.KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES)
- .setMsg("kv txn conflict"));
- response = getLockResponseBuilder.build();
+ boolean res = false;
+ try {
+ getPartitionInfo(mowTableList, tabletCommitInfos, lockContext);
+ for (Map.Entry<Long, Set<Long>> entry :
lockContext.getTableToPartitions().entrySet()) {
+ GetDeleteBitmapUpdateLockRequest.Builder builder =
GetDeleteBitmapUpdateLockRequest.newBuilder();
+
builder.setTableId(entry.getKey()).setLockId(transactionId).setInitiator(-1)
+
.setExpiration(Config.delete_bitmap_lock_expiration_seconds).setRequireCompactionStats(true);
+ List<Long> tabletList =
lockContext.getTableToTabletList().get(entry.getKey());
+ for (Long tabletId : tabletList) {
+ TabletMeta tabletMeta =
lockContext.getTabletToTabletMeta().get(tabletId);
+ TabletIndexPB.Builder tabletIndexBuilder =
TabletIndexPB.newBuilder();
+ tabletIndexBuilder.setDbId(tabletMeta.getDbId());
+ tabletIndexBuilder.setTableId(tabletMeta.getTableId());
+ tabletIndexBuilder.setIndexId(tabletMeta.getIndexId());
+
tabletIndexBuilder.setPartitionId(tabletMeta.getPartitionId());
+ tabletIndexBuilder.setTabletId(tabletId);
+ builder.addTabletIndexes(tabletIndexBuilder);
+ }
+ final GetDeleteBitmapUpdateLockRequest request =
builder.build();
+ GetDeleteBitmapUpdateLockResponse response = null;
+
+ int retryTime = 0;
+ while (retryTime++ < Config.metaServiceRpcRetryTimes()) {
+ try {
+ response =
MetaServiceProxy.getInstance().getDeleteBitmapUpdateLock(request);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("get delete bitmap lock,
transactionId={}, Request: {}, Response: {}",
+ transactionId,
+ request, response);
+ }
+ if
(DebugPointUtil.isEnable("CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.conflict"))
{
+ DebugPoint debugPoint =
DebugPointUtil.getDebugPoint(
+
"CloudGlobalTransactionMgr.getDeleteBitmapUpdateLock.conflict");
+ double percent = debugPoint.param("percent", 0.4);
+ long timestamp = System.currentTimeMillis();
+ Random random = new Random(timestamp);
+ if (Math.abs(random.nextInt()) % 100 < 100 *
percent) {
+ LOG.info("set kv txn conflict for test");
+ GetDeleteBitmapUpdateLockResponse.Builder
getLockResponseBuilder
+ =
GetDeleteBitmapUpdateLockResponse.newBuilder();
+
getLockResponseBuilder.setStatus(MetaServiceResponseStatus.newBuilder()
+
.setCode(MetaServiceCode.KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES)
+ .setMsg("kv txn conflict"));
+ response = getLockResponseBuilder.build();
+ }
+ }
+ if (response.getStatus().getCode() !=
MetaServiceCode.LOCK_CONFLICT
+ && response.getStatus().getCode() !=
MetaServiceCode.KV_TXN_CONFLICT) {
+ break;
}
+ } catch (Exception e) {
+ LOG.warn("ignore get delete bitmap lock exception,
transactionId={}, retryTime={}, tableIds={}",
+ transactionId,
+ retryTime,
mowTableList.stream().map(Table::getId).collect(Collectors.toList()), e);
}
- if (response.getStatus().getCode() !=
MetaServiceCode.LOCK_CONFLICT
- && response.getStatus().getCode() !=
MetaServiceCode.KV_TXN_CONFLICT) {
- break;
+ retryMsg = response.toString();
+ if (DebugPointUtil.isEnable("FE.mow.check.lock.release")
+ && response.getStatus().getCode() ==
MetaServiceCode.LOCK_CONFLICT) {
+ throw new UserException(InternalErrorCode.INTERNAL_ERR,
+ "check delete bitmap lock release
fail,response is " + response
+ + ", tableList=(" +
StringUtils.join(mowTableList, ",") + ")");
+ }
+ // sleep random millis [20, 300] ms, avoid txn conflict
+ int randomMillis = 20 + (int) (Math.random() * (300 - 20));
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("randomMillis:{}", randomMillis);
+ }
+ try {
+ Thread.sleep(randomMillis);
+ } catch (InterruptedException e) {
+ LOG.info("InterruptedException: ", e);
}
- } catch (Exception e) {
- LOG.warn("ignore get delete bitmap lock exception,
transactionId={}, retryTime={}, tableIds={}",
- transactionId,
- retryTime,
mowTableList.stream().map(Table::getId).collect(Collectors.toList()), e);
- }
- retryMsg = response.toString();
- if (DebugPointUtil.isEnable("FE.mow.check.lock.release")
- && response.getStatus().getCode() ==
MetaServiceCode.LOCK_CONFLICT) {
- throw new UserException(InternalErrorCode.INTERNAL_ERR,
- "check delete bitmap lock release fail,response is
" + response
- + ", tableList=(" +
StringUtils.join(mowTableList, ",") + ")");
}
- // sleep random millis [20, 300] ms, avoid txn conflict
- int randomMillis = 20 + (int) (Math.random() * (300 - 20));
- if (LOG.isDebugEnabled()) {
- LOG.debug("randomMillis:{}", randomMillis);
+ Preconditions.checkNotNull(response);
+ Preconditions.checkNotNull(response.getStatus());
+ if
(DebugPointUtil.isEnable("FE.mow.get_delete_bitmap_lock.fail")) {
+ throw new
UserException(InternalErrorCode.DELETE_BITMAP_LOCK_ERR,
+ "test get_delete_bitmap_lock fail");
}
- try {
- Thread.sleep(randomMillis);
- } catch (InterruptedException e) {
- LOG.info("InterruptedException: ", e);
+ if (response.getStatus().getCode() != MetaServiceCode.OK) {
+ LOG.warn("get delete bitmap lock failed, transactionId={},
for {} times, response:{}",
+ transactionId,
+ retryTime, response);
+ if (response.getStatus().getCode() ==
MetaServiceCode.LOCK_CONFLICT
+ || response.getStatus().getCode() ==
MetaServiceCode.KV_TXN_CONFLICT
+ || response.getStatus().getCode()
+ ==
MetaServiceCode.KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES) {
+ // DELETE_BITMAP_LOCK_ERR will be retried on be
+ throw new
UserException(InternalErrorCode.DELETE_BITMAP_LOCK_ERR,
+ "Failed to get delete bitmap lock due to
conflict");
+ }
+ throw new UserException(
+ "Failed to get delete bitmap lock, msg: " +
response.getStatus().getMsg() + ", code: "
+ + response.getStatus().getCode());
}
- }
- Preconditions.checkNotNull(response);
- Preconditions.checkNotNull(response.getStatus());
- if (DebugPointUtil.isEnable("FE.mow.get_delete_bitmap_lock.fail"))
{
- throw new
UserException(InternalErrorCode.DELETE_BITMAP_LOCK_ERR,
- "test get_delete_bitmap_lock fail");
- }
- if (response.getStatus().getCode() != MetaServiceCode.OK) {
- LOG.warn("get delete bitmap lock failed, transactionId={}, for
{} times, response:{}", transactionId,
- retryTime, response);
- if (response.getStatus().getCode() ==
MetaServiceCode.LOCK_CONFLICT
- || response.getStatus().getCode() ==
MetaServiceCode.KV_TXN_CONFLICT
- || response.getStatus().getCode() ==
MetaServiceCode.KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES) {
- // DELETE_BITMAP_LOCK_ERR will be retried on be
- throw new
UserException(InternalErrorCode.DELETE_BITMAP_LOCK_ERR,
- "Failed to get delete bitmap lock due to
conflict");
+
+ // record tablet's latest compaction stats from meta service
and send them to BEs
+ // to let BEs eliminate unnecessary sync_rowsets() calls if
possible
+ List<Long> respBaseCompactionCnts =
response.getBaseCompactionCntsList();
+ List<Long> respCumulativeCompactionCnts =
response.getCumulativeCompactionCntsList();
+ List<Long> respCumulativePoints =
response.getCumulativePointsList();
+ List<Long> respTabletStates = response.getTabletStatesList();
+ int size1 = respBaseCompactionCnts.size();
+ int size2 = respCumulativeCompactionCnts.size();
+ int size3 = respCumulativePoints.size();
+ int size4 = respTabletStates.size();
+ if (size1 != tabletList.size() || size2 != tabletList.size()
|| size3 != tabletList.size()
+ || (size4 > 0 && size4 != tabletList.size())) {
+ throw new UserException("The size of returned compaction
cnts can't match the size of tabletList, "
+ + "tabletList.size()=" + tabletList.size() + ",
respBaseCompactionCnts.size()=" + size1
+ + ", respCumulativeCompactionCnts.size()=" + size2
+ ", respCumulativePoints.size()="
+ + size3
+ + ", respTabletStates.size()=" + size4);
}
- throw new UserException(
- "Failed to get delete bitmap lock, msg: " +
response.getStatus().getMsg() + ", code: "
- + response.getStatus().getCode());
- }
-
- // record tablet's latest compaction stats from meta service and
send them to BEs
- // to let BEs eliminate unnecessary sync_rowsets() calls if
possible
- List<Long> respBaseCompactionCnts =
response.getBaseCompactionCntsList();
- List<Long> respCumulativeCompactionCnts =
response.getCumulativeCompactionCntsList();
- List<Long> respCumulativePoints =
response.getCumulativePointsList();
- List<Long> respTabletStates = response.getTabletStatesList();
- int size1 = respBaseCompactionCnts.size();
- int size2 = respCumulativeCompactionCnts.size();
- int size3 = respCumulativePoints.size();
- int size4 = respTabletStates.size();
- if (size1 != tabletList.size() || size2 != tabletList.size() ||
size3 != tabletList.size()
- || (size4 > 0 && size4 != tabletList.size())) {
- throw new UserException("The size of returned compaction cnts
can't match the size of tabletList, "
- + "tabletList.size()=" + tabletList.size() + ",
respBaseCompactionCnts.size()=" + size1
- + ", respCumulativeCompactionCnts.size()=" + size2 +
", respCumulativePoints.size()=" + size3
- + ", respTabletStates.size()=" + size4);
- }
- for (int i = 0; i < tabletList.size(); i++) {
- long tabletId = tabletList.get(i);
- lockContext.getBaseCompactionCnts().put(tabletId,
respBaseCompactionCnts.get(i));
- lockContext.getCumulativeCompactionCnts().put(tabletId,
respCumulativeCompactionCnts.get(i));
- lockContext.getCumulativePoints().put(tabletId,
respCumulativePoints.get(i));
- if (size4 > 0) {
- lockContext.getTabletStates().put(tabletId,
respTabletStates.get(i));
+ for (int i = 0; i < tabletList.size(); i++) {
+ long tabletId = tabletList.get(i);
+ lockContext.getBaseCompactionCnts().put(tabletId,
respBaseCompactionCnts.get(i));
+ lockContext.getCumulativeCompactionCnts().put(tabletId,
respCumulativeCompactionCnts.get(i));
+ lockContext.getCumulativePoints().put(tabletId,
respCumulativePoints.get(i));
+ if (size4 > 0) {
+ lockContext.getTabletStates().put(tabletId,
respTabletStates.get(i));
+ }
}
+ totalRetryTime += retryTime;
}
- totalRetryTime += retryTime;
+ res = true;
+ } finally {
+ stopWatch.stop();
+ long costTime = stopWatch.getTime();
Review Comment:
forget to call `setWaitDeleteBitmapLockCostTimeMs`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]