[ https://issues.apache.org/jira/browse/HIVE-24211?focusedWorklogId=492855&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-492855 ]
ASF GitHub Bot logged work on HIVE-24211: ----------------------------------------- Author: ASF GitHub Bot Created on: 30/Sep/20 09:44 Start Date: 30/Sep/20 09:44 Worklog Time Spent: 10m Work Description: deniskuzZ commented on a change in pull request #1533: URL: https://github.com/apache/hive/pull/1533#discussion_r497380399 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/Driver.java ########## @@ -497,38 +497,41 @@ private void runInternal(String command, boolean alreadyCompiled) throws Command HiveConf.ConfVars.HIVE_TXN_MAX_RETRYSNAPSHOT_COUNT); try { - while (!driverTxnHandler.isValidTxnListState() && ++retryShapshotCnt <= maxRetrySnapshotCnt) { - LOG.info("Re-compiling after acquiring locks, attempt #" + retryShapshotCnt); - // Snapshot was outdated when locks were acquired, hence regenerate context, txn list and retry. - // TODO: Lock acquisition should be moved before analyze, this is a bit hackish. - // Currently, we acquire a snapshot, compile the query with that snapshot, and then - acquire locks. - // If snapshot is still valid, we continue as usual. - // But if snapshot is not valid, we recompile the query. - if (driverContext.isOutdatedTxn()) { - LOG.info("Snapshot is outdated, re-initiating transaction ..."); - driverContext.getTxnManager().rollbackTxn(); - - String userFromUGI = DriverUtils.getUserFromUGI(driverContext); - driverContext.getTxnManager().openTxn(context, userFromUGI, driverContext.getTxnType()); - lockAndRespond(); + do { + driverContext.setOutdatedTxn(false); + + if (!driverTxnHandler.isValidTxnListState()) { + LOG.info("Re-compiling after acquiring locks, attempt #" + retryShapshotCnt); + // Snapshot was outdated when locks were acquired, hence regenerate context, txn list and retry. + // TODO: Lock acquisition should be moved before analyze, this is a bit hackish. + // Currently, we acquire a snapshot, compile the query with that snapshot, and then - acquire locks. + // If snapshot is still valid, we continue as usual. + // But if snapshot is not valid, we recompile the query. + if (driverContext.isOutdatedTxn()) { + LOG.info("Snapshot is outdated, re-initiating transaction ..."); + driverContext.getTxnManager().rollbackTxn(); + + String userFromUGI = DriverUtils.getUserFromUGI(driverContext); + driverContext.getTxnManager().openTxn(context, userFromUGI, driverContext.getTxnType()); + lockAndRespond(); + } + driverContext.setRetrial(true); + driverContext.getBackupContext().addSubContext(context); + driverContext.getBackupContext().setHiveLocks(context.getHiveLocks()); + context = driverContext.getBackupContext(); + + driverContext.getConf().set(ValidTxnList.VALID_TXNS_KEY, + driverContext.getTxnManager().getValidTxns().toString()); + + if (driverContext.getPlan().hasAcidResourcesInQuery()) { + compileInternal(context.getCmd(), true); + driverTxnHandler.recordValidWriteIds(); + driverTxnHandler.setWriteIdForAcidFileSinks(); + } + // Since we're reusing the compiled plan, we need to update its start time for current run + driverContext.getPlan().setQueryStartTime(driverContext.getQueryDisplay().getQueryStartTime()); } - - driverContext.setRetrial(true); - driverContext.getBackupContext().addSubContext(context); - driverContext.getBackupContext().setHiveLocks(context.getHiveLocks()); - context = driverContext.getBackupContext(); - - driverContext.getConf().set(ValidTxnList.VALID_TXNS_KEY, - driverContext.getTxnManager().getValidTxns().toString()); - - if (driverContext.getPlan().hasAcidResourcesInQuery()) { - compileInternal(context.getCmd(), true); - driverTxnHandler.recordValidWriteIds(); - driverTxnHandler.setWriteIdForAcidFileSinks(); - } - // Since we're reusing the compiled plan, we need to update its start time for current run - driverContext.getPlan().setQueryStartTime(driverContext.getQueryDisplay().getQueryStartTime()); - } + } while (driverContext.isOutdatedTxn() && ++retryShapshotCnt <= maxRetrySnapshotCnt); Review comment: added comments ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 492855) Remaining Estimate: 0h Time Spent: 10m > Replace Snapshot invalidate logic with WriteSet check for txn conflict > detection > -------------------------------------------------------------------------------- > > Key: HIVE-24211 > URL: https://issues.apache.org/jira/browse/HIVE-24211 > Project: Hive > Issue Type: Bug > Components: Transactions > Reporter: Denys Kuzmenko > Assignee: Denys Kuzmenko > Priority: Major > Time Spent: 10m > Remaining Estimate: 0h > > *Issue with concurrent writes on partitioned table:* > Concurrent writes on different partitions should execute in parallel without > issues. They acquire a shared lock on table level and exclusive write on > partition level (hive.txn.xlock.write=true). > However there is a problem with the Snapshot validation. It compares valid > writeIds seen by current transaction, recorded before locking, with the > actual list of writeIds. The Issue is that writeId in Snapshot has no > information on partition, meaning that concurrent writes to different > partitions would be seen as writes to the same non-partitioned table causing > Snapshot to be obsolete. -- This message was sent by Atlassian Jira (v8.3.4#803005)