[ https://issues.apache.org/jira/browse/HIVE-24428?focusedWorklogId=536008&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-536008 ]
ASF GitHub Bot logged work on HIVE-24428: ----------------------------------------- Author: ASF GitHub Bot Created on: 14/Jan/21 14:13 Start Date: 14/Jan/21 14:13 Worklog Time Spent: 10m Work Description: kgyrtkirk commented on a change in pull request #1724: URL: https://github.com/apache/hive/pull/1724#discussion_r557422893 ########## File path: ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ########## @@ -774,6 +780,100 @@ private void checkFileFormats(Hive db, LoadTableDesc tbd, Table table) } } + class LocalTableLock implements Closeable{ + + private Optional<HiveLockObject> lock; + private HiveLock lockObj; + + public LocalTableLock(Optional<HiveLockObject> lock) throws LockException { + + this.lock = lock; + if(!lock.isPresent()) { + return; + } + LOG.info("LocalTableLock; locking: " + lock); + HiveLockManager lockMgr = context.getHiveTxnManager().getLockManager(); + lockObj = lockMgr.lock(lock.get(), HiveLockMode.SEMI_SHARED, true); + LOG.info("LocalTableLock; locked: " + lock); + } + + @Override + public void close() throws IOException { + if(!lock.isPresent()) { + return; + } + LOG.info("LocalTableLock; unlocking: "+lock); + HiveLockManager lockMgr; + try { + lockMgr = context.getHiveTxnManager().getLockManager(); + lockMgr.unlock(lockObj); + } catch (LockException e1) { + throw new IOException(e1); + } + LOG.info("LocalTableLock; unlocked"); + } + + } + + static enum LockFileMoveMode { + none, dp, all; + + public static LockFileMoveMode fromConf(HiveConf conf) { + if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY)) { + return none; + } + String lockFileMoveMode = conf.getVar(HiveConf.ConfVars.HIVE_LOCK_FILE_MOVE_MODE); + return valueOf(lockFileMoveMode); + } + } + + private LocalTableLock acquireLockForFileMove(LoadTableDesc loadTableWork) throws HiveException { + LockFileMoveMode mode = LockFileMoveMode.fromConf(conf); + + if (mode == LockFileMoveMode.none) { + return new LocalTableLock(Optional.empty()); + } + if (mode == LockFileMoveMode.dp) { + if (loadTableWork.getDPCtx() == null) { + return new LocalTableLock(Optional.empty()); + } + } + + WriteEntity output = context.getLoadTableOutputMap().get(loadTableWork); + List<HiveLockObj> lockObjects = context.getOutputLockObjects().get(output); + if (lockObjects == null) { + return new LocalTableLock(Optional.empty()); + } + TableDesc table = loadTableWork.getTable(); + if(table == null) { + return new LocalTableLock(Optional.empty()); + } + + Hive db = getHive(); + Table baseTable = db.getTable(loadTableWork.getTable().getTableName()); + + HiveLockObject.HiveLockObjectData lockData = + new HiveLockObject.HiveLockObjectData(queryPlan.getQueryId(), + String.valueOf(System.currentTimeMillis()), + "IMPLICIT", + queryPlan.getQueryStr(), + conf); + + HiveLockObject lock = new HiveLockObject(baseTable,lockData); + + for (HiveLockObj hiveLockObj : lockObjects) { Review comment: I tried to use stream api a few times - and I'm a bit against it because: * it's somewhat unreadable * it's harder to read them when they eventually changed in a patch * not really debug friendly * they seem to fit "easy tasks" nicely; but if someone extends it later - it might become a really hard to read expression... instead of 2 complex lines; this is 7 simple - I would rather keep it ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 536008) Time Spent: 4h 50m (was: 4h 40m) > Concurrent add_partitions requests may lead to data loss > -------------------------------------------------------- > > Key: HIVE-24428 > URL: https://issues.apache.org/jira/browse/HIVE-24428 > Project: Hive > Issue Type: Bug > Reporter: Zoltan Haindrich > Assignee: Zoltan Haindrich > Priority: Major > Labels: pull-request-available > Time Spent: 4h 50m > Remaining Estimate: 0h > > in case multiple clients are adding partitions to the same table - when the > same partition is being added there is a chance that the data dir is removed > after the other client have already written its data > https://github.com/apache/hive/blob/5e96b14a2357c66a0640254d5414bc706d8be852/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L3958 -- This message was sent by Atlassian Jira (v8.3.4#803005)