[ https://issues.apache.org/jira/browse/HIVE-21160?focusedWorklogId=776260&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-776260 ]
ASF GitHub Bot logged work on HIVE-21160: ----------------------------------------- Author: ASF GitHub Bot Created on: 31/May/22 11:50 Start Date: 31/May/22 11:50 Worklog Time Spent: 10m Work Description: kasakrisz commented on code in PR #2855: URL: https://github.com/apache/hive/pull/2855#discussion_r885539665 ########## ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java: ########## @@ -267,9 +262,104 @@ private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { } } - private void validateTxnManager(Table mTable) throws SemanticException { - if (!AcidUtils.acidTableWithoutTransactions(mTable) && !getTxnMgr().supportsAcid()) { - throw new SemanticException(ErrorMsg.ACID_OP_ON_NONACID_TXNMGR.getMsg()); + private void analyzeSplitUpdate(ASTNode tree, Table mTable, ASTNode tabNameNode) throws SemanticException { + operation = Context.Operation.UPDATE; + + List<? extends Node> children = tree.getChildren(); + + ASTNode where = null; + int whereIndex = 2; + if (children.size() > whereIndex) { + where = (ASTNode) children.get(whereIndex); + assert where.getToken().getType() == HiveParser.TOK_WHERE : + "Expected where clause, but found " + where.getName(); + } + + Set<String> setRCols = new LinkedHashSet<>(); +// TOK_UPDATE_TABLE +// TOK_TABNAME +// ... +// TOK_SET_COLUMNS_CLAUSE <- The set list from update should be the second child (index 1) + assert children.size() >= 2 : "Expected update token to have at least two children"; + ASTNode setClause = (ASTNode) children.get(1); + Map<String, ASTNode> setCols = collectSetColumnsAndExpressions(setClause, setRCols, mTable); + Map<Integer, ASTNode> setColExprs = new HashMap<>(setClause.getChildCount()); + + List<FieldSchema> nonPartCols = mTable.getCols(); + Map<String, String> colNameToDefaultConstraint = getColNameToDefaultValueMap(mTable); + List<String> values = new ArrayList<>(mTable.getCols().size()); + StringBuilder rewrittenQueryStr = createRewrittenQueryStrBuilder(); + rewrittenQueryStr.append("(SELECT ROW__ID"); + for (int i = 0; i < nonPartCols.size(); i++) { + rewrittenQueryStr.append(','); + String name = nonPartCols.get(i).getName(); + ASTNode setCol = setCols.get(name); + String identifier = HiveUtils.unparseIdentifier(name, this.conf); + + if (setCol != null) { + if (setCol.getType() == HiveParser.TOK_TABLE_OR_COL && + setCol.getChildCount() == 1 && setCol.getChild(0).getType() == HiveParser.TOK_DEFAULT_VALUE) { + rewrittenQueryStr.append(colNameToDefaultConstraint.get(name)); + } else { + rewrittenQueryStr.append(identifier); + // This is one of the columns we're setting, record it's position so we can come back + // later and patch it up. 0th is ROW_ID + setColExprs.put(i + 1, setCol); + } + } else { + rewrittenQueryStr.append(identifier); + } + rewrittenQueryStr.append(" AS "); + rewrittenQueryStr.append(identifier); + + values.add("s." + identifier); + } + addPartitionColsToSelect(mTable.getPartCols(), rewrittenQueryStr); + addPartitionColsAsValues(mTable.getPartCols(), "s", values); + rewrittenQueryStr.append(" FROM ").append(getFullTableNameForSQL(tabNameNode)).append(") s\n"); + + appendInsertBranch(rewrittenQueryStr, null, values); + appendDeleteBranch(rewrittenQueryStr, null, "s", Collections.singletonList("s.ROW__ID ")); + + appendSortBy(rewrittenQueryStr, Collections.singletonList("s.ROW__ID ")); + + ReparseResult rr = parseRewrittenQuery(rewrittenQueryStr, ctx.getCmd()); + Context rewrittenCtx = rr.rewrittenCtx; + ASTNode rewrittenTree = rr.rewrittenTree; + + ASTNode rewrittenInsert = new ASTSearcher().simpleBreadthFirstSearch( + rewrittenTree, HiveParser.TOK_FROM, HiveParser.TOK_SUBQUERY, HiveParser.TOK_INSERT); + + rewrittenCtx.setOperation(Context.Operation.UPDATE); + rewrittenCtx.addDestNamePrefix(1, Context.DestClausePrefix.INSERT); + rewrittenCtx.addDeleteOfUpdateDestNamePrefix(2, Context.DestClausePrefix.DELETE); + + if (where != null) { + rewrittenInsert.addChild(where); + } + + patchProjectionForUpdate(rewrittenInsert, setColExprs); + + try { Review Comment: I was not able to get the final rewritten query string from the AST which has all the transformations. To do it I need a `TokenRewriteStream` instance belongs to the transformed AST. The one which I have from `parseRewrittenQuery` is outdated because of the transformations made on the AST. I added `rewrittenTree.dump()` before calling `super.analyze(rewrittenTree, rewrittenCtx);` It prints the string representation of the AST. I admit that this is more difficult to read but better than nothing. Issue Time Tracking ------------------- Worklog Id: (was: 776260) Time Spent: 2h 50m (was: 2h 40m) > Rewrite Update statement as Multi-insert and do Update split early > ------------------------------------------------------------------ > > Key: HIVE-21160 > URL: https://issues.apache.org/jira/browse/HIVE-21160 > Project: Hive > Issue Type: Sub-task > Components: Transactions > Affects Versions: 3.0.0 > Reporter: Eugene Koifman > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 2h 50m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.20.7#820007)