[ https://issues.apache.org/jira/browse/HIVE-21160?focusedWorklogId=777470&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-777470 ]
ASF GitHub Bot logged work on HIVE-21160: ----------------------------------------- Author: ASF GitHub Bot Created on: 02/Jun/22 12:15 Start Date: 02/Jun/22 12:15 Worklog Time Spent: 10m Work Description: kgyrtkirk commented on code in PR #2855: URL: https://github.com/apache/hive/pull/2855#discussion_r887876292 ########## ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java: ########## @@ -8292,8 +8294,8 @@ private WriteEntity generateTableWriteEntity(String dest, Table dest_tab, if ((dpCtx == null || dpCtx.getNumDPCols() == 0)) { output = new WriteEntity(dest_tab, determineWriteType(ltd, dest)); if (!outputs.add(output)) { - if(!((this instanceof MergeSemanticAnalyzer) && - conf.getBoolVar(ConfVars.MERGE_SPLIT_UPDATE))) { + if(!((this instanceof MergeSemanticAnalyzer || this instanceof UpdateDeleteSemanticAnalyzer) && Review Comment: I believe this could be more readable if the condition would be placed into an instance method; which could be overriden by `MergeSemanticAnalyzer` / `UpdateDeleteSemanticAnalyzer` ########## itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java: ########## @@ -251,6 +252,44 @@ public void testExceucteUpdateCounts() throws Exception { assertEquals("1 row PreparedStatement update", 1, count); } + @Test + public void testExceucteMergeCounts() throws Exception { + testExceucteMergeCounts(true); + } + + @Test + public void testExceucteMergeCountsNoSplitUpdate() throws Exception { + testExceucteMergeCounts(false); + } + + private void testExceucteMergeCounts(boolean splitUpdateEarly) throws Exception { + + Statement stmt = con.createStatement(); + stmt.execute("set " + ConfVars.MERGE_SPLIT_UPDATE.varname + "=" + splitUpdateEarly); + stmt.execute("set " + ConfVars.HIVE_SUPPORT_CONCURRENCY.varname + "=true"); Review Comment: isn't `MERGE_SPLIT_UPDATE` is the old option's name; meanwhile the one is named `SPLIT_UPDATE` ? ########## ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java: ########## @@ -91,23 +110,15 @@ private void analyzeDelete(ASTNode tree) throws SemanticException { * The sort by clause is put in there so that records come out in the right order to enable * merge on read. */ - private void reparseAndSuperAnalyze(ASTNode tree) throws SemanticException { + private void reparseAndSuperAnalyze(ASTNode tree, ASTNode tabNameNode, Table mTable) throws SemanticException { List<? extends Node> children = tree.getChildren(); - // The first child should be the table we are updating / deleting from - ASTNode tabName = (ASTNode)children.get(0); - assert tabName.getToken().getType() == HiveParser.TOK_TABNAME : - "Expected tablename as first child of " + operation + " but found " + tabName.getName(); - Table mTable = getTargetTable(tabName); - validateTxnManager(mTable); - validateTargetTable(mTable); - // save the operation type into the query state SessionStateUtil.addResource(conf, Context.Operation.class.getSimpleName(), operation.name()); StringBuilder rewrittenQueryStr = new StringBuilder(); rewrittenQueryStr.append("insert into table "); - rewrittenQueryStr.append(getFullTableNameForSQL(tabName)); + rewrittenQueryStr.append(getFullTableNameForSQL(tabNameNode)); Review Comment: it seems like `tabNameNode` is only used as an argument of this method... ########## ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java: ########## @@ -48,28 +52,43 @@ public class UpdateDeleteSemanticAnalyzer extends RewriteSemanticAnalyzer { super(queryState); } - protected void analyze(ASTNode tree) throws SemanticException { + @Override + protected ASTNode getTargetTableNode(ASTNode tree) { + // The first child should be the table we are updating / deleting from + ASTNode tabName = (ASTNode)tree.getChild(0); + assert tabName.getToken().getType() == HiveParser.TOK_TABNAME : + "Expected tablename as first child of " + operation + " but found " + tabName.getName(); + return tabName; + } + + protected void analyze(ASTNode tree, Table table, ASTNode tabNameNode) throws SemanticException { switch (tree.getToken().getType()) { case HiveParser.TOK_DELETE_FROM: - analyzeDelete(tree); + analyzeDelete(tree, tabNameNode, table); break; case HiveParser.TOK_UPDATE_TABLE: - analyzeUpdate(tree); + analyzeUpdate(tree, tabNameNode, table); break; default: throw new RuntimeException("Asked to parse token " + tree.getName() + " in " + "UpdateDeleteSemanticAnalyzer"); } } - private void analyzeUpdate(ASTNode tree) throws SemanticException { + private void analyzeUpdate(ASTNode tree, ASTNode tabNameNode, Table mTable) throws SemanticException { operation = Context.Operation.UPDATE; - reparseAndSuperAnalyze(tree); + boolean nonNativeAcid = AcidUtils.isNonNativeAcidTable(mTable); + + if (HiveConf.getBoolVar(queryState.getConf(), HiveConf.ConfVars.SPLIT_UPDATE) && !nonNativeAcid) { + analyzeSplitUpdate(tree, mTable, tabNameNode); + } else { + reparseAndSuperAnalyze(tree, tabNameNode, mTable); Review Comment: can we use one order; either `tree,mTable, tableNameNode` or the other ########## ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java: ########## @@ -48,28 +52,43 @@ public class UpdateDeleteSemanticAnalyzer extends RewriteSemanticAnalyzer { super(queryState); } - protected void analyze(ASTNode tree) throws SemanticException { + @Override + protected ASTNode getTargetTableNode(ASTNode tree) { + // The first child should be the table we are updating / deleting from + ASTNode tabName = (ASTNode)tree.getChild(0); + assert tabName.getToken().getType() == HiveParser.TOK_TABNAME : + "Expected tablename as first child of " + operation + " but found " + tabName.getName(); + return tabName; + } + + protected void analyze(ASTNode tree, Table table, ASTNode tabNameNode) throws SemanticException { Review Comment: can't we use the `TableName` object instead of the `ASTNode` of the `tabNameNode` / why don't we have that info in the `table`? ########## itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcDriver2.java: ########## @@ -251,6 +252,44 @@ public void testExceucteUpdateCounts() throws Exception { assertEquals("1 row PreparedStatement update", 1, count); } + @Test + public void testExceucteMergeCounts() throws Exception { + testExceucteMergeCounts(true); + } + + @Test + public void testExceucteMergeCountsNoSplitUpdate() throws Exception { + testExceucteMergeCounts(false); + } + + private void testExceucteMergeCounts(boolean splitUpdateEarly) throws Exception { + + Statement stmt = con.createStatement(); + stmt.execute("set " + ConfVars.MERGE_SPLIT_UPDATE.varname + "=" + splitUpdateEarly); + stmt.execute("set " + ConfVars.HIVE_SUPPORT_CONCURRENCY.varname + "=true"); + stmt.execute("set " + ConfVars.HIVE_TXN_MANAGER.varname + + "=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager"); + + stmt.execute("drop table if exists transactional_crud"); + stmt.execute("drop table if exists source"); + + stmt.execute("create table transactional_crud (a int, b int) stored as orc " + + "tblproperties('transactional'='true', 'transactional_properties'='default')"); + stmt.executeUpdate("insert into transactional_crud values(1,2),(3,4),(5,6),(7,8),(9,10)"); + + stmt.execute("create table source (a int, b int) stored as orc " + + "tblproperties('transactional'='true', 'transactional_properties'='default')"); + stmt.executeUpdate("insert into source values(1,12),(3,14),(9,19),(100,100)"); + + int count = stmt.executeUpdate(" MERGE INTO transactional_crud as t using source as s ON t.a = s.a\n" + + " WHEN MATCHED AND s.a > 7 THEN DELETE\n" + + " WHEN MATCHED AND s.a <= 8 THEN UPDATE set b = 100\n" + + " WHEN NOT MATCHED THEN INSERT VALUES (s.a, s.b)"); + stmt.close(); + + assertEquals("Statement merge", 4, count); Review Comment: is this assert sufficient? the input table has 4 rows - and the merge has a `not matched` part; so it should be 4 all the time or what we don't want to see in the count is the number of deleted records? Issue Time Tracking ------------------- Worklog Id: (was: 777470) Time Spent: 3.5h (was: 3h 20m) > Rewrite Update statement as Multi-insert and do Update split early > ------------------------------------------------------------------ > > Key: HIVE-21160 > URL: https://issues.apache.org/jira/browse/HIVE-21160 > Project: Hive > Issue Type: Sub-task > Components: Transactions > Affects Versions: 3.0.0 > Reporter: Eugene Koifman > Assignee: Krisztian Kasa > Priority: Major > Labels: pull-request-available > Time Spent: 3.5h > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.20.7#820007)