This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 1f16919172361ab8cc7a2e1b1303b2b6dfb004ba
Author: Peter Rozsa <[email protected]>
AuthorDate: Fri Sep 20 17:31:07 2024 +0200

    IMPALA-12732: Docs for MERGE statement
    
    This change adds documentation for MERGE statement.
    
    Change-Id: Ifadbae34ba802c4d4bd2feeec74f637607f108d7
    Reviewed-on: http://gerrit.cloudera.org:8080/21834
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Zoltan Borok-Nagy <[email protected]>
---
 docs/impala.ditamap            |   2 +-
 docs/topics/impala_iceberg.xml |  30 ++++++++++-
 docs/topics/impala_merge.xml   | 113 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 2 deletions(-)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index c050ff04c..682ae6491 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -157,6 +157,7 @@ under the License.
       <topicref href="topics/impala_insert.xml"/>
       <topicref href="topics/impala_invalidate_metadata.xml"/>
       <topicref href="topics/impala_load_data.xml"/>
+      <topicref href="topics/impala_merge.xml"/>
       <topicref href="topics/impala_refresh.xml"/>
       <topicref href="topics/impala_refresh_authorization.xml"/>
       <topicref href="topics/impala_refresh_functions.xml"/>
@@ -304,7 +305,6 @@ under the License.
     <topicref href="topics/impala_porting.xml"/>
     <topicref href="topics/impala_utf_8.xml"/>
   </topicref>
-  
   <topicref href="topics/impala_performance.xml">
     <topicref href="topics/impala_perf_cookbook.xml"/>
     <topicref href="topics/impala_perf_joins.xml"/>
diff --git a/docs/topics/impala_iceberg.xml b/docs/topics/impala_iceberg.xml
index e82dc8a0d..37aea6a32 100644
--- a/docs/topics/impala_iceberg.xml
+++ b/docs/topics/impala_iceberg.xml
@@ -550,7 +550,7 @@ UPDATE ice_t SET ice_t.k = o.k, ice_t.j = o.j, FROM ice_t, 
other_table o where i
           </li>
           <li>
             Updating partitioning column with non-constant expression via the 
UPDATE FROM statement is not allowed.
-            The upcoming MERGE statement will not have this limitation.
+            This limitation could be eliminated by using a 
<codeph>MERGE</codeph> statement.
           </li>
         </ul>
       </p>
@@ -560,6 +560,34 @@ UPDATE ice_t SET ice_t.k = o.k, ice_t.j = o.j, FROM ice_t, 
other_table o where i
     </conbody>
   </concept>
 
+    <concept id="iceberg_merge">
+    <title>Merging data into Iceberg tables</title>
+    <conbody>
+      <p>
+        Impala can execute MERGE statements against Iceberg tables, e.g:
+        <codeblock>
+MERGE INTO ice_t USING source ON ice_t.a = source.id WHEN NOT MATCHED THEN 
INSERT VALUES(id, source.column1);
+MERGE INTO ice_t USING source ON ice_t.a = source.id WHEN MATCHED THEN DELETE;
+MERGE INTO ice_t USING source ON ice_t.a = source.id WHEN MATCHED THEN UPDATE 
SET b = source.b;
+MERGE INTO ice_t USING source ON ice_t.a = source.id
+  WHEN MATCHED AND ice_t.a &lt; 100 THEN UPDATE SET b = source.b
+  WHEN MATCHED THEN DELETE
+  WHEN NOT MATCHED THEN INSERT VALUES(id, source.column1);
+        </codeblock>
+      </p>
+      <p>
+        The limitations of the <codeph>UPDATE</codeph> statement also apply to 
the <codeph>MERGE</codeph> statement; in addition,
+        the limitations of the <codeph>MERGE</codeph> statement:
+        <ul>
+          <li>Subqueries in source statements must be simple queries as 
internal rewrite is not supported.</li>
+        </ul>
+      </p>
+      <p>
+        More information about the <codeph>MERGE</codeph> statement can be 
found at <xref href="impala_merge.xml"/>.
+      </p>
+    </conbody>
+  </concept>
+
   <concept id="iceberg_load">
     <title>Loading data into Iceberg tables</title>
     <conbody>
diff --git a/docs/topics/impala_merge.xml b/docs/topics/impala_merge.xml
new file mode 100644
index 000000000..1afd47147
--- /dev/null
+++ b/docs/topics/impala_merge.xml
@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="merge">
+
+  <title>MERGE Statement</title>
+  <titlealts audience="PDF"><navtitle>MERGE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Ingest"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Reports"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Iceberg"/>
+    </metadata>
+  </prolog>
+
+ <conbody>
+
+    <p>
+      <indexterm audience="hidden">MERGE statement</indexterm>
+      The <codeph>MERGE</codeph> statement enables conditional updates, 
deletes, and inserts, based on the result of a join
+      between a target and a source table. This operation is useful for 
applying data changes from transactional systems to
+      analytic data warehouses by merging data from two tables with similar 
structures.
+    </p>
+
+    <p>
+      The <codeph>MERGE</codeph> statement supports multiple 
<codeph>WHEN</codeph> clauses, where each clause can specify
+      actions like <codeph>UPDATE</codeph>, <codeph>DELETE</codeph>, or 
<codeph>INSERT</codeph>. Actions are applied based
+      on the join conditions defined between the source and target tables.
+    </p>
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>MERGE INTO <i>target_table</i> [AS <i>target_alias</i>]
+USING <i>source_expr</i> [AS <i>source_alias</i>]
+ON <i>search_condition</i>
+[WHEN MATCHED [AND <i>search_condition</i>] THEN
+  UPDATE SET <i>column1 = expression1</i>, <i>column2 = expression2</i>, ... ]
+[WHEN MATCHED [AND <i>search_condition</i>] THEN DELETE]
+[WHEN NOT MATCHED [AND <i>search_condition</i>] THEN
+  INSERT (<i>column1</i>, <i>column2</i>, ...) VALUES (<i>expression1</i>, 
<i>expression2</i>, ...)]</codeblock>
+
+    <p>
+      The <codeph>WHEN MATCHED</codeph> clause is executed if a row from the 
source table matches a row in the target table,
+      based on the <codeph>ON</codeph> condition. Within this clause, you can 
either <codeph>UPDATE</codeph> specific
+      columns or <codeph>DELETE</codeph> the matched rows. Multiple 
<codeph>WHEN MATCHED</codeph> clauses can be provided,
+      each with a different condition.
+    </p>
+
+    <p>
+      The <codeph>WHEN NOT MATCHED</codeph> clause is executed if a row from 
the source table has no matching row in the
+      target table. This clause typically inserts new rows into the target 
table.
+    </p>
+
+    <ul>
+      <li><codeph>UPDATE</codeph>: Updates specified columns of the target 
table for matching rows. Both source and target
+      fields can be used in the update expressions.</li>
+      <li><codeph>DELETE</codeph>: Deletes the matching rows from the target 
table.</li>
+      <li><codeph>INSERT</codeph>: Inserts new rows into the target table when 
no match is found, using values from the source table.</li>
+    </ul>
+
+    <p>
+      The <codeph>ON</codeph> clause defines the join condition between the 
target table and source expression, typically based
+      on primary key or unique identifier columns. The <codeph>MERGE</codeph> 
operation evaluates the conditions in the order
+      of the <codeph>WHEN</codeph> clauses, executing the first matching 
action and discarding subsequent clauses.
+    </p>
+
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+
+<codeblock>MERGE INTO customers AS c
+USING updates AS u
+ON u.customer_id = c.customer_id
+WHEN MATCHED AND c.status != 'inactive' THEN
+  UPDATE SET c.name = u.name, c.email = u.email
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN
+  INSERT (customer_id, name, email, status) VALUES (u.customer_id, u.name, 
u.email, 'active');</codeblock>
+    <p>
+      In this example, the <codeph>MERGE</codeph> operation updates customer 
information where IDs match and the customer
+      is not inactive, deletes inactive customers, and inserts new customers 
from the source table if no match is found.
+    </p>
+    <p>
+      The <codeph>MERGE</codeph> statement is only supported for Iceberg 
tables.
+    </p>
+    <p>
+      For Iceberg tables, this operation generally uses a full outer join with 
the <codeph>STRAIGHT_JOIN</codeph> hint
+      to combine the target and source datasets.
+    </p>
+  </conbody>
+</concept>

Reply via email to