This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 1f16919172361ab8cc7a2e1b1303b2b6dfb004ba Author: Peter Rozsa <[email protected]> AuthorDate: Fri Sep 20 17:31:07 2024 +0200 IMPALA-12732: Docs for MERGE statement This change adds documentation for MERGE statement. Change-Id: Ifadbae34ba802c4d4bd2feeec74f637607f108d7 Reviewed-on: http://gerrit.cloudera.org:8080/21834 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Zoltan Borok-Nagy <[email protected]> --- docs/impala.ditamap | 2 +- docs/topics/impala_iceberg.xml | 30 ++++++++++- docs/topics/impala_merge.xml | 113 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 143 insertions(+), 2 deletions(-) diff --git a/docs/impala.ditamap b/docs/impala.ditamap index c050ff04c..682ae6491 100644 --- a/docs/impala.ditamap +++ b/docs/impala.ditamap @@ -157,6 +157,7 @@ under the License. <topicref href="topics/impala_insert.xml"/> <topicref href="topics/impala_invalidate_metadata.xml"/> <topicref href="topics/impala_load_data.xml"/> + <topicref href="topics/impala_merge.xml"/> <topicref href="topics/impala_refresh.xml"/> <topicref href="topics/impala_refresh_authorization.xml"/> <topicref href="topics/impala_refresh_functions.xml"/> @@ -304,7 +305,6 @@ under the License. <topicref href="topics/impala_porting.xml"/> <topicref href="topics/impala_utf_8.xml"/> </topicref> - <topicref href="topics/impala_performance.xml"> <topicref href="topics/impala_perf_cookbook.xml"/> <topicref href="topics/impala_perf_joins.xml"/> diff --git a/docs/topics/impala_iceberg.xml b/docs/topics/impala_iceberg.xml index e82dc8a0d..37aea6a32 100644 --- a/docs/topics/impala_iceberg.xml +++ b/docs/topics/impala_iceberg.xml @@ -550,7 +550,7 @@ UPDATE ice_t SET ice_t.k = o.k, ice_t.j = o.j, FROM ice_t, other_table o where i </li> <li> Updating partitioning column with non-constant expression via the UPDATE FROM statement is not allowed. - The upcoming MERGE statement will not have this limitation. + This limitation could be eliminated by using a <codeph>MERGE</codeph> statement. </li> </ul> </p> @@ -560,6 +560,34 @@ UPDATE ice_t SET ice_t.k = o.k, ice_t.j = o.j, FROM ice_t, other_table o where i </conbody> </concept> + <concept id="iceberg_merge"> + <title>Merging data into Iceberg tables</title> + <conbody> + <p> + Impala can execute MERGE statements against Iceberg tables, e.g: + <codeblock> +MERGE INTO ice_t USING source ON ice_t.a = source.id WHEN NOT MATCHED THEN INSERT VALUES(id, source.column1); +MERGE INTO ice_t USING source ON ice_t.a = source.id WHEN MATCHED THEN DELETE; +MERGE INTO ice_t USING source ON ice_t.a = source.id WHEN MATCHED THEN UPDATE SET b = source.b; +MERGE INTO ice_t USING source ON ice_t.a = source.id + WHEN MATCHED AND ice_t.a < 100 THEN UPDATE SET b = source.b + WHEN MATCHED THEN DELETE + WHEN NOT MATCHED THEN INSERT VALUES(id, source.column1); + </codeblock> + </p> + <p> + The limitations of the <codeph>UPDATE</codeph> statement also apply to the <codeph>MERGE</codeph> statement; in addition, + the limitations of the <codeph>MERGE</codeph> statement: + <ul> + <li>Subqueries in source statements must be simple queries as internal rewrite is not supported.</li> + </ul> + </p> + <p> + More information about the <codeph>MERGE</codeph> statement can be found at <xref href="impala_merge.xml"/>. + </p> + </conbody> + </concept> + <concept id="iceberg_load"> <title>Loading data into Iceberg tables</title> <conbody> diff --git a/docs/topics/impala_merge.xml b/docs/topics/impala_merge.xml new file mode 100644 index 000000000..1afd47147 --- /dev/null +++ b/docs/topics/impala_merge.xml @@ -0,0 +1,113 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="merge"> + + <title>MERGE Statement</title> + <titlealts audience="PDF"><navtitle>MERGE</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="SQL"/> + <data name="Category" value="Ingest"/> + <data name="Category" value="Querying"/> + <data name="Category" value="Reports"/> + <data name="Category" value="Tables"/> + <data name="Category" value="Data Analysts"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Iceberg"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="hidden">MERGE statement</indexterm> + The <codeph>MERGE</codeph> statement enables conditional updates, deletes, and inserts, based on the result of a join + between a target and a source table. This operation is useful for applying data changes from transactional systems to + analytic data warehouses by merging data from two tables with similar structures. + </p> + + <p> + The <codeph>MERGE</codeph> statement supports multiple <codeph>WHEN</codeph> clauses, where each clause can specify + actions like <codeph>UPDATE</codeph>, <codeph>DELETE</codeph>, or <codeph>INSERT</codeph>. Actions are applied based + on the join conditions defined between the source and target tables. + </p> + <p conref="../shared/impala_common.xml#common/syntax_blurb"/> + +<codeblock>MERGE INTO <i>target_table</i> [AS <i>target_alias</i>] +USING <i>source_expr</i> [AS <i>source_alias</i>] +ON <i>search_condition</i> +[WHEN MATCHED [AND <i>search_condition</i>] THEN + UPDATE SET <i>column1 = expression1</i>, <i>column2 = expression2</i>, ... ] +[WHEN MATCHED [AND <i>search_condition</i>] THEN DELETE] +[WHEN NOT MATCHED [AND <i>search_condition</i>] THEN + INSERT (<i>column1</i>, <i>column2</i>, ...) VALUES (<i>expression1</i>, <i>expression2</i>, ...)]</codeblock> + + <p> + The <codeph>WHEN MATCHED</codeph> clause is executed if a row from the source table matches a row in the target table, + based on the <codeph>ON</codeph> condition. Within this clause, you can either <codeph>UPDATE</codeph> specific + columns or <codeph>DELETE</codeph> the matched rows. Multiple <codeph>WHEN MATCHED</codeph> clauses can be provided, + each with a different condition. + </p> + + <p> + The <codeph>WHEN NOT MATCHED</codeph> clause is executed if a row from the source table has no matching row in the + target table. This clause typically inserts new rows into the target table. + </p> + + <ul> + <li><codeph>UPDATE</codeph>: Updates specified columns of the target table for matching rows. Both source and target + fields can be used in the update expressions.</li> + <li><codeph>DELETE</codeph>: Deletes the matching rows from the target table.</li> + <li><codeph>INSERT</codeph>: Inserts new rows into the target table when no match is found, using values from the source table.</li> + </ul> + + <p> + The <codeph>ON</codeph> clause defines the join condition between the target table and source expression, typically based + on primary key or unique identifier columns. The <codeph>MERGE</codeph> operation evaluates the conditions in the order + of the <codeph>WHEN</codeph> clauses, executing the first matching action and discarding subsequent clauses. + </p> + + + <p conref="../shared/impala_common.xml#common/example_blurb"/> + + +<codeblock>MERGE INTO customers AS c +USING updates AS u +ON u.customer_id = c.customer_id +WHEN MATCHED AND c.status != 'inactive' THEN + UPDATE SET c.name = u.name, c.email = u.email +WHEN MATCHED THEN DELETE +WHEN NOT MATCHED THEN + INSERT (customer_id, name, email, status) VALUES (u.customer_id, u.name, u.email, 'active');</codeblock> + <p> + In this example, the <codeph>MERGE</codeph> operation updates customer information where IDs match and the customer + is not inactive, deletes inactive customers, and inserts new customers from the source table if no match is found. + </p> + <p> + The <codeph>MERGE</codeph> statement is only supported for Iceberg tables. + </p> + <p> + For Iceberg tables, this operation generally uses a full outer join with the <codeph>STRAIGHT_JOIN</codeph> hint + to combine the target and source datasets. + </p> + </conbody> +</concept>
