This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 82d8e382a IMPALA-13296: Check column compatibility earlier for table 
migration
82d8e382a is described below

commit 82d8e382a3fa21e2930c0dd5e8198db647e36f17
Author: Gabor Kaszab <[email protected]>
AuthorDate: Thu Aug 15 13:37:21 2024 +0200

    IMPALA-13296: Check column compatibility earlier for table migration
    
    When migrating a Hive table to Iceberg there can be columns in the
    source table that are not supported by Iceberg. For these tables the
    table migration process gives an error. However, this is a multi-step
    process and the check for column compatibility is performed after the
    source table had been renamed to a temporary name.
    This patch brings the column type check to the analysis phase to avoid
    renaming the table when there are incompatible columns.
    
    Testing:
      - Re-run the existing checks
      - Adjusted the error message for this error case to reflect that we
        return an AnalysisException.
    
    Change-Id: I381e7359a55ebd84f9a1dcec3b665bdbea62a035
    Reviewed-on: http://gerrit.cloudera.org:8080/21675
    Reviewed-by: Peter Rozsa <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../impala/analysis/ConvertTableToIcebergStmt.java       | 13 ++++++++++++-
 .../org/apache/impala/analysis/AnalyzeStmtsTest.java     | 16 +++++++++-------
 .../iceberg-migrate-from-external-hdfs-tables.test       |  2 +-
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git 
a/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java
index b8b0ee95f..b0b5069ba 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ConvertTableToIcebergStmt.java
@@ -34,13 +34,13 @@ import org.apache.impala.authorization.Privilege;
 import org.apache.impala.catalog.FeFsTable;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.IcebergTable;
-import org.apache.impala.catalog.PrunablePartition;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.common.AnalysisException;
 import org.apache.impala.thrift.THdfsFileFormat;
 import org.apache.impala.thrift.TIcebergCatalog;
 import org.apache.impala.thrift.TConvertTableRequest;
 import org.apache.impala.util.AcidUtils;
+import org.apache.impala.util.IcebergSchemaConverter;
 import org.apache.impala.util.IcebergUtil;
 import org.apache.impala.util.MigrateTableUtil;
 
@@ -107,6 +107,8 @@ public class ConvertTableToIcebergStmt extends 
StatementBase {
           sd.getInputFormat());
     }
 
+    checkColumnTypeCompatibility(table);
+
     if (properties_.size() > 1 ||
         properties_.keySet().stream().anyMatch(
             key -> !key.equalsIgnoreCase(IcebergTable.ICEBERG_CATALOG)) ) {
@@ -123,6 +125,15 @@ public class ConvertTableToIcebergStmt extends 
StatementBase {
     createSubQueryStrings((FeFsTable) table);
   }
 
+  private void checkColumnTypeCompatibility(FeTable table) throws 
AnalysisException {
+    try {
+      IcebergSchemaConverter.convertToIcebergSchema(table.getMetaStoreTable());
+    } catch (IllegalArgumentException e) {
+      throw new AnalysisException("Incompatible column type in source table. " 
+
+          e.getMessage());
+    }
+  }
+
   private void createSubQueryStrings(FeFsTable table)  {
     setHdfsTablePropertiesQuery_ = SetTblProps.builder()
               .table(table.getFullName())
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
index 6037d45d8..2f9405a67 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeStmtsTest.java
@@ -5196,20 +5196,22 @@ public class AnalyzeStmtsTest extends AnalyzerTest {
 
   @Test
   public void TestConvertTable() {
-    AnalyzesOk("alter table functional_parquet.alltypes convert to iceberg");
-    AnalyzesOk("alter table functional_parquet.alltypes convert to iceberg"
+    AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg");
+    AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg"
             + " tblproperties('iceberg.catalog'='hadoop.tables')");
-    AnalyzesOk("alter table functional_parquet.alltypes convert to iceberg"
+    AnalyzesOk("alter table functional_parquet.tinytable convert to iceberg"
             + " tblproperties('iceberg.catalog'='hive.catalog')");
-    AnalysisError("alter table functional_parquet.alltypes convert to iceberg"
+    AnalysisError("alter table functional_parquet.alltypes convert to iceberg",
+        "Incompatible column type in source table. Unsupported Hive type: 
BYTE");
+    AnalysisError("alter table functional_parquet.tinytable convert to iceberg"
             + " tblproperties('iceberg.catalog'='hadoop.catalog')",
         "The Hadoop Catalog is not supported because the location may change");
-    AnalysisError("alter table functional_kudu.alltypes convert to iceberg",
+    AnalysisError("alter table functional_kudu.tinytable convert to iceberg",
         "CONVERT TO ICEBERG is not supported for KuduTable");
-    AnalysisError("alter table functional.alltypes convert to iceberg",
+    AnalysisError("alter table functional.tinytable convert to iceberg",
         "CONVERT TO ICEBERG is not supported for " +
         "org.apache.hadoop.mapred.TextInputFormat");
-    AnalysisError("alter table functional_parquet.alltypes convert to iceberg"
+    AnalysisError("alter table functional_parquet.tinytable convert to iceberg"
             + " tblproperties('metadata.generator.threads'='a1')",
         "CONVERT TO ICEBERG only accepts 'iceberg.catalog' as TBLPROPERTY.");
   }
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
index 9d3fde193..087ebc35c 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-migrate-from-external-hdfs-tables.test
@@ -204,7 +204,7 @@ AnalysisException: CONVERT TO ICEBERG is not supported for 
transactional tables
 create table hdfs_table2 (col tinyint) stored as parquet;
 alter table hdfs_table2 convert to iceberg;
 ---- CATCH
-Unsupported Hive type: BYTE, use integer instead
+AnalysisException: Incompatible column type in source table. Unsupported Hive 
type: BYTE, use integer instead
 ====
 ---- QUERY
 # Test table migration for decimal partitioned table.

Reply via email to