This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e093bbc8 IMPALA-13085: Add warning and NULL out DECIMAL values in 
Iceberg metadata tables
2e093bbc8 is described below

commit 2e093bbc8ae06f89f17bbe57f41d5e91749572c4
Author: Daniel Becker <[email protected]>
AuthorDate: Wed May 15 15:53:22 2024 +0200

    IMPALA-13085: Add warning and NULL out DECIMAL values in Iceberg metadata 
tables
    
    DECIMAL values are not supported in Iceberg metadata tables and Impala
    runs on a DCHECK and crashes if it encounters one.
    
    Until this issue is properly fixed (see IMPALA-13080), this commit
    introduces a temporary solution: DECIMAL values coming from Iceberg
    metadata tables are NULLed out and a warning is issued.
    
    Testing:
     - added a DECIMAL column to the 'iceberg_metadata_alltypes' test table,
       so querying the `files` metadata table will include a DECIMAL in the
       'readable_metrics' struct.
    
    Change-Id: I0c8791805bc4fa2112e092e65366ca2815f3fa22
    Reviewed-on: http://gerrit.cloudera.org:8080/21429
    Reviewed-by: Daniel Becker <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/iceberg-metadata/iceberg-row-reader.cc | 22 ++++++++++++++++++++--
 be/src/exec/iceberg-metadata/iceberg-row-reader.h  |  6 ++++++
 .../functional/functional_schema_template.sql      |  5 ++++-
 .../queries/QueryTest/iceberg-metadata-tables.test |  3 +--
 4 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc 
b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
index 685a12518..bc42efa2a 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
@@ -33,7 +33,8 @@ namespace impala {
 IcebergRowReader::IcebergRowReader(ScanNode* scan_node,
     IcebergMetadataScanner* metadata_scanner)
   : scan_node_(scan_node),
-    metadata_scanner_(metadata_scanner) {}
+    metadata_scanner_(metadata_scanner),
+    unsupported_decimal_warning_emitted_(false) {}
 
 Status IcebergRowReader::InitJNI() {
   DCHECK(list_cl_ == nullptr) << "InitJNI() already called!";
@@ -120,7 +121,10 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const 
jobject* struct_like_row,
     } case TYPE_DOUBLE: { // java.lang.Double
       RETURN_IF_ERROR(WriteDoubleSlot(env, accessed_value, slot));
       break;
-    } case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
+    } case TYPE_DECIMAL: {
+      RETURN_IF_ERROR(WriteDecimalSlot(slot_desc, tuple, state));
+      break;
+    }case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
       RETURN_IF_ERROR(WriteTimeStampSlot(env, accessed_value, slot));
       break;
     } case TYPE_STRING: {
@@ -220,6 +224,20 @@ Status IcebergRowReader::WriteDoubleSlot(JNIEnv* env, 
const jobject &accessed_va
   return Status::OK();
 }
 
+Status IcebergRowReader::WriteDecimalSlot(const SlotDescriptor* slot_desc, 
Tuple* tuple,
+    RuntimeState* state) {
+  // TODO IMPALA-13080: Handle DECIMALs without NULLing them out.
+  constexpr const char* warning = "DECIMAL values from Iceberg metadata tables 
"
+    "are displayed as NULL. See IMPALA-13080.";
+  if (!unsupported_decimal_warning_emitted_) {
+    unsupported_decimal_warning_emitted_ = true;
+    LOG(WARNING) << warning;
+    state->LogError(ErrorMsg(TErrorCode::NOT_IMPLEMENTED_ERROR, warning));
+  }
+  tuple->SetNull(slot_desc->null_indicator_offset());
+  return Status::OK();
+}
+
 Status IcebergRowReader::WriteTimeStampSlot(JNIEnv* env, const jobject 
&accessed_value,
     void* slot) {
   DCHECK(accessed_value != nullptr);
diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.h 
b/be/src/exec/iceberg-metadata/iceberg-row-reader.h
index 67a51d2fe..f33638a5f 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.h
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.h
@@ -78,6 +78,10 @@ class IcebergRowReader {
   /// IcebergMetadataScanner class, used to get and access values inside java 
objects.
   IcebergMetadataScanner* metadata_scanner_;
 
+  /// We want to emit a warning about DECIMAL values being NULLed out at most 
once. This
+  /// member keeps track of whether the warning has already been emitted.
+  bool unsupported_decimal_warning_emitted_;
+
   // Writes a Java value into the target tuple. 'struct_like_row' is only used 
for struct
   // types. It is needed because struct children reside directly in the parent 
tuple of
   // the struct.
@@ -99,6 +103,8 @@ class IcebergRowReader {
       WARN_UNUSED_RESULT;
   Status WriteDoubleSlot(JNIEnv* env, const jobject &accessed_value, void* 
slot)
       WARN_UNUSED_RESULT;
+  Status WriteDecimalSlot(const SlotDescriptor* slot_desc, Tuple* tuple,
+      RuntimeState* state) WARN_UNUSED_RESULT;
   /// Iceberg TimeStamp is parsed into TimestampValue.
   Status WriteTimeStampSlot(JNIEnv* env, const jobject &accessed_value, void* 
slot)
       WARN_UNUSED_RESULT;
diff --git a/testdata/datasets/functional/functional_schema_template.sql 
b/testdata/datasets/functional/functional_schema_template.sql
index 9f4c2aa67..a380b17db 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3905,7 +3905,7 @@ CREATE TABLE IF NOT EXISTS 
{db_name}{db_suffix}.{table_name} (
   dt date,
   s string,
   bn binary,
-  -- TODO IMPALA-13080: Add decimal.
+  dc decimal,
   strct struct<i: int>,
   arr array<double>,
   mp map<int, float>
@@ -3924,6 +3924,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
   to_date("2024-05-14"),
   "Some string",
   "bin1",
+  15.48,
   named_struct("i", 10),
   array(cast(10.0 as double), cast(20.0 as double)),
   map(10, cast(10.0 as float), 100, cast(100.0 as float))
@@ -3938,6 +3939,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
   to_date("2025-06-15"),
   "A string",
   NULL,
+  5.8,
   named_struct("i", -150),
   array(cast(-10.0 as double), cast(-2e100 as double)),
   map(10, cast(0.5 as float), 101, cast(1e3 as float))
@@ -3952,6 +3954,7 @@ INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
   NULL,
   NULL,
   "bin2",
+  NULL,
   named_struct("i", -150),
   array(cast(-12.0 as double), cast(-2e100 as double)),
   map(10, cast(0.5 as float), 101, cast(1e3 as float))
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
index cca52f8af..cad619500 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
@@ -834,12 +834,11 @@ STRING,DATE,DATE
 # Query the `files` metadata table of a table that contains all types - 
because of lower
 # and upper bounds, the 'readable_metrics' struct of the metadata table will 
also contain
 # all types.
-# TODO IMPALA-13080: Add DECIMAL.
 ####
 ---- QUERY
 select readable_metrics from 
functional_parquet.iceberg_metadata_alltypes.`files`;
 ---- RESULTS
-regex:'{"arr.element":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":-2e\+100,"upper_bound":20},"b":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":false,"upper_bound":true},"bn":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"YmluMQ==","upper_bound":"YmluMg=="},"d":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":-
 [...]
+regex:'{"arr.element":{"column_size":\d+,"value_count":6,"null_value_count":0,"nan_value_count":0,"lower_bound":-2e\+100,"upper_bound":20},"b":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":false,"upper_bound":true},"bn":{"column_size":\d+,"value_count":3,"null_value_count":1,"nan_value_count":null,"lower_bound":"YmluMQ==","upper_bound":"YmluMg=="},"d":{"column_size":\d+,"value_count":3,"null_value_count":0,"nan_value_count":1,"lower_bound":-
 [...]
 ---- TYPES
 STRING
 ====

Reply via email to