This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit bbfba13ed4d084681b542d7c5e1b5156576a603b
Author: Daniel Becker <[email protected]>
AuthorDate: Tue May 14 16:01:46 2024 +0200

    IMPALA-13079: Add support for FLOAT/DOUBLE in Iceberg metadata tables
    
    Until now, the float and double data types were not supported in Iceberg
    metadata tables. This commit adds support for them.
    
    Testing:
     - added a test table that contains all primitive types (except for
       decimal, which is still not supported), a struct, an array and a map
     - added a test query that queries the `files` metadata table of the
       above table - the 'readable_metrics' struct contains lower and upper
       bounds for all columns in the original table, with the original type
    
    Change-Id: I2171c9aa9b6d2b634b8c511263b1610cb1d7cb29
    Reviewed-on: http://gerrit.cloudera.org:8080/21425
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/exec/iceberg-metadata/iceberg-row-reader.cc | 32 +++++++++++
 be/src/exec/iceberg-metadata/iceberg-row-reader.h  |  8 +++
 .../functional/functional_schema_template.sql      | 67 ++++++++++++++++++++++
 .../datasets/functional/schema_constraints.csv     |  1 +
 .../queries/QueryTest/iceberg-metadata-tables.test | 14 +++++
 5 files changed, 122 insertions(+)

diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc 
b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
index 4d259b5a1..685a12518 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.cc
@@ -46,6 +46,8 @@ Status IcebergRowReader::InitJNI() {
   RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/Boolean", 
&boolean_cl_));
   RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/Integer", 
&integer_cl_));
   RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/Long", 
&long_cl_));
+  RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/Float", 
&float_cl_));
+  RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/Double", 
&double_cl_));
   RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/lang/CharSequence",
       &char_sequence_cl_));
   RETURN_IF_ERROR(JniUtil::GetGlobalClassRef(env, "java/nio/ByteBuffer",
@@ -60,6 +62,10 @@ Status IcebergRowReader::InitJNI() {
       &integer_value_));
   RETURN_IF_ERROR(JniUtil::GetMethodID(env, long_cl_, "longValue", "()J",
       &long_value_));
+  RETURN_IF_ERROR(JniUtil::GetMethodID(env, long_cl_, "floatValue", "()F",
+      &float_value_));
+  RETURN_IF_ERROR(JniUtil::GetMethodID(env, long_cl_, "doubleValue", "()D",
+      &double_value_));
   RETURN_IF_ERROR(JniUtil::GetMethodID(env, char_sequence_cl_, "toString",
       "()Ljava/lang/String;", &char_sequence_to_string_));
   return Status::OK();
@@ -108,6 +114,12 @@ Status IcebergRowReader::WriteSlot(JNIEnv* env, const 
jobject* struct_like_row,
     } case TYPE_BIGINT: { // java.lang.Long
       RETURN_IF_ERROR(WriteLongSlot(env, accessed_value, slot));
       break;
+    } case TYPE_FLOAT: { // java.lang.Float
+      RETURN_IF_ERROR(WriteFloatSlot(env, accessed_value, slot));
+      break;
+    } case TYPE_DOUBLE: { // java.lang.Double
+      RETURN_IF_ERROR(WriteDoubleSlot(env, accessed_value, slot));
+      break;
     } case TYPE_TIMESTAMP: { // org.apache.iceberg.types.TimestampType
       RETURN_IF_ERROR(WriteTimeStampSlot(env, accessed_value, slot));
       break;
@@ -188,6 +200,26 @@ Status IcebergRowReader::WriteLongSlot(JNIEnv* env, const 
jobject &accessed_valu
   return Status::OK();
 }
 
+Status IcebergRowReader::WriteFloatSlot(JNIEnv* env, const jobject 
&accessed_value,
+    void* slot) {
+  DCHECK(accessed_value != nullptr);
+  DCHECK(env->IsInstanceOf(accessed_value, float_cl_) == JNI_TRUE);
+  jfloat result = env->CallFloatMethod(accessed_value, float_value_);
+  RETURN_ERROR_IF_EXC(env);
+  *reinterpret_cast<float*>(slot) = result;
+  return Status::OK();
+}
+
+Status IcebergRowReader::WriteDoubleSlot(JNIEnv* env, const jobject 
&accessed_value,
+    void* slot) {
+  DCHECK(accessed_value != nullptr);
+  DCHECK(env->IsInstanceOf(accessed_value, double_cl_) == JNI_TRUE);
+  jdouble result = env->CallDoubleMethod(accessed_value, double_value_);
+  RETURN_ERROR_IF_EXC(env);
+  *reinterpret_cast<double*>(slot) = result;
+  return Status::OK();
+}
+
 Status IcebergRowReader::WriteTimeStampSlot(JNIEnv* env, const jobject 
&accessed_value,
     void* slot) {
   DCHECK(accessed_value != nullptr);
diff --git a/be/src/exec/iceberg-metadata/iceberg-row-reader.h 
b/be/src/exec/iceberg-metadata/iceberg-row-reader.h
index 34f21d773..67a51d2fe 100644
--- a/be/src/exec/iceberg-metadata/iceberg-row-reader.h
+++ b/be/src/exec/iceberg-metadata/iceberg-row-reader.h
@@ -57,6 +57,8 @@ class IcebergRowReader {
   inline static jclass boolean_cl_ = nullptr;
   inline static jclass integer_cl_ = nullptr;
   inline static jclass long_cl_ = nullptr;
+  inline static jclass float_cl_ = nullptr;
+  inline static jclass double_cl_ = nullptr;
   inline static jclass char_sequence_cl_ = nullptr;
   inline static jclass byte_buffer_cl_ = nullptr;
 
@@ -66,6 +68,8 @@ class IcebergRowReader {
   inline static jmethodID boolean_value_ = nullptr;
   inline static jmethodID integer_value_ = nullptr;
   inline static jmethodID long_value_ = nullptr;
+  inline static jmethodID float_value_ = nullptr;
+  inline static jmethodID double_value_ = nullptr;
   inline static jmethodID char_sequence_to_string_ = nullptr;
 
   /// The scan node that started this row reader.
@@ -91,6 +95,10 @@ class IcebergRowReader {
       WARN_UNUSED_RESULT;
   Status WriteLongSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
       WARN_UNUSED_RESULT;
+  Status WriteFloatSlot(JNIEnv* env, const jobject &accessed_value, void* slot)
+      WARN_UNUSED_RESULT;
+  Status WriteDoubleSlot(JNIEnv* env, const jobject &accessed_value, void* 
slot)
+      WARN_UNUSED_RESULT;
   /// Iceberg TimeStamp is parsed into TimestampValue.
   Status WriteTimeStampSlot(JNIEnv* env, const jobject &accessed_value, void* 
slot)
       WARN_UNUSED_RESULT;
diff --git a/testdata/datasets/functional/functional_schema_template.sql 
b/testdata/datasets/functional/functional_schema_template.sql
index 0290e6ba0..762f6e319 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -3893,6 +3893,73 @@ SELECT * FROM  
{db_name}{db_suffix}.iceberg_query_metadata;
 ---- DATASET
 functional
 ---- BASE_TABLE_NAME
+iceberg_metadata_alltypes
+---- CREATE
+CREATE TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} (
+  b boolean,
+  i int,
+  l bigint,
+  f float,
+  d double,
+  ts timestamp,
+  dt date,
+  s string,
+  bn binary,
+  -- TODO IMPALA-13080: Add decimal.
+  strct struct<i: int>,
+  arr array<double>,
+  mp map<int, float>
+)
+STORED BY ICEBERG
+LOCATION 
'/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_metadata_alltypes'
+TBLPROPERTIES('format-version'='2');
+---- DEPENDENT_LOAD_HIVE
+INSERT INTO {db_name}{db_suffix}.{table_name} VALUES (
+  false,
+  1,
+  -10,
+  2e-10,
+  -2e-100,
+  to_utc_timestamp("2024-05-14 14:51:12", "UTC"),
+  to_date("2024-05-14"),
+  "Some string",
+  "bin1",
+  named_struct("i", 10),
+  array(cast(10.0 as double), cast(20.0 as double)),
+  map(10, cast(10.0 as float), 100, cast(100.0 as float))
+),
+(
+  NULL,
+  5,
+  150,
+  2e15,
+  double('NaN'),
+  to_utc_timestamp("2025-06-15 18:51:12", "UTC"),
+  to_date("2025-06-15"),
+  "A string",
+  NULL,
+  named_struct("i", -150),
+  array(cast(-10.0 as double), cast(-2e100 as double)),
+  map(10, cast(0.5 as float), 101, cast(1e3 as float))
+),
+(
+  true,
+  5,
+  150,
+  float('NaN'),
+  2e100,
+  NULL,
+  NULL,
+  NULL,
+  "bin2",
+  named_struct("i", -150),
+  array(cast(-12.0 as double), cast(-2e100 as double)),
+  map(10, cast(0.5 as float), 101, cast(1e3 as float))
+);
+====
+---- DATASET
+functional
+---- BASE_TABLE_NAME
 iceberg_with_key_metadata
 ---- CREATE
 CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name}
diff --git a/testdata/datasets/functional/schema_constraints.csv 
b/testdata/datasets/functional/schema_constraints.csv
index 84046aa7b..58b705e78 100644
--- a/testdata/datasets/functional/schema_constraints.csv
+++ b/testdata/datasets/functional/schema_constraints.csv
@@ -110,6 +110,7 @@ table_name:iceberg_lineitem_sixblocks, 
constraint:restrict_to, table_format:parq
 table_name:iceberg_spark_compaction_with_dangling_delete, 
constraint:restrict_to, table_format:parquet/none/none
 table_name:iceberg_v2_equality_delete_schema_evolution, 
constraint:restrict_to, table_format:parquet/none/none
 table_name:iceberg_query_metadata, constraint:restrict_to, 
table_format:parquet/none/none
+table_name:iceberg_metadata_alltypes, constraint:restrict_to, 
table_format:parquet/none/none
 table_name:iceberg_view, constraint:restrict_to, table_format:parquet/none/none
 
 # TODO: Support Avro. Data loading currently fails for Avro because complex 
types
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
index 339940e19..d8f947fad 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-metadata-tables.test
@@ -845,6 +845,20 @@ select value_counts, readable_metrics.d.lower_bound, 
readable_metrics.d.upper_bo
 STRING,DATE,DATE
 ====
 
+####
+# Query the `files` metadata table of a table that contains all types - 
because of lower
+# and upper bounds, the 'readable_metrics' struct of the metadata table will 
also contain
+# all types.
+# TODO IMPALA-13080: Add DECIMAL.
+####
+---- QUERY
+select * from functional_parquet.iceberg_metadata_alltypes.`files`;
+---- RESULTS
+0,regex:'.*\.parquet','PARQUET',0,3,3648,'{1:32,2:63,3:71,4:43,5:55,6:47,7:39,8:58,9:47,13:63,14:96,15:75,16:78}','{1:3,2:3,3:3,4:3,5:3,6:3,7:3,8:3,9:3,13:3,14:6,15:6,16:6}','{1:1,2:0,3:0,4:0,5:0,6:1,7:1,8:1,9:1,13:0,14:0,15:0,16:0}','{16:0,4:1,5:1,14:0}','{1:"AA==",2:"AQAAAA==",3:"9v////////8=",4:"/+ZbLw==",5:"MAWO5C7/O6s=",6:"AFgLImsYBgA=",7:"kU0AAA==",8:"QSBzdHJpbmc=",9:"YmluMQ==",13:"av///w==",14:"fcOUJa1JwtQ=",16:"AAAAPw=="}','{1:"AQ==",2:"BQAAAA==",3:"lgAAAAAAAAA=",4:"qV/jWA==",5:"
 [...]
+---- TYPES
+INT,STRING,STRING,INT,BIGINT,BIGINT,STRING,STRING,STRING,STRING,STRING,STRING,BINARY,STRING,STRING,INT,STRING
+====
+
 ####
 # Describe all the metadata tables once
 ####

Reply via email to