This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 365ce0b12f778cae7f46edd5e9e99db736b465b1
Author: Daniel Vanko <[email protected]>
AuthorDate: Mon Jul 14 14:41:55 2025 +0200

    IMPALA-11512: Add tests for BINARY type support in Iceberg
    
    This patch adds tests for the binary type in Iceberg tables.
    
    Change-Id: I9221050a4bee57b8fbb85280478304e5b28efd21
    Reviewed-on: http://gerrit.cloudera.org:8080/23167
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../queries/QueryTest/iceberg-binary-type.test     | 292 +++++++++++++++++++++
 tests/query_test/test_iceberg.py                   |   3 +
 2 files changed, 295 insertions(+)

diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-binary-type.test
 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-binary-type.test
new file mode 100644
index 000000000..e3348a497
--- /dev/null
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-binary-type.test
@@ -0,0 +1,292 @@
+====
+---- QUERY
+CREATE TABLE iceberg_binary_tbl (
+  id INT,
+  string_col STRING,
+  binary_col BINARY
+) STORED AS ICEBERG TBLPROPERTIES('format-version'='2')
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+INSERT INTO iceberg_binary_tbl VALUES
+  (1, 'ascii', CAST('binary1' AS BINARY)),
+  (2, 'ascii', CAST('binary2' AS BINARY)),
+  (3, 'null', NULL),
+  (4, 'empty', CAST('' AS BINARY)),
+  (5, 'valid utf8', CAST('árvíztűrőtükörfúró' AS BINARY)),
+  (6, 'valid utf8', CAST('你好hello' AS BINARY)),
+  (7, 'invalid utf8', CAST(UNHEX('00FF00FF') AS BINARY)),
+  (8, 'invalid utf8', CAST(UNHEX('FF4433221100') AS BINARY))
+---- RUNTIME_PROFILE
+NumModifiedRows: 8
+====
+---- QUERY
+SELECT id, string_col, CAST(binary_col AS STRING) FROM iceberg_binary_tbl
+WHERE string_col != "invalid utf8"
+---- TYPES
+INT, STRING, STRING
+---- RESULTS: RAW_STRING
+1,'ascii','binary1'
+2,'ascii','binary2'
+3,'null','NULL'
+4,'empty',''
+5,'valid utf8','árvíztűrőtükörfúró'
+6,'valid utf8','你好hello'
+====
+---- QUERY
+SELECT id, HEX(CAST(binary_col AS STRING)) FROM iceberg_binary_tbl
+WHERE string_col = "invalid utf8"
+---- TYPES
+INT, STRING
+---- RESULTS
+7,'00FF00FF'
+8,'FF4433221100'
+====
+---- QUERY
+SELECT id, HEX(CAST(binary_col AS STRING)) FROM iceberg_binary_tbl
+WHERE binary_col = CAST(UNHEX("FF4433221100") AS BINARY)
+---- TYPES
+INT, STRING
+---- RESULTS
+8,'FF4433221100'
+====
+---- QUERY
+SET utf8_mode=0;
+SELECT string_col, LENGTH(binary_col) FROM iceberg_binary_tbl
+---- TYPES
+STRING, INT
+---- RESULTS
+'ascii',7
+'ascii',7
+'null',NULL
+'empty',0
+'valid utf8',26
+'valid utf8',11
+'invalid utf8',4
+'invalid utf8',6
+====
+---- QUERY
+SET utf8_mode=1;
+SELECT string_col, LENGTH(binary_col) FROM iceberg_binary_tbl
+---- TYPES
+STRING, INT
+---- RESULTS
+'ascii',7
+'ascii',7
+'null',NULL
+'empty',0
+'valid utf8',26
+'valid utf8',11
+'invalid utf8',4
+'invalid utf8',6
+====
+---- QUERY
+# Test UPDATE on iceberg_binary_tbl
+UPDATE iceberg_binary_tbl
+SET binary_col = CAST('updated' AS BINARY) WHERE id = 2
+---- RUNTIME_PROFILE
+NumModifiedRows: 1
+NumDeletedRows: 1
+====
+---- QUERY
+# Verify UPDATE results
+SELECT id, string_col, CAST(binary_col AS STRING) FROM iceberg_binary_tbl
+WHERE binary_col = CAST('updated' AS BINARY)
+---- TYPES
+INT, STRING, STRING
+---- RESULTS
+2,'ascii','updated'
+====
+---- QUERY
+# Test DELETE on iceberg_binary_tbl with binary column condition
+DELETE FROM iceberg_binary_tbl WHERE binary_col = CAST('updated' AS BINARY)
+---- RUNTIME_PROFILE
+NumModifiedRows: 0
+NumDeletedRows: 1
+====
+---- QUERY
+# Verify DELETE results
+SELECT id, string_col, CAST(binary_col AS STRING) FROM iceberg_binary_tbl
+WHERE string_col != "invalid utf8"
+---- TYPES
+INT, STRING, STRING
+---- RESULTS: RAW_STRING
+1,'ascii','binary1'
+3,'null','NULL'
+4,'empty',''
+5,'valid utf8','árvíztűrőtükörfúró'
+6,'valid utf8','你好hello'
+====
+---- QUERY
+CREATE TABLE iceberg_binary_tbl_big (
+  id INT,
+  int_col INT,
+  binary_col BINARY,
+  binary_col_with_nulls BINARY,
+  timestamp_col TIMESTAMP
+)
+PARTITIONED BY SPEC (MONTH(timestamp_col))
+STORED AS ICEBERG TBLPROPERTIES('format-version'='2')
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+INSERT INTO TABLE iceberg_binary_tbl_big
+SELECT id, int_col, CAST(string_col AS BINARY),
+       CAST(CASE WHEN id % 2 = 0 THEN date_string_col ELSE NULL END AS BINARY),
+       timestamp_col
+    FROM functional.alltypes
+---- RUNTIME_PROFILE
+NumModifiedRows: 310
+NumModifiedRows: 300
+NumModifiedRows: 280
+====
+---- QUERY
+SELECT binary_col_with_nulls FROM iceberg_binary_tbl_big
+WHERE binary_col_with_nulls = CAST("01/02/09" AS BINARY)
+---- TYPES
+BINARY
+---- RESULTS
+'01/02/09'
+'01/02/09'
+'01/02/09'
+'01/02/09'
+'01/02/09'
+====
+---- QUERY
+SELECT binary_col_with_nulls FROM iceberg_binary_tbl_big
+WHERE binary_col_with_nulls > CAST("12/31/09" AS BINARY)
+---- TYPES
+BINARY
+---- RESULTS
+'12/31/10'
+'12/31/10'
+'12/31/10'
+'12/31/10'
+'12/31/10'
+====
+---- QUERY
+SELECT DISTINCT binary_col_with_nulls FROM iceberg_binary_tbl_big
+WHERE binary_col_with_nulls < CAST("01/02/09" AS BINARY)
+---- TYPES
+BINARY
+---- RESULTS
+'01/01/09'
+'01/01/10'
+====
+---- QUERY
+SET DISABLE_OUTERMOST_TOPN=0;
+SELECT binary_col_with_nulls FROM iceberg_binary_tbl_big
+WHERE binary_col = CAST("4" AS BINARY)
+ORDER BY binary_col_with_nulls LIMIT 3
+---- TYPES
+BINARY
+---- RESULTS
+'01/01/09'
+'01/01/10'
+'01/02/09'
+====
+---- QUERY
+SET DISABLE_OUTERMOST_TOPN=1;
+SELECT binary_col_with_nulls FROM iceberg_binary_tbl_big
+WHERE binary_col = CAST("4" AS BINARY)
+ORDER BY binary_col_with_nulls LIMIT 3
+---- TYPES
+BINARY
+---- RESULTS
+'01/01/09'
+'01/01/10'
+'01/02/09'
+====
+---- QUERY
+SELECT COUNT(binary_col_with_nulls), max(binary_col), min(binary_col)
+FROM iceberg_binary_tbl_big
+---- TYPES
+BIGINT, BINARY, BINARY
+---- RESULTS
+3650,'9','0'
+====
+---- QUERY
+# NDV is not yet added for BINARY, casting is needed (IMPALA-11351)
+SELECT COUNT(DISTINCT binary_col_with_nulls), NDV(CAST(binary_col_with_nulls 
AS STRING))
+FROM iceberg_binary_tbl_big
+---- TYPES
+BIGINT, BIGINT
+---- RESULTS
+730,736
+====
+---- QUERY
+# Test multiple count distinct
+SELECT COUNT(DISTINCT binary_col), COUNT(DISTINCT binary_col_with_nulls)
+FROM iceberg_binary_tbl_big
+WHERE id < 20
+---- TYPES
+BIGINT, BIGINT
+---- RESULTS
+10,2
+====
+---- QUERY
+SELECT bb1.id, bb2.id, bb1.binary_col_with_nulls, bb2.binary_col_with_nulls
+FROM iceberg_binary_tbl_big bb1 LEFT JOIN iceberg_binary_tbl_big bb2
+  ON bb1.binary_col_with_nulls = bb2.binary_col_with_nulls
+WHERE bb1.id < 3 AND bb2.id < 3
+---- TYPES
+INT,  INT, BINARY, BINARY
+---- RESULTS
+0,2,'01/01/09','01/01/09'
+0,0,'01/01/09','01/01/09'
+2,2,'01/01/09','01/01/09'
+2,0,'01/01/09','01/01/09'
+====
+---- QUERY
+# Test UPDATE on iceberg_binary_tbl_big
+UPDATE iceberg_binary_tbl_big
+SET binary_col = CAST('updated' AS BINARY)
+WHERE binary_col_with_nulls < CAST("01/02/09" AS BINARY)
+---- RUNTIME_PROFILE
+NumModifiedRows: 5
+NumDeletedRows: 5
+====
+---- QUERY
+# Verify UPDATE results
+SELECT id, CAST(binary_col AS STRING), CAST(binary_col_with_nulls AS STRING)
+FROM iceberg_binary_tbl_big
+WHERE binary_col = CAST('updated' AS BINARY)
+---- TYPES
+INT, STRING, STRING
+---- RESULTS
+0,'updated','01/01/09'
+2,'updated','01/01/09'
+4,'updated','01/01/09'
+6,'updated','01/01/09'
+8,'updated','01/01/09'
+3650,'updated','01/01/10'
+3652,'updated','01/01/10'
+3654,'updated','01/01/10'
+3656,'updated','01/01/10'
+3658,'updated','01/01/10'
+====
+---- QUERY
+# Test DELETE on iceberg_binary_tbl_big with binary column conditions
+DELETE FROM iceberg_binary_tbl_big
+  WHERE binary_col = CAST('updated' AS BINARY)
+    AND binary_col_with_nulls = CAST("01/01/10" AS BINARY)
+---- RUNTIME_PROFILE
+NumModifiedRows: 0
+NumDeletedRows: 5
+====
+---- QUERY
+# Verify DELETE results
+SELECT id, CAST(binary_col AS STRING), CAST(binary_col_with_nulls AS STRING)
+  FROM iceberg_binary_tbl_big
+  WHERE binary_col = CAST('updated' AS BINARY)
+---- TYPES
+INT, STRING, STRING
+---- RESULTS
+0,'updated','01/01/09'
+2,'updated','01/01/09'
+4,'updated','01/01/09'
+6,'updated','01/01/09'
+8,'updated','01/01/09'
+====
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 8fc06738e..f3f5bcf63 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -75,6 +75,9 @@ class TestIcebergTable(IcebergTestSuite):
   def test_alter_iceberg_tables_default(self, vector, unique_database):
     self.run_test_case('QueryTest/iceberg-alter-default', vector, 
use_db=unique_database)
 
+  def test_iceberg_binary_type(self, vector, unique_database):
+    self.run_test_case('QueryTest/iceberg-binary-type', vector, 
use_db=unique_database)
+
   def test_external_iceberg_tables(self, vector, unique_database):
     self.run_test_case('QueryTest/iceberg-external', vector, unique_database)
 

Reply via email to