This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new cb496104d IMPALA-14027: Implement HS2 NULL_TYPE using TStringValue
cb496104d is described below

commit cb496104d98e8cbd87acf25277f2648cffaac42a
Author: Riza Suminto <[email protected]>
AuthorDate: Sat May 3 16:45:26 2025 -0700

    IMPALA-14027: Implement HS2 NULL_TYPE using TStringValue
    
    HS2 NULL_TYPE should be implemented using TStringValue.
    
    However, due to incompatibility with Hive JDBC driver implementation
    then, Impala choose to implement NULL type using TBoolValue (see
    IMPALA-914, IMPALA-1370).
    
    HIVE-4172 might be the root cause for such decision. Today, the Hive
    JDBC (org.apache.hive.jdbc.HiveDriver) does not have that issue anymore,
    as shown in this reproduction after applying this patch:
    
    ./bin/run-jdbc-client.sh -q "select null" -t NOSASL
    Using JDBC Driver Name: org.apache.hive.jdbc.HiveDriver
    Connecting to: jdbc:hive2://localhost:21050/;auth=noSasl
    Executing: select null
    ----[START]----
    NULL
    ----[END]----
    Returned 1 row(s) in 0.343s
    
    Thus, we can reimplement NULL_TYPE using TStringValue to match
    HiveServer2 behavior.
    
    Testing:
    - Pass core tests.
    
    Change-Id: I354110164b360013d9893f1eb4398c3418f80472
    Reviewed-on: http://gerrit.cloudera.org:8080/22852
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/service/hs2-util.cc                         | 34 +++++++++++++++++-----
 be/src/service/query-result-set.cc                 |  4 +--
 .../java/org/apache/impala/service/JdbcTest.java   |  6 ++--
 .../functional-query/queries/QueryTest/except.test |  4 +--
 .../queries/QueryTest/intersect.test               |  2 +-
 .../functional-query/queries/QueryTest/joins.test  |  2 +-
 .../functional-query/queries/QueryTest/top-n.test  |  2 +-
 .../functional-query/queries/QueryTest/union.test  |  8 ++---
 tests/hs2/test_fetch.py                            | 10 +++----
 9 files changed, 46 insertions(+), 26 deletions(-)

diff --git a/be/src/service/hs2-util.cc b/be/src/service/hs2-util.cc
index ce6654db3..8424e6996 100644
--- a/be/src/service/hs2-util.cc
+++ b/be/src/service/hs2-util.cc
@@ -85,7 +85,6 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue& 
col_val,
   string* nulls;
   bool is_null;
   switch (type.types[0].scalar_type.type) {
-    case TPrimitiveType::NULL_TYPE:
     case TPrimitiveType::BOOLEAN:
       is_null = !col_val.__isset.bool_val;
       column->boolVal.values.push_back(col_val.bool_val);
@@ -117,6 +116,7 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue& 
col_val,
       column->doubleVal.values.push_back(col_val.double_val);
       nulls = &column->doubleVal.nulls;
       break;
+    case TPrimitiveType::NULL_TYPE:
     case TPrimitiveType::TIMESTAMP:
     case TPrimitiveType::DATE:
     case TPrimitiveType::STRING:
@@ -152,6 +152,25 @@ void ReserveSpace(int reserve_count, T* hs2Vals) {
   hs2Vals->nulls.reserve(BitUtil::RoundUpToPowerOfTwo(num_null_bytes));
 }
 
+// Implementation for NULL.
+// Internally, Impala implement NULL expession using nullable-BooleanVal 
(IMPALA-914).
+// To match with HiveServer2 behavior, IMPALA-14027 change the result mapping 
to use
+// TColumn.stringVal rather than TColumn.boolVal.
+static void NullExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
+    int start_idx, int num_rows, uint32_t output_row_idx,
+    apache::hive::service::cli::thrift::TColumn* column) {
+  FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
+    // It is actually not necessary to evaluate expr_eval here. But we choose 
to do it
+    // and DCHECK the result to be consistent with other functions.
+    BooleanVal val = expr_eval->GetBooleanVal(it.Get());
+    DCHECK(val.is_null);
+    // emplace empty string and set null bit.
+    column->stringVal.values.emplace_back();
+    SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
+    ++output_row_idx;
+  }
+}
+
 // Implementation for BOOL.
 static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval, 
RowBatch* batch,
     int start_idx, int num_rows, uint32_t output_row_idx,
@@ -454,6 +473,9 @@ void impala::ExprValuesToHS2TColumn(ScalarExprEvaluator* 
expr_eval,
 
   switch (type.types[0].scalar_type.type) {
     case TPrimitiveType::NULL_TYPE:
+      ReserveSpace(expected_result_count, &column->stringVal);
+      NullExprValuesToHS2TColumn(
+          expr_eval, batch, start_idx, num_rows, output_row_idx, column);
     case TPrimitiveType::BOOLEAN:
       ReserveSpace(expected_result_count, &column->boolVal);
       BoolExprValuesToHS2TColumn(
@@ -598,9 +620,9 @@ void impala::ExprValueToHS2TColumnValue(const void* value, 
const TColumnType& ty
   DCHECK_EQ(1, type.types[0].__isset.scalar_type);
   switch (type.types[0].scalar_type.type) {
     case TPrimitiveType::NULL_TYPE:
-      // Set NULLs in the bool_val.
-      hs2_col_val->__isset.boolVal = true;
-      hs2_col_val->boolVal.__isset.value = false;
+      // Set NULLs in the stringVal, but don't set the value itself.
+      hs2_col_val->__isset.stringVal = true;
+      hs2_col_val->stringVal.__isset.value = false;
       break;
     case TPrimitiveType::BOOLEAN:
       hs2_col_val->__isset.boolVal = true;
@@ -868,10 +890,8 @@ thrift::TTypeEntry impala::ColumnToHs2Type(
   const ColumnType& type = ColumnType::FromThrift(columnType);
   thrift::TPrimitiveTypeEntry type_entry;
   switch (type.type) {
-    // Map NULL_TYPE to BOOLEAN, otherwise Hive's JDBC driver won't
-    // work for queries like "SELECT NULL" (IMPALA-914).
     case TYPE_NULL:
-      type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
+      type_entry.__set_type(thrift::TTypeId::NULL_TYPE);
       break;
     case TYPE_BOOLEAN:
       type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
diff --git a/be/src/service/query-result-set.cc 
b/be/src/service/query-result-set.cc
index 97d97d97e..4fe411350 100644
--- a/be/src/service/query-result-set.cc
+++ b/be/src/service/query-result-set.cc
@@ -410,7 +410,6 @@ int HS2ColumnarResultSet::AddRows(
       primitiveType = TPrimitiveType::STRING;
     }
     switch (primitiveType) {
-      case TPrimitiveType::NULL_TYPE:
       case TPrimitiveType::BOOLEAN:
         StitchNulls(
             num_rows_, rows_added, start_idx, from->boolVal.nulls, 
&(to->boolVal.nulls));
@@ -454,6 +453,7 @@ int HS2ColumnarResultSet::AddRows(
             from->doubleVal.values.begin() + start_idx,
             from->doubleVal.values.begin() + start_idx + rows_added);
         break;
+      case TPrimitiveType::NULL_TYPE:
       case TPrimitiveType::TIMESTAMP:
       case TPrimitiveType::DATE:
       case TPrimitiveType::DECIMAL:
@@ -509,7 +509,6 @@ void HS2ColumnarResultSet::InitColumns() {
       DCHECK(type_nodes[0].__isset.scalar_type);
       TPrimitiveType::type input_type = type_nodes[0].scalar_type.type;
       switch (input_type) {
-        case TPrimitiveType::NULL_TYPE:
         case TPrimitiveType::BOOLEAN:
           col_output.__isset.boolVal = true;
           break;
@@ -529,6 +528,7 @@ void HS2ColumnarResultSet::InitColumns() {
         case TPrimitiveType::DOUBLE:
           col_output.__isset.doubleVal = true;
           break;
+        case TPrimitiveType::NULL_TYPE:
         case TPrimitiveType::TIMESTAMP:
         case TPrimitiveType::DATE:
         case TPrimitiveType::DECIMAL:
diff --git a/fe/src/test/java/org/apache/impala/service/JdbcTest.java 
b/fe/src/test/java/org/apache/impala/service/JdbcTest.java
index 43086caec..5a48a4435 100644
--- a/fe/src/test/java/org/apache/impala/service/JdbcTest.java
+++ b/fe/src/test/java/org/apache/impala/service/JdbcTest.java
@@ -599,10 +599,10 @@ public class JdbcTest extends JdbcTestBase {
 
   @Test
   public void testSelectNull() throws SQLException {
-    // Regression test for IMPALA-914.
+    // Regression test for IMPALA-914 / IMPALA-1370 / IMPALA-14027.
     ResultSet rs = con_.createStatement().executeQuery("select NULL");
-    // Expect the column to be of type BOOLEAN to be compatible with Hive.
-    assertEquals(rs.getMetaData().getColumnType(1), Types.BOOLEAN);
+    // Expect the column to be of type NULL to be compatible with HiveServer2.
+    assertEquals(rs.getMetaData().getColumnType(1), Types.NULL);
     try {
       // We expect exactly one result row with a NULL inside the first column.
       assertTrue(rs.next());
diff --git a/testdata/workloads/functional-query/queries/QueryTest/except.test 
b/testdata/workloads/functional-query/queries/QueryTest/except.test
index 51dcd0f2a..7e22b8df6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/except.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/except.test
@@ -519,7 +519,7 @@ select 3, 'c', NULL, 30.0
 ---- TYPES
 tinyint, string, null, decimal
 ---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
 1,'a',NULL,10.0
 ====
@@ -533,7 +533,7 @@ values(3, 'c', NULL, 30.0)
 ---- TYPES
 tinyint, string, null, decimal
 ---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
 1,'a',NULL,10.0
 ====
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/intersect.test 
b/testdata/workloads/functional-query/queries/QueryTest/intersect.test
index 9751ae41a..61eef15c6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/intersect.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/intersect.test
@@ -463,7 +463,7 @@ values(1, 'a', NULL, 10.0)
 ---- TYPES
 tinyint, string, null, decimal
 ---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
 1,'a',NULL,10.0
 ====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test 
b/testdata/workloads/functional-query/queries/QueryTest/joins.test
index 0027b9e69..19b1b483b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/joins.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test
@@ -195,7 +195,7 @@ limit 6
 ---- TYPES
 int, tinyint, null
 ---- HS2_TYPES
-int, tinyint, boolean
+int, tinyint, null
 ====
 ---- QUERY
 # check cross joins within a subquery
diff --git a/testdata/workloads/functional-query/queries/QueryTest/top-n.test 
b/testdata/workloads/functional-query/queries/QueryTest/top-n.test
index 0992559c4..f5df9a0ea 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/top-n.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/top-n.test
@@ -903,7 +903,7 @@ limit 6
 int, tinyint, null
 ---- HS2_TYPES
 # HS2 maps NULL to BOOLEAN
-int, tinyint, boolean
+int, tinyint, null
 ====
 ---- QUERY
 # check cross joins within a subquery
diff --git a/testdata/workloads/functional-query/queries/QueryTest/union.test 
b/testdata/workloads/functional-query/queries/QueryTest/union.test
index fc5927ebb..6ac61474a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/union.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/union.test
@@ -741,7 +741,7 @@ select 3, 'c', NULL, 30.0
 ---- TYPES
 tinyint, string, null, decimal
 ---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
 1,'a',NULL,10.0
 2,'b',NULL,20.0
@@ -757,7 +757,7 @@ select 1, 'a', NULL, 10.0
 ---- TYPES
 tinyint, string, null, decimal
 ---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
 1,'a',NULL,10.0
 2,'b',NULL,20.0
@@ -772,7 +772,7 @@ values(3, 'c', NULL, 30.0)
 ---- TYPES
 tinyint, string, null, decimal
 ---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
 1,'a',NULL,10.0
 2,'b',NULL,20.0
@@ -788,7 +788,7 @@ values(1, 'a', NULL, 10.0)
 ---- TYPES
 tinyint, string, null, decimal
 ---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
 ---- RESULTS: VERIFY_IS_EQUAL_SORTED
 1,'a',NULL,10.0
 2,'b',NULL,20.0
diff --git a/tests/hs2/test_fetch.py b/tests/hs2/test_fetch.py
index 7b40ef502..f52f57010 100644
--- a/tests/hs2/test_fetch.py
+++ b/tests/hs2/test_fetch.py
@@ -242,21 +242,21 @@ class TestFetch(HS2TestSuite):
     execute_statement_resp = 
self.hs2_client.ExecuteStatement(execute_statement_req)
     HS2TestSuite.check_response(execute_statement_resp)
 
-    # Check that the expected type is boolean (for compatibility with Hive, 
see also
-    # IMPALA-914)
+    # Check that the expected type is NULL_TYPE (for compatibility with 
HiveServer2,
+    # see also IMPALA-914, IMPALA-1370, and IMPALA-14027 for history).
     get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
     get_result_metadata_req.operationHandle = 
execute_statement_resp.operationHandle
     get_result_metadata_resp = \
         self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
     col = get_result_metadata_resp.schema.columns[0]
-    assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE
+    assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.NULL_TYPE
 
-    # Check that the actual type is boolean
+    # Check that the actual type is string
     fetch_results_req = TCLIService.TFetchResultsReq()
     fetch_results_req.operationHandle = execute_statement_resp.operationHandle
     fetch_results_req.maxRows = 1
     fetch_results_resp = self.fetch(fetch_results_req)
-    assert fetch_results_resp.results.columns[0].boolVal is not None
+    assert fetch_results_resp.results.columns[0].stringVal is not None
 
     assert self.column_results_to_string(
       fetch_results_resp.results.columns) == (1, "NULL\n")

Reply via email to