This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new cb496104d IMPALA-14027: Implement HS2 NULL_TYPE using TStringValue
cb496104d is described below
commit cb496104d98e8cbd87acf25277f2648cffaac42a
Author: Riza Suminto <[email protected]>
AuthorDate: Sat May 3 16:45:26 2025 -0700
IMPALA-14027: Implement HS2 NULL_TYPE using TStringValue
HS2 NULL_TYPE should be implemented using TStringValue.
However, due to incompatibility with Hive JDBC driver implementation
then, Impala choose to implement NULL type using TBoolValue (see
IMPALA-914, IMPALA-1370).
HIVE-4172 might be the root cause for such decision. Today, the Hive
JDBC (org.apache.hive.jdbc.HiveDriver) does not have that issue anymore,
as shown in this reproduction after applying this patch:
./bin/run-jdbc-client.sh -q "select null" -t NOSASL
Using JDBC Driver Name: org.apache.hive.jdbc.HiveDriver
Connecting to: jdbc:hive2://localhost:21050/;auth=noSasl
Executing: select null
----[START]----
NULL
----[END]----
Returned 1 row(s) in 0.343s
Thus, we can reimplement NULL_TYPE using TStringValue to match
HiveServer2 behavior.
Testing:
- Pass core tests.
Change-Id: I354110164b360013d9893f1eb4398c3418f80472
Reviewed-on: http://gerrit.cloudera.org:8080/22852
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/service/hs2-util.cc | 34 +++++++++++++++++-----
be/src/service/query-result-set.cc | 4 +--
.../java/org/apache/impala/service/JdbcTest.java | 6 ++--
.../functional-query/queries/QueryTest/except.test | 4 +--
.../queries/QueryTest/intersect.test | 2 +-
.../functional-query/queries/QueryTest/joins.test | 2 +-
.../functional-query/queries/QueryTest/top-n.test | 2 +-
.../functional-query/queries/QueryTest/union.test | 8 ++---
tests/hs2/test_fetch.py | 10 +++----
9 files changed, 46 insertions(+), 26 deletions(-)
diff --git a/be/src/service/hs2-util.cc b/be/src/service/hs2-util.cc
index ce6654db3..8424e6996 100644
--- a/be/src/service/hs2-util.cc
+++ b/be/src/service/hs2-util.cc
@@ -85,7 +85,6 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue&
col_val,
string* nulls;
bool is_null;
switch (type.types[0].scalar_type.type) {
- case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
is_null = !col_val.__isset.bool_val;
column->boolVal.values.push_back(col_val.bool_val);
@@ -117,6 +116,7 @@ void impala::TColumnValueToHS2TColumn(const TColumnValue&
col_val,
column->doubleVal.values.push_back(col_val.double_val);
nulls = &column->doubleVal.nulls;
break;
+ case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::DATE:
case TPrimitiveType::STRING:
@@ -152,6 +152,25 @@ void ReserveSpace(int reserve_count, T* hs2Vals) {
hs2Vals->nulls.reserve(BitUtil::RoundUpToPowerOfTwo(num_null_bytes));
}
+// Implementation for NULL.
+// Internally, Impala implement NULL expession using nullable-BooleanVal
(IMPALA-914).
+// To match with HiveServer2 behavior, IMPALA-14027 change the result mapping
to use
+// TColumn.stringVal rather than TColumn.boolVal.
+static void NullExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
RowBatch* batch,
+ int start_idx, int num_rows, uint32_t output_row_idx,
+ apache::hive::service::cli::thrift::TColumn* column) {
+ FOREACH_ROW_LIMIT(batch, start_idx, num_rows, it) {
+ // It is actually not necessary to evaluate expr_eval here. But we choose
to do it
+ // and DCHECK the result to be consistent with other functions.
+ BooleanVal val = expr_eval->GetBooleanVal(it.Get());
+ DCHECK(val.is_null);
+ // emplace empty string and set null bit.
+ column->stringVal.values.emplace_back();
+ SetNullBit(output_row_idx, val.is_null, &column->stringVal.nulls);
+ ++output_row_idx;
+ }
+}
+
// Implementation for BOOL.
static void BoolExprValuesToHS2TColumn(ScalarExprEvaluator* expr_eval,
RowBatch* batch,
int start_idx, int num_rows, uint32_t output_row_idx,
@@ -454,6 +473,9 @@ void impala::ExprValuesToHS2TColumn(ScalarExprEvaluator*
expr_eval,
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
+ ReserveSpace(expected_result_count, &column->stringVal);
+ NullExprValuesToHS2TColumn(
+ expr_eval, batch, start_idx, num_rows, output_row_idx, column);
case TPrimitiveType::BOOLEAN:
ReserveSpace(expected_result_count, &column->boolVal);
BoolExprValuesToHS2TColumn(
@@ -598,9 +620,9 @@ void impala::ExprValueToHS2TColumnValue(const void* value,
const TColumnType& ty
DCHECK_EQ(1, type.types[0].__isset.scalar_type);
switch (type.types[0].scalar_type.type) {
case TPrimitiveType::NULL_TYPE:
- // Set NULLs in the bool_val.
- hs2_col_val->__isset.boolVal = true;
- hs2_col_val->boolVal.__isset.value = false;
+ // Set NULLs in the stringVal, but don't set the value itself.
+ hs2_col_val->__isset.stringVal = true;
+ hs2_col_val->stringVal.__isset.value = false;
break;
case TPrimitiveType::BOOLEAN:
hs2_col_val->__isset.boolVal = true;
@@ -868,10 +890,8 @@ thrift::TTypeEntry impala::ColumnToHs2Type(
const ColumnType& type = ColumnType::FromThrift(columnType);
thrift::TPrimitiveTypeEntry type_entry;
switch (type.type) {
- // Map NULL_TYPE to BOOLEAN, otherwise Hive's JDBC driver won't
- // work for queries like "SELECT NULL" (IMPALA-914).
case TYPE_NULL:
- type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
+ type_entry.__set_type(thrift::TTypeId::NULL_TYPE);
break;
case TYPE_BOOLEAN:
type_entry.__set_type(thrift::TTypeId::BOOLEAN_TYPE);
diff --git a/be/src/service/query-result-set.cc
b/be/src/service/query-result-set.cc
index 97d97d97e..4fe411350 100644
--- a/be/src/service/query-result-set.cc
+++ b/be/src/service/query-result-set.cc
@@ -410,7 +410,6 @@ int HS2ColumnarResultSet::AddRows(
primitiveType = TPrimitiveType::STRING;
}
switch (primitiveType) {
- case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
StitchNulls(
num_rows_, rows_added, start_idx, from->boolVal.nulls,
&(to->boolVal.nulls));
@@ -454,6 +453,7 @@ int HS2ColumnarResultSet::AddRows(
from->doubleVal.values.begin() + start_idx,
from->doubleVal.values.begin() + start_idx + rows_added);
break;
+ case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::DATE:
case TPrimitiveType::DECIMAL:
@@ -509,7 +509,6 @@ void HS2ColumnarResultSet::InitColumns() {
DCHECK(type_nodes[0].__isset.scalar_type);
TPrimitiveType::type input_type = type_nodes[0].scalar_type.type;
switch (input_type) {
- case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
col_output.__isset.boolVal = true;
break;
@@ -529,6 +528,7 @@ void HS2ColumnarResultSet::InitColumns() {
case TPrimitiveType::DOUBLE:
col_output.__isset.doubleVal = true;
break;
+ case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::TIMESTAMP:
case TPrimitiveType::DATE:
case TPrimitiveType::DECIMAL:
diff --git a/fe/src/test/java/org/apache/impala/service/JdbcTest.java
b/fe/src/test/java/org/apache/impala/service/JdbcTest.java
index 43086caec..5a48a4435 100644
--- a/fe/src/test/java/org/apache/impala/service/JdbcTest.java
+++ b/fe/src/test/java/org/apache/impala/service/JdbcTest.java
@@ -599,10 +599,10 @@ public class JdbcTest extends JdbcTestBase {
@Test
public void testSelectNull() throws SQLException {
- // Regression test for IMPALA-914.
+ // Regression test for IMPALA-914 / IMPALA-1370 / IMPALA-14027.
ResultSet rs = con_.createStatement().executeQuery("select NULL");
- // Expect the column to be of type BOOLEAN to be compatible with Hive.
- assertEquals(rs.getMetaData().getColumnType(1), Types.BOOLEAN);
+ // Expect the column to be of type NULL to be compatible with HiveServer2.
+ assertEquals(rs.getMetaData().getColumnType(1), Types.NULL);
try {
// We expect exactly one result row with a NULL inside the first column.
assertTrue(rs.next());
diff --git a/testdata/workloads/functional-query/queries/QueryTest/except.test
b/testdata/workloads/functional-query/queries/QueryTest/except.test
index 51dcd0f2a..7e22b8df6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/except.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/except.test
@@ -519,7 +519,7 @@ select 3, 'c', NULL, 30.0
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
====
@@ -533,7 +533,7 @@ values(3, 'c', NULL, 30.0)
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
====
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/intersect.test
b/testdata/workloads/functional-query/queries/QueryTest/intersect.test
index 9751ae41a..61eef15c6 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/intersect.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/intersect.test
@@ -463,7 +463,7 @@ values(1, 'a', NULL, 10.0)
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test
b/testdata/workloads/functional-query/queries/QueryTest/joins.test
index 0027b9e69..19b1b483b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/joins.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test
@@ -195,7 +195,7 @@ limit 6
---- TYPES
int, tinyint, null
---- HS2_TYPES
-int, tinyint, boolean
+int, tinyint, null
====
---- QUERY
# check cross joins within a subquery
diff --git a/testdata/workloads/functional-query/queries/QueryTest/top-n.test
b/testdata/workloads/functional-query/queries/QueryTest/top-n.test
index 0992559c4..f5df9a0ea 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/top-n.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/top-n.test
@@ -903,7 +903,7 @@ limit 6
int, tinyint, null
---- HS2_TYPES
# HS2 maps NULL to BOOLEAN
-int, tinyint, boolean
+int, tinyint, null
====
---- QUERY
# check cross joins within a subquery
diff --git a/testdata/workloads/functional-query/queries/QueryTest/union.test
b/testdata/workloads/functional-query/queries/QueryTest/union.test
index fc5927ebb..6ac61474a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/union.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/union.test
@@ -741,7 +741,7 @@ select 3, 'c', NULL, 30.0
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
2,'b',NULL,20.0
@@ -757,7 +757,7 @@ select 1, 'a', NULL, 10.0
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
2,'b',NULL,20.0
@@ -772,7 +772,7 @@ values(3, 'c', NULL, 30.0)
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
2,'b',NULL,20.0
@@ -788,7 +788,7 @@ values(1, 'a', NULL, 10.0)
---- TYPES
tinyint, string, null, decimal
---- HS2_TYPES
-tinyint, string, boolean, decimal
+tinyint, string, null, decimal
---- RESULTS: VERIFY_IS_EQUAL_SORTED
1,'a',NULL,10.0
2,'b',NULL,20.0
diff --git a/tests/hs2/test_fetch.py b/tests/hs2/test_fetch.py
index 7b40ef502..f52f57010 100644
--- a/tests/hs2/test_fetch.py
+++ b/tests/hs2/test_fetch.py
@@ -242,21 +242,21 @@ class TestFetch(HS2TestSuite):
execute_statement_resp =
self.hs2_client.ExecuteStatement(execute_statement_req)
HS2TestSuite.check_response(execute_statement_resp)
- # Check that the expected type is boolean (for compatibility with Hive,
see also
- # IMPALA-914)
+ # Check that the expected type is NULL_TYPE (for compatibility with
HiveServer2,
+ # see also IMPALA-914, IMPALA-1370, and IMPALA-14027 for history).
get_result_metadata_req = TCLIService.TGetResultSetMetadataReq()
get_result_metadata_req.operationHandle =
execute_statement_resp.operationHandle
get_result_metadata_resp = \
self.hs2_client.GetResultSetMetadata(get_result_metadata_req)
col = get_result_metadata_resp.schema.columns[0]
- assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.BOOLEAN_TYPE
+ assert col.typeDesc.types[0].primitiveEntry.type == TTypeId.NULL_TYPE
- # Check that the actual type is boolean
+ # Check that the actual type is string
fetch_results_req = TCLIService.TFetchResultsReq()
fetch_results_req.operationHandle = execute_statement_resp.operationHandle
fetch_results_req.maxRows = 1
fetch_results_resp = self.fetch(fetch_results_req)
- assert fetch_results_resp.results.columns[0].boolVal is not None
+ assert fetch_results_resp.results.columns[0].stringVal is not None
assert self.column_results_to_string(
fetch_results_resp.results.columns) == (1, "NULL\n")