This is an automated email from the ASF dual-hosted git repository.
hello-stephen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a95974c6dde [BUG](exec) fix coalesce function output null (#63092)
a95974c6dde is described below
commit a95974c6dded2dcf839181d387eb41761b24c889
Author: shee <[email protected]>
AuthorDate: Thu May 28 15:22:44 2026 +0800
[BUG](exec) fix coalesce function output null (#63092)
### What problem does this PR solve?
Issue Number: close #xxx
Example: COALESCE(same_department_income_amount, 0) ==> outputs NULL
(where same_department_income_amount is of type double).
When assigning the value to the result column in the computation, the
assignment is done unconditionally (forced), as in:
```cpp
result_raw_data[row] +=
column_raw_data[row] *
typename ColumnType::value_type(!(null_map_data[row] |
filled_flag[row]));
```
If the argument column column_raw_data's null_map[row] is 1, then the
value stored in column_raw_data[row] is garbage data. This garbage may
contain values such as NaN. If a preceding argument of COALESCE happens
to be assigned NaN, then during subsequent assignments we run into cases
like:
0 * NaN = NaN
num + NaN = NaN
so the assigned result also becomes NaN, which causes value pollution.
By rights the final output should also be NaN, but what is actually
returned is NULL. The reason is that during result serialization/output,
NaN values are emitted as NULL.
```cpp
tatus DataTypeNumberSerDe<T>::_write_column_to_mysql(const IColumn& column,
MysqlRowBuffer<is_binary_format>& result,
int row_idx, bool
col_const,
const FormatOptions&
options) const {
//...
else if constexpr (std::is_same_v<T, float>) {
if (std::isnan(data[col_index])) {
// Handle NaN for float, we should push null value
buf_ret = result.push_null();
} else {
buf_ret = result.push_float(data[col_index]);
}
}
//...
}
```
Co-authored-by: garenshi <[email protected]>
---
be/src/exprs/vcondition_expr.cpp | 6 +
be/test/exprs/vcondition_expr_test.cpp | 358 +++++++++++++++++++++++++++++++++
2 files changed, 364 insertions(+)
diff --git a/be/src/exprs/vcondition_expr.cpp b/be/src/exprs/vcondition_expr.cpp
index 2f2ffddebe5..13e196a5e0b 100644
--- a/be/src/exprs/vcondition_expr.cpp
+++ b/be/src/exprs/vcondition_expr.cpp
@@ -589,6 +589,12 @@ void insert_result_data(MutableColumnPtr& result_column,
ColumnPtr& argument_col
binary_cast<VecDateTimeValue,
int64_t>(result_raw_data[row]) +
binary_cast<VecDateTimeValue,
int64_t>(column_raw_data[row]) *
int64_t(!(null_map_data[row] | filled_flag[row])));
+ } else if constexpr (std::is_same_v<ColumnType, ColumnFloat32> ||
+ std::is_same_v<ColumnType, ColumnFloat64>) {
+ auto flag = (!(null_map_data[row] | filled_flag[row]));
+ result_raw_data[row] += std::isfinite(column_raw_data[row])
+ ? column_raw_data[row] * flag
+ : (flag ? column_raw_data[row] :
0);
} else {
result_raw_data[row] +=
column_raw_data[row] *
diff --git a/be/test/exprs/vcondition_expr_test.cpp
b/be/test/exprs/vcondition_expr_test.cpp
new file mode 100644
index 00000000000..b26f01cc266
--- /dev/null
+++ b/be/test/exprs/vcondition_expr_test.cpp
@@ -0,0 +1,358 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/vcondition_expr.h"
+
+#include <gen_cpp/Exprs_types.h>
+#include <gen_cpp/Types_types.h>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include "core/column/column_nullable.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+
+namespace doris {
+
+// Build a minimal TExprNode as the input of VectorizedCoalesceExpr.
+// Only fields required by the VExpr base ctor (so that create_data_type
works) are set.
+static TExprNode make_coalesce_node(TPrimitiveType::type ptype, bool
is_nullable) {
+ TExprNode node;
+ node.node_type = TExprNodeType::FUNCTION_CALL;
+ node.num_children = 0;
+ node.__set_is_nullable(is_nullable);
+
+ TTypeDesc type_desc;
+ TTypeNode type_node;
+ type_node.type = TTypeNodeType::SCALAR;
+ TScalarType scalar_type;
+ scalar_type.__set_type(ptype);
+ type_node.__set_scalar_type(scalar_type);
+ type_desc.types.push_back(type_node);
+ node.__set_type(type_desc);
+
+ TFunction fn;
+ TFunctionName fn_name;
+ fn_name.function_name = "coalesce";
+ fn.name = fn_name;
+ node.__set_fn(fn);
+
+ return node;
+}
+
+// Mock child expression: returns the pre-injected ColumnPtr / DataTypePtr to
the parent expr.
+// Behavior is modeled after MockVExprForTryCast in try_cast_expr_test.cpp.
+class MockChildVExpr : public VExpr {
+public:
+ MockChildVExpr(ColumnPtr column, DataTypePtr type)
+ : _column(std::move(column)), _type(std::move(type)) {}
+
+ MOCK_CONST_METHOD0(clone, VExprSPtr());
+
+ const std::string& expr_name() const override { return _name; }
+
+ Status execute(VExprContext* context, Block* block, int* result_column_id)
const override {
+ block->insert({_column, _type, "mock_child"});
+ *result_column_id = static_cast<int>(block->columns()) - 1;
+ return Status::OK();
+ }
+
+ Status execute_column_impl(VExprContext* /*context*/, const Block*
/*block*/,
+ const Selector* /*selector*/, size_t /*count*/,
+ ColumnPtr& result_column) const override {
+ result_column = _column;
+ return Status::OK();
+ }
+
+ DataTypePtr execute_type(const Block* /*block*/) const override { return
_type; }
+
+private:
+ ColumnPtr _column;
+ DataTypePtr _type;
+ std::string _name = "mock_child";
+};
+
+// Helper: build a nullable Float64 column from a list of (value, is_null)
pairs.
+static ColumnPtr make_nullable_float64_column(const
std::vector<std::pair<double, bool>>& values) {
+ auto nested = ColumnFloat64::create();
+ auto null_map = ColumnUInt8::create();
+ for (auto& [v, is_null] : values) {
+ nested->insert_value(v);
+ null_map->insert_value(is_null ? 1 : 0);
+ }
+ return ColumnNullable::create(std::move(nested), std::move(null_map));
+}
+
+// Helper: build a non-nullable Float64 column from a list of doubles.
+static ColumnPtr make_float64_column(const std::vector<double>& values) {
+ auto col = ColumnFloat64::create();
+ for (auto v : values) {
+ col->insert_value(v);
+ }
+ return col;
+}
+
+// Helper: extract the Float64 value at `row` from the result column,
+// handling both nullable and non-nullable cases.
+static double get_float64_value(const ColumnPtr& column, size_t row, bool*
is_null = nullptr) {
+ if (const auto* nullable =
check_and_get_column<ColumnNullable>(column.get())) {
+ if (is_null) {
+ *is_null = nullable->is_null_at(row);
+ }
+ const auto& nested =
+ assert_cast<const
ColumnFloat64&>(nullable->get_nested_column()).get_data();
+ return nested[row];
+ }
+ if (is_null) {
+ *is_null = false;
+ }
+ return assert_cast<const ColumnFloat64*>(column.get())->get_data()[row];
+}
+
+class VConditionExprCoalesceTest : public ::testing::Test {};
+
+// Targets the fix: after some rows of col0 are filled, NaN/Inf values in the
+// same rows of later columns must not pollute the already-filled result via
+// "value * 0" arithmetic.
+// Input:
+// col0 (nullable, double): [1.0, NULL]
+// col1 (non-nullable, double): [NaN, 100.0]
+// Expected coalesce result: [1.0, 100.0]
+TEST_F(VConditionExprCoalesceTest, Float64_NaN_NotPolluteResult) {
+ auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE,
/*is_nullable=*/true);
+ auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
+
+ // _data_type is already set to Nullable(Float64) by the base ctor;
reassert it explicitly
+ // through the public data_type() accessor (the underlying field is
protected).
+ coalesce_expr->data_type() =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
+
+ const double kNaN = std::numeric_limits<double>::quiet_NaN();
+ auto col0 = make_nullable_float64_column({{1.0, false}, {0.0, true}});
+ auto col1 = make_float64_column({kNaN, 100.0});
+
+ auto child0 = std::make_shared<MockChildVExpr>(
+ col0,
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>()));
+ auto child1 = std::make_shared<MockChildVExpr>(col1,
std::make_shared<DataTypeFloat64>());
+ coalesce_expr->add_child(child0);
+ coalesce_expr->add_child(child1);
+
+ VExprContext context(coalesce_expr);
+ ColumnPtr result;
+ auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
+ /*selector=*/nullptr,
/*count=*/2, result);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_TRUE(result.get() != nullptr);
+ ASSERT_EQ(result->size(), 2);
+
+ bool is_null = false;
+ auto v0 = get_float64_value(result, 0, &is_null);
+ EXPECT_FALSE(is_null);
+ EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got "
<< v0;
+ EXPECT_DOUBLE_EQ(v0, 1.0);
+
+ auto v1 = get_float64_value(result, 1, &is_null);
+ EXPECT_FALSE(is_null);
+ EXPECT_DOUBLE_EQ(v1, 100.0);
+}
+
+// Same shape as above, but the polluting value in the later column is +Inf:
+// col0 (nullable, double): [2.0, NULL]
+// col1 (non-nullable, double): [+Inf, 200.0]
+// Expected coalesce result: [2.0, 200.0]
+TEST_F(VConditionExprCoalesceTest, Float64_Inf_NotPolluteResult) {
+ auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE,
/*is_nullable=*/true);
+ auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
+ coalesce_expr->data_type() =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
+
+ const double kInf = std::numeric_limits<double>::infinity();
+ auto col0 = make_nullable_float64_column({{2.0, false}, {0.0, true}});
+ auto col1 = make_float64_column({kInf, 200.0});
+
+ coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
+ col0,
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())));
+ coalesce_expr->add_child(
+ std::make_shared<MockChildVExpr>(col1,
std::make_shared<DataTypeFloat64>()));
+
+ VExprContext context(coalesce_expr);
+ ColumnPtr result;
+ auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
+ /*selector=*/nullptr,
/*count=*/2, result);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(result->size(), 2);
+
+ bool is_null = false;
+ auto v0 = get_float64_value(result, 0, &is_null);
+ EXPECT_FALSE(is_null);
+ EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by Inf*0, got "
<< v0;
+ EXPECT_TRUE(std::isfinite(v0));
+ EXPECT_DOUBLE_EQ(v0, 2.0);
+
+ auto v1 = get_float64_value(result, 1, &is_null);
+ EXPECT_FALSE(is_null);
+ EXPECT_DOUBLE_EQ(v1, 200.0);
+}
+
+// Verify that when NaN itself is the valid non-null value picked by coalesce,
+// the result must preserve the NaN (the fix should not over-correct).
+// col0 (nullable, double): [NaN, NULL]
+// col1 (non-nullable, double): [10.0, 20.0]
+// Expected: row0 takes col0's NaN (non-null, value is NaN); row1 takes col1's
20.0.
+TEST_F(VConditionExprCoalesceTest, Float64_NaN_PreservedWhenSelected) {
+ auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE,
/*is_nullable=*/true);
+ auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
+ coalesce_expr->data_type() =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
+
+ const double kNaN = std::numeric_limits<double>::quiet_NaN();
+ auto col0 = make_nullable_float64_column({{kNaN, false}, {0.0, true}});
+ auto col1 = make_float64_column({10.0, 20.0});
+
+ coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
+ col0,
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())));
+ coalesce_expr->add_child(
+ std::make_shared<MockChildVExpr>(col1,
std::make_shared<DataTypeFloat64>()));
+
+ VExprContext context(coalesce_expr);
+ ColumnPtr result;
+ auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
+ /*selector=*/nullptr,
/*count=*/2, result);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(result->size(), 2);
+
+ bool is_null = false;
+ auto v0 = get_float64_value(result, 0, &is_null);
+ EXPECT_FALSE(is_null);
+ EXPECT_TRUE(std::isnan(v0)) << "row0 should preserve NaN from col0";
+
+ auto v1 = get_float64_value(result, 1, &is_null);
+ EXPECT_FALSE(is_null);
+ EXPECT_DOUBLE_EQ(v1, 20.0);
+}
+
+// Float32 variant: same shape as the Float64 NaN case.
+TEST_F(VConditionExprCoalesceTest, Float32_NaN_NotPolluteResult) {
+ auto coalesce_node = make_coalesce_node(TPrimitiveType::FLOAT,
/*is_nullable=*/true);
+ auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
+ coalesce_expr->data_type() =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat32>());
+
+ const float kNaN = std::numeric_limits<float>::quiet_NaN();
+
+ auto nested0 = ColumnFloat32::create();
+ nested0->insert_value(1.5f);
+ nested0->insert_value(0.0f);
+ auto null_map0 = ColumnUInt8::create();
+ null_map0->insert_value(0);
+ null_map0->insert_value(1);
+ ColumnPtr col0 = ColumnNullable::create(std::move(nested0),
std::move(null_map0));
+
+ auto nested1 = ColumnFloat32::create();
+ nested1->insert_value(kNaN);
+ nested1->insert_value(7.5f);
+ ColumnPtr col1 = std::move(nested1);
+
+ coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
+ col0,
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat32>())));
+ coalesce_expr->add_child(
+ std::make_shared<MockChildVExpr>(col1,
std::make_shared<DataTypeFloat32>()));
+
+ VExprContext context(coalesce_expr);
+ ColumnPtr result;
+ auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
+ /*selector=*/nullptr,
/*count=*/2, result);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(result->size(), 2);
+
+ auto get_f32 = [&](size_t row, bool* is_null) -> float {
+ if (const auto* nullable =
check_and_get_column<ColumnNullable>(result.get())) {
+ *is_null = nullable->is_null_at(row);
+ return assert_cast<const
ColumnFloat32&>(nullable->get_nested_column()).get_data()[row];
+ }
+ *is_null = false;
+ return assert_cast<const
ColumnFloat32*>(result.get())->get_data()[row];
+ };
+
+ bool is_null = false;
+ float v0 = get_f32(0, &is_null);
+ EXPECT_FALSE(is_null);
+ EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got "
<< v0;
+ EXPECT_FLOAT_EQ(v0, 1.5f);
+
+ float v1 = get_f32(1, &is_null);
+ EXPECT_FALSE(is_null);
+ EXPECT_FLOAT_EQ(v1, 7.5f);
+}
+
+// Regression: verify that the non-floating (integer) path is unaffected by
the Float branch change.
+TEST_F(VConditionExprCoalesceTest, Int32_NormalPathStillWorks) {
+ auto coalesce_node = make_coalesce_node(TPrimitiveType::INT,
/*is_nullable=*/true);
+ auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
+ coalesce_expr->data_type() =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+
+ auto nested0 = ColumnInt32::create();
+ nested0->insert_value(11);
+ nested0->insert_value(0);
+ auto null_map0 = ColumnUInt8::create();
+ null_map0->insert_value(0);
+ null_map0->insert_value(1);
+ ColumnPtr col0 = ColumnNullable::create(std::move(nested0),
std::move(null_map0));
+
+ auto nested1 = ColumnInt32::create();
+ nested1->insert_value(99);
+ nested1->insert_value(22);
+ ColumnPtr col1 = std::move(nested1);
+
+ coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
+ col0,
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>())));
+ coalesce_expr->add_child(
+ std::make_shared<MockChildVExpr>(col1,
std::make_shared<DataTypeInt32>()));
+
+ VExprContext context(coalesce_expr);
+ ColumnPtr result;
+ auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
+ /*selector=*/nullptr,
/*count=*/2, result);
+ ASSERT_TRUE(st.ok()) << st.to_string();
+ ASSERT_EQ(result->size(), 2);
+
+ auto get_int = [&](size_t row, bool* is_null) -> int32_t {
+ if (const auto* nullable =
check_and_get_column<ColumnNullable>(result.get())) {
+ *is_null = nullable->is_null_at(row);
+ return assert_cast<const
ColumnInt32&>(nullable->get_nested_column()).get_data()[row];
+ }
+ *is_null = false;
+ return assert_cast<const ColumnInt32*>(result.get())->get_data()[row];
+ };
+
+ bool is_null = false;
+ EXPECT_EQ(get_int(0, &is_null), 11);
+ EXPECT_FALSE(is_null);
+ EXPECT_EQ(get_int(1, &is_null), 22);
+ EXPECT_FALSE(is_null);
+}
+
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]