This is an automated email from the ASF dual-hosted git repository.
BiteTheDDDDt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0d8654bd33d [fix](be) Fix varbinary literal construction (#64089)
0d8654bd33d is described below
commit 0d8654bd33da9dd93552057f753054936b0ebb3f
Author: Pxl <[email protected]>
AuthorDate: Thu Jun 4 11:34:59 2026 +0800
[fix](be) Fix varbinary literal construction (#64089)
`create_texpr_literal_node<TYPE_VARBINARY>` treated the input pointer as
`std::string*`, but Doris `Field` stores `TYPE_VARBINARY` values as
`StringView`. When TopN predicate conversion builds a VARBINARY literal
from a `Field`, the helper reinterprets a `StringView*` as a
`std::string*`, which can make `std::string` assignment read a bogus
size and request a huge allocation under ASAN.
This PR reads VARBINARY literal input as `StringView`, copies the exact
byte range into the thrift literal, and adds VARBINARY coverage for
`create_texpr_node_from(Field, TYPE_VARBINARY, ...)` and `VLiteral`
round trip. It also wires the `const void*` helper for `TYPE_VARBINARY`.
---
be/src/exprs/vexpr.cpp | 4 ++++
be/src/exprs/vexpr.h | 11 ++++++-----
be/test/exprs/vexpr_test.cpp | 26 +++++++++++++++++++++++---
3 files changed, 33 insertions(+), 8 deletions(-)
diff --git a/be/src/exprs/vexpr.cpp b/be/src/exprs/vexpr.cpp
index 61e3effbdd1..65dcd4eb9de 100644
--- a/be/src/exprs/vexpr.cpp
+++ b/be/src/exprs/vexpr.cpp
@@ -164,6 +164,10 @@ TExprNode create_texpr_node_from(const void* data, const
PrimitiveType& type, in
THROW_IF_ERROR(create_texpr_literal_node<TYPE_STRING>(data, &node));
break;
}
+ case TYPE_VARBINARY: {
+ THROW_IF_ERROR(create_texpr_literal_node<TYPE_VARBINARY>(data, &node));
+ break;
+ }
case TYPE_IPV4: {
THROW_IF_ERROR(create_texpr_literal_node<TYPE_IPV4>(data, &node));
break;
diff --git a/be/src/exprs/vexpr.h b/be/src/exprs/vexpr.h
index 79f3485b3be..458f469c85e 100644
--- a/be/src/exprs/vexpr.h
+++ b/be/src/exprs/vexpr.h
@@ -39,6 +39,7 @@
#include "core/data_type/data_type_ipv6.h"
#include "core/data_type/define_primitive_type.h"
#include "core/extended_types.h"
+#include "core/string_view.h"
#include "core/types.h"
#include "core/value/large_int_value.h"
#include "core/value/timestamptz_value.h"
@@ -491,7 +492,7 @@ Status create_texpr_literal_node(const void* data,
TExprNode* node, int precisio
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_BIGINT));
} else if constexpr (T == TYPE_LARGEINT) {
// data may not be 16-byte aligned; use unaligned_load to avoid UB.
- int128_t origin_value = unaligned_load<int128_t>(data);
+ auto origin_value = unaligned_load<int128_t>(data);
(*node).__set_node_type(TExprNodeType::LARGE_INT_LITERAL);
TLargeIntLiteral large_int_literal;
large_int_literal.__set_value(LargeIntValue::to_string(origin_value));
@@ -540,7 +541,7 @@ Status create_texpr_literal_node(const void* data,
TExprNode* node, int precisio
} else if constexpr (T == TYPE_DECIMALV2) {
// data may not be 16-byte aligned (DecimalV2Value stores int128_t);
// use unaligned_load to avoid UB.
- DecimalV2Value origin_value = unaligned_load<DecimalV2Value>(data);
+ auto origin_value = unaligned_load<DecimalV2Value>(data);
(*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
TDecimalLiteral decimal_literal;
decimal_literal.__set_value(origin_value.to_string());
@@ -562,7 +563,7 @@ Status create_texpr_literal_node(const void* data,
TExprNode* node, int precisio
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64,
precision, scale));
} else if constexpr (T == TYPE_DECIMAL128I) {
// data may not be 16-byte aligned; use unaligned_load to avoid UB.
- Decimal<int128_t> origin_value =
unaligned_load<Decimal<int128_t>>(data);
+ auto origin_value = unaligned_load<Decimal<int128_t>>(data);
(*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
TDecimalLiteral decimal_literal;
// e.g. For a decimal(26,6) column, the initial value of the _min of
the MinMax RF
@@ -627,10 +628,10 @@ Status create_texpr_literal_node(const void* data,
TExprNode* node, int precisio
(*node).__set_node_type(TExprNodeType::TIMEV2_LITERAL);
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_TIMEV2,
precision, scale));
} else if constexpr (T == TYPE_VARBINARY) {
- const auto* origin_value = reinterpret_cast<const std::string*>(data);
+ const auto* origin_value = reinterpret_cast<const StringView*>(data);
(*node).__set_node_type(TExprNodeType::VARBINARY_LITERAL);
TVarBinaryLiteral varbinary_literal;
- varbinary_literal.__set_value(*origin_value);
+ varbinary_literal.__set_value(std::string(origin_value->data(),
origin_value->size()));
(*node).__set_varbinary_literal(varbinary_literal);
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_VARBINARY));
} else {
diff --git a/be/test/exprs/vexpr_test.cpp b/be/test/exprs/vexpr_test.cpp
index c2269efb39b..dc430d55915 100644
--- a/be/test/exprs/vexpr_test.cpp
+++ b/be/test/exprs/vexpr_test.cpp
@@ -489,11 +489,11 @@ TEST(TEST_VEXPR, LITERALTEST) {
}
// float
{
- VLiteral literal(create_literal<TYPE_FLOAT, float>(1024.0f));
+ VLiteral literal(create_literal<TYPE_FLOAT, float>(1024.0F));
ColumnPtr result_column;
static_cast<void>(literal.execute_column(nullptr, nullptr, nullptr, 1,
result_column));
auto v = (*result_column)[0].get<TYPE_FLOAT>();
- EXPECT_FLOAT_EQ(v, 1024.0f);
+ EXPECT_FLOAT_EQ(v, 1024.0F);
EXPECT_EQ("1024", literal.value());
auto node = std::make_shared<VLiteral>(
@@ -709,6 +709,26 @@ TEST(TEST_VEXPR, LITERALTEST) {
create_texpr_node_from((*result_column)[0], TYPE_STRING, 0,
0), true);
EXPECT_EQ(s, node->value());
}
+ // varbinary
+ {
+ const std::vector<std::string> values = {std::string("bin\0ary", 7),
+
std::string("0123456789abc\0xyz", 17)};
+ for (const auto& value : values) {
+ auto field = Field::create_field<TYPE_VARBINARY>(
+ StringView(value.data(),
cast_set<uint32_t>(value.size())));
+ auto texpr_node = create_texpr_node_from(field, TYPE_VARBINARY, 0,
0);
+ EXPECT_EQ(TExprNodeType::VARBINARY_LITERAL, texpr_node.node_type);
+ EXPECT_EQ(value, texpr_node.varbinary_literal.value);
+
+ VLiteral literal(texpr_node);
+ EXPECT_EQ(value, literal.value());
+
+ ColumnPtr result_column;
+ ASSERT_TRUE(literal.execute_column(nullptr, nullptr, nullptr, 1,
result_column).ok());
+ auto sv = (*result_column)[0].get<TYPE_VARBINARY>();
+ EXPECT_EQ(value, std::string(sv.data(), sv.size()));
+ }
+ }
// decimalv2
{
VLiteral literal(create_literal<TYPE_DECIMALV2,
std::string>(std::string("1234.56")));
@@ -912,5 +932,5 @@ TEST(VExprExecuteColumnTest, CorrectColumnPasses) {
ColumnPtr result;
auto st = expr.execute_column(nullptr, nullptr, nullptr, 1, result);
EXPECT_TRUE(st.ok());
- EXPECT_EQ(result->size(), 1u);
+ EXPECT_EQ(result->size(), 1U);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]