This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 134e86e2ecf [fix](function) Undefined behavior in parse_url (#49149)
134e86e2ecf is described below
commit 134e86e2ecfb705c3e7b92d613b3dafa92d3a40b
Author: Jerry Hu <[email protected]>
AuthorDate: Tue Mar 18 15:25:24 2025 +0800
[fix](function) Undefined behavior in parse_url (#49149)
### What problem does this PR solve?
```
/root/doris/be/src/vec/common/pod_array.h:510:29: runtime error: null
pointer passed as argument 2, which is declared to never be null
/root/ldb_toolchain/bin/../usr/include/string.h:43:28: note: nonnull
attribute specified here
#0 0x55cb2c2cea1e in void doris::vectorized::PODArray<unsigned char,
4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul,
15ul>::insert_assume_reserved<char const*, char const*>(char const*, char
const*) /root/doris/be/src/vec/common/pod_array.h:510:9
#1 0x55cb2c2ce8a7 in void doris::vectorized::PODArray<unsigned char,
4096ul, Allocator<false, false, false, DefaultMemoryAllocator>, 16ul,
15ul>::insert<char const*, char const*>(char const*, char const*)
/root/doris/be/src/vec/common/pod_array.h:472:9
#2 0x55cb5a0b0d50 in
doris::vectorized::StringOP::push_value_string(std::basic_string_view<char,
std::char_traits<char>> const&, unsigned long,
doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false,
false, DefaultMemoryAllocator>, 16ul, 15ul>&,
doris::vectorized::PODArray<unsigned int, 4096ul, Allocator<false, false,
false, DefaultMemoryAllocator>, 16ul, 15ul>&)
/root/doris/be/src/vec/functions/function_string.h:128:15
#3 0x55cb5d6843a2 in doris::Status
doris::vectorized::FunctionStringParseUrl::vector_parse<false,
true>(doris::vectorized::ColumnStr<unsigned int> const*,
std::vector<doris::UrlParser::UrlPart,
std::allocator<doris::UrlParser::UrlPart>>&, int,
doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false,
false, DefaultMemoryAllocator>, 16ul, 15ul>&,
doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false,
false, DefaultMemoryAllocator>, 16ul, 15ul>&, [...]
#4 0x55cb5d683e9d in auto
doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long) const::'lambda'(auto,
auto)::operator()<std::integral_constant<bool, false>,
std::integral_constant<bool, true>>(auto, auto) const
/root/doris/be/src/vec/functions/function_string.h:2783:13
#5 0x55cb5d683c39 in auto std::__invoke_impl<doris::Status,
doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto),
std::integral_constant<bool, false>, std::integral_constant<bool,
true>>(std::__invoke_other, auto&&, std::integral_constant<bool, false>&&,
std::integral_constant<bool, true>&&) /root/ldb_toolch [...]
#6 0x55cb5d683af5 in std::__invoke_result<auto,
std::integral_constant<bool, false>, std::integral_constant<bool, true>>::type
std::__invoke<doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto),
std::integral_constant<bool, false>, std::integral_constant<bool,
true>>(auto&&, std::integral_constant<bool, fals [...]
#7 0x55cb5d682dd3 in
std::__detail::__variant::__gen_vtable_impl<std::__detail::__variant::_Multi_array<std::__detail::__variant::__deduce_visit_result<doris::Status>
(*)(doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto)&&,
std::variant<std::integral_constant<bool, false>, std::integral_constant<bool,
tru [...]
#8 0x55cb5d682ad5 in decltype(auto)
std::__do_visit<std::__detail::__variant::__deduce_visit_result<doris::Status>,
doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long) const::'lambda'(auto, auto),
std::variant<std::integral_constant<bool, false>, std::integral_constant<bool,
true>>, std::variant<std::integral_constant<bool, false>, st [...]
#9 0x55cb5d67766b in std::invoke_result<auto,
std::__conditional<is_lvalue_reference_v<std::variant<std::integral_constant<bool,
false>, std::integral_constant<bool,
true>>>>::type<std::variant_alternative<0ul,
std::remove_reference<decltype(__variant::__as(std::declval<std::variant<std::integral_constant<bool,
false>, std::integral_constant<bool, true>>>()))>::type>::type&,
std::variant_alternative<0ul,
std::remove_reference<decltype(__variant::__as(std::declval<std::variant<std:
[...]
#10 0x55cb5d676604 in
doris::vectorized::FunctionStringParseUrl::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long) const
/root/doris/be/src/vec/functions/function_string.h:2783:13
#11 0x55cb51a43fd4 in
doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long) const
/root/doris/be/src/vec/functions/function.h:434:26
#12 0x55cb583f7dff in
doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long, bool) const
/root/doris/be/src/vec/functions/function.cpp:119:16
#13 0x55cb583e6de9 in
doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long, bool) const
/root/doris/be/src/vec/functions/function.cpp:244:12
#14 0x55cb583e5f53 in
doris::vectorized::PreparedFunctionImpl::default_implementation_for_nulls(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long, bool, bool*) const
/root/doris/be/src/vec/functions/function.cpp:216:9
#15 0x55cb583f7939 in
doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long, bool) const
/root/doris/be/src/vec/functions/function.cpp:110:5
#16 0x55cb583e6de9 in
doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long, bool) const
/root/doris/be/src/vec/functions/function.cpp:244:12
#17 0x55cb583e7069 in
doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long, bool) const
/root/doris/be/src/vec/functions/function.cpp:250:12
#18 0x55cb51a3fd95 in
doris::vectorized::IFunctionBase::execute(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long, bool) const
/root/doris/be/src/vec/functions/function.h:193:19
#19 0x55cb51a2797f in
doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*, std::vector<unsigned int,
std::allocator<unsigned int>>&)
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:187:5
#20 0x55cb51a28a77 in
doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*,
doris::vectorized::Block*, int*)
/root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:220:12
#21 0x55cb51b5f3b4 in
doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*)
/root/doris/be/src/vec/exprs/vexpr_context.cpp:61:5
#22 0x55cb519a6732 in
doris::vectorized::Scanner::_do_projections(doris::vectorized::Block*,
doris::vectorized::Block*) /root/doris/be/src/vec/exec/scan/scanner.cpp:200:9
#23 0x55cb519a1b13 in
doris::vectorized::Scanner::get_block_after_projects(doris::RuntimeState*,
doris::vectorized::Block*, bool*)
/root/doris/be/src/vec/exec/scan/scanner.cpp:82:16
#24 0x55cb5192a59d in
doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr<doris::vectorized::ScannerContext>,
std::shared_ptr<doris::vectorized::ScanTask>)
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:241:5
#25 0x55cb51931c38 in
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()()
const::'lambda'()::operator()() const::'lambda'()::operator()() const
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:148:21
#26 0x55cb519314ef in
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()()
const::'lambda'()::operator()() const
/root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:147:31
#27 0x55cb519312fe in void std::__invoke_impl<void,
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()()
const::'lambda'()&>(std::__invoke_other,
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()()
const::'lambda'()&) /root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gn [...]
#28 0x55cb5193123e in std::enable_if<is_invocable_r_v<void,
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()()
const::'lambda'()&>, void>::type std::__invoke_r<void,
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()()
const::'lambda'()&>(doris::vectorized::ScannerSc [...]
#29 0x55cb51930e45 in std::_Function_handler<void (),
doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>,
std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()()
const::'lambda'()>::_M_invoke(std::_Any_data const&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9
#30 0x55cb216f8e3f in std::function<void ()>::operator()() const
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
#31 0x55cb51940ec6 in
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()::operator()()
const /root/doris/be/src/vec/exec/scan/scanner_scheduler.h:149:65
#32 0x55cb51940e7e in void std::__invoke_impl<void,
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>(std::__invoke_other,
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
#33 0x55cb51940dbe in std::enable_if<is_invocable_r_v<void,
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>,
void>::type std::__invoke_r<void,
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&>(doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux- [...]
#34 0x55cb51940745 in std::_Function_handler<void (),
doris::vectorized::SimplifiedScanScheduler::submit_scan_task(doris::vectorized::SimplifiedScanTask)::'lambda'()>::_M_invoke(std::_Any_data
const&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9
#35 0x55cb216f8e3f in std::function<void ()>::operator()() const
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
#36 0x55cb28bd9844 in doris::FunctionRunnable::run()
/root/doris/be/src/util/threadpool.cpp:64:27
#37 0x55cb28bb84b9 in doris::ThreadPool::dispatch_thread()
/root/doris/be/src/util/threadpool.cpp:616:24
#38 0x55cb28bfd263 in void std::__invoke_impl<void, void
(doris::ThreadPool::*&)(), doris::ThreadPool*&>(std::__invoke_memfun_deref,
void (doris::ThreadPool::*&)(), doris::ThreadPool*&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:74:14
#39 0x55cb28bfd068 in std::__invoke_result<void
(doris::ThreadPool::*&)(), doris::ThreadPool*&>::type std::__invoke<void
(doris::ThreadPool::*&)(), doris::ThreadPool*&>(void (doris::ThreadPool::*&)(),
doris::ThreadPool*&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96:14
#40 0x55cb28bfcfa0 in void std::_Bind<void (doris::ThreadPool::*
(doris::ThreadPool*))()>::__call<void, 0ul>(std::tuple<>&&,
std::_Index_tuple<0ul>)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:506:11
#41 0x55cb28bfcd95 in void std::_Bind<void (doris::ThreadPool::*
(doris::ThreadPool*))()>::operator()<void>()
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:591:17
#42 0x55cb28bfcc8e in void std::__invoke_impl<void, std::_Bind<void
(doris::ThreadPool::* (doris::ThreadPool*))()>&>(std::__invoke_other,
std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14
#43 0x55cb28bfcbce in std::enable_if<is_invocable_r_v<void,
std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&>, void>::type
std::__invoke_r<void, std::_Bind<void (doris::ThreadPool::*
(doris::ThreadPool*))()>&>(std::_Bind<void (doris::ThreadPool::*
(doris::ThreadPool*))()>&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2
#44 0x55cb28bfc665 in std::_Function_handler<void (), std::_Bind<void
(doris::ThreadPool::* (doris::ThreadPool*))()>>::_M_invoke(std::_Any_data
const&)
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9
#45 0x55cb216f8e3f in std::function<void ()>::operator()() const
/root/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
#46 0x55cb28b74241 in doris::Thread::supervise_thread(void*)
/root/doris/be/src/util/thread.cpp:498:5
#47 0x55cb2142be0a in asan_thread_start(void*) crtstuff.c
#48 0x7f17840221c9 in start_thread (/lib64/libpthread.so.0+0x81c9)
(BuildId: 7c4add5c7a885e6ff4ce17867d6a2286e4420eec)
#49 0x7f1784a118d2 in clone (/lib64/libc.so.6+0x398d2) (BuildId:
4ee3325955e3b55b6805f33959b7cb77745ad625)
---
be/src/vec/functions/function_string.h | 6 +++++-
be/test/vec/function/function_string_test.cpp | 3 ++-
.../data/function_p0/test_function_string.out | Bin 121 -> 188 bytes
.../suites/function_p0/test_function_string.groovy | 24 +++++++++++++++++++++
4 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index 3b909f4a8d5..5b37cc44c5d 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -122,6 +122,7 @@ struct StringOP {
static void push_value_string(const std::string_view& string_value, size_t
index,
ColumnString::Chars& chars,
ColumnString::Offsets& offsets) {
+ DCHECK(string_value.data() != nullptr);
ColumnString::check_chars_length(chars.size() + string_value.size(),
offsets.size());
chars.insert(string_value.data(), string_value.data() +
string_value.size());
@@ -2802,11 +2803,14 @@ public:
StringRef url_val =
url_col->get_data_at(index_check_const<url_const>(i));
StringRef parse_res;
if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
+ if (parse_res.empty()) [[unlikely]] {
+ StringOP::push_empty_string(i, res_chars, res_offsets);
+ continue;
+ }
StringOP::push_value_string(std::string_view(parse_res.data,
parse_res.size), i,
res_chars, res_offsets);
} else {
StringOP::push_null_string(i, res_chars, res_offsets,
null_map_data);
- continue;
}
}
return Status::OK();
diff --git a/be/test/vec/function/function_string_test.cpp
b/be/test/vec/function/function_string_test.cpp
index 6dc0e4ba42f..2a0326361fd 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -2284,7 +2284,8 @@ TEST(function_string_test, function_parse_url_test) {
{{std::string(
"https://www.facebook.com/aa/bb?returnpage=https://www.facebook.com/"),
std::string("HosT")},
- std::string("www.facebook.com")}};
+ std::string("www.facebook.com")},
+ {{std::string("http://www.baidu.com"), std::string("FILE")},
{std::string("")}}};
check_function_all_arg_comb<DataTypeString, true>(func_name,
input_types, data_set);
}
diff --git a/regression-test/data/function_p0/test_function_string.out
b/regression-test/data/function_p0/test_function_string.out
index 226d3e675f3..6524bb82fc0 100644
Binary files a/regression-test/data/function_p0/test_function_string.out and
b/regression-test/data/function_p0/test_function_string.out differ
diff --git a/regression-test/suites/function_p0/test_function_string.groovy
b/regression-test/suites/function_p0/test_function_string.groovy
index 5aa46fb6c52..28e4d832336 100644
--- a/regression-test/suites/function_p0/test_function_string.groovy
+++ b/regression-test/suites/function_p0/test_function_string.groovy
@@ -47,4 +47,28 @@ suite("test_function_string") {
drop table if exists test_tb_function_space;
"""
+
+ sql """
+ drop table if exists test_parse_url;
+ """
+
+ sql """
+ CREATE TABLE `test_parse_url` (
+ `id` int NULL,
+ `url` text NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ insert into test_parse_url values (1, 'http://www.facebook.com'), (2,
"http://www.google.com/test?name=abc&age=20");
+ """
+
+ qt_sql """
+ select parse_url(url, 'HOST') as host, parse_url(url, 'FILE') as file
from test_parse_url order by id;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]