This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 5d7ca0712 IMPALA-13150: Possible buffer overflow in
StringVal::CopyFrom()
5d7ca0712 is described below
commit 5d7ca0712af493eca6704a3fdfcfaf16bde46ed0
Author: Daniel Becker <[email protected]>
AuthorDate: Mon Jun 10 15:22:04 2024 +0200
IMPALA-13150: Possible buffer overflow in StringVal::CopyFrom()
In StringVal::CopyFrom(), we take the 'len' parameter as a size_t, which
is usually a 64-bit unsigned integer. We pass it to the constructor of
StringVal, which takes it as an int, which is usually a 32-bit signed
integer. The constructor then allocates memory for the length using the
int value, but afterwards in CopyFrom(), we copy the buffer with the
size_t length. If size_t is indeed 64 bits and int is 32 bits, and the
value is truncated, we may copy more bytes that what we have allocated
for the destination.
Note that in the constructor of StringVal it is checked whether the
length is greater than 1GB, but if the value is truncated because of the
type conversion, the check doesn't necessarily catch it as the truncated
value may be small.
This change fixes the problem by doing the length check with 64 bit
integers in StringVal::CopyFrom().
Testing:
- added unit tests for StringVal::CopyFrom() in udf-test.cc.
Change-Id: I6a1d03d65ec4339a0f33e69ff29abdd8cc3e3067
Reviewed-on: http://gerrit.cloudera.org:8080/21501
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/udf/udf-test.cc | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++
be/src/udf/udf.cc | 41 +++++++++++++++++++--------------
be/src/udf/udf.h | 4 ++++
3 files changed, 89 insertions(+), 17 deletions(-)
diff --git a/be/src/udf/udf-test.cc b/be/src/udf/udf-test.cc
index 9951db25a..fabf11055 100644
--- a/be/src/udf/udf-test.cc
+++ b/be/src/udf/udf-test.cc
@@ -337,4 +337,65 @@ TEST(UdfTest, MemTest) {
}
+// Creates and manages a FunctionContext for a dummy UDF with no arguments.
+class FunctionCtxGuard {
+ public:
+ FunctionCtxGuard(): ctx_(UdfTestHarness::CreateTestContext(return_type,
arg_types)) {}
+
+ ~FunctionCtxGuard() { UdfTestHarness::CloseContext(ctx_.get()); }
+
+ FunctionContext* getCtx() { return ctx_.get(); }
+ private:
+ static const FunctionContext::TypeDesc return_type;
+ static const std::vector<FunctionContext::TypeDesc> arg_types;
+
+ const std::unique_ptr<FunctionContext> ctx_;
+};
+const FunctionContext::TypeDesc FunctionCtxGuard::return_type {};
+const std::vector<FunctionContext::TypeDesc> FunctionCtxGuard::arg_types;
+
+bool StringValEqualsStr(const StringVal& string_val, const std::string& str) {
+ return str.length() == string_val.len &&
+ std::memcmp(str.c_str(), string_val.ptr, str.length()) == 0;
+}
+
+void CheckStringValCopyFromSucceeds(const std::string& str) {
+ FunctionCtxGuard ctx_guard;
+
+ const StringVal string_val = StringVal::CopyFrom(ctx_guard.getCtx(),
+ reinterpret_cast<const uint8_t*>(str.c_str()), str.length());
+ EXPECT_TRUE(StringValEqualsStr(string_val, str));
+ EXPECT_FALSE(string_val.is_null);
+ EXPECT_FALSE(ctx_guard.getCtx()->has_error());
+}
+
+void CheckStringValCopyFromFailsWithLength(uint64_t len) {
+ FunctionCtxGuard ctx_guard;
+ const StringVal string_val = StringVal::CopyFrom(ctx_guard.getCtx(),
nullptr, len);
+ EXPECT_TRUE(string_val.is_null);
+ EXPECT_TRUE(ctx_guard.getCtx()->has_error());
+}
+
+TEST(UdfTest, TestStringValCopyFrom) {
+ const std::string empty_string = "";
+ const std::string small_string = "small";
+ const std::string longer_string = "This is a somewhat longer string.";
+
+ // Test that copying works correctly.
+ CheckStringValCopyFromSucceeds(empty_string);
+ CheckStringValCopyFromSucceeds(small_string);
+ CheckStringValCopyFromSucceeds(longer_string);
+
+ // Test that if a length bigger than StringVal::MAX_LENGTH is passed, the
result is null
+ // and an error.
+ constexpr int len32 = StringVal::MAX_LENGTH + 1;
+ CheckStringValCopyFromFailsWithLength(len32);
+
+ // Test that if a length bigger than StringVal::MAX_LENGTH is passed as a
size_t, but
+ // when truncated to int it is no longer too big, the result is still null
and an error.
+ // Regression test for IMPALA-13150.
+ constexpr uint64_t len64 = (1UL << 40) + 1;
+ CheckStringValCopyFromFailsWithLength(len64);
+}
+
IMPALA_TEST_MAIN();
diff --git a/be/src/udf/udf.cc b/be/src/udf/udf.cc
index dee3fc446..10390824f 100644
--- a/be/src/udf/udf.cc
+++ b/be/src/udf/udf.cc
@@ -525,26 +525,13 @@ void
FunctionContextImpl::SetNonConstantArgs(NonConstantArgsVector&& non_constan
// Note: this function crashes LLVM's JIT in expr-test if it's xcompiled. Do
not move to
// expr-ir.cc. This could probably use further investigation.
StringVal::StringVal(FunctionContext* context, int str_len) noexcept :
len(str_len),
-
ptr(NULL) {
- if (UNLIKELY(str_len > StringVal::MAX_LENGTH)) {
- context->SetError("String length larger than allowed limit of "
- "1 GB character data.");
- len = 0;
- is_null = true;
- } else {
- ptr = context->impl()->AllocateForResults(str_len);
- if (UNLIKELY(ptr == NULL && str_len > 0)) {
-#ifndef IMPALA_UDF_SDK_BUILD
- assert(!context->impl()->state()->GetQueryStatus().ok());
-#endif
- len = 0;
- is_null = true;
- }
- }
+
ptr(nullptr) {
+ AllocateStringValWithLenCheck(context, str_len, this);
}
StringVal StringVal::CopyFrom(FunctionContext* ctx, const uint8_t* buf, size_t
len) noexcept {
- StringVal result(ctx, len);
+ StringVal result;
+ AllocateStringValWithLenCheck(ctx, len, &result);
if (LIKELY(!result.is_null)) {
std::copy(buf, buf + len, result.ptr);
}
@@ -573,6 +560,26 @@ bool StringVal::Resize(FunctionContext* ctx, int new_len)
noexcept {
return false;
}
+void StringVal::AllocateStringValWithLenCheck(FunctionContext* ctx, uint64_t
str_len,
+ StringVal* res) {
+ if (UNLIKELY(str_len > StringVal::MAX_LENGTH)) {
+ ctx->SetError("String length larger than allowed limit of 1 GB character
data.");
+ res->len = 0;
+ res->is_null = true;
+ } else {
+ res->ptr = ctx->impl()->AllocateForResults(str_len);
+ if (UNLIKELY(res->ptr == nullptr && str_len > 0)) {
+#ifndef IMPALA_UDF_SDK_BUILD
+ assert(!ctx->impl()->state()->GetQueryStatus().ok());
+#endif
+ res->len = 0;
+ res->is_null = true;
+ } else {
+ res->len = str_len;
+ }
+ }
+}
+
void StructVal::ReserveMemory(FunctionContext* ctx) {
assert(ctx != nullptr);
assert(num_children >= 0);
diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h
index 6a6626c74..f997f7a07 100644
--- a/be/src/udf/udf.h
+++ b/be/src/udf/udf.h
@@ -710,6 +710,10 @@ struct StringVal : public AnyVal {
return ptr == other.ptr || memcmp(ptr, other.ptr, len) == 0;
}
bool operator!=(const StringVal& other) const { return !(*this == other); }
+
+ private:
+ static void AllocateStringValWithLenCheck(FunctionContext* ctx, uint64_t
str_len,
+ StringVal* res);
};
struct DecimalVal : public impala_udf::AnyVal {