This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit ef6dad694d131f600478798674ed460d2391e7ac Author: Daniel Becker <[email protected]> AuthorDate: Tue Apr 9 14:46:44 2024 +0200 IMPALA-12986: Base64Encode fails if the 'out_len' output parameter is passed with certain values The Base64Encode function in coding-util.h with signature bool Base64Encode(const char* in, int64_t in_len, int64_t out_max, char* out, int64_t* out_len); fails if '*out_len', when passed to the function, contains a value that does not fit in a 32 bit integer. Internally we use the int sasl_encode64(const char *in, unsigned inlen, char *out, unsigned outmax, unsigned *outlen); function and explicitly cast 'out_len' to 'unsigned*'. The error is that the called sasl_encode64() function only sets the four lower bytes of '*out_len' (assuming that 'unsigned' is a 32 bit integer), and if the upper bytes are not all zero, the resulting value of '*out_len' will be incorrect. This change changes the type of 'out_len' from 'int64_t*' to 'unsigned*' to match the type that sasl_encode64() expects. Base64Decode() is also updated to use 'unsigned*'. Before this change it used an intermediate 32 bit local variable to avoid this issue. Testing: - added a regression test in coding-util-test.cc Change-Id: I35ae59fc9b3280f89ea4f7d95d27d2f21751001f Reviewed-on: http://gerrit.cloudera.org:8080/21271 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/text-converter.inline.h | 2 +- be/src/exprs/string-functions-ir.cc | 4 ++-- be/src/util/coding-util-test.cc | 33 +++++++++++++++++++++++++++++++-- be/src/util/coding-util.cc | 12 +++++------- be/src/util/coding-util.h | 6 +++--- be/src/util/runtime-profile.cc | 2 +- 6 files changed, 43 insertions(+), 16 deletions(-) diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h index 6f4c74ff9..67c6f3eae 100644 --- a/be/src/exec/text-converter.inline.h +++ b/be/src/exec/text-converter.inline.h @@ -94,7 +94,7 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup reinterpret_cast<char*>(slot); if (UNLIKELY(str.ptr == nullptr)) return false; if (base64_decode) { - int64_t out_len; + unsigned out_len; if(!Base64Decode(data, len, buffer_len, str.ptr, &out_len)) return false; DCHECK_LE(out_len, buffer_len); str.len = out_len; diff --git a/be/src/exprs/string-functions-ir.cc b/be/src/exprs/string-functions-ir.cc index f68e483db..dc9468328 100644 --- a/be/src/exprs/string-functions-ir.cc +++ b/be/src/exprs/string-functions-ir.cc @@ -1529,7 +1529,7 @@ StringVal StringFunctions::Base64Encode(FunctionContext* ctx, const StringVal& s } StringVal result(ctx, out_max); if (UNLIKELY(result.is_null)) return result; - int64_t out_len = 0; + unsigned out_len = 0; if (UNLIKELY(!impala::Base64Encode( reinterpret_cast<const char*>(str.ptr), str.len, out_max, reinterpret_cast<char*>(result.ptr), &out_len))) { @@ -1558,7 +1558,7 @@ StringVal StringFunctions::Base64Decode(FunctionContext* ctx, const StringVal& s } StringVal result(ctx, out_max); if (UNLIKELY(result.is_null)) return result; - int64_t out_len = 0; + unsigned out_len = 0; if (UNLIKELY(!impala::Base64Decode( reinterpret_cast<const char*>(str.ptr), static_cast<int64_t>(str.len), out_max, reinterpret_cast<char*>(result.ptr), &out_len))) { diff --git a/be/src/util/coding-util-test.cc b/be/src/util/coding-util-test.cc index 4222d9c01..ad04222ae 100644 --- a/be/src/util/coding-util-test.cc +++ b/be/src/util/coding-util-test.cc @@ -58,9 +58,9 @@ void TestBase64(const string& input, const string& expected_encoded) { int64_t out_max = 0; EXPECT_TRUE(Base64DecodeBufLen(intermediate.c_str(), intermediate.size(), &out_max)); string output(out_max, '\0'); - int64_t out_len = 0; + unsigned out_len = 0; EXPECT_TRUE(Base64Decode(intermediate.c_str(), intermediate.size(), - out_max, const_cast<char*>(output.c_str()), &out_len)); + out_max, const_cast<char*>(output.c_str()), &out_len)); output.resize(out_len); EXPECT_EQ(input, output); @@ -73,6 +73,28 @@ void TestBase64(const string& input, const string& expected_encoded) { EXPECT_EQ(intermediate, intermediate2); } +// Test Base64 encoding when the variables in which the calculated maximal output size and +// the actual output size are stored have specific initial values (regression test for +// IMPALA-12986). +void TestBase64EncodeWithInitialValues(int64_t initial_max_value, + int64_t initial_out_value) { + const string bytes = "abc\1\2\3"; + + int64_t base64_max_len = initial_max_value; + bool succ = Base64EncodeBufLen(bytes.size(), &base64_max_len); + EXPECT_TRUE(succ); + + // 'base64_max_len' includes the null terminator. + string buf(base64_max_len - 1, '\0'); + unsigned base64_len = initial_out_value; + succ = Base64Encode(bytes.c_str(), bytes.size(), base64_max_len, buf.data(), + &base64_len); + EXPECT_TRUE(succ); + + const string expected = "YWJjAQID"; + EXPECT_EQ(expected, buf); +} + // Test URL encoding. Check that the values that are put in are the // same that come out. TEST(UrlCodingTest, Basic) { @@ -112,6 +134,13 @@ TEST(Base64Test, Basic) { TestBase64(string("a\0b\0", 4), "YQBiAA=="); } +TEST(Base64Test, VariousInitialVariableValues) { + TestBase64EncodeWithInitialValues(0, 0); + TestBase64EncodeWithInitialValues(5, -10); + // Test a value that doesn't fit in 32 bits. + TestBase64EncodeWithInitialValues(5, 88090617260393); +} + TEST(HtmlEscapingTest, Basic) { string before = "<html><body>&"; stringstream after; diff --git a/be/src/util/coding-util.cc b/be/src/util/coding-util.cc index b093b1e7b..a78a3791d 100644 --- a/be/src/util/coding-util.cc +++ b/be/src/util/coding-util.cc @@ -123,13 +123,13 @@ bool Base64EncodeBufLen(int64_t in_len, int64_t* out_max) { } bool Base64Encode(const char* in, int64_t in_len, int64_t out_max, char* out, - int64_t* out_len) { + unsigned* out_len) { if (UNLIKELY(in_len < 0 || in_len > std::numeric_limits<unsigned>::max() || out_max < 0 || out_max > std::numeric_limits<unsigned>::max())) { return false; } const int encode_result = sasl_encode64(in, static_cast<unsigned>(in_len), out, - static_cast<unsigned>(out_max), reinterpret_cast<unsigned*>(out_len)); + static_cast<unsigned>(out_max), out_len); if (UNLIKELY(encode_result != SASL_OK || *out_len != out_max - 1)) return false; return true; } @@ -142,7 +142,7 @@ void Base64Encode(const char* in, int64_t in_len, stringstream* out) { int64_t out_max = 0; if (UNLIKELY(!Base64EncodeBufLen(in_len, &out_max))) return; string result(out_max, '\0'); - int64_t out_len = 0; + unsigned out_len = 0; if (UNLIKELY(!Base64Encode(in, in_len, out_max, const_cast<char*>(result.c_str()), &out_len))) { return; @@ -198,12 +198,10 @@ bool Base64DecodeBufLen(const char* in, int64_t in_len, int64_t* out_max) { } bool Base64Decode(const char* in, int64_t in_len, int64_t out_max, char* out, - int64_t* out_len) { - uint32_t out_len_u32 = 0; + unsigned* out_len) { if (UNLIKELY((in_len & 3) != 0)) return false; const int decode_result = sasl_decode64(in, static_cast<unsigned>(in_len), out, - static_cast<unsigned>(out_max), &out_len_u32); - *out_len = out_len_u32; + static_cast<unsigned>(out_max), out_len); if (UNLIKELY(decode_result != SASL_OK || *out_len != out_max - 1)) return false; return true; } diff --git a/be/src/util/coding-util.h b/be/src/util/coding-util.h index 8716d0245..dd71679a3 100644 --- a/be/src/util/coding-util.h +++ b/be/src/util/coding-util.h @@ -48,9 +48,9 @@ bool Base64EncodeBufLen(int64_t in_len, int64_t* out_max); /// data, the space of size out_max should be allocated before calling this function. /// out_len saves the actual length of encoded string. bool Base64Encode(const char* in, int64_t in_len, int64_t out_max, char* out, - int64_t* out_len); + unsigned* out_len); -/// Utility method to encode input as base-64 encoded. This is not +/// Utility method to encode input as base-64 encoded. This is not /// very performant (multiple string copies) and should not be used /// in a hot path. void Base64Encode(const std::vector<uint8_t>& in, std::string* out); @@ -68,7 +68,7 @@ bool Base64DecodeBufLen(const char* in, int64_t in_len, int64_t* out_max); /// out_max should be allocated before calling this function. out_len saves the actual /// length of decoded string. bool Base64Decode(const char* in, int64_t in_len, int64_t out_max, char* out, - int64_t* out_len); + unsigned* out_len); /// Replaces &, < and > with &, < and > respectively. This is /// not the full set of required encodings, but one that should be diff --git a/be/src/util/runtime-profile.cc b/be/src/util/runtime-profile.cc index 9a531f01e..f6b848c5d 100644 --- a/be/src/util/runtime-profile.cc +++ b/be/src/util/runtime-profile.cc @@ -1687,7 +1687,7 @@ Status RuntimeProfile::DeserializeFromArchiveString( vector<uint8_t> decoded_buffer; decoded_buffer.resize(decoded_max); - int64_t decoded_len; + unsigned decoded_len; if (!Base64Decode(archive_str.c_str(), archive_str.size(), decoded_max, reinterpret_cast<char*>(decoded_buffer.data()), &decoded_len)) { return Status("Error in DeserializeFromArchiveString: Base64Decode failed.");
