This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c9519d50755 [fix] Fixed length error in compress.cpp (#48210)
c9519d50755 is described below
commit c9519d507550abf95ee96604e955f8844f3e7e7a
Author: lzy <[email protected]>
AuthorDate: Wed Feb 26 14:10:15 2025 +0800
[fix] Fixed length error in compress.cpp (#48210)
### What problem does this PR solve?
Fixed length error in compress.cpp
Issue Number: close #xxx
Related PR: #47307
Problem Summary:
The compressed string length should be represented by 4 bytes instead of
10, and I replaced the magic value with a constant. And I've added
examples of multi-line queries
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [x] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [x] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [x] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [x] Confirm the release note
- [x] Confirm test cases
- [x] Confirm document
- [x] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/common/kerberos/kerberos_ticket_mgr.cpp | 1 +
be/src/vec/functions/function_compress.cpp | 14 ++++++++------
.../string_functions/test_compress_uncompress.out | Bin 741 -> 1321 bytes
.../test_compress_uncompress.groovy | 20 ++++++++++++++++++++
4 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/be/src/common/kerberos/kerberos_ticket_mgr.cpp
b/be/src/common/kerberos/kerberos_ticket_mgr.cpp
index 9020a692f3b..45dca7eca4b 100644
--- a/be/src/common/kerberos/kerberos_ticket_mgr.cpp
+++ b/be/src/common/kerberos/kerberos_ticket_mgr.cpp
@@ -17,6 +17,7 @@
#include "common/kerberos/kerberos_ticket_mgr.h"
+#include <chrono>
#include <iomanip>
#include <sstream>
diff --git a/be/src/vec/functions/function_compress.cpp
b/be/src/vec/functions/function_compress.cpp
index b645e944bfe..0a50cfce01d 100644
--- a/be/src/vec/functions/function_compress.cpp
+++ b/be/src/vec/functions/function_compress.cpp
@@ -52,6 +52,8 @@ class FunctionContext;
namespace doris::vectorized {
+static constexpr size_t COMPRESS_STR_LENGTH = 4;
+
class FunctionCompress : public IFunction {
public:
static constexpr auto name = "compress";
@@ -103,17 +105,17 @@ public:
// Z_MEM_ERROR and Z_BUF_ERROR are already handled in compress,
making sure st is always Z_OK
RETURN_IF_ERROR(compression_codec->compress(data,
&compressed_str));
- col_data.resize(col_data.size() + 4 + compressed_str.size());
+ col_data.resize(col_data.size() + COMPRESS_STR_LENGTH +
compressed_str.size());
std::memcpy(col_data.data() + idx, &length, sizeof(length));
- idx += 4;
+ idx += COMPRESS_STR_LENGTH;
// The length of compress_str is not known in advance, so it
cannot be compressed directly into col_data
unsigned char* src = compressed_str.data();
for (size_t i = 0; i < compressed_str.size(); idx++, i++, src++) {
col_data[idx] = *src;
}
- col_offset[row] = col_offset[row - 1] + 10 + compressed_str.size();
+ col_offset[row] = col_offset[row - 1] + COMPRESS_STR_LENGTH +
compressed_str.size();
}
block.replace_by_position(result, std::move(result_column));
@@ -174,16 +176,16 @@ public:
}
union {
- char bytes[4];
+ char bytes[COMPRESS_STR_LENGTH];
uint32_t value;
} length;
- std::memcpy(length.bytes, data.data, 4);
+ std::memcpy(length.bytes, data.data, COMPRESS_STR_LENGTH);
size_t idx = col_data.size();
col_data.resize(col_data.size() + length.value);
uncompressed_slice = Slice(col_data.data() + idx, length.value);
- Slice compressed_data(data.data + 4, data.size - 4);
+ Slice compressed_data(data.data + COMPRESS_STR_LENGTH, data.size -
COMPRESS_STR_LENGTH);
auto st = compression_codec->decompress(compressed_data,
&uncompressed_slice);
if (!st.ok()) { // is not a
legal compressed string
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out
index be60951c955..b54c3b71d63 100644
Binary files
a/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out
and
b/regression-test/data/query_p0/sql_functions/string_functions/test_compress_uncompress.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy
b/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy
index 9c4df7b1ec9..7aa753891c9 100644
---
a/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy
+++
b/regression-test/suites/query_p0/sql_functions/string_functions/test_compress_uncompress.groovy
@@ -136,4 +136,24 @@ suite("test_compress_uncompress") {
UNCOMPRESS(COMPRESS('12345')) AS decompressed_data
LIMIT 1;
"""
+
+ // Test 12: Multiple COMPRESS calls that COMPRESS the text_col field
multiple times directly from the table
+ order_qt_compress_multiple_calls_from_table """
+ SELECT
+ k0,
+ COMPRESS(text_col) AS comp1,
+ binary_col AS comp2
+ FROM test_compression
+ ORDER BY k0;
+ """
+
+ // Test 13: multiple COMPRESS and UNCOMPRESS calls
+ order_qt_compress_uncompress_multiple_calls_from_table """
+ SELECT
+ k0,
+ text_col AS result1,
+ UNCOMPRESS(binary_col) AS result2
+ FROM test_compression
+ ORDER BY k0;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]