This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c1bf31ce988 [fix](json) handle space characters in JsonbPath (#54152)
c1bf31ce988 is described below
commit c1bf31ce98815c36cf6dc17d8f5071bbe9afd3fd
Author: Jerry Hu <[email protected]>
AuthorDate: Fri Aug 1 16:18:19 2025 +0800
[fix](json) handle space characters in JsonbPath (#54152)
### What problem does this PR solve?
```sql
select JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one
\\"potato": "fool" }','all', 'fool');
```
The result should be:
```text
+------------------------------------------------------------------------------------------------------+
| JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one
\\"potato": "fool" }','all', 'fool') |
+------------------------------------------------------------------------------------------------------+
| "$.\"one \\\"potato\""
|
+------------------------------------------------------------------------------------------------------+
```
But got:
```text
+------------------------------------------------------------------------------------------------------+
| JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one
\\"potato": "fool" }','all', 'fool') |
+------------------------------------------------------------------------------------------------------+
| "$.one potato"
|
+------------------------------------------------------------------------------------------------------+
```
And the `json_extract` function has the similar issue.
This PR also handles special characters in `JsonbPath` that require
escaping, such as '\r', '\n', etc.
Related PR: #xxx
Problem Summary:
### Release note
None
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/util/jsonb_document.h | 26 ++++++++++++++++++---
.../sql_functions/json_functions/json_search.out | Bin 6219 -> 6323 bytes
.../json_functions/json_search.groovy | 14 +++++++++++
3 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index 94a674c0088..9b50135b183 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -363,7 +363,24 @@ struct leg_info {
bool to_string(std::string* str) const {
if (type == MEMBER_CODE) {
str->push_back(BEGIN_MEMBER);
- str->append(leg_ptr, leg_len);
+ bool contains_space = false;
+ std::string tmp;
+ for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) {
+ if (std::isspace(*it)) {
+ contains_space = true;
+ } else if (*it == '"' || *it == ESCAPE || *it == '\r' || *it
== '\n' ||
+ *it == '\b' || *it == '\t') {
+ tmp.push_back(ESCAPE);
+ }
+ tmp.push_back(*it);
+ }
+ if (contains_space) {
+ str->push_back(DOUBLE_QUOTE);
+ }
+ str->append(tmp);
+ if (contains_space) {
+ str->push_back(DOUBLE_QUOTE);
+ }
return true;
} else if (type == ARRAY_CODE) {
str->push_back(BEGIN_ARRAY);
@@ -1519,8 +1536,11 @@ inline bool JsonbPath::parse_member(Stream* stream,
JsonbPath* path) {
const char* left_quotation_marks = nullptr;
const char* right_quotation_marks = nullptr;
- for (; !stream->exhausted(); stream->skip(1)) {
- if (stream->peek() == ESCAPE) {
+ for (; !stream->exhausted(); stream->advance()) {
+ // Only accept space characters quoted by double quotes.
+ if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) {
+ return false;
+ } else if (stream->peek() == ESCAPE) {
stream->add_leg_len();
stream->skip(1);
stream->add_leg_len();
diff --git
a/regression-test/data/query_p0/sql_functions/json_functions/json_search.out
b/regression-test/data/query_p0/sql_functions/json_functions/json_search.out
index d5ecb9cd3b0..e4f165c8bae 100644
Binary files
a/regression-test/data/query_p0/sql_functions/json_functions/json_search.out
and
b/regression-test/data/query_p0/sql_functions/json_functions/json_search.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/json_functions/json_search.groovy
b/regression-test/suites/query_p0/sql_functions/json_functions/json_search.groovy
index 43487be6d98..db08986e8b3 100644
---
a/regression-test/suites/query_p0/sql_functions/json_functions/json_search.groovy
+++
b/regression-test/suites/query_p0/sql_functions/json_functions/json_search.groovy
@@ -118,4 +118,18 @@ suite("test_json_search") {
qt_one_case2 """ SELECT id, $jsonValue, 'All', p, JSON_SEARCH($jsonValue,
'One', p) FROM ${testTable} ORDER BY id; """
sql "drop table ${testTable}"
+
+ qt_search1 """
+ select JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one
\\\\"potato": "fool" }','all', 'food');
+ """
+
+ qt_search2 """
+ select JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one
\\\\"potato": "fool" }','all', 'fool');
+ """
+
+ qt_search3 """
+ select JSON_EXTRACT('{ "onepotato": "foot", "one potato": "food" ,
"one \\\\"potato": "fool" }',
+ JSON_UNQUOTE(JSON_SEARCH('{ "onepotato": "foot", "one potato":
"food" , "one \\\\"potato": "fool" }','all', 'fool'))
+ );
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]