This is an automated email from the ASF dual-hosted git repository.

mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c1bf31ce988 [fix](json) handle space characters in JsonbPath (#54152)
c1bf31ce988 is described below

commit c1bf31ce98815c36cf6dc17d8f5071bbe9afd3fd
Author: Jerry Hu <[email protected]>
AuthorDate: Fri Aug 1 16:18:19 2025 +0800

    [fix](json) handle space characters in JsonbPath (#54152)
    
    ### What problem does this PR solve?
    
    ```sql
    select JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one 
\\"potato": "fool" }','all', 'fool');
    ```
    The result should be:
    ```text
    
+------------------------------------------------------------------------------------------------------+
    | JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one 
\\"potato": "fool" }','all', 'fool') |
    
+------------------------------------------------------------------------------------------------------+
    | "$.\"one \\\"potato\""                                                    
                           |
    
+------------------------------------------------------------------------------------------------------+
    ```
    But got:
    ```text
    
+------------------------------------------------------------------------------------------------------+
    | JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one 
\\"potato": "fool" }','all', 'fool') |
    
+------------------------------------------------------------------------------------------------------+
    | "$.one potato"                                                            
                           |
    
+------------------------------------------------------------------------------------------------------+
    ```
    
    And the `json_extract`  function  has the similar issue.
    This PR also handles special characters in `JsonbPath` that require
    escaping, such as '\r', '\n', etc.
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/util/jsonb_document.h                       |  26 ++++++++++++++++++---
 .../sql_functions/json_functions/json_search.out   | Bin 6219 -> 6323 bytes
 .../json_functions/json_search.groovy              |  14 +++++++++++
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/be/src/util/jsonb_document.h b/be/src/util/jsonb_document.h
index 94a674c0088..9b50135b183 100644
--- a/be/src/util/jsonb_document.h
+++ b/be/src/util/jsonb_document.h
@@ -363,7 +363,24 @@ struct leg_info {
     bool to_string(std::string* str) const {
         if (type == MEMBER_CODE) {
             str->push_back(BEGIN_MEMBER);
-            str->append(leg_ptr, leg_len);
+            bool contains_space = false;
+            std::string tmp;
+            for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) {
+                if (std::isspace(*it)) {
+                    contains_space = true;
+                } else if (*it == '"' || *it == ESCAPE || *it == '\r' || *it 
== '\n' ||
+                           *it == '\b' || *it == '\t') {
+                    tmp.push_back(ESCAPE);
+                }
+                tmp.push_back(*it);
+            }
+            if (contains_space) {
+                str->push_back(DOUBLE_QUOTE);
+            }
+            str->append(tmp);
+            if (contains_space) {
+                str->push_back(DOUBLE_QUOTE);
+            }
             return true;
         } else if (type == ARRAY_CODE) {
             str->push_back(BEGIN_ARRAY);
@@ -1519,8 +1536,11 @@ inline bool JsonbPath::parse_member(Stream* stream, 
JsonbPath* path) {
     const char* left_quotation_marks = nullptr;
     const char* right_quotation_marks = nullptr;
 
-    for (; !stream->exhausted(); stream->skip(1)) {
-        if (stream->peek() == ESCAPE) {
+    for (; !stream->exhausted(); stream->advance()) {
+        // Only accept space characters quoted by double quotes.
+        if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) {
+            return false;
+        } else if (stream->peek() == ESCAPE) {
             stream->add_leg_len();
             stream->skip(1);
             stream->add_leg_len();
diff --git 
a/regression-test/data/query_p0/sql_functions/json_functions/json_search.out 
b/regression-test/data/query_p0/sql_functions/json_functions/json_search.out
index d5ecb9cd3b0..e4f165c8bae 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/json_functions/json_search.out 
and 
b/regression-test/data/query_p0/sql_functions/json_functions/json_search.out 
differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/json_functions/json_search.groovy
 
b/regression-test/suites/query_p0/sql_functions/json_functions/json_search.groovy
index 43487be6d98..db08986e8b3 100644
--- 
a/regression-test/suites/query_p0/sql_functions/json_functions/json_search.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/json_functions/json_search.groovy
@@ -118,4 +118,18 @@ suite("test_json_search") {
     qt_one_case2 """ SELECT id, $jsonValue, 'All', p, JSON_SEARCH($jsonValue, 
'One', p) FROM ${testTable} ORDER BY id; """
 
     sql "drop table ${testTable}"
+
+    qt_search1 """
+        select JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one 
\\\\"potato": "fool" }','all', 'food');
+    """
+
+    qt_search2 """
+        select JSON_SEARCH('{ "onepotato": "foot", "one potato": "food" , "one 
\\\\"potato": "fool" }','all', 'fool');
+    """
+
+    qt_search3 """
+        select JSON_EXTRACT('{ "onepotato": "foot", "one potato": "food" , 
"one \\\\"potato": "fool" }',
+            JSON_UNQUOTE(JSON_SEARCH('{ "onepotato": "foot", "one potato": 
"food" , "one \\\\"potato": "fool" }','all', 'fool'))
+        );
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to