This is an automated email from the ASF dual-hosted git repository.

zhangchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new c342cc52153 branch-3.0: [fix](json) fix parsing double in jsonb #46977 
(#47064)
c342cc52153 is described below

commit c342cc52153f2012ba1510c8c6a69a30fd156447
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 17 19:44:41 2025 +0800

    branch-3.0: [fix](json) fix parsing double in jsonb #46977 (#47064)
    
    Cherry-picked from #46977
    
    Co-authored-by: Sun Chenyang <suncheny...@selectdb.com>
---
 be/src/util/jsonb_parser_simd.h                    | 22 +++++-
 regression-test/data/json_p0/test_json_double.csv  |  2 +
 .../data/json_p0/test_json_load_double.out         | 11 +++
 .../suites/json_p0/test_json_load_double.groovy    | 87 ++++++++++++++++++++++
 4 files changed, 118 insertions(+), 4 deletions(-)

diff --git a/be/src/util/jsonb_parser_simd.h b/be/src/util/jsonb_parser_simd.h
index 6621912a9d0..07e2ab370f7 100644
--- a/be/src/util/jsonb_parser_simd.h
+++ b/be/src/util/jsonb_parser_simd.h
@@ -136,7 +136,7 @@ public:
                 break;
             }
             case simdjson::ondemand::json_type::number: {
-                write_number(doc.get_number());
+                write_number(doc.get_number(), doc.raw_json_token());
                 break;
             }
             }
@@ -172,7 +172,7 @@ public:
             break;
         }
         case simdjson::ondemand::json_type::number: {
-            write_number(value.get_number());
+            write_number(value.get_number(), value.raw_json_token());
             break;
         }
         case simdjson::ondemand::json_type::object: {
@@ -290,9 +290,23 @@ public:
         }
     }
 
-    void write_number(simdjson::ondemand::number num) {
+    void write_number(simdjson::ondemand::number num, std::string_view 
raw_string) {
         if (num.is_double()) {
-            if (writer_.writeDouble(num.get_double()) == 0) {
+            double number = num.get_double();
+            // When a double exceeds the precision that can be represented by 
a double type in simdjson, it gets converted to 0.
+            // The correct approach, should be to truncate the double value 
instead.
+            if (number == 0) {
+                StringParser::ParseResult result;
+                number = 
StringParser::string_to_float<double>(raw_string.data(), raw_string.size(),
+                                                               &result);
+                if (result != StringParser::PARSE_SUCCESS) {
+                    err_ = JsonbErrType::E_INVALID_NUMBER;
+                    LOG(WARNING) << "invalid number, raw string is: " << 
raw_string;
+                    return;
+                }
+            }
+
+            if (writer_.writeDouble(number) == 0) {
                 err_ = JsonbErrType::E_OUTPUT_FAIL;
                 LOG(WARNING) << "writeDouble failed";
                 return;
diff --git a/regression-test/data/json_p0/test_json_double.csv 
b/regression-test/data/json_p0/test_json_double.csv
new file mode 100644
index 00000000000..e928633659b
--- /dev/null
+++ b/regression-test/data/json_p0/test_json_double.csv
@@ -0,0 +1,2 @@
+2      
{"rebookProfit":3.729672759600005773616970827788463793694972991943359375}
+3      3.729672759600005773616970827788463793694972991943359375
\ No newline at end of file
diff --git a/regression-test/data/json_p0/test_json_load_double.out 
b/regression-test/data/json_p0/test_json_load_double.out
new file mode 100644
index 00000000000..621c3a0910e
--- /dev/null
+++ b/regression-test/data/json_p0/test_json_load_double.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql_select_src --
+3.72967275960001
+\N
+
+-- !sql_select_dst --
+1      3.72967275960001
+1      {"rebookProfit":3.72967275960001}
+2      {"rebookProfit":3.72967275960001}
+3      3.72967275960001
+
diff --git a/regression-test/suites/json_p0/test_json_load_double.groovy 
b/regression-test/suites/json_p0/test_json_load_double.groovy
new file mode 100644
index 00000000000..8c692e3e71d
--- /dev/null
+++ b/regression-test/suites/json_p0/test_json_load_double.groovy
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_json_load_double", "p0") {
+
+    def srcTable = "stringTable"
+    def dstTable = "jsonTable"
+    def dataFile = "test_json_double.csv"
+
+    sql """ DROP TABLE IF EXISTS ${srcTable} """ 
+    sql """ DROP TABLE IF EXISTS ${dstTable} """
+
+    sql """
+        CREATE TABLE IF NOT EXISTS ${srcTable} (
+            id INT not null,
+            v STRING not null
+        )
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+    """
+
+    sql """
+        CREATE TABLE IF NOT EXISTS ${dstTable} (
+            id INT not null,
+            j JSON not null
+        )
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1");
+    """
+
+    sql """
+        insert into ${srcTable}  
values(1,'{"rebookProfit":3.729672759600005773616970827788463793694972991943359375}');
+    """
+
+    sql """
+        insert into ${srcTable}  
values(1,'3.729672759600005773616970827788463793694972991943359375');
+    """
+
+    sql """ insert into ${dstTable} select * from ${srcTable} """
+
+    // load the json data from csv file
+    streamLoad {
+        table dstTable
+        
+        file dataFile // import csv file
+        time 10000 // limit inflight 10s
+        set 'strict_mode', 'true'
+
+        // if declared a check callback, the default check condition will 
ignore.
+        // So you must check all condition
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(2, json.NumberTotalRows)
+            assertEquals(2, json.NumberLoadedRows)
+            assertTrue(json.LoadBytes > 0)
+            log.info("url: " + json.ErrorURL)
+        }
+    }
+
+    qt_sql_select_src """  select jsonb_extract(v, '\$.rebookProfit') from 
${srcTable} """
+    qt_sql_select_dst """  select * from ${dstTable} """
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to