This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new c342cc52153 branch-3.0: [fix](json) fix parsing double in jsonb #46977 (#47064) c342cc52153 is described below commit c342cc52153f2012ba1510c8c6a69a30fd156447 Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Fri Jan 17 19:44:41 2025 +0800 branch-3.0: [fix](json) fix parsing double in jsonb #46977 (#47064) Cherry-picked from #46977 Co-authored-by: Sun Chenyang <suncheny...@selectdb.com> --- be/src/util/jsonb_parser_simd.h | 22 +++++- regression-test/data/json_p0/test_json_double.csv | 2 + .../data/json_p0/test_json_load_double.out | 11 +++ .../suites/json_p0/test_json_load_double.groovy | 87 ++++++++++++++++++++++ 4 files changed, 118 insertions(+), 4 deletions(-) diff --git a/be/src/util/jsonb_parser_simd.h b/be/src/util/jsonb_parser_simd.h index 6621912a9d0..07e2ab370f7 100644 --- a/be/src/util/jsonb_parser_simd.h +++ b/be/src/util/jsonb_parser_simd.h @@ -136,7 +136,7 @@ public: break; } case simdjson::ondemand::json_type::number: { - write_number(doc.get_number()); + write_number(doc.get_number(), doc.raw_json_token()); break; } } @@ -172,7 +172,7 @@ public: break; } case simdjson::ondemand::json_type::number: { - write_number(value.get_number()); + write_number(value.get_number(), value.raw_json_token()); break; } case simdjson::ondemand::json_type::object: { @@ -290,9 +290,23 @@ public: } } - void write_number(simdjson::ondemand::number num) { + void write_number(simdjson::ondemand::number num, std::string_view raw_string) { if (num.is_double()) { - if (writer_.writeDouble(num.get_double()) == 0) { + double number = num.get_double(); + // When a double exceeds the precision that can be represented by a double type in simdjson, it gets converted to 0. + // The correct approach, should be to truncate the double value instead. + if (number == 0) { + StringParser::ParseResult result; + number = StringParser::string_to_float<double>(raw_string.data(), raw_string.size(), + &result); + if (result != StringParser::PARSE_SUCCESS) { + err_ = JsonbErrType::E_INVALID_NUMBER; + LOG(WARNING) << "invalid number, raw string is: " << raw_string; + return; + } + } + + if (writer_.writeDouble(number) == 0) { err_ = JsonbErrType::E_OUTPUT_FAIL; LOG(WARNING) << "writeDouble failed"; return; diff --git a/regression-test/data/json_p0/test_json_double.csv b/regression-test/data/json_p0/test_json_double.csv new file mode 100644 index 00000000000..e928633659b --- /dev/null +++ b/regression-test/data/json_p0/test_json_double.csv @@ -0,0 +1,2 @@ +2 {"rebookProfit":3.729672759600005773616970827788463793694972991943359375} +3 3.729672759600005773616970827788463793694972991943359375 \ No newline at end of file diff --git a/regression-test/data/json_p0/test_json_load_double.out b/regression-test/data/json_p0/test_json_load_double.out new file mode 100644 index 00000000000..621c3a0910e --- /dev/null +++ b/regression-test/data/json_p0/test_json_load_double.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_select_src -- +3.72967275960001 +\N + +-- !sql_select_dst -- +1 3.72967275960001 +1 {"rebookProfit":3.72967275960001} +2 {"rebookProfit":3.72967275960001} +3 3.72967275960001 + diff --git a/regression-test/suites/json_p0/test_json_load_double.groovy b/regression-test/suites/json_p0/test_json_load_double.groovy new file mode 100644 index 00000000000..8c692e3e71d --- /dev/null +++ b/regression-test/suites/json_p0/test_json_load_double.groovy @@ -0,0 +1,87 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_json_load_double", "p0") { + + def srcTable = "stringTable" + def dstTable = "jsonTable" + def dataFile = "test_json_double.csv" + + sql """ DROP TABLE IF EXISTS ${srcTable} """ + sql """ DROP TABLE IF EXISTS ${dstTable} """ + + sql """ + CREATE TABLE IF NOT EXISTS ${srcTable} ( + id INT not null, + v STRING not null + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + """ + + sql """ + CREATE TABLE IF NOT EXISTS ${dstTable} ( + id INT not null, + j JSON not null + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + """ + + sql """ + insert into ${srcTable} values(1,'{"rebookProfit":3.729672759600005773616970827788463793694972991943359375}'); + """ + + sql """ + insert into ${srcTable} values(1,'3.729672759600005773616970827788463793694972991943359375'); + """ + + sql """ insert into ${dstTable} select * from ${srcTable} """ + + // load the json data from csv file + streamLoad { + table dstTable + + file dataFile // import csv file + time 10000 // limit inflight 10s + set 'strict_mode', 'true' + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + + assertEquals("success", json.Status.toLowerCase()) + assertEquals(2, json.NumberTotalRows) + assertEquals(2, json.NumberLoadedRows) + assertTrue(json.LoadBytes > 0) + log.info("url: " + json.ErrorURL) + } + } + + qt_sql_select_src """ select jsonb_extract(v, '\$.rebookProfit') from ${srcTable} """ + qt_sql_select_dst """ select * from ${dstTable} """ + +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org