mrhhsg commented on code in PR #63309:
URL: https://github.com/apache/doris/pull/63309#discussion_r3361390950


##########
be/src/util/jsonb_parser_simd.h:
##########
@@ -281,54 +293,151 @@ struct JsonbParser {
         case simdjson::ondemand::number_type::signed_integer:
         case simdjson::ondemand::number_type::unsigned_integer: {
             int128_t val = num.is_int64() ? (int128_t)num.get_int64() : 
(int128_t)num.get_uint64();
-            bool success = false;
-            if (val >= std::numeric_limits<int8_t>::min() &&
-                val <= std::numeric_limits<int8_t>::max()) {
-                success = writer.writeInt8((int8_t)val);
-            } else if (val >= std::numeric_limits<int16_t>::min() &&
-                       val <= std::numeric_limits<int16_t>::max()) {
-                success = writer.writeInt16((int16_t)val);
-            } else if (val >= std::numeric_limits<int32_t>::min() &&
-                       val <= std::numeric_limits<int32_t>::max()) {
-                success = writer.writeInt32((int32_t)val);
-            } else if (val >= std::numeric_limits<int64_t>::min() &&
-                       val <= std::numeric_limits<int64_t>::max()) {
-                success = writer.writeInt64((int64_t)val);
-            } else { // INT128
-                success = writer.writeInt128(val);
+            RETURN_IF_ERROR(write_int128(val, writer));
+            break;
+        }
+        case simdjson::ondemand::number_type::big_integer: {
+            RETURN_IF_ERROR(write_number_from_raw_json(raw_string, writer));
+            break;
+        }
+        }
+        return Status::OK();
+    }
+
+    static bool is_json_number_space(char c) {
+        return c == ' ' || c == '\t' || c == '\n' || c == '\r';
+    }
+
+    static std::string_view trim_json_number(std::string_view raw_number) {
+        while (!raw_number.empty() && 
is_json_number_space(raw_number.front())) {
+            raw_number.remove_prefix(1);
+        }
+        while (!raw_number.empty() && is_json_number_space(raw_number.back())) 
{
+            raw_number.remove_suffix(1);
+        }
+        return raw_number;
+    }
+
+    static bool is_json_number_digit(char c) { return c >= '0' && c <= '9'; }
+
+    static Status validate_json_number(std::string_view raw_number, bool& 
is_integer) {
+        if (raw_number.empty()) {
+            return Status::InvalidArgument("empty number");
+        }
+
+        size_t pos = 0;
+        if (raw_number[pos] == '-') {
+            ++pos;
+            if (pos == raw_number.size()) {
+                return Status::InvalidArgument("invalid number, raw string is: 
" +
+                                               std::string(raw_number));
             }
+        }
 
-            if (!success) {
-                return Status::InvalidArgument("writeInt failed");
+        if (raw_number[pos] == '0') {
+            ++pos;
+        } else if (raw_number[pos] >= '1' && raw_number[pos] <= '9') {
+            while (pos < raw_number.size() && 
is_json_number_digit(raw_number[pos])) {
+                ++pos;
             }
-            break;
+        } else {
+            return Status::InvalidArgument("invalid number, raw string is: " +
+                                           std::string(raw_number));
         }
-        case simdjson::ondemand::number_type::big_integer: {
-            StringParser::ParseResult result;
-            auto val = 
StringParser::string_to_int<int128_t>(raw_string.data(), raw_string.size(),
+
+        bool has_fraction = false;
+        if (pos < raw_number.size() && raw_number[pos] == '.') {
+            has_fraction = true;
+            ++pos;
+            if (pos == raw_number.size() || 
!is_json_number_digit(raw_number[pos])) {
+                return Status::InvalidArgument("invalid number, raw string is: 
" +
+                                               std::string(raw_number));
+            }
+            while (pos < raw_number.size() && 
is_json_number_digit(raw_number[pos])) {
+                ++pos;
+            }
+        }
+
+        bool has_exponent = false;
+        if (pos < raw_number.size() && (raw_number[pos] == 'e' || 
raw_number[pos] == 'E')) {
+            has_exponent = true;
+            ++pos;
+            if (pos < raw_number.size() && (raw_number[pos] == '+' || 
raw_number[pos] == '-')) {
+                ++pos;
+            }
+            if (pos == raw_number.size() || 
!is_json_number_digit(raw_number[pos])) {
+                return Status::InvalidArgument("invalid number, raw string is: 
" +
+                                               std::string(raw_number));
+            }
+            while (pos < raw_number.size() && 
is_json_number_digit(raw_number[pos])) {
+                ++pos;
+            }
+        }
+
+        if (pos != raw_number.size()) {
+            return Status::InvalidArgument("simdjson parse exception: trailing 
content");
+        }
+        is_integer = !has_fraction && !has_exponent;
+        return Status::OK();
+    }
+
+    static Status write_int128(int128_t val, JsonbWriter& writer) {
+        bool success = false;
+        if (val >= std::numeric_limits<int8_t>::min() &&
+            val <= std::numeric_limits<int8_t>::max()) {
+            success = writer.writeInt8((int8_t)val);
+        } else if (val >= std::numeric_limits<int16_t>::min() &&
+                   val <= std::numeric_limits<int16_t>::max()) {
+            success = writer.writeInt16((int16_t)val);
+        } else if (val >= std::numeric_limits<int32_t>::min() &&
+                   val <= std::numeric_limits<int32_t>::max()) {
+            success = writer.writeInt32((int32_t)val);
+        } else if (val >= std::numeric_limits<int64_t>::min() &&
+                   val <= std::numeric_limits<int64_t>::max()) {
+            success = writer.writeInt64((int64_t)val);
+        } else { // INT128
+            success = writer.writeInt128(val);
+        }
+
+        if (!success) {
+            return Status::InvalidArgument("writeInt failed");
+        }
+        return Status::OK();
+    }
+
+    static Status write_number_from_raw_json(const char* pch, size_t len, 
JsonbWriter& writer) {

Review Comment:
   已按建议删除这个 char*/len 重载,调用处直接构造 std::string_view(pch, len) 后调用 string_view 版本。



##########
be/src/util/jsonb_parser_simd.h:
##########
@@ -95,13 +84,20 @@ struct JsonbParser {
 
             // simdjson process top level primitive types specially
             // so some repeated code here
+            bool need_check_at_end = true;
             switch (doc.type()) {
             case simdjson::ondemand::json_type::object:
             case simdjson::ondemand::json_type::array: {
                 RETURN_IF_ERROR(parse(doc.get_value(), writer));
                 break;
             }
             case simdjson::ondemand::json_type::null: {
+                bool is_null = false;
+                simdjson::error_code res = doc.is_null().get(is_null);
+                if (res != simdjson::SUCCESS || !is_null) {
+                    return Status::InvalidArgument(fmt::format("simdjson get 
null failed: {}",

Review Comment:
   已拆开 simdjson 调用失败和 !is_null 两种情况;当类型判断为 null 但 is_null 为 false 时,现在返回 
invalid JSON null literal,避免出现 simdjson get null failed: SUCCESS 这种误导信息。



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to