amorynan commented on code in PR #26973: URL: https://github.com/apache/doris/pull/26973#discussion_r1398630717
########## be/src/vec/data_types/serde/data_type_struct_serde.cpp: ########## @@ -94,106 +93,125 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic } return Status::OK(); } - - ReadBuffer rb(slice.data, slice.size); - ++rb.position(); + // remove '{' '}' + slice.remove_prefix(1); + slice.remove_suffix(1); + slice.trim_prefix(); bool is_explicit_names = false; - std::vector<std::string> field_names; - std::vector<ReadBuffer> field_rbs; - std::vector<size_t> field_pos; - - while (!rb.eof()) { - StringRef slot(rb.position(), rb.count()); - bool has_quota = false; - bool is_name = false; - if (!next_slot_from_string(rb, slot, is_name, has_quota)) { - return Status::InvalidArgument("Cannot read struct field from text '{}'", - slot.to_string()); - } - if (is_name) { - std::string name = slot.to_string(); - if (!next_slot_from_string(rb, slot, is_name, has_quota)) { - return Status::InvalidArgument("Cannot read struct field from text '{}'", - slot.to_string()); - } - ReadBuffer field_rb(const_cast<char*>(slot.data), slot.size); - field_names.push_back(name); - field_rbs.push_back(field_rb); - - if (!is_explicit_names) { - is_explicit_names = true; + int nested_level = 0; + bool has_quote = false; + int start_pos = 0; + size_t slice_size = slice.size; + bool key_added = false; + int idx = 0; + char quote_char = 0; + + auto elem_size = elemSerDeSPtrs.size(); + int field_pos = 0; + + for (; idx < slice_size; ++idx) { + char c = slice[idx]; + if (c == '"' || c == '\'') { + if (!has_quote) { + quote_char = c; + has_quote = !has_quote; + } else if (has_quote && quote_char == c) { + quote_char = 0; + has_quote = !has_quote; } - } else { - ReadBuffer field_rb(const_cast<char*>(slot.data), slot.size); - field_rbs.push_back(field_rb); - } - } - - // TODO: should we support insert default field value when actual field number is less than - // schema field number? - if (field_rbs.size() != elemSerDeSPtrs.size()) { - std::string cmp_str = field_rbs.size() > elemSerDeSPtrs.size() ? "more" : "less"; - return Status::InvalidArgument( - "Actual struct field number {} is {} than schema field number {}.", - field_rbs.size(), cmp_str, elemSerDeSPtrs.size()); - } - - if (is_explicit_names) { - if (field_names.size() != field_rbs.size()) { - return Status::InvalidArgument( - "Struct field name number {} is not equal to field number {}.", - field_names.size(), field_rbs.size()); - } - std::unordered_set<std::string> name_set; - for (size_t i = 0; i < field_names.size(); i++) { - // check duplicate fields - auto ret = name_set.insert(field_names[i]); - if (!ret.second) { - return Status::InvalidArgument("Struct field name {} is duplicate with others.", - field_names[i]); + } else if (c == '\\' && idx + 1 < slice_size) { //escaped + ++idx; + } else if (!has_quote && (c == '[' || c == '{')) { + ++nested_level; + } else if (!has_quote && (c == ']' || c == '}')) { + --nested_level; + } else if (!has_quote && nested_level == 0 && c == options.map_key_delim && !key_added) { Review Comment: "," -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org