stalary commented on code in PR #16941: URL: https://github.com/apache/doris/pull/16941#discussion_r1112442457
########## be/src/exec/es/es_scroll_parser.cpp: ########## @@ -519,7 +564,134 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, } break; } + case TYPE_ARRAY: { + vectorized::Array array; + const auto& sub_type = tuple_desc->slots()[i]->type().children[0].type; + for (auto& sub_col : col.GetArray()) { + switch (sub_type) { + case TYPE_CHAR: + case TYPE_VARCHAR: + case TYPE_STRING: { + std::string val; + if (pure_doc_value) { + if (!sub_col[0].IsString()) { + val = json_value_to_string(sub_col[0]); + } else { + val = sub_col[0].GetString(); + } + } else { + RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, type); + if (!sub_col.IsString()) { + val = json_value_to_string(sub_col); + } else { + val = sub_col.GetString(); + } + } + array.push_back(val); + break; + } + case TYPE_TINYINT: { + int8_t val; + RETURN_IF_ERROR(get_int_value<int8_t>(sub_col, sub_type, &val, pure_doc_value)); + array.push_back(val); + break; + } + case TYPE_SMALLINT: { + int16_t val; + RETURN_IF_ERROR( + get_int_value<int16_t>(sub_col, sub_type, &val, pure_doc_value)); + array.push_back(val); + break; + } + case TYPE_INT: { + int32 val; + RETURN_IF_ERROR(get_int_value<int32>(sub_col, sub_type, &val, pure_doc_value)); + array.push_back(val); + break; + } + case TYPE_BIGINT: { + int64_t val; + RETURN_IF_ERROR( + get_int_value<int64_t>(sub_col, sub_type, &val, pure_doc_value)); + array.push_back(val); + break; + } + case TYPE_LARGEINT: { + __int128 val; + RETURN_IF_ERROR( + get_int_value<__int128>(sub_col, sub_type, &val, pure_doc_value)); + array.push_back(val); + break; + } + case TYPE_FLOAT: { + float val; + RETURN_IF_ERROR( + get_float_value<float>(sub_col, sub_type, &val, pure_doc_value)); + array.push_back(val); + break; + } + case TYPE_DOUBLE: { + double val; + RETURN_IF_ERROR( + get_float_value<double>(sub_col, sub_type, &val, pure_doc_value)); + array.push_back(val); + break; + } + case TYPE_BOOLEAN: { + if (sub_col.IsBool()) { + array.push_back(sub_col.GetBool()); + break; + } + + if (sub_col.IsNumber()) { + array.push_back(sub_col.GetInt()); + break; + } + + bool is_nested_str = false; + if (pure_doc_value && sub_col.IsArray() && sub_col[0].IsBool()) { + array.push_back(sub_col[0].GetBool()); + break; + } else if (pure_doc_value && sub_col.IsArray() && sub_col[0].IsString()) { + is_nested_str = true; + } else if (pure_doc_value && sub_col.IsArray()) { + return Status::InternalError(ERROR_INVALID_COL_DATA, "BOOLEAN"); + } + + const rapidjson::Value& str_col = is_nested_str ? sub_col[0] : sub_col; + + const std::string& val = str_col.GetString(); + size_t val_size = str_col.GetStringLength(); + StringParser::ParseResult result; + bool b = StringParser::string_to_bool(val.c_str(), val_size, &result); + RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type); + array.push_back(b); + break; + } + // date/datetime v2 is the default type for catalog table, + // see https://github.com/apache/doris/pull/16304 + // No need to support date and datetime types. + case TYPE_DATEV2: { + array.push_back(get_date_int<vectorized::DateV2ValueType, uint32_t>( + sub_col, sub_type, pure_doc_value)); + break; + } + case TYPE_DATETIMEV2: { + array.push_back(get_date_int<vectorized::DateTimeV2ValueType, uint64_t>( + sub_col, sub_type, pure_doc_value)); + break; + } + default: { + LOG(ERROR) << "Do not support Array type: " << sub_type; + break; + } + } + } + col_ptr->insert(array); + break; + } default: { + LOG(ERROR) << "Do not support data type: " << type_to_string(type); Review Comment: ```suggestion LOG(ERROR) << "Unsupported data type: " << type_to_string(type); ``` Be consistent with the following ########## docs/en/docs/lakehouse/multi-catalog/es.md: ########## @@ -88,6 +88,50 @@ After switching to the ES Catalog, you will be in the `dafault_db` so you don't | object | string | | | other | unsupported | | +### Array Type + +Elasticsearch does not have an explicit array type, but one of its fields can contain +[0 or more values](https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html). +To indicate that a field is an array type, a specific `doris` structural annotation can be added to the +[_meta](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-meta-field.html) section of the index mapping. Review Comment: I recommend also add a lower version of the document reference https://www.elastic.co/guide/en/elasticsearch/reference/6.8/mapping-meta-field.html -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org