This is an automated email from the ASF dual-hosted git repository. jianliangqi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 390cdcc8953 [feature](http action)Add http action to show nested inverted index file (#38272) 390cdcc8953 is described below commit 390cdcc8953f0aaf6d3d7798e4f312f670821f65 Author: qiye <jianliang5...@gmail.com> AuthorDate: Sun Jul 28 01:02:53 2024 +0800 [feature](http action)Add http action to show nested inverted index file (#38272) ## Proposed changes Add `show_nested_index_file` http action to show contents of nested inverted index files. Support inverted index storage format v1 and v2. Add `http_client` to execute http command when the response body is too large. Usage: ``` # format v2 curl http://10.16.10.6:8147/api/show_nested_index_file?tablet_id=10090 { "tablet_id": 11479, "rowsets": [ { "rowset_id": "02000000000001993e4f197d4f45cf7c58a5d70f01990d84", "index_storage_format": "V2", "segments": [] }, { "rowset_id": "020000000000019b3e4f197d4f45cf7c58a5d70f01990d84", "index_storage_format": "V2", "segments": [ { "segment_id": 0, "idx_file_path": "/mnt/disk2/luen/mydoris/be/storage/data/46/11479/777485197/020000000000019b3e4f197d4f45cf7c58a5d70f01990d84_0.idx", "idx_file_size": 585, "indices": [ { "index_id": 11474, "index_suffix": "", "files": [ { "name": "_0.fnm", "size": 5 }, { "name": "_0.frq", "size": 3 }, { "name": "_0.tii", "size": 51 }, { "name": "_0.tis", "size": 42 }, { "name": "null_bitmap", "size": 5 }, { "name": "segments.gen", "size": 20 }, { "name": "segments_2", "size": 47 } ] }, { "index_id": 11475, "index_suffix": "", "files": [ { "name": "bkd", "size": 18 }, { "name": "bkd_index", "size": 29 }, { "name": "bkd_meta", "size": 12 }, { "name": "null_bitmap", "size": 5 } ] } ] } ] } ] } # format v1 curl http://10.16.10.6:8147/api/show_nested_index_file?tablet_id=10373 { "tablet_id": 11460, "rowsets": [ { "rowset_id": "02000000000001923e4f197d4f45cf7c58a5d70f01990d84", "index_storage_format": "V1", "segments": [] }, { "rowset_id": "02000000000001933e4f197d4f45cf7c58a5d70f01990d84", "index_storage_format": "V1", "segments": [ { "segment_id": 0, "indices": [ { "index_id": 11455, "index_suffix": "", "idx_file_path": "/mnt/disk2/luen/mydoris/be/storage/data/45/11460/1080325693/02000000000001933e4f197d4f45cf7c58a5d70f01990d84_0_11455.idx", "idx_file_size": 353, "files": [ { "name": "_0.fnm", "size": 8 }, { "name": "_0.frq", "size": 3 }, { "name": "_0.tii", "size": 51 }, { "name": "_0.tis", "size": 42 }, { "name": "null_bitmap", "size": 5 }, { "name": "segments.gen", "size": 20 }, { "name": "segments_2", "size": 47 } ] }, { "index_id": 11456, "index_suffix": "", "idx_file_path": "/mnt/disk2/luen/mydoris/be/storage/data/45/11460/1080325693/02000000000001933e4f197d4f45cf7c58a5d70f01990d84_0_11456.idx", "idx_file_size": 164, "files": [ { "name": "bkd", "size": 18 }, { "name": "bkd_index", "size": 29 }, { "name": "bkd_meta", "size": 12 }, { "name": "null_bitmap", "size": 5 } ] } ] } ] } ] } ``` --- .../http/action/show_nested_index_file_action.cpp | 78 +++++++++++++ be/src/http/action/show_nested_index_file_action.h | 46 ++++++++ be/src/olap/base_tablet.cpp | 36 ++++++ be/src/olap/base_tablet.h | 2 + be/src/olap/rowset/beta_rowset.cpp | 126 ++++++++++++++++++++ be/src/olap/rowset/beta_rowset.h | 3 + be/src/service/http_service.cpp | 11 ++ ..._nested_index_file_http_action_with_variant.out | 13 +++ .../plugins/plugin_curl_requester.groovy | 92 +++++++++++++++ .../test_show_nested_index_file_http_action.groovy | 98 ++++++++++++++++ ...sted_index_file_http_action_with_variant.groovy | 130 +++++++++++++++++++++ 11 files changed, 635 insertions(+) diff --git a/be/src/http/action/show_nested_index_file_action.cpp b/be/src/http/action/show_nested_index_file_action.cpp new file mode 100644 index 00000000000..ba5e0fc699b --- /dev/null +++ b/be/src/http/action/show_nested_index_file_action.cpp @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "http/action/show_nested_index_file_action.h" + +#include <rapidjson/rapidjson.h> + +#include <exception> +#include <string> + +#include "common/status.h" +#include "http/http_channel.h" +#include "http/http_headers.h" +#include "http/http_request.h" +#include "http/http_status.h" +#include "olap/storage_engine.h" +#include "olap/tablet_manager.h" +#include "util/stopwatch.hpp" + +namespace doris { +using namespace ErrorCode; + +const static std::string HEADER_JSON = "application/json"; + +ShowNestedIndexFileAction::ShowNestedIndexFileAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type ptype) + : HttpHandlerWithAuth(exec_env, hier, ptype) {} + +// show the nested inverted index file in the tablet +Status ShowNestedIndexFileAction::_handle_show_nested_index_file(HttpRequest* req, + std::string* json_meta) { + req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.c_str()); + std::string req_tablet_id = req->param(TABLET_ID_KEY); + uint64_t tablet_id = 0; + try { + tablet_id = std::stoull(req_tablet_id); + } catch (const std::exception& e) { + LOG(WARNING) << "invalid argument.tablet_id:" << req_tablet_id; + return Status::InternalError("convert failed, {}", e.what()); + } + + auto tablet = DORIS_TRY(ExecEnv::get_tablet(tablet_id)); + RETURN_IF_ERROR(tablet->show_nested_index_file(json_meta)); + return Status::OK(); +} + +void ShowNestedIndexFileAction::handle(HttpRequest* req) { + MonotonicStopWatch timer; + timer.start(); + + std::string json_meta; + Status status = _handle_show_nested_index_file(req, &json_meta); + std::string status_result = status.to_json(); + timer.stop(); + LOG(INFO) << "handle show_nested_index_file request finished, result:" << status_result + << ", use time = " << timer.elapsed_time() / 1000000 << "ms"; + if (status.ok()) { + HttpChannel::send_reply(req, HttpStatus::OK, json_meta); + } else { + HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, status_result); + } +} + +} // end namespace doris diff --git a/be/src/http/action/show_nested_index_file_action.h b/be/src/http/action/show_nested_index_file_action.h new file mode 100644 index 00000000000..913eec0aa27 --- /dev/null +++ b/be/src/http/action/show_nested_index_file_action.h @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <stdint.h> + +#include <string> + +#include "common/status.h" +#include "http/http_handler_with_auth.h" + +namespace doris { +class HttpRequest; +class BaseStorageEngine; +class ExecEnv; + +// This action is used to show nested inverted index file in tablet +class ShowNestedIndexFileAction : public HttpHandlerWithAuth { +public: + ShowNestedIndexFileAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type ptype); + + ~ShowNestedIndexFileAction() override = default; + + void handle(HttpRequest* req) override; + +private: + Status _handle_show_nested_index_file(HttpRequest* req, std::string* json_header); +}; + +} // end namespace doris diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index c4330667dfc..22940b40206 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -1587,4 +1587,40 @@ Status BaseTablet::calc_file_crc(uint32_t* crc_value, int64_t start_version, int return Status::OK(); } +Status BaseTablet::show_nested_index_file(std::string* json_meta) { + Version v(0, max_version_unlocked()); + std::vector<RowsetSharedPtr> rowsets; + traverse_rowsets([&rowsets, &v](const auto& rs) { + // get all rowsets + if (v.contains(rs->version())) { + rowsets.emplace_back(rs); + } + }); + std::sort(rowsets.begin(), rowsets.end(), Rowset::comparator); + + rapidjson::Document doc; + doc.SetObject(); + rapidjson::Document::AllocatorType& allocator = doc.GetAllocator(); + rapidjson::Value tabletIdValue(tablet_id()); + doc.AddMember("tablet_id", tabletIdValue, allocator); + + rapidjson::Value rowsets_value(rapidjson::kArrayType); + + for (const auto& rs : rowsets) { + rapidjson::Value rowset_value(rapidjson::kObjectType); + + auto rowset = std::static_pointer_cast<BetaRowset>(rs); + RETURN_IF_ERROR(rowset->show_nested_index_file(&rowset_value, allocator)); + rowsets_value.PushBack(rowset_value, allocator); + } + doc.AddMember("rowsets", rowsets_value, allocator); + + rapidjson::StringBuffer buffer; + rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer); + doc.Accept(writer); + *json_meta = std::string(buffer.GetString()); + + return Status::OK(); +} + } // namespace doris diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index fc75b5e31fd..f958d398fd5 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -271,6 +271,8 @@ public: Status calc_file_crc(uint32_t* crc_value, int64_t start_version, int64_t end_version, int32_t* rowset_count, int64_t* file_count); + Status show_nested_index_file(std::string* json_meta); + protected: // Find the missed versions until the spec_version. // diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 992d437da4e..6d917c78d95 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -40,6 +40,7 @@ #include "olap/rowset/beta_rowset_reader.h" #include "olap/rowset/segment_v2/inverted_index_cache.h" #include "olap/rowset/segment_v2/inverted_index_desc.h" +#include "olap/rowset/segment_v2/inverted_index_file_reader.h" #include "olap/tablet_schema.h" #include "olap/utils.h" #include "util/crc32c.h" @@ -701,4 +702,129 @@ Status BetaRowset::calc_file_crc(uint32_t* crc_value, int64_t* file_count) { return Status::OK(); } +Status BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value, + rapidjson::Document::AllocatorType& allocator) { + const auto& fs = _rowset_meta->fs(); + auto storage_format = _schema->get_inverted_index_storage_format(); + auto format_str = storage_format == InvertedIndexStorageFormatPB::V1 ? "V1" : "V2"; + auto rs_id = rowset_id().to_string(); + rowset_value->AddMember("rowset_id", rapidjson::Value(rs_id.c_str(), allocator), allocator); + rowset_value->AddMember("index_storage_format", rapidjson::Value(format_str, allocator), + allocator); + rapidjson::Value segments(rapidjson::kArrayType); + for (int seg_id = 0; seg_id < num_segments(); ++seg_id) { + rapidjson::Value segment(rapidjson::kObjectType); + segment.AddMember("segment_id", rapidjson::Value(seg_id).Move(), allocator); + + auto seg_path = DORIS_TRY(segment_path(seg_id)); + auto index_file_path_prefix = InvertedIndexDescriptor::get_index_file_path_prefix(seg_path); + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, std::string(index_file_path_prefix), storage_format); + RETURN_IF_ERROR(inverted_index_file_reader->init()); + auto dirs = inverted_index_file_reader->get_all_directories(); + + auto add_file_info_to_json = [&](const std::string& path, + rapidjson::Value& json_value) -> Status { + json_value.AddMember("idx_file_path", rapidjson::Value(path.c_str(), allocator), + allocator); + int64_t idx_file_size = 0; + auto st = fs->file_size(path, &idx_file_size); + if (st != Status::OK()) { + LOG(WARNING) << "show nested index file get file size error, file: " << path + << ", error: " << st.msg(); + return st; + } + json_value.AddMember("idx_file_size", rapidjson::Value(idx_file_size).Move(), + allocator); + return Status::OK(); + }; + + auto process_files = [&allocator, &inverted_index_file_reader]( + auto& index_meta, rapidjson::Value& indices, + rapidjson::Value& index) -> Status { + rapidjson::Value files_value(rapidjson::kArrayType); + std::vector<std::string> files; + auto ret = inverted_index_file_reader->open(&index_meta); + if (!ret.has_value()) { + LOG(INFO) << "InvertedIndexFileReader open error:" << ret.error(); + return Status::InternalError("InvertedIndexFileReader open error"); + } + using T = std::decay_t<decltype(ret)>; + auto reader = std::forward<T>(ret).value(); + reader->list(&files); + for (auto& file : files) { + rapidjson::Value file_value(rapidjson::kObjectType); + auto size = reader->fileLength(file.c_str()); + file_value.AddMember("name", rapidjson::Value(file.c_str(), allocator), allocator); + file_value.AddMember("size", rapidjson::Value(size).Move(), allocator); + files_value.PushBack(file_value, allocator); + } + index.AddMember("files", files_value, allocator); + indices.PushBack(index, allocator); + return Status::OK(); + }; + + if (storage_format != InvertedIndexStorageFormatPB::V1) { + auto path = InvertedIndexDescriptor::get_index_file_path_v2(index_file_path_prefix); + auto st = add_file_info_to_json(path, segment); + if (!st.ok()) { + return st; + } + rapidjson::Value indices(rapidjson::kArrayType); + for (auto& dir : *dirs) { + rapidjson::Value index(rapidjson::kObjectType); + auto index_id = dir.first.first; + auto index_suffix = dir.first.second; + index.AddMember("index_id", rapidjson::Value(index_id).Move(), allocator); + index.AddMember("index_suffix", rapidjson::Value(index_suffix.c_str(), allocator), + allocator); + + rapidjson::Value files_value(rapidjson::kArrayType); + std::vector<std::string> files; + doris::TabletIndexPB index_pb; + index_pb.set_index_id(index_id); + index_pb.set_index_suffix_name(index_suffix); + TabletIndex index_meta; + index_meta.init_from_pb(index_pb); + + auto status = process_files(index_meta, indices, index); + if (!status.ok()) { + return status; + } + } + segment.AddMember("indices", indices, allocator); + segments.PushBack(segment, allocator); + } else { + rapidjson::Value indices(rapidjson::kArrayType); + for (auto column : _rowset_meta->tablet_schema()->columns()) { + const auto* index_meta = _rowset_meta->tablet_schema()->get_inverted_index(*column); + if (index_meta == nullptr) { + continue; + } + rapidjson::Value index(rapidjson::kObjectType); + auto index_id = index_meta->index_id(); + auto index_suffix = index_meta->get_index_suffix(); + index.AddMember("index_id", rapidjson::Value(index_id).Move(), allocator); + index.AddMember("index_suffix", rapidjson::Value(index_suffix.c_str(), allocator), + allocator); + auto path = InvertedIndexDescriptor::get_index_file_path_v1(index_file_path_prefix, + index_id, index_suffix); + auto st = add_file_info_to_json(path, index); + if (!st.ok()) { + return st; + } + + auto status = process_files(*index_meta, indices, index); + if (!status.ok()) { + return status; + } + } + segment.AddMember("indices", indices, allocator); + segments.PushBack(segment, allocator); + } + } + rowset_value->AddMember("segments", segments, allocator); + return Status::OK(); +} + } // namespace doris diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 238073f066d..52d5ac5c8a8 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -86,6 +86,9 @@ public: Status calc_file_crc(uint32_t* crc_value, int64_t* file_count); + Status show_nested_index_file(rapidjson::Value* rowset_value, + rapidjson::Document::AllocatorType& allocator); + protected: BetaRowset(const TabletSchemaSPtr& schema, const RowsetMetaSharedPtr& rowset_meta, std::string tablet_path); diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index 86862e4dbc9..5cea6cb67ac 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -57,6 +57,7 @@ #include "http/action/reset_rpc_channel_action.h" #include "http/action/restore_tablet_action.h" #include "http/action/show_hotspot_action.h" +#include "http/action/show_nested_index_file_action.h" #include "http/action/shrink_mem_action.h" #include "http/action/snapshot_action.h" #include "http/action/stream_load.h" @@ -377,6 +378,11 @@ void HttpService::register_local_handler(StorageEngine& engine) { CalcFileCrcAction* calc_crc_action = _pool.add( new CalcFileCrcAction(_env, engine, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/calc_crc", calc_crc_action); + + ShowNestedIndexFileAction* show_nested_index_file_action = _pool.add( + new ShowNestedIndexFileAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); + _ev_http_server->register_handler(HttpMethod::GET, "/api/show_nested_index_file", + show_nested_index_file_action); } void HttpService::register_cloud_handler(CloudStorageEngine& engine) { @@ -409,6 +415,11 @@ void HttpService::register_cloud_handler(CloudStorageEngine& engine) { CalcFileCrcAction* calc_crc_action = _pool.add( new CalcFileCrcAction(_env, engine, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/calc_crc", calc_crc_action); + + ShowNestedIndexFileAction* show_nested_index_file_action = _pool.add( + new ShowNestedIndexFileAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); + _ev_http_server->register_handler(HttpMethod::GET, "/api/show_nested_index_file", + show_nested_index_file_action); } // NOLINTEND(readability-function-size) diff --git a/regression-test/data/inverted_index_p0/test_show_nested_index_file_http_action_with_variant.out b/regression-test/data/inverted_index_p0/test_show_nested_index_file_http_action_with_variant.out new file mode 100644 index 00000000000..abb1e67170c --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_show_nested_index_file_http_action_with_variant.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +4748 + +-- !sql -- +1 + +-- !sql -- +4748 + +-- !sql -- +1 + diff --git a/regression-test/plugins/plugin_curl_requester.groovy b/regression-test/plugins/plugin_curl_requester.groovy index 77698a6a88f..899b7c4fdb6 100644 --- a/regression-test/plugins/plugin_curl_requester.groovy +++ b/regression-test/plugins/plugin_curl_requester.groovy @@ -18,8 +18,100 @@ import org.apache.doris.regression.suite.Suite import org.apache.doris.regression.util.Http import org.apache.doris.regression.util.NodeType +@Grab(group='org.apache.httpcomponents', module='httpclient', version='4.5.13') +import org.apache.http.client.methods.* +import org.apache.http.impl.client.* +import org.apache.http.util.EntityUtils +import org.apache.http.client.config.RequestConfig +import org.apache.http.conn.ConnectTimeoutException +import org.apache.http.conn.HttpHostConnectException import org.codehaus.groovy.runtime.IOGroovyMethods +Suite.metaClass.http_client = { String method, String url /* param */ -> + Suite suite = delegate as Suite + if (method != "GET" && method != "POST") { + throw new Exception("Invalid method: ${method}") + } + if (!url || !(url =~ /^https?:\/\/.+/)) { + throw new Exception("Invalid url: ${url}") + } + + Integer timeout = 60 // seconds + Integer maxRetries = 10 + Integer retryCount = 0 + Integer sleepTime = 1000 // milliseconds + + logger.info("HTTP request: ${method} ${url}") + + CloseableHttpClient httpClient = HttpClients.custom() + .setRetryHandler(new DefaultHttpRequestRetryHandler(3, true)) + .build() + + int code + String err + String out + + try { + while (retryCount < maxRetries) { + HttpRequestBase request + if (method == "GET") { + request = new HttpGet(url) + } else if (method == "POST") { + request = new HttpPost(url) + } + + RequestConfig requestConfig = RequestConfig.custom() + .setConnectTimeout(timeout * 1000) + .setSocketTimeout(timeout * 1000) + .build() + request.setConfig(requestConfig) + + try { + CloseableHttpResponse response = httpClient.execute(request) + try { + int statusCode = response.getStatusLine().getStatusCode() + String responseBody = EntityUtils.toString(response.getEntity()) + + if (statusCode >= 200 && statusCode < 300) { + code = 0 // to be compatible with the old curl function + out = responseBody + err = "" + return [code, out, err] + } else { + logger.warn("HTTP request failed with status code ${statusCode}, retrying (${++retryCount}/${maxRetries})") + } + } finally { + response.close() + } + } catch (ConnectTimeoutException | HttpHostConnectException e) { + logger.warn("Connection failed, retrying (${++retryCount}/${maxRetries}): ${e.message}") + } catch (SocketTimeoutException e) { + timeout = timeout + 10 + logger.warn("Read timed out, retrying (${++retryCount}/${maxRetries}): ${e.message}") + } catch (Exception e) { + logger.error("Error executing HTTP request: ${e.message}") + code = -1 + out = "" + err = e.message + return [code, out, err] + } + + sleep(sleepTime) + sleepTime = Math.min(sleepTime * 2, 60000) + } + + logger.error("HTTP request failed after ${maxRetries} attempts") + code = -1 + out = "" + err = "Failed after ${maxRetries} attempts" + return [code, out, err] + } finally { + httpClient.close() + } +} + +logger.info("Added 'http_client' function to Suite") + Suite.metaClass.curl = { String method, String url /* param */-> Suite suite = delegate as Suite if (method != "GET" && method != "POST") diff --git a/regression-test/suites/inverted_index_p0/test_show_nested_index_file_http_action.groovy b/regression-test/suites/inverted_index_p0/test_show_nested_index_file_http_action.groovy new file mode 100644 index 00000000000..beb11f3f40d --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_show_nested_index_file_http_action.groovy @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_show_nested_index_file_http_action") { + def show_nested_index_file_on_tablet = { ip, port, tablet -> + return http_client("GET", String.format("http://%s:%s/api/show_nested_index_file?tablet_id=%s", ip, port, tablet)) + } + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + def run_test = { format -> + def tableName = "test_show_nested_index_file_http_action_" + format + + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `id` int(11) NULL, + `name` varchar(255) NULL, + `score` int(11) NULL, + index index_name (name) using inverted, + index index_score (score) using inverted + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "inverted_index_storage_format" = "${format}" + ); + """ + sql """ INSERT INTO ${tableName} VALUES (1, "andy", 100); """ + sql """ INSERT INTO ${tableName} VALUES (1, "bason", 99); """ + sql """ INSERT INTO ${tableName} VALUES (2, "andy", 100); """ + sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """ + sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """ + sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """ + + // select to sync meta in cloud mode + sql """ select * from ${tableName}; """ + + def tablets = sql_return_maparray """ show tablets from ${tableName}; """ + String tablet_id = tablets[0].TabletId + String backend_id = tablets[0].BackendId + String ip = backendId_to_backendIP.get(backend_id) + String port = backendId_to_backendHttpPort.get(backend_id) + def (code, out, err) = show_nested_index_file_on_tablet(ip, port, tablet_id) + logger.info("Run show_nested_index_file_on_tablet: code=" + code + ", out=" + out + ", err=" + err) + + assertTrue(code == 0) + assertEquals(tablet_id, parseJson(out.trim()).tablet_id.toString()) + def rowset_count = parseJson(out.trim()).rowsets.size(); + assertEquals(7, rowset_count) + def index_files_count = 0 + def segment_files_count = 0 + for (def rowset in parseJson(out.trim()).rowsets) { + assertEquals(format, rowset.index_storage_format) + for (int i = 0; i < rowset.segments.size(); i++) { + def segment = rowset.segments[i] + assertEquals(i, segment.segment_id) + def indices_count = segment.indices.size() + assertEquals(2, indices_count) + if (format == "V1") { + index_files_count += indices_count + } else { + index_files_count++ + } + } + segment_files_count += rowset.segments.size() + } + if (format == "V1") { + int indices_count = 2 + assertEquals(index_files_count, segment_files_count * indices_count) + } else { + assertEquals(index_files_count, segment_files_count) + } + } + + run_test("V1") + run_test("V2") +} diff --git a/regression-test/suites/inverted_index_p0/test_show_nested_index_file_http_action_with_variant.groovy b/regression-test/suites/inverted_index_p0/test_show_nested_index_file_http_action_with_variant.groovy new file mode 100644 index 00000000000..71831140a99 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_show_nested_index_file_http_action_with_variant.groovy @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_show_nested_index_file_http_action_with_variant", "nonConcurrent,p0") { + def show_nested_index_file_on_tablet = { ip, port, tablet -> + return http_client("GET", String.format("http://%s:%s/api/show_nested_index_file?tablet_id=%s", ip, port, tablet)) + } + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + def set_be_config = { key, value -> + String backend_id; + backend_id = backendId_to_backendIP.keySet()[0] + def (code, out, err) = update_be_config(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), key, value) + logger.info("update config: code=" + code + ", out=" + out + ", err=" + err) + } + + def load_json_data = {tableName, file_name -> + // load the json data + streamLoad { + table "${tableName}" + + // set http request header params + set 'read_json_by_line', 'true' + set 'format', 'json' + set 'max_filter_ratio', '0.1' + file file_name // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load ${file_name} result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + // assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + + set_be_config.call("memory_limitation_per_thread_for_schema_change_bytes", "6294967296") + set_be_config.call("variant_ratio_of_defaults_as_sparse_column", "1") + def run_test = { format -> + def tableName = "test_show_nested_index_file_http_action_with_variant_" + format + + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + k bigint, + v variant, + INDEX idx_var(v) USING INVERTED PROPERTIES("parser" = "english") COMMENT '' + ) + DUPLICATE KEY(`k`) + DISTRIBUTED BY HASH(k) BUCKETS 1 + properties("replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "${format}"); + """ + load_json_data.call(tableName, """${getS3Url() + '/regression/gharchive.m/2015-01-01-0.json'}""") + load_json_data.call(tableName, """${getS3Url() + '/regression/gharchive.m/2015-01-01-1.json'}""") + + // select to sync meta in cloud mode + sql """ select * from ${tableName} limit 10; """ + + def tablets = sql_return_maparray """ show tablets from ${tableName}; """ + String tablet_id = tablets[0].TabletId + String backend_id = tablets[0].BackendId + String ip = backendId_to_backendIP.get(backend_id) + String port = backendId_to_backendHttpPort.get(backend_id) + def (code, out, err) = show_nested_index_file_on_tablet(ip, port, tablet_id) + logger.info("Run show_nested_index_file_on_tablet: code=" + code + ", err=" + err) + + assertTrue(code == 0) + assertEquals(tablet_id, parseJson(out.trim()).tablet_id.toString()) + def rowset_count = parseJson(out.trim()).rowsets.size(); + assertEquals(3, rowset_count) + def index_files_count = 0 + def segment_files_count = 0 + def indices_count = 0 + for (def rowset in parseJson(out.trim()).rowsets) { + assertEquals(format, rowset.index_storage_format) + for (int i = 0; i < rowset.segments.size(); i++) { + def segment = rowset.segments[i] + assertEquals(i, segment.segment_id) + indices_count += segment.indices.size() + if (format == "V1") { + index_files_count += segment.indices.size() + } else { + index_files_count++ + } + } + segment_files_count += rowset.segments.size() + } + if (format == "V1") { + assertEquals(1203, indices_count) + assertEquals(1203, index_files_count) + assertEquals(2, segment_files_count) + } else { + assertEquals(1203, indices_count) + assertEquals(2, index_files_count) + assertEquals(2, segment_files_count) + } + + qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int) from ${tableName} where cast(v["repo"]["name"] as string) = 'xpressengine/xe-core' order by 1;""" + qt_sql """select count() from ${tableName} where cast(v["repo"]["name"] as string) = 'xpressengine/xe-core'""" + } + + run_test("V1") + run_test("V2") + + set_be_config.call("memory_limitation_per_thread_for_schema_change_bytes", "2147483648") +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org