github-actions[bot] commented on code in PR #64165: URL: https://github.com/apache/doris/pull/64165#discussion_r3402110438
########## be/src/runtime/scan_filter_profile.cpp: ########## @@ -0,0 +1,499 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/scan_filter_profile.h" + +#include <fmt/format.h> +#include <fmt/ranges.h> +#include <glog/logging.h> + +#include <algorithm> +#include <unordered_set> + +#include "runtime/runtime_profile.h" + +namespace doris { + +namespace { + +constexpr const char* SCAN_FILTER_INFO = "ScanFilterInfo"; +constexpr const char* KEY_RANGE_INFO = "KeyRangeInfo"; +constexpr const char* RUNTIME_FILTER_PARTITION_PRUNING = "RuntimeFilterPartitionPruning"; +constexpr int NOT_APPLIED_PROFILE_ORDER = static_cast<int>(ScanFilterStage::NUM_STAGES); + +bool is_index_stage(ScanFilterStage stage) { + return stage == ScanFilterStage::INDEX_INVERTED || stage == ScanFilterStage::INDEX_ANN || + stage == ScanFilterStage::INDEX_DICT || stage == ScanFilterStage::INDEX_BLOOM_FILTER || + stage == ScanFilterStage::INDEX_ZONE_MAP; +} + +bool is_exec_stage(ScanFilterStage stage) { + return stage == ScanFilterStage::EXEC_VECTOR || stage == ScanFilterStage::EXEC_SHORT_CIRCUIT || + stage == ScanFilterStage::EXEC_COMMON_EXPR || stage == ScanFilterStage::EXEC_RESIDUAL; +} + +void set_counter(RuntimeProfile* profile, const std::string& name, TUnit::type type, + const std::string& parent, int64_t level, int64_t value) { + auto* counter = profile->add_counter(name, type, parent, level); + counter->set(value); +} + +void set_root_counter(RuntimeProfile* profile, const std::string& name, TUnit::type type, + int64_t level, int64_t value) { + set_counter(profile, name, type, RuntimeProfile::ROOT_COUNTER, level, value); +} + +RuntimeProfile* get_or_create_child(RuntimeProfile* profile, const std::string& name) { + auto* child = profile->get_child(name); + if (child != nullptr) { + return child; + } + return profile->create_child(name, true, false); +} + +void add_info_string_if_not_empty(RuntimeProfile* profile, const std::string& key, + const std::string& value) { + if (!value.empty()) { + profile->add_info_string(key, value); + } +} + +const char* scan_filter_source_name(ScanFilterKind kind) { + switch (kind) { + case ScanFilterKind::NORMAL: + return "Conjunct"; + case ScanFilterKind::RUNTIME_FILTER: + return "RuntimeFilter"; + case ScanFilterKind::TOPN_FILTER: + return "TopNFilter"; + case ScanFilterKind::UNKNOWN: + return "Unknown"; + } + return "Unknown"; +} + +struct SummaryStats { + bool participated = false; + bool has_filtering_stage = false; + bool has_time = false; + int first_stage = NOT_APPLIED_PROFILE_ORDER; + int last_stage = -1; + int64_t input_rows = 0; + int64_t output_rows = 0; + int64_t filtered_rows = 0; + int64_t time_ns = 0; +}; + +void update_summary(SummaryStats* summary, ScanFilterStage stage, + const ScanFilterStageStatsSnapshot& stats) { + if (!stats.participated()) { + return; + } + const auto order = static_cast<int>(stage); + if (stats.filtered_rows > 0) { + if (!summary->has_filtering_stage || order < summary->first_stage) { + summary->first_stage = order; + summary->input_rows = stats.input_rows; + } + if (!summary->has_filtering_stage || order > summary->last_stage) { + summary->last_stage = order; + summary->output_rows = stats.output_rows; + } + summary->has_filtering_stage = true; + } else if (!summary->has_filtering_stage && + (!summary->participated || order > summary->last_stage)) { + summary->first_stage = order; + summary->last_stage = order; + summary->input_rows = stats.input_rows; + summary->output_rows = stats.output_rows; + } + summary->participated = true; + summary->filtered_rows += stats.filtered_rows; + if (stats.has_time) { + summary->has_time = true; + summary->time_ns += stats.time_ns; + } +} + +struct MaterializedFilterSnapshot { + ScanFilterDesc desc; + std::optional<ScanRuntimeFilterProfileStats> runtime_filter_stats; + std::array<ScanFilterStageStatsSnapshot, static_cast<size_t>(ScanFilterStage::NUM_STAGES)> + stage_snapshots; + SummaryStats total; + SummaryStats index; + SummaryStats exec; +}; + +void materialize_filter_stage(RuntimeProfile* filter_profile, ScanFilterStage stage, + const ScanFilterStageStatsSnapshot& stats) { + auto* stage_profile = get_or_create_child(filter_profile, scan_filter_stage_name(stage)); + set_root_counter(stage_profile, "InputRows", TUnit::UNIT, 2, stats.input_rows); + set_root_counter(stage_profile, "FilteredRows", TUnit::UNIT, 2, stats.filtered_rows); + if (stats.has_time) { + set_root_counter(stage_profile, "Time", TUnit::TIME_NS, 2, stats.time_ns); + } +} + +std::string scan_filter_stages_string(const MaterializedFilterSnapshot& snapshot, + bool is_key_range_source) { + std::vector<std::string> stages; + for (int i = 0; i < static_cast<int>(ScanFilterStage::NUM_STAGES); ++i) { + const auto stage = static_cast<ScanFilterStage>(i); + if (snapshot.stage_snapshots[static_cast<size_t>(stage)].participated()) { + stages.emplace_back(scan_filter_stage_name(stage)); + } + } + if (stages.empty()) { + return is_key_range_source ? "KeyRangeInfo" : "NotApplied"; + } + return fmt::format("{}", fmt::join(stages, " -> ")); +} + +std::string target_string(const ScanFilterDesc& desc) { + std::vector<std::string> parts; + if (desc.column_id >= 0) { + parts.emplace_back(fmt::format("column_id={}", desc.column_id)); + } + if (!desc.column_name.empty()) { + parts.emplace_back(fmt::format("column={}", desc.column_name)); + } + return fmt::format("{}", fmt::join(parts, ", ")); +} + +std::string source_string(const ScanFilterDesc& desc) { + if (desc.kind == ScanFilterKind::RUNTIME_FILTER) { + return fmt::format("{} rf_id={}", scan_filter_source_name(desc.kind), + desc.runtime_filter_id); + } + if (desc.kind == ScanFilterKind::TOPN_FILTER) { + return fmt::format("{} source_node_id={}", scan_filter_source_name(desc.kind), + desc.topn_filter_source_node_id); + } + return scan_filter_source_name(desc.kind); +} + +void materialize_filter_counters(RuntimeProfile* filter_profile, + const MaterializedFilterSnapshot& snapshot, int profile_level, + bool is_key_range_source) { + const auto* runtime_filter_stats = + snapshot.runtime_filter_stats.has_value() ? &*snapshot.runtime_filter_stats : nullptr; + filter_profile->add_info_string("Source", source_string(snapshot.desc)); + add_info_string_if_not_empty(filter_profile, "Target", target_string(snapshot.desc)); + filter_profile->add_info_string("Stages", + scan_filter_stages_string(snapshot, is_key_range_source)); + add_info_string_if_not_empty(filter_profile, "Expr", snapshot.desc.expr_debug_string); + if (profile_level >= 2 && runtime_filter_stats != nullptr && Review Comment: This debug string is now hidden at `profile_level < 2`, but `_scan_filter_profile` is enabled from profile level 1 and replaces the legacy `RuntimeFilterInfo` output. Before this PR, `RuntimeFilterConsumer::collect_realtime_profile()` always added the RF debug description whenever profiling was enabled, so level-1 profiles still showed state such as timeout/disabled diagnostics. With this gate, a timed-out or otherwise inactive runtime filter can still show counters like wait time, but the only state text explaining why it was not applied disappears. This is distinct from the earlier unmatched-runtime-filter thread: the synthetic RF entry is now created, but its debug string is still suppressed at profile level 1. Please keep the RF debug info at the legacy visibility level, or preserve the legacy `RuntimeFilterInfo` output for this level. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
