This is an automated email from the ASF dual-hosted git repository. csringhofer pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit bad56af62c66c9a717d5d41d83e7e5ec74a1836a Author: Yida Wu <[email protected]> AuthorDate: Tue Mar 25 16:38:40 2025 -0700 IMPALA-13893: Add node id to the file name in tuple cache correctness verification Currently, the file name for tuple cache correctness verification was generated using only the fragment instance id. However, in some cases, multiple tuple cache nodes within the same fragment instance may share the same cache key, leading to conflicts during file comparison. This patch resolves the issue by appending the node id to the file name, make sure it is unique among nodes within the same fragment instance. Old Format: file: {fragment_instance_id} Ref file: {fragment_instance_id}_{org_fragment_instance_id}_ref New Format: file: {fragment_instance_id}_{node_id} Ref file: {fragment_instance_id}_{node_id} _{org_fragment_instance_id}_{node_id}_ref Tests: Passed query_test.test_queries.TestQueriesTextTables.test_random, which previously failed due to this issue. Change-Id: I6ce5ed05623761ace7b7a1027b762736a63e97fc Reviewed-on: http://gerrit.cloudera.org:8080/22671 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exec/tuple-cache-node.cc | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/be/src/exec/tuple-cache-node.cc b/be/src/exec/tuple-cache-node.cc index 1f49fcb01..1911bb845 100644 --- a/be/src/exec/tuple-cache-node.cc +++ b/be/src/exec/tuple-cache-node.cc @@ -228,6 +228,14 @@ Status TupleCacheNode::VerifyAndMoveDebugCache(RuntimeState* state) { return verify_status; } +// Helper function to generate the unique id based on fragment instance id and node id. +static string GenerateFragmentNodeId( + const RuntimeState* state, const TupleCacheNode* node) { + DCHECK(state != nullptr); + DCHECK(node != nullptr); + return Substitute("$0_$1", PrintId(state->fragment_instance_id()), node->id()); +} + Status TupleCacheNode::GetNext( RuntimeState* state, RowBatch* output_row_batch, bool* eos) { SCOPED_TIMER(runtime_profile()->total_time_counter()); @@ -297,7 +305,7 @@ Status TupleCacheNode::GetNext( // option when removing metadata. Keeping this consistent ensures proper // handling. tuple_cache_mgr->StoreMetadataForTupleCache( - combined_key_, PrintId(state->fragment_instance_id())); + combined_key_, GenerateFragmentNodeId(state, this)); } tuple_cache_mgr->CompleteWrite(move(handle_), bytes_written); } else { @@ -384,13 +392,13 @@ void TupleCacheNode::Close(RuntimeState* state) { string TupleCacheNode::GetDebugDumpPath(const RuntimeState* state, const string& org_fragment_id, string* sub_dir_full_path) const { // The name of the subdirectory is hash key. - // For non-reference files, the file name is the fragment instance id. - // For reference files, the name includes the current fragment instance id, the original - // fragment id, and a "ref" suffix. - string file_name = PrintId(state->fragment_instance_id()); + // For non-reference files, the file name is the fragment instance node id. + // For reference files, the name includes the current fragment instance node id, the + // original fragment node id, and a "ref" suffix. + string file_name = GenerateFragmentNodeId(state, this); if (!org_fragment_id.empty()) { // Adds the original fragment id of the cache to the path for debugging purpose. - file_name += "_" + org_fragment_id + "_ref"; + file_name = Substitute("$0_$1_ref", file_name, org_fragment_id); } return ExecEnv::GetInstance()->tuple_cache_mgr()->GetDebugDumpPath( combined_key_, file_name, sub_dir_full_path);
