This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new f0a781806 IMPALA-14494: Tag catalogd logs of GetPartialCatalogObject
requests with correct query ids
f0a781806 is described below
commit f0a781806fa0bd2b2a4ab5af7f31f3bee4100654
Author: stiga-huang <[email protected]>
AuthorDate: Tue Oct 14 09:40:38 2025 +0800
IMPALA-14494: Tag catalogd logs of GetPartialCatalogObject requests with
correct query ids
Catalogd logs of GetPartialCatalogObject requests are not tagged with
correct query ids. Instead, the query id that is previously using that
thread is printed in the logs. This is fixed by using
ScopedThreadContext which resets the query id at the end of the RPC
code.
Add DCHECKs to make sure ThreadDebugInfo is initialized before being
used in Catalog methods. An instance is added in CatalogdMain() for
this.
This patch also adds the query id in GetPartialCatalogObject requests so
catalogd can tag the responding thread with it.
Some codes are copied from Michael Smith's patch:
https://gerrit.cloudera.org/c/22738/
Tested by enabling TRACE logging in org.apache.impala.common.JniUtil to
verify logs of GetPartialCatalogObject requests.
I20251014 09:39:39.685225 342587 JniUtil.java:165]
964e37e9303d6f8a:eab7096000000000] getPartialCatalogObject request: Getting
partial catalog object of CATALOG_SERVICE_ID
I20251014 09:39:39.690346 342587 JniUtil.java:176]
964e37e9303d6f8a:eab7096000000000] Finished getPartialCatalogObject request:
Getting partial catalog object of CATALOG_SERVICE_ID. Time spent: 5ms
I20251014 09:39:39.699471 342587 JniUtil.java:165]
964e37e9303d6f8a:eab7096000000000] getPartialCatalogObject request: Getting
partial catalog object of DATABASE:functional
I20251014 09:39:39.701821 342587 JniUtil.java:176]
964e37e9303d6f8a:eab7096000000000] Finished getPartialCatalogObject request:
Getting partial catalog object of DATABASE:functional. Time spent: 2ms
I20251014 09:39:39.711462 341074 TAcceptQueueServer.cpp:368] New connection
to server CatalogService from client <Host: 127.0.0.1 Port: 42084>
I20251014 09:39:39.719146 342588 JniUtil.java:165]
964e37e9303d6f8a:eab7096000000000] getPartialCatalogObject request: Getting
partial catalog object of TABLE:functional.alltypestiny
Change-Id: Ie63363ac60e153e3a69f2a4cf6a0f4ce10701674
Reviewed-on: http://gerrit.cloudera.org:8080/23535
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/catalog/catalog.cc | 26 +++++++++++++++++++-------
be/src/catalog/catalogd-main.cc | 3 +++
be/src/service/fe-support.cc | 12 ++++++++++++
common/thrift/CatalogService.thrift | 3 +++
4 files changed, 37 insertions(+), 7 deletions(-)
diff --git a/be/src/catalog/catalog.cc b/be/src/catalog/catalog.cc
index 2993215eb..08d8fad6a 100644
--- a/be/src/catalog/catalog.cc
+++ b/be/src/catalog/catalog.cc
@@ -117,6 +117,11 @@ Status Catalog::GetJsonCatalogObject(const TCatalogObject&
req, string* res) {
Status Catalog::GetPartialCatalogObject(const TGetPartialCatalogObjectRequest&
req,
TGetPartialCatalogObjectResponse* resp) {
+ TUniqueId query_id;
+ if (req.__isset.header && req.header.__isset.query_id) {
+ query_id = req.header.query_id;
+ }
+ ScopedThreadContext scoped_tdi(DCHECK_NOTNULL(GetThreadDebugInfo()),
query_id);
return JniUtil::CallJniMethod(catalog_, get_partial_catalog_object_id_, req,
resp);
}
@@ -147,25 +152,31 @@ Status Catalog::GetCatalogDelta(CatalogServer* caller,
int64_t from_version,
}
Status Catalog::ExecDdl(const TDdlExecRequest& req, TDdlExecResponse* resp) {
+ TUniqueId query_id;
if (req.__isset.header && req.header.__isset.query_id) {
- GetThreadDebugInfo()->SetQueryId(req.header.query_id);
+ query_id = req.header.query_id;
}
+ ScopedThreadContext scoped_tdi(DCHECK_NOTNULL(GetThreadDebugInfo()),
query_id);
return JniUtil::CallJniMethod(catalog_, exec_ddl_id_, req, resp);
}
Status Catalog::ResetMetadata(const TResetMetadataRequest& req,
TResetMetadataResponse* resp) {
+ TUniqueId query_id;
if (req.__isset.header && req.header.__isset.query_id) {
- GetThreadDebugInfo()->SetQueryId(req.header.query_id);
+ query_id = req.header.query_id;
}
+ ScopedThreadContext scoped_tdi(DCHECK_NOTNULL(GetThreadDebugInfo()),
query_id);
return JniUtil::CallJniMethod(catalog_, reset_metadata_id_, req, resp);
}
Status Catalog::UpdateCatalog(const TUpdateCatalogRequest& req,
TUpdateCatalogResponse* resp) {
+ TUniqueId query_id;
if (req.__isset.header && req.header.__isset.query_id) {
- GetThreadDebugInfo()->SetQueryId(req.header.query_id);
+ query_id = req.header.query_id;
}
+ ScopedThreadContext scoped_tdi(DCHECK_NOTNULL(GetThreadDebugInfo()),
query_id);
return JniUtil::CallJniMethod(catalog_, update_metastore_id_, req, resp);
}
@@ -216,9 +227,11 @@ Status Catalog::GetFunctions(const TGetFunctionsRequest&
request,
}
Status Catalog::PrioritizeLoad(const TPrioritizeLoadRequest& req) {
+ TUniqueId query_id;
if (req.__isset.header && req.header.__isset.query_id) {
- GetThreadDebugInfo()->SetQueryId(req.header.query_id);
+ query_id = req.header.query_id;
}
+ ScopedThreadContext scoped_tdi(DCHECK_NOTNULL(GetThreadDebugInfo()),
query_id);
return JniUtil::CallJniMethod(catalog_, prioritize_load_id_, req);
}
@@ -261,8 +274,7 @@ Status Catalog::SetEventProcessorStatus(
Status Catalog::WaitForHmsEvent(const TWaitForHmsEventRequest& req,
TWaitForHmsEventResponse* resp) {
- if (req.header.__isset.query_id) {
- GetThreadDebugInfo()->SetQueryId(req.header.query_id);
- }
+ ScopedThreadContext scoped_tdi(
+ DCHECK_NOTNULL(GetThreadDebugInfo()), req.header.query_id);
return JniUtil::CallJniMethod(catalog_, wait_for_hms_event_id_, req, resp);
}
diff --git a/be/src/catalog/catalogd-main.cc b/be/src/catalog/catalogd-main.cc
index 598f305bb..3869b4023 100644
--- a/be/src/catalog/catalogd-main.cc
+++ b/be/src/catalog/catalogd-main.cc
@@ -24,6 +24,7 @@
#include "common/daemon-env.h"
#include "common/init.h"
#include "common/status.h"
+#include "common/thread-debug-info.h"
#include "rpc/authentication.h"
#include "rpc/rpc-trace.h"
#include "rpc/thrift-server.h"
@@ -103,6 +104,8 @@ int CatalogdMain(int argc, char** argv) {
catalog_server.MarkServiceAsStarted();
LOG(INFO) << "CatalogService started on port: " <<
FLAGS_catalog_service_port;
+ ThreadDebugInfo thread_debug_info;
+ thread_debug_info.SetThreadName("catalogd-main");
if (FLAGS_enable_workload_mgmt) {
if (catalog_server.WaitCatalogReadinessForWorkloadManagement()) {
ABORT_IF_ERROR(catalog_server.InitWorkloadManagement());
diff --git a/be/src/service/fe-support.cc b/be/src/service/fe-support.cc
index 9c900ab0b..560716cb8 100644
--- a/be/src/service/fe-support.cc
+++ b/be/src/service/fe-support.cc
@@ -27,6 +27,7 @@
#include "common/init.h"
#include "common/logging.h"
#include "common/status.h"
+#include "common/thread-debug-info.h"
#include "exec/catalog-op-executor.h"
#include "exprs/scalar-expr.h"
#include "exprs/scalar-expr-evaluator.h"
@@ -587,6 +588,17 @@
Java_org_apache_impala_service_FeSupport_NativeGetPartialCatalogObject(
THROW_IF_ERROR_RET(DeserializeThriftMsg(env, thrift_struct, &request), env,
JniUtil::internal_exc_class(), nullptr);
+ // Populate the request header.
+ if (!request.__isset.header) {
+ ThreadDebugInfo* tdi = GetThreadDebugInfo();
+ // TODO: After IMPALA-14447, query ids might be missing in some threads.
This will
+ // be addressed in IMPALA-12870.
+ if (tdi != nullptr) {
+ request.__set_header(TCatalogServiceRequestHeader());
+ request.header.__set_query_id(tdi->GetQueryId());
+ }
+ }
+
CatalogOpExecutor catalog_op_executor(ExecEnv::GetInstance(), nullptr,
nullptr);
TGetPartialCatalogObjectResponse result;
Status status = catalog_op_executor.GetPartialCatalogObject(request,
&result);
diff --git a/common/thrift/CatalogService.thrift
b/common/thrift/CatalogService.thrift
index 9ce17da09..7c71c758d 100644
--- a/common/thrift/CatalogService.thrift
+++ b/common/thrift/CatalogService.thrift
@@ -600,6 +600,9 @@ struct TPartialDbInfo {
struct TGetPartialCatalogObjectRequest {
1: required CatalogServiceVersion protocol_version = CatalogServiceVersion.V2
+ // Common header included in all CatalogService requests.
+ 6: optional TCatalogServiceRequestHeader header
+
// A catalog object descriptor: a TCatalogObject with the object name and
type fields
// set. This may be a TABLE, DB, CATALOG, or FUNCTION. The selectors below
can
// further restrict what information should be returned.