This is an automated email from the ASF dual-hosted git repository.
gianm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 94a75554fd8 feat: MSQ: Log full stack trace when "debug" is set.
(#19361)
94a75554fd8 is described below
commit 94a75554fd8dac27e427552e007ec2de029f4d03
Author: Gian Merlino <[email protected]>
AuthorDate: Tue Apr 21 10:12:45 2026 -0700
feat: MSQ: Log full stack trace when "debug" is set. (#19361)
Typically controllers and workers only log the full stack trace for
unknown faults and for DruidExceptions that have a non-USER persona.
The rationale is to avoid log spam. However, sometimes it is useful
to see them. This patch changes things so the full stack traces are
logged when the "debug" flag is set in the context.
Test contexts are also updated such that the full stack trace is always
logged in tests.
Finally, this patch also calls "wasDeserialized()" on the DruidException
builder when re-creating DruidExceptions on the controller. This avoids
having a stack trace assigned that isn't useful.
---
.../apache/druid/msq/dart/controller/DartControllerContext.java | 6 ++++++
.../java/org/apache/druid/msq/dart/worker/DartWorkerContext.java | 6 ++++++
.../main/java/org/apache/druid/msq/exec/ControllerContext.java | 5 +++++
.../src/main/java/org/apache/druid/msq/exec/ControllerImpl.java | 4 ++--
.../src/main/java/org/apache/druid/msq/exec/MSQTasks.java | 7 ++++---
.../src/main/java/org/apache/druid/msq/exec/WorkerContext.java | 5 +++++
.../src/main/java/org/apache/druid/msq/exec/WorkerImpl.java | 2 +-
.../org/apache/druid/msq/indexing/IndexerControllerContext.java | 6 ++++++
.../java/org/apache/druid/msq/indexing/IndexerWorkerContext.java | 8 ++++++++
.../org/apache/druid/msq/indexing/error/DruidExceptionFault.java | 1 +
.../java/org/apache/druid/msq/test/MSQTestControllerContext.java | 6 ++++++
.../test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java | 6 ++++++
.../druid/msq/test/TestDartControllerContextFactoryImpl.java | 6 ++++++
.../src/main/java/org/apache/druid/error/DruidException.java | 2 +-
14 files changed, 63 insertions(+), 7 deletions(-)
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/DartControllerContext.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/DartControllerContext.java
index a4a4ff8945a..52936b8e0f8 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/DartControllerContext.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/controller/DartControllerContext.java
@@ -254,4 +254,10 @@ public class DartControllerContext implements
ControllerContext
DEFAULT_TARGET_PARTITIONS_PER_WORKER
);
}
+
+ @Override
+ public boolean isDebug()
+ {
+ return context.isDebug();
+ }
}
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java
index 6aaddba48d5..bde9f8968a0 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/dart/worker/DartWorkerContext.java
@@ -274,6 +274,12 @@ public class DartWorkerContext implements WorkerContext
return true;
}
+ @Override
+ public boolean isDebug()
+ {
+ return queryContext.isDebug();
+ }
+
@Override
public DruidNode selfNode()
{
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerContext.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerContext.java
index 2d14c10788b..e12c5c3b336 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerContext.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerContext.java
@@ -135,4 +135,9 @@ public interface ControllerContext
* shuffle specs that have {@link ShuffleSpec#isAdjustable()} set to true.
*/
int targetPartitionsPerWorker();
+
+ /**
+ * Whether the controller should log full stack traces on error.
+ */
+ boolean isDebug();
}
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
index f616db7a032..83568629be1 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ControllerImpl.java
@@ -487,11 +487,11 @@ public class ControllerImpl implements Controller
// Log the errors we encountered.
if (controllerError != null) {
- log.warn("Controller: %s",
MSQTasks.errorReportToLogMessage(controllerError));
+ log.warn("Controller: %s",
MSQTasks.errorReportToLogMessage(controllerError, context.isDebug()));
}
if (workerError != null) {
- log.warn("Worker: %s", MSQTasks.errorReportToLogMessage(workerError));
+ log.warn("Worker: %s", MSQTasks.errorReportToLogMessage(workerError,
context.isDebug()));
}
}
if (queryKernel != null && queryKernel.isSuccess()) {
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java
index b47124aef6b..ea47ded0c92 100644
--- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java
+++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/MSQTasks.java
@@ -203,9 +203,10 @@ public class MSQTasks
}
/**
- * Returns a string form of a {@link MSQErrorReport} suitable for logging.
+ * Returns a string form of a {@link MSQErrorReport} suitable for logging.
When {@code forceFullStackTrace} is true,
+ * the full stack trace is always included (when available), regardless of
fault type.
*/
- static String errorReportToLogMessage(final MSQErrorReport errorReport)
+ static String errorReportToLogMessage(final MSQErrorReport errorReport,
final boolean forceFullStackTrace)
{
final StringBuilder logMessage = new StringBuilder("Work failed");
@@ -222,7 +223,7 @@ public class MSQTasks
logMessage.append(":
").append(MSQFaultUtils.generateMessageWithErrorCode(errorReport.getFault()));
if (errorReport.getExceptionStackTrace() != null) {
- if (logFullStackTrace(errorReport.getFault())) {
+ if (forceFullStackTrace || logFullStackTrace(errorReport.getFault())) {
logMessage.append('\n').append(errorReport.getExceptionStackTrace());
} else {
// Log first line only (error class, message) for known faults, to
avoid polluting logs.
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerContext.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerContext.java
index 61c1dd9654e..9e696a16cce 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerContext.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerContext.java
@@ -116,6 +116,11 @@ public interface WorkerContext extends Closeable
*/
boolean includeAllCounters();
+ /**
+ * Whether to log full stack traces for all errors.
+ */
+ boolean isDebug();
+
@Override
void close();
}
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java
index 0b3bfdc235a..fa7d5c232ce 100644
--- a/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java
+++ b/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java
@@ -195,7 +195,7 @@ public class WorkerImpl implements Worker
if (maybeErrorReport.isPresent()) {
final MSQErrorReport errorReport = maybeErrorReport.get();
- final String logMessage =
MSQTasks.errorReportToLogMessage(errorReport);
+ final String logMessage =
MSQTasks.errorReportToLogMessage(errorReport, context.isDebug());
log.warn("%s", logMessage);
// Inform controller of any errors that occur, unless we were
canceled. This prevents attempting to contact
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java
index cd91376e5c9..18aebe0bcd2 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerControllerContext.java
@@ -259,6 +259,12 @@ public class IndexerControllerContext implements
ControllerContext
);
}
+ @Override
+ public boolean isDebug()
+ {
+ return taskQuerySpecContext.isDebug();
+ }
+
/**
* Helper method for {@link #queryKernelConfig(MSQSpec)}. Also used in tests.
*/
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java
index 1e445f5e2c9..3a50cdc71f2 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/IndexerWorkerContext.java
@@ -93,6 +93,7 @@ public class IndexerWorkerContext implements WorkerContext
private final int maxConcurrentStages;
private final boolean liveReportCounters;
private final boolean includeAllCounters;
+ private final boolean debug;
private final int threadCount;
// Written under synchronized(this) using double-checked locking.
@@ -134,6 +135,7 @@ public class IndexerWorkerContext implements WorkerContext
);
this.liveReportCounters =
MultiStageQueryContext.getLiveReportCounters(queryContext,
DEFAULT_LIVE_REPORT_COUNTERS);
this.includeAllCounters =
MultiStageQueryContext.getIncludeAllCounters(queryContext);
+ this.debug = queryContext.isDebug();
// Compute thread count once in constructor
final int baseThreadCount = memoryIntrospector.numProcessingThreads();
@@ -327,6 +329,12 @@ public class IndexerWorkerContext implements WorkerContext
return includeAllCounters;
}
+ @Override
+ public boolean isDebug()
+ {
+ return debug;
+ }
+
public ServiceLocator controllerLocator()
{
return controllerLocator;
diff --git
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/DruidExceptionFault.java
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/DruidExceptionFault.java
index 32dded914ee..d58578e7f29 100644
---
a/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/DruidExceptionFault.java
+++
b/multi-stage-query/src/main/java/org/apache/druid/msq/indexing/error/DruidExceptionFault.java
@@ -106,6 +106,7 @@ public class DruidExceptionFault extends BaseMSQFault
return DruidException.forPersona(personaEnum)
.ofCategory(categoryEnum)
.withErrorCode(druidErrorCode)
+ .wasDeserialized()
.build(getErrorMessage())
.withContext(context);
}
diff --git
a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestControllerContext.java
b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestControllerContext.java
index 5bae51c53e3..6f09c43ff02 100644
---
a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestControllerContext.java
+++
b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestControllerContext.java
@@ -485,6 +485,12 @@ public class MSQTestControllerContext implements
ControllerContext, DartControll
return 1;
}
+ @Override
+ public boolean isDebug()
+ {
+ return true;
+ }
+
@Override
public ControllerContext newContext(QueryContext context)
{
diff --git
a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java
b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java
index a926aabee6f..a8773635484 100644
---
a/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java
+++
b/multi-stage-query/src/test/java/org/apache/druid/msq/test/MSQTestWorkerContext.java
@@ -206,6 +206,12 @@ public class MSQTestWorkerContext implements WorkerContext
return true;
}
+ @Override
+ public boolean isDebug()
+ {
+ return true;
+ }
+
@Override
public void close()
{
diff --git
a/multi-stage-query/src/test/java/org/apache/druid/msq/test/TestDartControllerContextFactoryImpl.java
b/multi-stage-query/src/test/java/org/apache/druid/msq/test/TestDartControllerContextFactoryImpl.java
index 6032afc853b..c3eaf6e9c9e 100644
---
a/multi-stage-query/src/test/java/org/apache/druid/msq/test/TestDartControllerContextFactoryImpl.java
+++
b/multi-stage-query/src/test/java/org/apache/druid/msq/test/TestDartControllerContextFactoryImpl.java
@@ -108,6 +108,12 @@ public class TestDartControllerContextFactoryImpl extends
DartControllerContextF
{
serviceEmitter.emit(metricBuilder.build("controller", queryId()));
}
+
+ @Override
+ public boolean isDebug()
+ {
+ return true;
+ }
};
}
diff --git
a/processing/src/main/java/org/apache/druid/error/DruidException.java
b/processing/src/main/java/org/apache/druid/error/DruidException.java
index 4883c811562..54382f5274b 100644
--- a/processing/src/main/java/org/apache/druid/error/DruidException.java
+++ b/processing/src/main/java/org/apache/druid/error/DruidException.java
@@ -477,7 +477,7 @@ public class DruidException extends RuntimeException
*
* @return the builder
*/
- DruidExceptionBuilder wasDeserialized()
+ public DruidExceptionBuilder wasDeserialized()
{
this.deserialized = true;
return this;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]