Copilot commented on code in PR #54565:
URL: https://github.com/apache/spark/pull/54565#discussion_r2870334162
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala:
##########
@@ -45,6 +45,33 @@ case class SparkPlanGraph(
dotFile.toString()
}
+ /**
+ * Generate a JSON string containing node details (metrics, description) for
the
+ * detail side panel. This keeps the DOT node labels compact (name only)
while
Review Comment:
The Scaladoc for makeNodeDetailsJson says it includes "metrics,
description", but the JSON currently only contains `name`, `metrics`, and
optional `children`. Either include `desc` in the JSON or update the comment to
match the actual output so future UI changes don't assume description is
available.
```suggestion
* Generate a JSON string containing node details (name, metrics, and
optional children)
* for the detail side panel. This keeps the DOT node labels compact (name
only) while
```
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala:
##########
@@ -45,6 +45,33 @@ case class SparkPlanGraph(
dotFile.toString()
}
+ /**
+ * Generate a JSON string containing node details (metrics, description) for
the
+ * detail side panel. This keeps the DOT node labels compact (name only)
while
+ * providing full metrics on demand via JavaScript.
+ */
+ def makeNodeDetailsJson(metrics: Map[Long, String]): String = {
+ val entries = allNodes.map { node =>
+ val metricsJson = node.metrics.flatMap { m =>
+ metrics.get(m.accumulatorId).map { v =>
+ val n = StringEscapeUtils.escapeJson(m.name)
+ val mv = StringEscapeUtils.escapeJson(v)
+ s"""{"name":"$n","value":"$mv","type":"${m.metricType}"}"""
Review Comment:
makeNodeDetailsJson escapes metric names/values, but `m.metricType` is
interpolated into JSON without JSON escaping. If metricType ever contains
quotes or backslashes, this will produce invalid JSON; escaping it also keeps
the JSON generation consistent and safer.
```suggestion
val mt = StringEscapeUtils.escapeJson(m.metricType)
s"""{"name":"$n","value":"$mv","type":"$mt"}"""
```
##########
sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js:
##########
@@ -264,18 +282,174 @@ function togglePlanViz() { // eslint-disable-line
no-unused-vars
$(this).toggleClass("arrow-open").toggleClass("arrow-closed")
});
if (arrow.classed("arrow-open")) {
- planVizContainer().style("display", "block");
+ d3.select("#plan-viz-content").style("display", "flex");
} else {
- planVizContainer().style("display", "none");
+ d3.select("#plan-viz-content").style("display", "none");
+ }
+}
+
+/*
+ * Parse the node details JSON from the hidden metadata div.
+ */
+function getNodeDetails() {
+ var detailsText = d3.select("#plan-viz-metadata
.node-details").text().trim();
+ try {
+ return JSON.parse(detailsText);
+ } catch (e) {
+ return {};
+ }
+}
+
+/*
+ * Format a metric value for display in the detail panel.
+ * Detects "total (min, med, max ...)\nVALUE (v1, v2, v3 (...))" patterns
+ * and renders them as a structured sub-table.
+ */
+/*
+ * Format a metric value for display in the detail panel.
+ * Uses metricType to determine layout instead of regex parsing.
+ * Types: "size", "timing", "nsTiming" have total + (min, med, max).
+ * "average" has (min, med, max) only.
+ * "sum" is a plain value.
+ */
+function formatMetricValue(val, metricType, showStageTask) {
+ var lines = val.split("\n");
+ if (lines.length < 2) return val;
+
+ // Multi-line: header line + data line
+ var dataLine = lines[1].trim();
+ var hasTotal = (metricType === "size" || metricType === "timing"
+ || metricType === "nsTiming");
+
+ // Parse data: "TOTAL (MIN, MED, MAX (stage X: task Y))" or "(MIN, MED, MAX
(...))"
+ var total = null, min, med, maxVal, stageId = "", taskId = "";
+ if (hasTotal) {
+ // Split "7.5 KiB (240.0 B, 240.0 B, 240.0 B (stage 3.0: task 36))"
+ var idx = dataLine.indexOf("(");
+ if (idx < 0) return val;
+ total = dataLine.substring(0, idx).trim();
+ dataLine = dataLine.substring(idx);
+ }
+ // Now dataLine is "(MIN, MED, MAX ...)" — strip outer parens
+ var inner = dataLine.replace(/^\(/, "").replace(/\)$/, "");
+ // Split by comma, but respect parenthesized stage/task
+ var parts = [];
+ var depth = 0, start = 0;
+ for (var i = 0; i < inner.length; i++) {
+ if (inner[i] === "(") depth++;
+ else if (inner[i] === ")") depth--;
+ else if (inner[i] === "," && depth === 0) {
+ parts.push(inner.substring(start, i).trim());
+ start = i + 1;
+ }
+ }
+ parts.push(inner.substring(start).trim());
+
+ min = parts[0] || "";
+ med = parts[1] || "";
+ // Max may contain "(stage X: task Y)"
+ var maxPart = parts[2] || "";
+ var stageMatch = maxPart.match(/^(.+?)\s*\(stage\s+(.+?):\s*task\s+(.+)\)$/);
+ if (stageMatch) {
+ maxVal = stageMatch[1];
+ stageId = stageMatch[2];
+ taskId = stageMatch[3];
+ } else {
+ maxVal = maxPart;
+ }
+
+ return buildStatTable(total, min, med, maxVal,
+ stageId, taskId, showStageTask);
+}
+
+function buildStatTable(total, min, med, maxVal,
+ stageId, taskId, showStageTask) {
+ var h = '<table class="table table-sm table-bordered mb-0"><thead><tr>';
+ if (total !== null) h += "<th>Total</th>";
+ h += "<th>Min</th><th>Med</th><th>Max</th>";
+ if (showStageTask) h += "<th>Stage</th><th>Task</th>";
+ h += "</tr></thead><tbody><tr>";
+ if (total !== null) h += "<td>" + total + "</td>";
+ h += "<td>" + min + "</td><td>" + med +
+ "</td><td>" + maxVal + "</td>";
+ if (showStageTask) {
+ h += "<td>" + stageId + "</td><td>" + taskId + "</td>";
}
+ h += "</tr></tbody></table>";
+ return h;
+}
+
+/*
+ * Update the detail side panel with the selected node's information.
+ */
+function updateDetailsPanel(nodeId, nodeDetails) {
+ var titleEl = document.getElementById("plan-viz-details-title");
+ var bodyEl = document.getElementById("plan-viz-details-body");
+ if (!titleEl || !bodyEl) return;
+
+ selectedNodeId = nodeId;
+ cachedNodeDetails = nodeDetails;
+
+ var details = nodeDetails[nodeId];
+ if (!details) {
+ titleEl.textContent = "Details";
+ bodyEl.innerHTML = '<p class="text-muted mb-0">No details available</p>';
+ return;
+ }
+
+ titleEl.textContent = details.name;
+
+ var showStageTask = document.getElementById("stageId-and-taskId-checkbox")
+ && document.getElementById("stageId-and-taskId-checkbox").checked;
+
+ var html = "";
+ if (details.metrics && details.metrics.length > 0) {
+ html += buildMetricsTable(details.metrics, showStageTask);
+ } else if (!details.children) {
+ html += '<p class="text-muted mb-0">No metrics</p>';
+ }
+
+ // For clusters, show child node metrics
+ if (details.children && details.children.length > 0) {
+ details.children.forEach(function (childId) {
+ var child = nodeDetails[childId];
+ if (child) {
+ html += '<h6 class="mt-2 mb-1 fw-bold">' + child.name + '</h6>';
+ if (child.metrics && child.metrics.length > 0) {
Review Comment:
updateDetailsPanel builds HTML with `child.name` directly inside an `<h6>`
string and assigns it via `innerHTML`. `child.name` ultimately comes from the
plan/SQL and can contain attacker-controlled text, so this is an XSS vector.
Render names using `textContent` (or a proper HTML-escape helper that handles
`&`, `<`, `>`, quotes) instead of string-concatenating into HTML.
##########
sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala:
##########
@@ -212,16 +216,25 @@ class SparkPlanGraphCluster(
override def makeDotNode(metricsValue: Map[Long, String]): String = {
val duration =
metrics.filter(_.name.startsWith(WholeStageCodegenExec.PIPELINE_DURATION_METRIC))
+ // Extract short label: "(1)" from "WholeStageCodegen (1)"
+ val shortName = "\\(\\d+\\)".r.findFirstIn(name).getOrElse(name)
val labelStr = if (duration.nonEmpty) {
require(duration.length == 1)
val id = duration(0).accumulatorId
if (metricsValue.contains(id)) {
- name + "\n \n" + duration(0).name + ": " + metricsValue(id)
+ // For multi-line values like "total (min, med, max)\n10.0 s (...)",
+ // extract just the total number from the data line
+ val raw = metricsValue(id)
+ val lines = raw.split("\n")
+ val dataLine = if (lines.length > 1) lines(1).trim else lines(0).trim
+ // Extract just the total value (first token before any parenthesized
breakdown)
+ val total = dataLine.split("\\s*\\(")(0).trim
+ shortName + " / " + total
} else {
- name
+ shortName
}
} else {
- name
+ shortName
}
val clusterId = s"cluster$id"
s"""
Review Comment:
In SparkPlanGraphCluster.makeDotNode, `val id = duration(0).accumulatorId`
shadows the cluster node's `id`, so `val clusterId = s"cluster$id"` ends up
using the accumulator id instead of the cluster id. This will break DOM ids /
JS lookups (and can collide across clusters). Use a different local variable
name (e.g., `durationMetricId`) or explicitly reference the cluster id when
building `clusterId`.
##########
sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js:
##########
@@ -264,18 +282,174 @@ function togglePlanViz() { // eslint-disable-line
no-unused-vars
$(this).toggleClass("arrow-open").toggleClass("arrow-closed")
});
if (arrow.classed("arrow-open")) {
- planVizContainer().style("display", "block");
+ d3.select("#plan-viz-content").style("display", "flex");
} else {
- planVizContainer().style("display", "none");
+ d3.select("#plan-viz-content").style("display", "none");
+ }
+}
+
+/*
+ * Parse the node details JSON from the hidden metadata div.
+ */
+function getNodeDetails() {
+ var detailsText = d3.select("#plan-viz-metadata
.node-details").text().trim();
+ try {
+ return JSON.parse(detailsText);
+ } catch (e) {
+ return {};
+ }
+}
+
+/*
+ * Format a metric value for display in the detail panel.
+ * Detects "total (min, med, max ...)\nVALUE (v1, v2, v3 (...))" patterns
+ * and renders them as a structured sub-table.
+ */
+/*
+ * Format a metric value for display in the detail panel.
+ * Uses metricType to determine layout instead of regex parsing.
+ * Types: "size", "timing", "nsTiming" have total + (min, med, max).
+ * "average" has (min, med, max) only.
+ * "sum" is a plain value.
+ */
+function formatMetricValue(val, metricType, showStageTask) {
+ var lines = val.split("\n");
+ if (lines.length < 2) return val;
+
+ // Multi-line: header line + data line
+ var dataLine = lines[1].trim();
+ var hasTotal = (metricType === "size" || metricType === "timing"
+ || metricType === "nsTiming");
+
+ // Parse data: "TOTAL (MIN, MED, MAX (stage X: task Y))" or "(MIN, MED, MAX
(...))"
+ var total = null, min, med, maxVal, stageId = "", taskId = "";
+ if (hasTotal) {
+ // Split "7.5 KiB (240.0 B, 240.0 B, 240.0 B (stage 3.0: task 36))"
+ var idx = dataLine.indexOf("(");
+ if (idx < 0) return val;
+ total = dataLine.substring(0, idx).trim();
+ dataLine = dataLine.substring(idx);
+ }
+ // Now dataLine is "(MIN, MED, MAX ...)" — strip outer parens
+ var inner = dataLine.replace(/^\(/, "").replace(/\)$/, "");
+ // Split by comma, but respect parenthesized stage/task
+ var parts = [];
+ var depth = 0, start = 0;
+ for (var i = 0; i < inner.length; i++) {
+ if (inner[i] === "(") depth++;
+ else if (inner[i] === ")") depth--;
+ else if (inner[i] === "," && depth === 0) {
+ parts.push(inner.substring(start, i).trim());
+ start = i + 1;
+ }
+ }
+ parts.push(inner.substring(start).trim());
+
+ min = parts[0] || "";
+ med = parts[1] || "";
+ // Max may contain "(stage X: task Y)"
+ var maxPart = parts[2] || "";
+ var stageMatch = maxPart.match(/^(.+?)\s*\(stage\s+(.+?):\s*task\s+(.+)\)$/);
+ if (stageMatch) {
+ maxVal = stageMatch[1];
+ stageId = stageMatch[2];
+ taskId = stageMatch[3];
+ } else {
+ maxVal = maxPart;
+ }
+
+ return buildStatTable(total, min, med, maxVal,
+ stageId, taskId, showStageTask);
+}
+
+function buildStatTable(total, min, med, maxVal,
+ stageId, taskId, showStageTask) {
+ var h = '<table class="table table-sm table-bordered mb-0"><thead><tr>';
+ if (total !== null) h += "<th>Total</th>";
+ h += "<th>Min</th><th>Med</th><th>Max</th>";
+ if (showStageTask) h += "<th>Stage</th><th>Task</th>";
+ h += "</tr></thead><tbody><tr>";
+ if (total !== null) h += "<td>" + total + "</td>";
+ h += "<td>" + min + "</td><td>" + med +
+ "</td><td>" + maxVal + "</td>";
+ if (showStageTask) {
+ h += "<td>" + stageId + "</td><td>" + taskId + "</td>";
}
+ h += "</tr></tbody></table>";
+ return h;
+}
+
+/*
+ * Update the detail side panel with the selected node's information.
+ */
+function updateDetailsPanel(nodeId, nodeDetails) {
+ var titleEl = document.getElementById("plan-viz-details-title");
+ var bodyEl = document.getElementById("plan-viz-details-body");
+ if (!titleEl || !bodyEl) return;
+
+ selectedNodeId = nodeId;
+ cachedNodeDetails = nodeDetails;
+
+ var details = nodeDetails[nodeId];
+ if (!details) {
+ titleEl.textContent = "Details";
+ bodyEl.innerHTML = '<p class="text-muted mb-0">No details available</p>';
+ return;
+ }
+
+ titleEl.textContent = details.name;
+
+ var showStageTask = document.getElementById("stageId-and-taskId-checkbox")
+ && document.getElementById("stageId-and-taskId-checkbox").checked;
+
+ var html = "";
+ if (details.metrics && details.metrics.length > 0) {
+ html += buildMetricsTable(details.metrics, showStageTask);
+ } else if (!details.children) {
+ html += '<p class="text-muted mb-0">No metrics</p>';
+ }
+
+ // For clusters, show child node metrics
+ if (details.children && details.children.length > 0) {
+ details.children.forEach(function (childId) {
+ var child = nodeDetails[childId];
+ if (child) {
+ html += '<h6 class="mt-2 mb-1 fw-bold">' + child.name + '</h6>';
+ if (child.metrics && child.metrics.length > 0) {
+ html += buildMetricsTable(child.metrics, showStageTask);
+ } else {
+ html += '<p class="text-muted mb-0">No metrics</p>';
+ }
+ }
+ });
+ }
+ bodyEl.innerHTML = html;
+}
+
+/*
+ * Build an HTML metrics table from a metrics array.
+ */
+function buildMetricsTable(metrics, showStageTask) {
+ var html = '<table class="table table-sm table-bordered mb-0">';
+ html += "<thead><tr><th>Metric</th><th>Value</th></tr></thead><tbody>";
+ metrics.forEach(function (m) {
+ var name = m.name.replace(/</g, "<").replace(/>/g, ">");
+ var val = m.value.replace(/</g, "<").replace(/>/g, ">");
+ html += "<tr><td>" + name + "</td><td>" +
+ formatMetricValue(val, m.type, showStageTask) + "</td></tr>";
+ });
Review Comment:
buildMetricsTable only replaces `<` and `>` in metric names/values before
inserting into `innerHTML`. Not escaping `&` (and quotes) allows entity-based
HTML injection (e.g., `<img ...>`) and makes XSS possible. Use a proper
HTML-escape routine (escape `&`, `<`, `>`, `"`, `'`) or avoid `innerHTML` by
constructing DOM nodes and setting `textContent` for all untrusted strings
(including the pieces rendered by formatMetricValue/buildStatTable).
##########
sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanGraphSuite.scala:
##########
@@ -36,11 +36,30 @@ class SparkPlanGraphSuite extends SparkFunSuite {
accumulatorId = 35,
metricType = "nsTiming")))
val dotNode = planGraphNode.makeDotNode(Map.empty[Long, String])
- val expectedDotNode = " 24 [id=\"node24\" labelType=\"html\" label=\"" +
- "<br><b>Scan JDBCRelation(\\\"test-schema\\\".tickets)
[numPartitions=1]</b><br><br>\" " +
+ val expectedDotNode = " 24 [id=\"node24\" label=\"" +
+ "Scan JDBCRelation(\\\"test-schema\\\".tickets) [numPartitions=1]\" " +
"tooltip=\"Scan JDBCRelation(\\\"test-schema\\\".tickets)
[numPartitions=1] [ticket_no#0] " +
"PushedFilters: [], ReadSchema: struct<ticket_no:string>\"];"
assertResult(expectedDotNode)(dotNode)
}
+
+ test("SPARK-55785: makeNodeDetailsJson uses cluster prefix and includes
children") {
+ import scala.collection.mutable
Review Comment:
The test name references SPARK-55785, but the PR/JIRA context here appears
to be SPARK-55786. If this is meant to validate the current change, aligning
the test description to the correct ticket will make future
archaeology/searching easier.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]