(impala) 02/02: IMPALA-13945: Change hash trace to show each node's individual contribution

joemcdonnell Wed, 23 Jul 2025 09:17:01 -0700

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 535b72e674cfc00b358682f8dea4989a1d290ca8
Author: Joe McDonnell <[email protected]>
AuthorDate: Wed May 21 11:26:36 2025 -0700

    IMPALA-13945: Change hash trace to show each node's individual contribution
    
    Currently, the hash trace accumulates up the plan tree and is
    displayed only for tuple cache nodes. This means that tuple cache
    nodes high in a large plan can have hundreds of lines of hash trace
    output without an indication of which contributions came from
    which nodes.
    
    This changes the hash trace in two ways:
    1. It displays each plan node's individual contribution to the hash
       trace. This only contains a summary of the hash contributed by
       the child, so the hash trace does not accumulate up the plan tree.
       Since each node is displaying its own contribution, the tuple
       cache node does not display the hash trace itself.
    2. This adds structure to the hash trace to include a comment for
       each contribution to the hash trace. This allows a cleaner display
       of the individual pieces of a node's hash trace. It also gives
       extra information about the specific contributions into the hash.
       It should be possible to trace the contribution through the plan
       tree.
    
    This also changes the output to only display the hash trace with
    explain_level=EXTENDED or higher (i.e. it won't be displayed with
    STANDARD).
    
    Example output:
       tuple cache hash trace:
         TupleDescriptor 0: TTupleDescriptor(id:0, byteSize:0, numNullBytes:0, 
tableId:1, tuplePath:[])
         Table: TTableName(db_name:functional, table_name:alltypes)
         PlanNode:
           [TPlanNode(node_id:0, node_type:HDFS_SCAN_NODE, num_children:0, 
limit:-1, row_tuples:[0], nullable_tu]
           [ples:[false], disable_codegen:false, pipelines:[], 
hdfs_scan_node:THdfsScanNode(tuple_id:0, random_r]
           [eplica:false, use_mt_scan_node:false, is_partition_key_scan:false, 
file_formats:[]), resource_profil]
           [e:TBackendResourceProfile(min_reservation:0, max_reservation:0))]
         Query options hash: TQueryOptionsHash(hi:-2415313890045961504, 
lo:-1462668909363814466)
    
    Testing:
     - Modified TupleCacheInfoTest and TupleCacheTest to use the new hash trace
    
    Change-Id: If53eda24e7eba264bc2d2f212b63eab9dc97a74c
    Reviewed-on: http://gerrit.cloudera.org:8080/23017
    Reviewed-by: Yida Wu <[email protected]>
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../org/apache/impala/planner/HdfsScanNode.java    | 16 ++--
 .../java/org/apache/impala/planner/PlanNode.java   | 48 +++++++++--
 .../org/apache/impala/planner/TupleCacheInfo.java  | 95 +++++++++++++++-------
 .../org/apache/impala/planner/TupleCacheNode.java  | 12 ---
 .../apache/impala/planner/TupleCacheInfoTest.java  | 83 +++++++++++++------
 .../org/apache/impala/planner/TupleCacheTest.java  | 27 ++++--
 .../queries/QueryTest/explain-level1.test          |  1 -
 7 files changed, 192 insertions(+), 90 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
index bdc3565b3..bf411cdac 100644
--- a/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HdfsScanNode.java
@@ -2883,6 +2883,12 @@ public class HdfsScanNode extends ScanNode {
       // We should ignore empty partitions, so only include the information if 
there is
       // at least one scan range.
       if (hasScanRange) {
+        // Hash the partition name (which includes the partition keys and 
values)
+        // This is necessary for cases where two partitions point to the same
+        // directory and files. Without knowing the partition keys/values, the
+        // cache can't tell them apart.
+        info.hashString("Partition name", partition.getPartitionName());
+
         // Incorporate the storage descriptor. This contains several fields 
that can
         // impact correctness, including the escape character, separator 
character,
         // json binary format, etc.
@@ -2890,16 +2896,10 @@ public class HdfsScanNode extends ScanNode {
           partition.getInputFormatDescriptor().toThrift();
         // Zero the block size, as it is not relevant to reads.
         inputFormat.setBlockSize(0);
-        info.hashThrift(inputFormat);
-
-        // Hash the partition name (which includes the partition keys and 
values)
-        // This is necessary for cases where two partitions point to the same
-        // directory and files. Without knowing the partition keys/values, the
-        // cache can't tell them apart.
-        info.hashString(partition.getPartitionName());
+        info.hashThrift("Partition input format", inputFormat);
 
         // Hash the scan range information
-        info.hashThrift(spec);
+        info.hashThrift("Partition scan ranges", spec);
       }
     }
   }
diff --git a/fe/src/main/java/org/apache/impala/planner/PlanNode.java 
b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
index 08918228b..46de0316e 100644
--- a/fe/src/main/java/org/apache/impala/planner/PlanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/PlanNode.java
@@ -48,6 +48,7 @@ import org.apache.impala.common.ThriftSerializationCtx;
 import org.apache.impala.common.TreeNode;
 import org.apache.impala.planner.RuntimeFilterGenerator.RuntimeFilter;
 import org.apache.impala.planner.TupleCacheInfo.IneligibilityReason;
+import org.apache.impala.planner.TupleCacheInfo.HashTraceElement;
 import org.apache.impala.service.BackendConfig;
 import org.apache.impala.thrift.TExecNodePhase;
 import org.apache.impala.thrift.TExecStats;
@@ -448,6 +449,37 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
       }
     }
 
+    if (detailLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal()) {
+      if (getTupleCacheInfo() != null && getTupleCacheInfo().isEligible()) {
+        // This PlanNode is eligible for tuple caching, so there may be 
TupleCacheNodes
+        // above this point. For debuggability, display this node's 
contribution to the
+        // tuple cache key by printing its hash trace.
+        //
+        // Print trace in chunks to avoid excessive wrapping and padding in 
impala-shell.
+        // There are other explain lines at VERBOSE level that are over 100 
chars long so
+        // we limit the key chunk length similarly here.
+        expBuilder.append(detailPrefix + "tuple cache key: " +
+            getTupleCacheInfo().getHashString() + "\n");
+        expBuilder.append(detailPrefix + "tuple cache hash trace:\n");
+        final int keyFormatWidth = 100;
+        for (HashTraceElement elem : getTupleCacheInfo().getHashTraces()) {
+          final String hashTrace = elem.getHashTrace();
+          if (hashTrace.length() < keyFormatWidth) {
+            expBuilder.append(String.format("%s  %s: %s\n", detailPrefix,
+                elem.getComment(), hashTrace));
+          } else {
+            expBuilder.append(String.format("%s  %s:\n", detailPrefix,
+                elem.getComment()));
+            for (int idx = 0; idx < hashTrace.length(); idx += keyFormatWidth) 
{
+              int stopIdx = Math.min(hashTrace.length(), idx + keyFormatWidth);
+              expBuilder.append(String.format("%s  [%s]\n", detailPrefix,
+                  hashTrace.substring(idx, stopIdx)));
+            }
+          }
+        }
+      }
+    }
+
     // Print the children. Do not traverse into the children of an Exchange 
node to
     // avoid crossing fragment boundaries.
     if (traverseChildren) {
@@ -1319,7 +1351,8 @@ abstract public class PlanNode extends TreeNode<PlanNode> 
{
     // so visit and merge the children before processing this node's contents
     for (PlanNode child : getChildren()) {
       child.computeTupleCacheInfo(descTbl, queryOptsHash);
-      if (!tupleCacheInfo_.mergeChild(child.getTupleCacheInfo())) {
+      if (!tupleCacheInfo_.mergeChild("Child node " + child.getId(),
+          child.getTupleCacheInfo())) {
         LOG.trace("{} ineligible for caching due to {}", this, child);
       }
     }
@@ -1351,7 +1384,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> 
{
 
       // Build may not have been visited yet.
       build.computeTupleCacheInfo(descTbl, queryOptsHash);
-      if (!tupleCacheInfo_.mergeChildWithScans(build.getTupleCacheInfo())) {
+      if (!tupleCacheInfo_.mergeChildWithScans(
+          "Child node " + build.getId() + " via runtime filter " + 
filter.getFilterId(),
+          build.getTupleCacheInfo())) {
         LOG.trace("{} on {} ineligible for caching due to {}", filter, this, 
build);
         tupleCacheInfo_.finalizeHash();
         return;
@@ -1368,13 +1403,16 @@ abstract public class PlanNode extends 
TreeNode<PlanNode> {
       tupleCacheInfo_.finalizeHash();
       return;
     }
-    tupleCacheInfo_.hashThrift(msg);
+    tupleCacheInfo_.hashThrift("PlanNode", msg);
     if (getChildCount() == 0 && queryOptsHash != null) {
       // Leaf node, add query options hash.
-      tupleCacheInfo_.hashThrift(queryOptsHash);
+      tupleCacheInfo_.hashThrift("Query options hash", queryOptsHash);
     }
     tupleCacheInfo_.finalizeHash();
-    LOG.trace("Hash for {}: {}", this, tupleCacheInfo_.getHashTrace());
+    LOG.trace("Hash for {}:", this);
+    for (HashTraceElement elem : tupleCacheInfo_.getHashTraces()) {
+      LOG.trace("  {}: {}", elem.getComment(), elem.getHashTrace());
+    }
   }
 
   /**
diff --git a/fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java 
b/fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java
index 83a1835c2..f9a6bdbcc 100644
--- a/fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java
+++ b/fe/src/main/java/org/apache/impala/planner/TupleCacheInfo.java
@@ -18,6 +18,7 @@
 package org.apache.impala.planner;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
@@ -140,13 +141,29 @@ public class TupleCacheInfo {
   // These fields accumulate partial results until finalizeHash() is called.
   private Hasher hasher_ = Hashing.murmur3_128().newHasher();
 
-  // The hash trace keeps a human-readable record of the items hashed into the 
cache key.
-  private StringBuilder hashTraceBuilder_ = new StringBuilder();
+  // To ease debugging hash differences, the hash trace is divided into 
individual
+  // elements to track each piece incorporated.
+  public static class HashTraceElement {
+    private String comment_;
+    private String hashTrace_;
+
+    public HashTraceElement(String comment, String hashTrace) {
+      Preconditions.checkNotNull(comment, "Hash trace comment must not be 
null");
+      comment_ = comment;
+      hashTrace_ = hashTrace;
+    }
+
+    public String getComment() { return comment_; }
+    public String getHashTrace() { return hashTrace_; }
+  }
+
+  // Hash trace for tracking what items influence the cache key. finalizeHash()
+  // converts this to an immutable list.
+  private List<HashTraceElement> hashTraces_ = new 
ArrayList<HashTraceElement>();
 
   // When finalizeHash() is called, these final values are filled in and the 
hasher and
   // hash trace builder are destroyed.
   private boolean finalized_ = false;
-  private String finalizedHashTrace_ = null;
   private String finalizedHashString_ = null;
 
   public TupleCacheInfo(DescriptorTable descTbl) {
@@ -184,11 +201,11 @@ public class TupleCacheInfo {
     return finalizedHashString_;
   }
 
-  public String getHashTrace() {
+  public List<HashTraceElement> getHashTraces() {
     Preconditions.checkState(isEligible(),
         "TupleCacheInfo only has a hash trace if it is cache eligible");
     Preconditions.checkState(finalized_, "TupleCacheInfo not finalized");
-    return finalizedHashTrace_;
+    return hashTraces_;
   }
 
   /**
@@ -199,8 +216,8 @@ public class TupleCacheInfo {
   public void finalizeHash() {
     finalizedHashString_ = hasher_.hash().toString();
     hasher_ = null;
-    finalizedHashTrace_ = hashTraceBuilder_.toString();
-    hashTraceBuilder_ = null;
+    // Make the hashTraces_ immutable
+    hashTraces_ = Collections.unmodifiableList(hashTraces_);
     finalized_ = true;
   }
 
@@ -209,9 +226,10 @@ public class TupleCacheInfo {
    * ineligible, then this is marked ineligible and there is no need to 
calculate
    * a hash. If the child is eligible, it incorporates the child's hash into 
this
    * hash. Returns true if the child was merged, false if it was ineligible.
+   * The "comment" should provide useful information for debugging the hash 
trace.
    */
-  public boolean mergeChild(TupleCacheInfo child) {
-    if (!mergeChildImpl(child)) {
+  public boolean mergeChild(String comment, TupleCacheInfo child) {
+    if (!mergeChildImpl(comment, child, /* mergeChildHashTrace */ false)) {
       return false;
     }
 
@@ -223,19 +241,26 @@ public class TupleCacheInfo {
   /**
    * Pull in a child's TupleCacheInfo into this TupleCacheInfo while also 
incorporating
    * all of its scan ranges into the key. This returns true if the child is 
eligible
-   * and false otherwise.
+   * and false otherwise. The "comment" should provide useful information for 
debugging
+   * the hash trace.
    */
-  public boolean mergeChildWithScans(TupleCacheInfo child) {
+  public boolean mergeChildWithScans(String comment, TupleCacheInfo child) {
     if (!child.isEligible()) {
-      return mergeChild(child);
+      return mergeChild(comment, child);
     }
     // Use a temporary TupleCacheInfo to incorporate the scan ranges for this 
child.
+    // This temporary is behaving the same way that the exchange node would 
behave
+    // in this case.
     TupleCacheInfo tmpInfo = new TupleCacheInfo(descriptorTable_);
-    boolean success = tmpInfo.mergeChild(child);
+    boolean success = tmpInfo.mergeChild(comment, child);
     Preconditions.checkState(success);
     tmpInfo.incorporateScans();
     tmpInfo.finalizeHash();
-    return mergeChild(tmpInfo);
+    Preconditions.checkState(tmpInfo.inputScanNodes_.size() == 0);
+    // Since this is using a temporary to merge the scan range information, 
the parent
+    // should merge in the temporary's hash traces, as they are not displayed 
anywhere
+    // else.
+    return mergeChildImpl(comment, tmpInfo, /* mergeChildHashTrace */ true);
   }
 
   /**
@@ -258,8 +283,10 @@ public class TupleCacheInfo {
   /**
    * Pull in a child's TupleCacheInfo that can be exhaustively determined 
during planning.
    * Public interfaces may add additional info that is more dynamic, such as 
scan ranges.
+   * The "comment" is used for the hash trace element unless 
mergeChildHashTrace is true.
    */
-  private boolean mergeChildImpl(TupleCacheInfo child) {
+  private boolean mergeChildImpl(String comment, TupleCacheInfo child,
+      boolean mergeChildHashTrace) {
     Preconditions.checkState(!finalized_,
         "TupleCacheInfo is finalized and can't be modified");
     if (!child.isEligible()) {
@@ -268,11 +295,16 @@ public class TupleCacheInfo {
     } else {
       // The child is eligible, so incorporate its hash into our hasher.
       hasher_.putBytes(child.getHashString().getBytes());
-      // Also, aggregate its hash trace into ours.
-      // TODO: It might be more useful to have the hash trace just for this
-      // node. We could display each node's hash trace in explain plan,
-      // and each contribution would be clear.
-      hashTraceBuilder_.append(child.getHashTrace());
+      if (mergeChildHashTrace) {
+        // If mergeChildHashTrace=true, then we are incorporating a temporary
+        // TupleCacheInfo that doesn't correspond to an actual node in the 
plan.
+        // For that case, copy in all its hash trace elements as they are not
+        // displayed elsewhere.
+        hashTraces_.addAll(child.getHashTraces());
+      } else {
+        // Add a single entry for a direct child
+        hashTraces_.add(new HashTraceElement(comment, child.getHashString()));
+      }
 
       // Incorporate the child's tuple references. This is creating a new 
translation
       // of TupleIds, because it will be incorporating multiple children.
@@ -293,9 +325,9 @@ public class TupleCacheInfo {
 
   /**
    * All Thrift objects inherit from TBase, so this function can incorporate 
any Thrift
-   * object into the hash.
+   * object into the hash. The comment is used for hash trace debugging.
    */
-  public void hashThrift(TBase<?, ?> thriftObj) {
+  public void hashThrift(String comment, TBase<?, ?> thriftObj) {
     Preconditions.checkState(!finalized_,
         "TupleCacheInfo is finalized and can't be modified");
     try {
@@ -311,18 +343,18 @@ public class TupleCacheInfo {
     // Thrift's toString() function doesn't return null.
     String thriftString = thriftObj.toString();
     Preconditions.checkState(thriftString != null);
-    hashTraceBuilder_.append(thriftString);
+    hashTraces_.add(new HashTraceElement(comment, thriftString));
   }
 
   /**
    * Hash a regular string and incorporate it into the key
    */
-  public void hashString(String s) {
+  public void hashString(String comment, String s) {
     Preconditions.checkState(!finalized_,
         "TupleCacheInfo is finalized and can't be modified");
     Preconditions.checkState(s != null);
     hasher_.putUnencodedChars(s);
-    hashTraceBuilder_.append(s);
+    hashTraces_.add(new HashTraceElement(comment, s));
   }
 
   /**
@@ -360,7 +392,7 @@ public class TupleCacheInfo {
             (tupleDesc.getTable() != null && !(tupleDesc.getTable() instanceof 
FeView));
         TTupleDescriptor thriftTupleDesc =
             tupleDesc.toThrift(needs_table_id ? new Integer(1) : null, 
serialCtx);
-        hashThrift(thriftTupleDesc);
+        hashThrift("TupleDescriptor " + id, thriftTupleDesc);
       }
 
       // Go through the tuple's slots and add them. This matches the behavior 
of
@@ -379,7 +411,7 @@ public class TupleCacheInfo {
         if (incorporateIntoHash) {
            // Incorporate the SlotDescriptor into the hash
           TSlotDescriptor thriftSlotDesc = slotDesc.toThrift(serialCtx);
-          hashThrift(thriftSlotDesc);
+          hashThrift("SlotDescriptor " + slotDesc.getId(), thriftSlotDesc);
         }
       }
     }
@@ -397,7 +429,7 @@ public class TupleCacheInfo {
 
     // Right now, we only hash the database / table name.
     TTableName tblName = tbl.getTableName().toThrift();
-    hashThrift(tblName);
+    hashThrift("Table", tblName);
   }
 
   /**
@@ -452,7 +484,12 @@ public class TupleCacheInfo {
       builder.append(getHashString());
       builder.append("\n");
       builder.append("cache key hash trace: ");
-      builder.append(getHashTrace());
+      for (HashTraceElement elem : getHashTraces()) {
+        builder.append(elem.getComment());
+        builder.append(": ");
+        builder.append(elem.getHashTrace());
+        builder.append("\n");
+      }
       builder.append("\n");
     } else {
       builder.append("ineligibility reasons: ");
diff --git a/fe/src/main/java/org/apache/impala/planner/TupleCacheNode.java 
b/fe/src/main/java/org/apache/impala/planner/TupleCacheNode.java
index 9b76f4673..98818648e 100644
--- a/fe/src/main/java/org/apache/impala/planner/TupleCacheNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/TupleCacheNode.java
@@ -42,7 +42,6 @@ import com.google.common.hash.HashCode;
 public class TupleCacheNode extends PlanNode {
 
   protected String compileTimeKey_;
-  protected String hashTrace_;
   protected boolean displayCorrectnessCheckingInfo_;
   protected boolean skipCorrectnessVerification_;
   protected final List<Integer> inputScanNodeIds_ = new ArrayList<Integer>();
@@ -57,7 +56,6 @@ public class TupleCacheNode extends PlanNode {
     TupleCacheInfo childCacheInfo = child.getTupleCacheInfo();
     Preconditions.checkState(childCacheInfo.isEligible());
     compileTimeKey_ = childCacheInfo.getHashString();
-    hashTrace_ = childCacheInfo.getHashTrace();
     // If there is variability due to a streaming agg, skip the correctness 
verification
     // for this location.
     skipCorrectnessVerification_ = childCacheInfo.getStreamingAggVariability();
@@ -126,16 +124,6 @@ public class TupleCacheNode extends PlanNode {
       output.append(detailPrefix + "skip correctness verification: " +
           skipCorrectnessVerification_ + "\n");
     }
-
-    // For debuggability, always print the hash trace until the cache key 
calculation
-    // matures. Print trace in chunks to avoid excessive wrapping and padding 
in
-    // impala-shell. There are other explain lines at VERBOSE level that are
-    // over 100 chars long so we limit the key chunk length similarly here.
-    final int keyFormatWidth = 100;
-    for (int idx = 0; idx < hashTrace_.length(); idx += keyFormatWidth) {
-      int stop_idx = Math.min(hashTrace_.length(), idx + keyFormatWidth);
-      output.append(detailPrefix + "[" + hashTrace_.substring(idx, stop_idx) + 
"]\n");
-    }
     List<String> input_scan_node_ids_strs =
         
inputScanNodeIds_.stream().map(Object::toString).collect(Collectors.toList());
     output.append(detailPrefix + "input scan node ids: " +
diff --git a/fe/src/test/java/org/apache/impala/planner/TupleCacheInfoTest.java 
b/fe/src/test/java/org/apache/impala/planner/TupleCacheInfoTest.java
index 8059b3f02..8626e94a8 100644
--- a/fe/src/test/java/org/apache/impala/planner/TupleCacheInfoTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/TupleCacheInfoTest.java
@@ -21,11 +21,14 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.util.List;
+
 import org.apache.impala.analysis.DescriptorTable;
 import org.apache.impala.analysis.SlotDescriptor;
 import org.apache.impala.analysis.TupleDescriptor;
 import org.apache.impala.catalog.PrimitiveType;
 import org.apache.impala.catalog.ScalarType;
+import org.apache.impala.planner.TupleCacheInfo.HashTraceElement;
 import org.apache.impala.thrift.TUniqueId;
 
 import org.junit.Test;
@@ -39,17 +42,24 @@ public class TupleCacheInfoTest {
   public void testHashThrift() {
     // This test doesn't need a DescriptorTable, so it just sets it to null.
     TupleCacheInfo info1 = new TupleCacheInfo(null);
-    info1.hashThrift(new TUniqueId(1L, 2L));
+    info1.hashThrift("info1", new TUniqueId(1L, 2L));
     info1.finalizeHash();
+    List<HashTraceElement> info1HashTraces = info1.getHashTraces();
+    assertEquals(info1HashTraces.size(), 1);
+    assertEquals("info1", info1HashTraces.get(0).getComment());
+    assertEquals("TUniqueId(hi:1, lo:2)", 
info1HashTraces.get(0).getHashTrace());
 
     TupleCacheInfo info2 = new TupleCacheInfo(null);
-    info2.hashThrift(new TUniqueId(1L, 2L));
+    info2.hashThrift("info2", new TUniqueId(1L, 2L));
     info2.finalizeHash();
+    List<HashTraceElement> info2HashTraces = info2.getHashTraces();
+    assertEquals(info1HashTraces.size(), info2HashTraces.size());
+    assertEquals("info2", info2HashTraces.get(0).getComment());
+    assertEquals(info1HashTraces.get(0).getHashTrace(),
+        info2HashTraces.get(0).getHashTrace());
 
-    assertEquals(info1.getHashTrace(), "TUniqueId(hi:1, lo:2)");
-    assertEquals(info1.getHashTrace(), info2.getHashTrace());
     // Hashes are stable over time, so check the actual hash value
-    assertEquals(info1.getHashString(), "b3f5384f81770c6adb83209b2a171dfa");
+    assertEquals("b3f5384f81770c6adb83209b2a171dfa", info1.getHashString());
     assertEquals(info1.getHashString(), info2.getHashString());
   }
 
@@ -57,23 +67,30 @@ public class TupleCacheInfoTest {
   public void testMergeHash() {
     // This test doesn't need a DescriptorTable, so it just sets it to null.
     TupleCacheInfo child1 = new TupleCacheInfo(null);
-    child1.hashThrift(new TUniqueId(1L, 2L));
+    child1.hashThrift("child1", new TUniqueId(1L, 2L));
     child1.finalizeHash();
 
     TupleCacheInfo child2 = new TupleCacheInfo(null);
-    child2.hashThrift(new TUniqueId(3L, 4L));
+    child2.hashThrift("child2", new TUniqueId(3L, 4L));
     child2.finalizeHash();
 
     TupleCacheInfo parent = new TupleCacheInfo(null);
-    parent.mergeChild(child1);
-    parent.mergeChild(child2);
-    parent.hashThrift(new TUniqueId(5L, 6L));
+    parent.mergeChild("child1", child1);
+    parent.mergeChild("child2", child2);
+    parent.hashThrift("parent", new TUniqueId(5L, 6L));
     parent.finalizeHash();
 
-    assertEquals(parent.getHashTrace(),
-        "TUniqueId(hi:1, lo:2)TUniqueId(hi:3, lo:4)TUniqueId(hi:5, lo:6)");
+    // The hash trace includes the hash of the child, but not the full hash 
trace
+    List<HashTraceElement> hashTraces = parent.getHashTraces();
+    assertEquals(hashTraces.size(), 3);
+    assertEquals("child1", hashTraces.get(0).getComment());
+    assertEquals(child1.getHashString(), hashTraces.get(0).getHashTrace());
+    assertEquals("child2", hashTraces.get(1).getComment());
+    assertEquals(child2.getHashString(), hashTraces.get(1).getHashTrace());
+    assertEquals("parent", hashTraces.get(2).getComment());
+    assertEquals("TUniqueId(hi:5, lo:6)", hashTraces.get(2).getHashTrace());
     // Hashes are stable over time, so check the actual hash value
-    assertEquals(parent.getHashString(), "edf5633bed2280c3c3edb703182f3122");
+    assertEquals("edf5633bed2280c3c3edb703182f3122", parent.getHashString());
   }
 
   @Test
@@ -81,7 +98,7 @@ public class TupleCacheInfoTest {
     // This test doesn't need a DescriptorTable, so it just sets it to null.
     // Child 1 is eligible
     TupleCacheInfo child1 = new TupleCacheInfo(null);
-    child1.hashThrift(new TUniqueId(1L, 2L));
+    child1.hashThrift("child1", new TUniqueId(1L, 2L));
     child1.finalizeHash();
     assertTrue(child1.isEligible());
 
@@ -92,10 +109,10 @@ public class TupleCacheInfoTest {
     assertTrue(!child2.isEligible());
 
     TupleCacheInfo parent = new TupleCacheInfo(null);
-    parent.mergeChild(child1);
+    parent.mergeChild("child1", child1);
     // Still eligible after adding child1 without child2
     assertTrue(parent.isEligible());
-    parent.mergeChild(child2);
+    parent.mergeChild("child2", child2);
     // It is allowed to check eligibility before finalizeHash()
     assertTrue(!parent.isEligible());
     parent.finalizeHash();
@@ -125,23 +142,28 @@ public class TupleCacheInfoTest {
     descTbl.computeMemLayout();
 
     TupleCacheInfo child1 = new TupleCacheInfo(descTbl);
-    child1.hashThrift(new TUniqueId(1L, 2L));
+    child1.hashThrift("child1", new TUniqueId(1L, 2L));
     child1.registerTuple(tuple1.getId());
     child1.finalizeHash();
     assertEquals(child1.getLocalTupleId(tuple1.getId()).asInt(), 0);
     assertEquals(child1.getLocalSlotId(t1slot.getId()).asInt(), 0);
-    String child1ExpectedHashTrace = "TUniqueId(hi:1, lo:2)" +
-        "TTupleDescriptor(id:0, byteSize:5, numNullBytes:1)" +
+    List<HashTraceElement> child1HashTraces = child1.getHashTraces();
+    assertEquals(3, child1HashTraces.size());
+    assertEquals("child1", child1HashTraces.get(0).getComment());
+    assertEquals("TUniqueId(hi:1, lo:2)", 
child1HashTraces.get(0).getHashTrace());
+    assertEquals("TTupleDescriptor(id:0, byteSize:5, numNullBytes:1)",
+        child1HashTraces.get(1).getHashTrace());
+    assertEquals(
         "TSlotDescriptor(id:0, parent:0, slotType:TColumnType(types:[" +
         "TTypeNode(type:SCALAR, scalar_type:TScalarType(type:INT))]), " +
         "materializedPath:[], byteOffset:0, nullIndicatorByte:4, 
nullIndicatorBit:0, " +
-        "slotIdx:0, virtual_col_type:NONE)";
-    assertEquals(child1.getHashTrace(), child1ExpectedHashTrace);
+        "slotIdx:0, virtual_col_type:NONE)",
+        child1HashTraces.get(2).getHashTrace());
 
     // To demonstrate why we're doing this, child2 uses the same TUniqueId as
     // child1, but different tuple / slot ids.
     TupleCacheInfo child2 = new TupleCacheInfo(descTbl);
-    child2.hashThrift(new TUniqueId(1L, 2L));
+    child2.hashThrift("child2", new TUniqueId(1L, 2L));
     child2.registerTuple(tuple2.getId());
     child2.finalizeHash();
     // Note: we expect the id's to be translated to local ids, so even though 
this is
@@ -150,14 +172,15 @@ public class TupleCacheInfoTest {
     assertEquals(child2.getLocalTupleId(tuple2.getId()).asInt(), 0);
     assertEquals(child2.getLocalSlotId(t2slot.getId()).asInt(), 0);
     // Because of the translation, child2's hash is the same as child1.
-    assertEquals(child2.getHashTrace(), child1ExpectedHashTrace);
+    List<HashTraceElement> child2HashTraces = child1.getHashTraces();
+    assertEquals(child2HashTraces, child1HashTraces);
     assertEquals(child2.getHashString(), child1.getHashString());
 
     // Merge the children in opposite order. This means that every index is 
different
     // from its original index in the descriptor table.
     TupleCacheInfo parent = new TupleCacheInfo(descTbl);
-    parent.mergeChild(child2);
-    parent.mergeChild(child1);
+    parent.mergeChild("child2", child2);
+    parent.mergeChild("child1", child1);
     parent.finalizeHash();
 
     // Tuple1 = second index
@@ -168,7 +191,13 @@ public class TupleCacheInfoTest {
     assertEquals(parent.getLocalTupleId(tuple2.getId()).asInt(), 0);
     assertEquals(parent.getLocalSlotId(t1slot.getId()).asInt(), 1);
     assertEquals(parent.getLocalSlotId(t2slot.getId()).asInt(), 0);
-    assertEquals(parent.getHashTrace(),
-        child1ExpectedHashTrace + child1ExpectedHashTrace);
+
+    // Parent hash trace only has entries for the two children
+    List<HashTraceElement> parentHashTraces = parent.getHashTraces();
+    assertEquals(parentHashTraces.size(), 2);
+    assertEquals("child2", parentHashTraces.get(0).getComment());
+    assertEquals(child2.getHashString(), 
parentHashTraces.get(0).getHashTrace());
+    assertEquals("child1", parentHashTraces.get(1).getComment());
+    assertEquals(child1.getHashString(), 
parentHashTraces.get(1).getHashTrace());
   }
 }
diff --git a/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java 
b/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java
index a854d7542..0c211a42a 100644
--- a/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/TupleCacheTest.java
@@ -29,6 +29,7 @@ import java.util.Set;
 
 import org.apache.impala.catalog.Type;
 import org.apache.impala.common.ImpalaException;
+import org.apache.impala.planner.TupleCacheInfo.HashTraceElement;
 import org.apache.impala.service.Frontend.PlanCtx;
 import org.apache.impala.testutil.TestUtils;
 import org.apache.impala.thrift.TQueryCtx;
@@ -522,7 +523,13 @@ public class TupleCacheTest extends PlannerTestBase {
   private List<String> getCacheHashTraces(List<PlanNode> cacheEligibleNodes) {
     List<String> cacheHashTraces = new ArrayList<String>();
     for (PlanNode node : cacheEligibleNodes) {
-      cacheHashTraces.add(node.getTupleCacheInfo().getHashTrace());
+      StringBuilder builder = new StringBuilder();
+      for (HashTraceElement elem : node.getTupleCacheInfo().getHashTraces()) {
+        // Use only the hash trace pieces as the comments are intended for 
human
+        // consumption and can be different
+        builder.append(elem.getHashTrace());
+      }
+      cacheHashTraces.add(builder.toString());
     }
     return cacheHashTraces;
   }
@@ -608,9 +615,11 @@ public class TupleCacheTest extends PlannerTestBase {
     Set<String> hashTraceIntersection = new HashSet(cacheHashTraces1);
     hashTraceIntersection.retainAll(cacheHashTraces2);
 
-    // The hash trace for a cache key should be a one-to-one thing, so
-    // any difference in keys should be a difference in hash traces.
-    assertEquals(keyIntersection.size(), hashTraceIntersection.size());
+    // Since the hash trace is defined as a single node's contribution,
+    // it can have additional matches higher in the plan that don't correspond
+    // to the same cache key. The number of single-node hash trace matches
+    // should be equal or greater than the number of cache key matches.
+    assertTrue(keyIntersection.size() <= hashTraceIntersection.size());
 
     if (keyIntersection.size() == 0 || hashTraceIntersection.size() == 0) {
       StringBuilder errorLog = new StringBuilder();
@@ -652,11 +661,13 @@ public class TupleCacheTest extends PlannerTestBase {
     Set<String> hashTraceIntersection = new HashSet(cacheHashTraces1);
     hashTraceIntersection.retainAll(cacheHashTraces2);
 
-    // The hash trace for a cache key should be a one-to-one thing, so
-    // any difference in keys should be a difference in hash traces.
-    assertEquals(keyIntersection.size(), hashTraceIntersection.size());
+    // Since the hash trace is defined as a single node's contribution,
+    // it can have additional matches higher in the plan that don't correspond
+    // to the same cache key. The number of single-node hash trace matches
+    // should be equal or greater than the number of cache key matches.
+    assertTrue(keyIntersection.size() <= hashTraceIntersection.size());
 
-    if (keyIntersection.size() != 0 || hashTraceIntersection.size() != 0) {
+    if (keyIntersection.size() != 0) {
       StringBuilder errorLog = new StringBuilder();
       errorLog.append("Expected different cache keys. Instead found:\n");
       printQueryNodesCacheInfo(query1, cacheEligibleNodes1, errorLog);
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test 
b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
index daafd1a3d..835d639ad 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/explain-level1.test
@@ -60,5 +60,4 @@ set ENABLE_TUPLE_CACHE=TRUE;
 explain select count(*) from tpch.region
 ---- RESULTS: VERIFY_IS_SUBSET
 row_regex:.* cache key: [0-9a-f][0-9a-f]*.*
-row_regex:.*\[.*TPlanNode\(.*\]
 ====

(impala) 02/02: IMPALA-13945: Change hash trace to show each node's individual contribution

Reply via email to