This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new f793524c6 IMPALA-12800: Implement hashCode everywhere
f793524c6 is described below
commit f793524c66d02ca1d6590cec1b09cada548c6d5b
Author: Michael Smith <[email protected]>
AuthorDate: Mon Jun 24 13:52:51 2024 -0700
IMPALA-12800: Implement hashCode everywhere
Fixes a regression in earlier IMPALA-12800 commits where two decimal
ScalarTypes could be equal, but have different hash codes. That led to
different lookup behavior, where iterating through lhs_ (the old method)
would find a similar expression via equals, but when looking up via the
substitutions_ map it would resolve a different hash code and return
null (no match). That caused analysis to fail on statements that used
the same cast-to-decimal expression twice, as one of them would fail to
find a result in ExprSubstitutionMap and lead to an unbound slot
ERROR: AnalysisException: select list expression not produced by
aggregation output (missing from GROUP BY clause?): <expr>...
Overrides hashCode on every class that overrides equals to avoid other
cases that might be untested, and avoid similar mistakes in the future.
https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html
requires "If two objects are equal according to the equals(Object)
method, then calling the hashCode method on each of the two objects must
produce the same integer result", which requires an explicit hashCode
implementation anywhere we override equals.
Adds a unit test covering the regression.
Change-Id: I129bff6fd0968be135e23e0b24e273b2ea384eca
Reviewed-on: http://gerrit.cloudera.org:8080/21550
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Michael Smith <[email protected]>
---
.../main/java/org/apache/impala/analysis/ColumnDef.java | 17 +++++++++++++++++
fe/src/main/java/org/apache/impala/analysis/Expr.java | 8 ++++++--
.../main/java/org/apache/impala/analysis/PlanHint.java | 6 ++++++
.../main/java/org/apache/impala/catalog/ArrayType.java | 6 ++++++
.../org/apache/impala/catalog/IcebergStructField.java | 7 +++++++
fe/src/main/java/org/apache/impala/catalog/MapType.java | 7 +++++++
.../main/java/org/apache/impala/catalog/ScalarType.java | 16 ++++++++++++++++
.../java/org/apache/impala/catalog/StructField.java | 3 +++
.../main/java/org/apache/impala/catalog/StructType.java | 5 +++++
.../java/org/apache/impala/catalog/TopicUpdateLog.java | 10 ++++++++++
fe/src/main/java/org/apache/impala/catalog/Type.java | 11 +++++++++++
.../java/org/apache/impala/planner/DataPartition.java | 6 ++++++
.../org/apache/impala/analysis/AnalyzeExprsTest.java | 9 +++++++++
13 files changed, 109 insertions(+), 2 deletions(-)
diff --git a/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
b/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
index b4e3e9b1d..4f31af4bb 100644
--- a/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/ColumnDef.java
@@ -24,6 +24,7 @@ import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -383,6 +384,22 @@ public class ColumnDef {
.isEquals();
}
+ @Override
+ public int hashCode() {
+ return Objects.hash(
+ colName_,
+ comment_,
+ isPrimaryKey_,
+ isPrimaryKeyUnique_,
+ typeDef_,
+ type_,
+ isNullable_,
+ encoding_,
+ compression_,
+ defaultValue_,
+ blockSize_);
+ }
+
public TColumn toThrift() {
TColumn col = new TColumn(getColName(), type_.toThrift());
Integer blockSize =
diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java
b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index 960c1775d..172c305a6 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -1022,7 +1022,8 @@ abstract public class Expr extends TreeNode<Expr>
implements ParseNode, Cloneabl
/**
* Returns true if two expressions are equal. The equality comparison works
on analyzed
- * as well as unanalyzed exprs by ignoring implicit casts.
+ * as well as unanalyzed exprs by ignoring implicit casts. If overridden by
a subclass,
+ * also provide an appropriate implementation for hashCode.
*/
@Override
public final boolean equals(Object obj) {
@@ -1037,7 +1038,10 @@ abstract public class Expr extends TreeNode<Expr>
implements ParseNode, Cloneabl
}
/**
- * Returns a hash code based on the same keys used for equals.
+ * Returns a hash code based on the same keys used for equals. Any
subclasses that
+ * override equals must ensure hashCode is defined such that a.equals(b)
implies
+ * a.hashCode() == b.hashCode(), as required by
+ * https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html#hashCode--
*/
@Override
public int hashCode() {
diff --git a/fe/src/main/java/org/apache/impala/analysis/PlanHint.java
b/fe/src/main/java/org/apache/impala/analysis/PlanHint.java
index f54931662..64b881669 100644
--- a/fe/src/main/java/org/apache/impala/analysis/PlanHint.java
+++ b/fe/src/main/java/org/apache/impala/analysis/PlanHint.java
@@ -20,6 +20,7 @@ package org.apache.impala.analysis;
import java.util.ArrayList;
import java.util.List;
+import java.util.Objects;
import com.google.common.base.Joiner;
@@ -56,6 +57,11 @@ public class PlanHint {
return name_.equals(oh.name_) && args_.equals(oh.args_);
}
+ @Override
+ public int hashCode() {
+ return Objects.hash(name_, args_);
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
diff --git a/fe/src/main/java/org/apache/impala/catalog/ArrayType.java
b/fe/src/main/java/org/apache/impala/catalog/ArrayType.java
index e635aa5aa..6e81e267d 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ArrayType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ArrayType.java
@@ -49,6 +49,12 @@ public class ArrayType extends Type {
return otherArrayType.itemType_.equals(itemType_);
}
+ @Override
+ public int hashCode() {
+ // Add 1 to differentiate between e.g. array<int> and array<array<int>>.
+ return 1 + itemType_.hashCode();
+ }
+
@Override
public void toThrift(TColumnType container) {
TTypeNode node = new TTypeNode();
diff --git a/fe/src/main/java/org/apache/impala/catalog/IcebergStructField.java
b/fe/src/main/java/org/apache/impala/catalog/IcebergStructField.java
index ef271a5c0..43697fb79 100644
--- a/fe/src/main/java/org/apache/impala/catalog/IcebergStructField.java
+++ b/fe/src/main/java/org/apache/impala/catalog/IcebergStructField.java
@@ -17,6 +17,8 @@
package org.apache.impala.catalog;
+import java.util.Objects;
+
import org.apache.impala.thrift.TColumnType;
import org.apache.impala.thrift.TStructField;
import org.apache.impala.thrift.TTypeNode;
@@ -54,4 +56,9 @@ public class IcebergStructField extends StructField {
return otherStructField.name_.equals(name_) &&
otherStructField.type_.equals(type_)
&& (otherStructField.fieldId_ == fieldId_);
}
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(name_, type_, fieldId_);
+ }
}
diff --git a/fe/src/main/java/org/apache/impala/catalog/MapType.java
b/fe/src/main/java/org/apache/impala/catalog/MapType.java
index 4235181d5..41e60ec1a 100644
--- a/fe/src/main/java/org/apache/impala/catalog/MapType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/MapType.java
@@ -17,6 +17,8 @@
package org.apache.impala.catalog;
+import java.util.Objects;
+
import org.apache.commons.lang3.StringUtils;
import org.apache.impala.thrift.TColumnType;
@@ -49,6 +51,11 @@ public class MapType extends Type {
otherMapType.valueType_.equals(valueType_);
}
+ @Override
+ public int hashCode() {
+ return Objects.hash(keyType_, valueType_);
+ }
+
@Override
public String toSql(int depth) {
if (depth >= MAX_NESTING_DEPTH) return "MAP<...>";
diff --git a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
index f466e2530..4872caabd 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
@@ -17,6 +17,8 @@
package org.apache.impala.catalog;
+import java.util.Objects;
+
import org.apache.commons.lang3.StringUtils;
import org.apache.impala.analysis.TypesUtil;
import org.apache.impala.thrift.TColumnType;
@@ -387,6 +389,20 @@ public class ScalarType extends Type {
return true;
}
+ @Override
+ public int hashCode() {
+ switch (type_) {
+ case CHAR:
+ case FIXED_UDA_INTERMEDIATE:
+ case VARCHAR:
+ return Objects.hash(type_, len_);
+ case DECIMAL:
+ return Objects.hash(type_, precision_, scale_);
+ default:
+ return Objects.hash(type_);
+ }
+ }
+
public Type getMaxResolutionType() {
// Dates got summed as BIGINT for AVG.
if (isIntegerType() || type_ == PrimitiveType.DATE) {
diff --git a/fe/src/main/java/org/apache/impala/catalog/StructField.java
b/fe/src/main/java/org/apache/impala/catalog/StructField.java
index fdfbb672f..ad611980e 100644
--- a/fe/src/main/java/org/apache/impala/catalog/StructField.java
+++ b/fe/src/main/java/org/apache/impala/catalog/StructField.java
@@ -95,6 +95,9 @@ public class StructField {
type_.toThrift(container);
}
+ /**
+ * Implements equals and hashCode so a.equals(b) implies
a.hashCode()==b.hashCode().
+ */
@Override
public boolean equals(Object other) {
if (!(other instanceof StructField)) return false;
diff --git a/fe/src/main/java/org/apache/impala/catalog/StructType.java
b/fe/src/main/java/org/apache/impala/catalog/StructType.java
index dc637d040..c6c043f40 100644
--- a/fe/src/main/java/org/apache/impala/catalog/StructType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/StructType.java
@@ -116,6 +116,11 @@ public class StructType extends Type {
return otherStructType.getFields().equals(fields_);
}
+ @Override
+ public int hashCode() {
+ return fields_.hashCode();
+ }
+
@Override
public void toThrift(TColumnType container) {
TTypeNode node = new TTypeNode();
diff --git a/fe/src/main/java/org/apache/impala/catalog/TopicUpdateLog.java
b/fe/src/main/java/org/apache/impala/catalog/TopicUpdateLog.java
index a20abd5ae..99006ebd2 100644
--- a/fe/src/main/java/org/apache/impala/catalog/TopicUpdateLog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/TopicUpdateLog.java
@@ -18,6 +18,7 @@
package org.apache.impala.catalog;
import java.util.Map;
+import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
@@ -103,6 +104,15 @@ public class TopicUpdateLog {
&& numSkippedUpdatesLockContention_ == entry
.getNumSkippedUpdatesLockContention();
}
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(
+ numSkippedUpdates_,
+ lastSentVersion_,
+ lastSentTopicUpdate_,
+ numSkippedUpdatesLockContention_);
+ }
}
// Entries in the topic update log stored as a map of catalog object keys to
diff --git a/fe/src/main/java/org/apache/impala/catalog/Type.java
b/fe/src/main/java/org/apache/impala/catalog/Type.java
index 40f4f53f5..640daad15 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Type.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Type.java
@@ -318,6 +318,17 @@ public abstract class Type {
*/
public abstract void toThrift(TColumnType container);
+ /**
+ * Subclasses must provide consistent equals and hashCode implementations
such that
+ * a.equals(b) implies a.hashCode() == b.hashCode().
+ */
+ public abstract boolean equals(Object other);
+
+ /**
+ * hashCode must be defined such that a.equals(b) implies a.hashCode() ==
b.hashCode().
+ */
+ public abstract int hashCode();
+
/**
* Returns true if this type is equal to t, or if t is a wildcard variant of
this
* type. Subclasses should override this as appropriate. The default
implementation
diff --git a/fe/src/main/java/org/apache/impala/planner/DataPartition.java
b/fe/src/main/java/org/apache/impala/planner/DataPartition.java
index 53a60634e..f266a9338 100644
--- a/fe/src/main/java/org/apache/impala/planner/DataPartition.java
+++ b/fe/src/main/java/org/apache/impala/planner/DataPartition.java
@@ -19,6 +19,7 @@ package org.apache.impala.planner;
import java.util.ArrayList;
import java.util.List;
+import java.util.Objects;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.analysis.Expr;
@@ -104,6 +105,11 @@ public class DataPartition {
return Expr.equalLists(partitionExprs_, other.partitionExprs_);
}
+ @Override
+ public int hashCode() {
+ return Objects.hash(type_, partitionExprs_);
+ }
+
public String debugString() {
return MoreObjects.toStringHelper(this)
.add("type_", type_)
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
index fd9417ff2..a8a577202 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeExprsTest.java
@@ -3302,6 +3302,15 @@ public class AnalyzeExprsTest extends AnalyzerTest {
RunCastFormatTestOnType("DATE");
}
+ @Test
+ public void TestMatchingCasts() throws AnalysisException {
+ // IMPALA-12800: ensure identical cast-to-decimal expressions match
hashCodes.
+ String clause =
+ "sum(CASE WHEN id IS NOT NULL THEN cast(0.4 as decimal(20,10)) ELSE 0
END)";
+ AnalyzesOk("SELECT CASE WHEN (" + clause + ") > 0 " +
+ "THEN (" + clause + ") ELSE null END q FROM functional.alltypes");
+ }
+
private void RunCastFormatTestOnType(String type) {
String to_timestamp_cast = "cast('05-01-2017' as " + type + ")";
AnalysisError(