This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit c96b7b082dff6bb2fd0062e63b508e3924e14297 Author: Csaba Ringhofer <[email protected]> AuthorDate: Mon Apr 7 01:14:37 2025 +0200 IMPALA-14576, IMPALA-14577: add rewrite rules for geospatial relations Apply rewrites to geospatial relations like st_intersects. 3 rewrites are added: 1. NormalizeGeospatialRelationsRule moves const arguments to the first position (this can be useful as the current Java implementation optimizes const first arguments, see IMPALA-14575): st_intersects(geom_col, ST_Polygon(1,1, 1,4, 4,4, 4,1)) -> st_intersects(ST_Polygon(1,1, 1,4, 4,4, 4,1), geom_col) 2. AddEnvIntersectsRule adds st_envintersects() before relations that can be only true when the bounding rectangles intersect. This is useful as st_envintersects() has native implementation (IMPALA-14573): st_intersects(geom1, geom2) -> st_envintersects(geom1, geom2) AND st_intersects(geom1, geom2) 3. PointEnvIntersectsRule replaces bounding rect (envelope) intersection on geometries from st_point with predicates directly on coordinates: st_envintersects(CONST_GEOM, st_point(x, y)) -> x >= st_minx(CONST_GEOM) AND y >= st_miny(CONST_GEOM) AND x <= st_maxx(CONST_GEOM) AND y <= st_maxy(CONST_GEOM) Note that AddEnvIntersectsRule is only valid in planar geometry (the relation functions in HIVE_ESRI are all planar). 2 and 3 are not applied if the cost of child expression is above some treshold. AddEnvIntersectsRule needed a new type of expression rewrite ("non-idempotent rules") that runs rules only once to avoid triggering the rules multiple times on the same input predicate. Other changes: - Changed handling of malformed geometries in c++ functions from error to warning. This is consistent with handling in Java. Change-Id: Id65f646db6f1c89a74253e9ff755c39c400328be Reviewed-on: http://gerrit.cloudera.org:8080/23719 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/exprs/geo/shape-format.h | 10 +-- .../java/org/apache/impala/analysis/Analyzer.java | 14 ++- .../impala/rewrite/AddEnvIntersectsRule.java | 77 ++++++++++++++++ .../org/apache/impala/rewrite/ExprRewriter.java | 21 +++++ .../rewrite/NormalizeGeospatialRelationsRule.java | 62 +++++++++++++ .../impala/rewrite/PointEnvIntersectsRule.java | 100 +++++++++++++++++++++ .../queries/QueryTest/geospatial-esri-planner.test | 88 +++++++++++++++++- tests/query_test/test_geospatial_functions.py | 2 +- 8 files changed, 366 insertions(+), 8 deletions(-) diff --git a/be/src/exprs/geo/shape-format.h b/be/src/exprs/geo/shape-format.h index a7af82845..091de17a4 100644 --- a/be/src/exprs/geo/shape-format.h +++ b/be/src/exprs/geo/shape-format.h @@ -225,29 +225,29 @@ inline bool ParseHeader(FunctionContext* ctx, const StringVal& geom, OGCType* og if (UNLIKELY(geom.is_null)) return false; if (UNLIKELY(geom.len < MIN_GEOM_SIZE)) { - ctx->SetError("Geometry size too small."); + ctx->AddWarning("Geometry size too small."); return false; } const OGCType unchecked_ogc_type = getOGCType(geom); if (UNLIKELY(unchecked_ogc_type < UNKNOWN || unchecked_ogc_type > ST_MULTIPOLYGON)) { - ctx->SetError("Invalid geometry type."); + ctx->AddWarning("Invalid geometry type."); return false; } if (UNLIKELY(unchecked_ogc_type == UNKNOWN)) { - ctx->SetError("Geometry type UNKNOWN."); + ctx->AddWarning("Geometry type UNKNOWN."); return false; } if (UNLIKELY(unchecked_ogc_type == ST_POINT)) { if (geom.len < MIN_POINT_SIZE) { - ctx->SetError("Geometry size too small for ST_POINT type."); + ctx->AddWarning("Geometry size too small for ST_POINT type."); return false; } } else { if (UNLIKELY(geom.len < MIN_NON_POINT_SIZE)) { - ctx->SetError("Geometry size too small for non ST_POINT type."); + ctx->AddWarning("Geometry size too small for non ST_POINT type."); return false; } } diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index fd5b2e1b6..86ab75c1a 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -92,6 +92,7 @@ import org.apache.impala.planner.JoinNode; import org.apache.impala.planner.PlanNode; import org.apache.impala.planner.ScanNode; import org.apache.impala.planner.UnionNode; +import org.apache.impala.rewrite.AddEnvIntersectsRule; import org.apache.impala.rewrite.BetweenToCompoundRule; import org.apache.impala.rewrite.ConvertToCNFRule; import org.apache.impala.rewrite.CountDistinctToNdvRule; @@ -106,15 +107,19 @@ import org.apache.impala.rewrite.FoldConstantsRule; import org.apache.impala.rewrite.NormalizeBinaryPredicatesRule; import org.apache.impala.rewrite.NormalizeCountStarRule; import org.apache.impala.rewrite.NormalizeExprsRule; +import org.apache.impala.rewrite.NormalizeGeospatialRelationsRule; +import org.apache.impala.rewrite.PointEnvIntersectsRule; import org.apache.impala.rewrite.SimplifyCastExprRule; import org.apache.impala.rewrite.SimplifyCastStringToTimestamp; import org.apache.impala.rewrite.SimplifyConditionalsRule; import org.apache.impala.rewrite.SimplifyDistinctFromRule; +import org.apache.impala.service.BackendConfig; import org.apache.impala.service.FeSupport; import org.apache.impala.thrift.QueryConstants; import org.apache.impala.thrift.TAccessEvent; import org.apache.impala.thrift.TCatalogObjectType; import org.apache.impala.thrift.TExecutorGroupSet; +import org.apache.impala.thrift.TGeospatialLibrary; import org.apache.impala.thrift.TImpalaQueryOptions; import org.apache.impala.thrift.TLineageGraph; import org.apache.impala.thrift.TNetworkAddress; @@ -670,6 +675,7 @@ public class Analyzer { this.authzFactory = authzFactory; this.lineageGraph = new ColumnLineageGraph(); List<ExprRewriteRule> rules = new ArrayList<>(); + List<ExprRewriteRule> nonIdempotentRules = new ArrayList<>(); // BetweenPredicates must be rewritten to be executable. Other non-essential // expr rewrites can be disabled via a query option. When rewrites are enabled // BetweenPredicates should be rewritten first to help trigger other rules. @@ -695,9 +701,15 @@ public class Analyzer { rules.add(CountDistinctToNdvRule.INSTANCE); rules.add(DefaultNdvScaleRule.INSTANCE); rules.add(SimplifyCastExprRule.INSTANCE); + if (BackendConfig.INSTANCE.getGeospatialLibrary().equals( + TGeospatialLibrary.HIVE_ESRI)) { + rules.add(NormalizeGeospatialRelationsRule.INSTANCE); + rules.add(PointEnvIntersectsRule.INSTANCE); + nonIdempotentRules.add(AddEnvIntersectsRule.INSTANCE); + } } rules.add(CountStarToConstRule.INSTANCE); - exprRewriter_ = new ExprRewriter(rules); + exprRewriter_ = new ExprRewriter(rules, nonIdempotentRules); nullSlotsCache = queryCtx.getClient_request().getQuery_options().use_null_slots_cache ? CacheBuilder.newBuilder().concurrencyLevel(1).recordStats().build() : null; diff --git a/fe/src/main/java/org/apache/impala/rewrite/AddEnvIntersectsRule.java b/fe/src/main/java/org/apache/impala/rewrite/AddEnvIntersectsRule.java new file mode 100644 index 000000000..9efbd6044 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/rewrite/AddEnvIntersectsRule.java @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.rewrite; + +import java.util.Arrays; +import java.util.List; + +import org.apache.impala.analysis.Analyzer; +import org.apache.impala.analysis.BinaryPredicate; +import org.apache.impala.analysis.CompoundPredicate; +import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.FunctionCallExpr; +import org.apache.impala.analysis.FunctionName; +import org.apache.impala.common.AnalysisException; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +/** + * Add st_envintersects() to conjuncts with a geospatial relation that can be true only + * if the bounding rects (envelopes) intersect. + * + * This is mainly beneficial because st_envintersects has a native implementation + * (IMPALA-14573) but other relations are not yet implemented in c++. + * + * Examples: + * st_intersects(geom1, geom2) + * -> + * st_envintersects(geom1, geom2) AND + * st_intersects(geom1, geom2) + * + */ +public class AddEnvIntersectsRule implements ExprRewriteRule { + public static ExprRewriteRule INSTANCE = new AddEnvIntersectsRule(); + + static final List<String> relations = Arrays.asList("st_contains", "st_crosses", + "st_equals", "st_intersects", "st_overlaps", "st_touches", "st_within"); + + // Common subexpressions are computed multiple times (IMPALA-7737), so this rule is only + // beneficial if children are cheap. The limit is above JAVA_FUNCTION_CALL_COST to allow + // an St_Point() and a few other expressions (needed for PointEnvIntersectsRule). + // TODO: rethink cost limit once st_point() get c++ implementation (IMPALA-14629) + static final float CHILD_COST_LIMIT = Expr.JAVA_FUNCTION_CALL_COST * 2; + + @Override + public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException { + if (!(expr instanceof FunctionCallExpr)) return expr; + FunctionCallExpr fn = (FunctionCallExpr) expr; + FunctionName fnName = fn.getFnName(); + if (!fnName.isBuiltin()) return expr; + if (!relations.contains(fnName.getFunction())) return expr; + + Expr arg1 = expr.getChild(0).clone(); + Expr arg2 = expr.getChild(1).clone(); + if (!arg1.hasCost() || !arg2.hasCost() + || arg1.getCost() + arg2.getCost() > CHILD_COST_LIMIT) { + return expr; + } + Expr newPred = new FunctionCallExpr("st_envintersects", Arrays.asList(arg1, arg2)); + return new CompoundPredicate(CompoundPredicate.Operator.AND, newPred, expr.clone()); + } +} diff --git a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java index 2c671ada9..a15b243d3 100644 --- a/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java +++ b/fe/src/main/java/org/apache/impala/rewrite/ExprRewriter.java @@ -39,25 +39,46 @@ public class ExprRewriter { private final static Logger LOG = LoggerFactory.getLogger(ExprRewriter.class); private int numChanges_ = 0; private final List<ExprRewriteRule> rules_; + private final List<ExprRewriteRule> nonIdempotentRules_; + public ExprRewriter(List<ExprRewriteRule> rules) { rules_ = rules; + nonIdempotentRules_ = Lists.newArrayList(); + } + + public ExprRewriter(List<ExprRewriteRule> rules, + List<ExprRewriteRule> nonIdempotentRules) { + rules_ = rules; + nonIdempotentRules_ = nonIdempotentRules; } public ExprRewriter(ExprRewriteRule rule) { rules_ = Lists.newArrayList(rule); + nonIdempotentRules_ = Lists.newArrayList(); } public Expr rewrite(Expr expr, Analyzer analyzer) throws AnalysisException { // Keep applying the rule list until no rule has made any changes. int oldNumChanges; Expr rewrittenExpr = expr; + int nonIdempotentRuleIdx = 0; do { oldNumChanges = numChanges_; for (ExprRewriteRule rule: rules_) { rewrittenExpr = applyRuleRepeatedly(rewrittenExpr, rule, analyzer); } + // Apply all possible idempotent rules before progressing with non-idempotent ones. + while (oldNumChanges == numChanges_ + && nonIdempotentRuleIdx < nonIdempotentRules_.size()) { + // Apply nonIdempotentRules_ only once as these add expressions without removing + // the original and could be applied again. + ExprRewriteRule rule = nonIdempotentRules_.get(nonIdempotentRuleIdx); + rewrittenExpr = applyRuleBottomUp(rewrittenExpr, rule, analyzer); + nonIdempotentRuleIdx++; + } } while (oldNumChanges != numChanges_); + return rewrittenExpr; } diff --git a/fe/src/main/java/org/apache/impala/rewrite/NormalizeGeospatialRelationsRule.java b/fe/src/main/java/org/apache/impala/rewrite/NormalizeGeospatialRelationsRule.java new file mode 100644 index 000000000..949477ced --- /dev/null +++ b/fe/src/main/java/org/apache/impala/rewrite/NormalizeGeospatialRelationsRule.java @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.rewrite; + +import java.util.Arrays; +import java.util.List; + +import org.apache.impala.analysis.Analyzer; +import org.apache.impala.analysis.BoolLiteral; +import org.apache.impala.analysis.CompoundPredicate; +import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.FunctionCallExpr; +import org.apache.impala.analysis.FunctionName; + +import com.google.common.base.Preconditions; + +/** + * Normalizes symmetric st_ relations by ensuring that if one child is constant then + * it is always the first child. + * This would highly benefit Hive ESRI relations if constness would be propageted to + * Java UDFs as those are optimized for const 1st argument(see IMPALA-14575 for details). + * + * Examples: + * ST_Intersects(geom_col, ST_Polygon(1,1, 1,4, 4,4, 4,1)) + * -> + * ST_Intersects(ST_Polygon(1,1, 1,4, 4,4, 4,1), geom_col) + */ +public class NormalizeGeospatialRelationsRule implements ExprRewriteRule { + public static ExprRewriteRule INSTANCE = new NormalizeGeospatialRelationsRule(); + static List<String> symmetric_rels = Arrays.asList( + "st_crosses", "st_disjoint", "st_equals", "st_envintersects", + "st_intersects", "st_overlaps", "st_touches"); + + @Override + public Expr apply(Expr expr, Analyzer analyzer) { + if (!(expr instanceof FunctionCallExpr)) return expr; + FunctionCallExpr fn = (FunctionCallExpr) expr; + FunctionName fnName = fn.getFnName(); + if (!fnName.isBuiltin()) return expr; + String name = fnName.getFunction(); + if (!symmetric_rels.contains(name)) return expr; + Expr arg1 = fn.getChild(0); + Expr arg2 = fn.getChild(1); + if (arg1.isConstant() || !arg2.isConstant()) return expr; // no need to swap + return new FunctionCallExpr(name, Arrays.asList(arg2, arg1)); + } +} diff --git a/fe/src/main/java/org/apache/impala/rewrite/PointEnvIntersectsRule.java b/fe/src/main/java/org/apache/impala/rewrite/PointEnvIntersectsRule.java new file mode 100644 index 000000000..5d83ef7a6 --- /dev/null +++ b/fe/src/main/java/org/apache/impala/rewrite/PointEnvIntersectsRule.java @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.impala.rewrite; + +import java.util.Arrays; +import java.util.List; + +import org.apache.impala.analysis.Analyzer; +import org.apache.impala.analysis.BinaryPredicate; +import org.apache.impala.analysis.CompoundPredicate; +import org.apache.impala.analysis.Expr; +import org.apache.impala.analysis.FunctionCallExpr; +import org.apache.impala.analysis.FunctionName; +import org.apache.impala.common.AnalysisException; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +/** + * Turns st_envintersects() on points from x/y pairs to applying bounding rect + * intersection directly on the input coordinates. + * + * Examples: + * st_envintersects(CONST_GEOM, st_point(x, y)) + * -> + * x >= st_minx(CONST_GEOM) AND y >= st_miny(CONST_GEOM) AND + * x <= st_maxx(CONST_GEOM) AND y <= st_maxy(CONST_GEOM) + * + * st_maxx(CONST_GEOM) and others will be rewritten to constants by FoldConstantsRule, so + * the final conjuncts will be like x <= CONST_DOUBLE, which can be often pushed down + * efficiently (e.g. to Parquet or Iceberg). + * + * Warning: + * this rule is only valid in planar geometry, in the spherical case + * shapes that intersect the antimeridian would need special handling + */ +public class PointEnvIntersectsRule implements ExprRewriteRule { + public static ExprRewriteRule INSTANCE = new PointEnvIntersectsRule(); + + // Common subexpressions are computed multiple times (IMPALA-7737), so this rule is only + // beneficial if children are very cheap. + static final float CHILD_COST_LIMIT = Expr.FUNCTION_CALL_COST * 2; + + @Override + public Expr apply(Expr expr, Analyzer analyzer) throws AnalysisException { + if (!(expr instanceof FunctionCallExpr)) return expr; + FunctionCallExpr fn = (FunctionCallExpr) expr; + FunctionName fnName = fn.getFnName(); + if (!fnName.isBuiltin()) return expr; + if (!fnName.getFunction().equals("st_envintersects")) return expr; + + // first args must be const due to NormalizeGeospatialRelationsRule + Expr constChild = expr.getChild(0); + if (!constChild.isConstant()) return expr; + Expr pointChild = expr.getChild(1); + + Preconditions.checkState(constChild.getType().isBinary()); + if (!(pointChild instanceof FunctionCallExpr)) return expr; + FunctionCallExpr pointFn = (FunctionCallExpr) pointChild; + if (!pointFn.getFnName().getFunction().equalsIgnoreCase("st_point")) return expr; + // TODO: maybe do this only if child expressions are simple? + if (pointFn.getChildCount() != 2 + || !pointFn.getChild(0).getType().isFloatingPointType() + || !pointFn.getChild(1).getType().isFloatingPointType()) { + return expr; + } + Expr x = pointFn.getChild(0).clone(); + Expr y = pointFn.getChild(1).clone(); + if (!x.hasCost() || !y.hasCost() || x.getCost() + y.getCost() > CHILD_COST_LIMIT) { + return expr; + } + Expr minx = new FunctionCallExpr("st_minx", Arrays.asList(constChild.clone())); + Expr miny = new FunctionCallExpr("st_miny", Arrays.asList(constChild.clone())); + Expr maxx = new FunctionCallExpr("st_maxx", Arrays.asList(constChild.clone())); + Expr maxy = new FunctionCallExpr("st_maxy", Arrays.asList(constChild.clone())); + List<Expr> predicates = Arrays.asList( + new BinaryPredicate(BinaryPredicate.Operator.LE, minx, x), + new BinaryPredicate(BinaryPredicate.Operator.LE, miny, y), + new BinaryPredicate(BinaryPredicate.Operator.GE, maxx, x), + new BinaryPredicate(BinaryPredicate.Operator.GE, maxy, y) + ); + return CompoundPredicate.createConjunctivePredicate(predicates); + } + +} diff --git a/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-planner.test b/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-planner.test index 14eb187bc..213b74fc7 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-planner.test +++ b/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-planner.test @@ -7,4 +7,90 @@ where st_geomfromwkb(binary_col) is not null and sqrt(id) = 1; ---- RUNTIME_PROFILE predicates: sqrt(CAST(id AS DOUBLE)) = CAST(1 AS DOUBLE), st_geomfromwkb(binary_col) IS NOT NULL ==== - +---- QUERY +# Check that NormalizeGeospatialRelationsRule is applied and the const argument is moved +# to the first position in symmetric geospatial relations. +# AddEnvIntersectsRule is not applied for st_disjoint(). +select * from functional.binary_tbl +where st_disjoint(binary_col, st_point(1, 1)) +---- RUNTIME_PROFILE +predicates: st_disjoint('unhex("000000000101000000000000000000F03F000000000000F03F")', binary_col) +==== +---- QUERY +# Check that AddEnvIntersectsRule adds st_envintersects() before relations. +select * from functional.binary_tbl +where st_touches(binary_col, st_point(id, id)) +---- RUNTIME_PROFILE +predicates: st_envintersects(binary_col, st_point(CAST(id AS DOUBLE), CAST(id AS DOUBLE))), st_touches(binary_col, st_point(CAST(id AS DOUBLE), CAST(id AS DOUBLE))) +==== +---- QUERY +# Check that AddEnvIntersectsRule doesn't duplicate st_envintersects() when it is +# already present. +select * from functional.binary_tbl +where st_envintersects(binary_col, st_point(id, id)) and st_touches(binary_col, st_point(id, id)) +---- RUNTIME_PROFILE +predicates: st_envintersects(binary_col, st_point(CAST(id AS DOUBLE), CAST(id AS DOUBLE))), st_touches(binary_col, st_point(CAST(id AS DOUBLE), CAST(id AS DOUBLE))) +==== +---- QUERY +# Check that AddEnvIntersectsRule does NOT add st_envintersects() if child expressions +# are expensive (more than 2 Java function calls). +select * from functional.alltypestiny +where st_touches(st_point(double_col, double_col), st_point(id, id)) +---- RUNTIME_PROFILE +predicates: st_touches(st_point(double_col, double_col), st_point(CAST(id AS DOUBLE), CAST(id AS DOUBLE))) +==== +---- QUERY +# Check that PointEnvIntersectsRule is applied and st_envintersects() on st_point(x,y) +# is rewritten to predicates on coordinate columns. +select id, double_col, float_col from functional_parquet.alltypessmall +where st_envintersects(st_geomfromtext("polygon ((-1 0, 20 0, 20 20.5, -1 20.5, -1 0))"), + st_point(double_col, float_col)) + and month = 1; +---- TYPES +INT,DOUBLE,FLOAT +---- RESULTS +0,0.0,0.0 +1,10.1,1.100000023841858 +10,0.0,0.0 +11,10.1,1.100000023841858 +20,0.0,0.0 +21,10.1,1.100000023841858 +---- RUNTIME_PROFILE +predicates: double_col <= CAST(20 AS DOUBLE), double_col >= CAST(-1 AS DOUBLE), CAST(float_col AS DOUBLE) <= CAST(20.5 AS DOUBLE), CAST(float_col AS DOUBLE) >= CAST(0 AS DOUBLE) +parquet statistics predicates: double_col <= CAST(20 AS DOUBLE), double_col >= CAST(-1 AS DOUBLE), CAST(float_col AS DOUBLE) <= CAST(20.5 AS DOUBLE), CAST(float_col AS DOUBLE) >= CAST(0 AS DOUBLE) +parquet dictionary predicates: double_col <= CAST(20 AS DOUBLE), double_col >= CAST(-1 AS DOUBLE), CAST(float_col AS DOUBLE) <= CAST(20.5 AS DOUBLE), CAST(float_col AS DOUBLE) >= CAST(0 AS DOUBLE) +==== +---- QUERY +# Check that PointEnvIntersectsRule is applied on the st_envintersects() added by +# AddEnvIntersectsRule. +select id, double_col, float_col from functional_parquet.alltypessmall +where st_intersects(st_point(double_col, float_col), + st_geomfromtext("polygon ((-1 0, 20 0, 20 20.5, -1 20.5, -1 0))")) + and month = 1; +---- TYPES +INT,DOUBLE,FLOAT +---- RESULTS +0,0.0,0.0 +1,10.1,1.100000023841858 +10,0.0,0.0 +11,10.1,1.100000023841858 +20,0.0,0.0 +21,10.1,1.100000023841858 +---- RUNTIME_PROFILE +row_regex: .*predicates: double_col <= CAST\(20 AS DOUBLE\), double_col >= CAST\(-1 AS DOUBLE\), CAST\(float_col AS DOUBLE\) <= CAST\(20.5 AS DOUBLE\), CAST\(float_col AS DOUBLE\) >= CAST\(0 AS DOUBLE\), st_intersects.* +parquet statistics predicates: double_col <= CAST(20 AS DOUBLE), double_col >= CAST(-1 AS DOUBLE), CAST(float_col AS DOUBLE) <= CAST(20.5 AS DOUBLE), CAST(float_col AS DOUBLE) >= CAST(0 AS DOUBLE) +parquet dictionary predicates: double_col <= CAST(20 AS DOUBLE), double_col >= CAST(-1 AS DOUBLE), CAST(float_col AS DOUBLE) <= CAST(20.5 AS DOUBLE), CAST(float_col AS DOUBLE) >= CAST(0 AS DOUBLE) +===== +---- QUERY +# Check that PointEnvIntersectsRule is NOT applied if children are expensive. +select count(*) from functional_parquet.alltypessmall +where st_envintersects(st_point(sqrt(double_col), sqrt(float_col)), + st_geomfromtext("polygon ((-1 0, 20 0, 20 20.5, -1 20.5, -1 0))")) + and month = 1; +---- TYPES +BIGINT +---- RESULTS +25 +---- RUNTIME_PROFILE +row_regex: .*predicates: st_envintersects\(\'unhex\(\".*\"\)\', st_point\(sqrt\(double_col\), sqrt\(CAST\(float_col AS DOUBLE\)\)\)\) +===== diff --git a/tests/query_test/test_geospatial_functions.py b/tests/query_test/test_geospatial_functions.py index 810dded74..7aeb2b2ac 100644 --- a/tests/query_test/test_geospatial_functions.py +++ b/tests/query_test/test_geospatial_functions.py @@ -22,6 +22,7 @@ from tests.common.test_dimensions import create_single_exec_option_dimension class TestGeospatialFuctions(ImpalaTestSuite): + """Tests the geospatial builtin functions""" @classmethod def add_test_dimensions(cls): @@ -31,7 +32,6 @@ class TestGeospatialFuctions(ImpalaTestSuite): cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format == 'parquet') - """Tests the geospatial builtin functions""" @SkipIfApacheHive.feature_not_supported def test_esri_geospatial_functions(self, vector): # tests generated from
