This is an automated email from the ASF dual-hosted git repository.
joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 4963ee6d2 IMPALA-14629: Implement st_point(double,double) in c++
4963ee6d2 is described below
commit 4963ee6d2272669a103c7cca98abd3b4353b5016
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Wed Jan 14 21:08:13 2026 +0100
IMPALA-14629: Implement st_point(double,double) in c++
Adds native implementation for the common 2d constructor while
keeps the more complex st_point(string) in Java. Tried using
boost's WKT parser but making it work with >2d geometries
seemed non-trivial.
Also removes the following overloads:
st_point(double,double,double)
st_point(double,double,double,double)
st_pointz(double,double,double,double)
These conflict with overloads that have different semantics in
PostGis, see HIVE-29395 for details.
Testing:
- no test changes are needed - the remaining constructors
are covered while the removed ones were not used at all
Change-Id: I927413f92cf4d4e9a995f7024de0ec2e3b584b6d
Reviewed-on: http://gerrit.cloudera.org:8080/23874
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/exprs/geo/geospatial-functions-ir.cc | 9 +++++++
be/src/exprs/geo/geospatial-functions.h | 3 +++
be/src/exprs/geo/shape-format.h | 13 +++++++++
.../impala/compat/HiveEsriGeospatialBuiltins.java | 31 ++++++++++++++++++++--
.../queries/QueryTest/geospatial-esri-planner.test | 8 +++---
5 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/be/src/exprs/geo/geospatial-functions-ir.cc
b/be/src/exprs/geo/geospatial-functions-ir.cc
index aa1ae6099..d3fb1034d 100644
--- a/be/src/exprs/geo/geospatial-functions-ir.cc
+++ b/be/src/exprs/geo/geospatial-functions-ir.cc
@@ -95,6 +95,15 @@ StringVal GeospatialFunctions::st_SetSrid(FunctionContext*
ctx, const StringVal&
return res;
}
+// Constructors
+
+StringVal GeospatialFunctions::st_Point(FunctionContext* ctx, const DoubleVal&
x,
+ const DoubleVal& y) {
+ if (x.is_null) return StringVal::null();
+ if (y.is_null) return StringVal::null();
+ return createStPoint(ctx, x.val, y.val, 0);
+}
+
// Predicates
BooleanVal GeospatialFunctions::st_EnvIntersects(
diff --git a/be/src/exprs/geo/geospatial-functions.h
b/be/src/exprs/geo/geospatial-functions.h
index 66110f843..50906bc03 100644
--- a/be/src/exprs/geo/geospatial-functions.h
+++ b/be/src/exprs/geo/geospatial-functions.h
@@ -50,6 +50,9 @@ class GeospatialFunctions {
static StringVal st_SetSrid(FunctionContext* ctx, const StringVal& geom,
const IntVal& srid);
+ // Constructors
+ static StringVal st_Point(FunctionContext* ctx, const DoubleVal& x, const
DoubleVal& y);
+
// Predicates
static BooleanVal st_EnvIntersects(
FunctionContext* ctx, const StringVal& lhs,const StringVal& rhs);
diff --git a/be/src/exprs/geo/shape-format.h b/be/src/exprs/geo/shape-format.h
index 091de17a4..4e72af830 100644
--- a/be/src/exprs/geo/shape-format.h
+++ b/be/src/exprs/geo/shape-format.h
@@ -291,4 +291,17 @@ inline bool bBoxIntersects(const StringVal& lhs_geom,
const StringVal rhs_geom,
return true;
}
+inline StringVal createStPoint(FunctionContext* ctx, double x, double y,
+ uint32_t srid = 0) {
+ StringVal res(ctx, MIN_POINT_SIZE);
+
+ setSrid(res, srid);
+ setOGCType(res, ST_POINT);
+ setEsriType(res, ShapePoint);
+ setMinX(res, x);
+ setMinY(res, y);
+
+ return res;
+}
+
} // namespace impala
diff --git
a/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveEsriGeospatialBuiltins.java
b/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveEsriGeospatialBuiltins.java
index 048958e1e..7ed295f14 100644
---
a/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveEsriGeospatialBuiltins.java
+++
b/fe/src/compat-hive-3/java/org/apache/impala/compat/HiveEsriGeospatialBuiltins.java
@@ -94,8 +94,8 @@ public class HiveEsriGeospatialBuiltins {
new ST_M(), new ST_MaxM(), new ST_MaxZ(),
new ST_MinM(), new ST_MinZ(), new ST_MLineFromWKB(),
new ST_MPointFromWKB(), new ST_MPolyFromWKB(), new ST_NumGeometries(),
- new ST_NumInteriorRing(), new ST_NumPoints(), new ST_Point(),
- new ST_PointFromWKB(), new ST_PointN(), new ST_PointZ(), new
ST_PolyFromWKB(),
+ new ST_NumInteriorRing(), new ST_NumPoints(),
+ new ST_PointFromWKB(), new ST_PointN(), new ST_PolyFromWKB(),
new ST_Relate(), new ST_StartPoint(), new ST_SymmetricDiff(),
new ST_Z());
@@ -115,6 +115,8 @@ public class HiveEsriGeospatialBuiltins {
db.addBuiltin(fn);
}
}
+ // st_point is a special case as it has both Java and native overloads
+ addJavaStPoint(db, addNatives);
}
private static void addGenericUDFs(Db db) {
@@ -180,6 +182,26 @@ public class HiveEsriGeospatialBuiltins {
}
}
+ private static void addJavaStPoint(Db db, boolean addNatives) {
+ // Create only specific overloads for st_point and st_pointz.
+ // Unlike Hive, overloads with more dimensions are not added to avoid
conflict with
+ // PostGis's optional "integer srid=unknown" argument, which is implicitly
castable
+ // from double. See HIVE-29395 for details.
+ // There is no ST_PointM and ST_PointZM at the moment so points with M
dimension can
+ // be created only with the WKT constructor.
+ if (!addNatives) {
+ Type[] args2d = {Type.DOUBLE, Type.DOUBLE};
+ db.addBuiltin(
+ createScalarFunction(ST_Point.class, "st_point", Type.BINARY,
args2d));
+ }
+ Type[] args3d = {Type.DOUBLE, Type.DOUBLE, Type.DOUBLE};
+ db.addBuiltin(
+ createScalarFunction(ST_PointZ.class, "st_pointz", Type.BINARY,
args3d));
+ Type[] argsWkt = {Type.STRING};
+ db.addBuiltin(
+ createScalarFunction(ST_Point.class, "st_point", Type.BINARY,
argsWkt));
+ }
+
private static List<ScalarFunction> extractFromLegacyHiveBuiltin(
UDF udf, String dbName) {
return extractFunctions(udf.getClass(), udf.getClass(), dbName);
@@ -248,6 +270,7 @@ public class HiveEsriGeospatialBuiltins {
private static void addNatives(Db db) {
// Legacy UDFs.
+
// Accessors.
addNative(db, "st_MinX", false, Type.DOUBLE, Type.BINARY);
addNative(db, "st_MaxX", false, Type.DOUBLE, Type.BINARY);
@@ -259,6 +282,10 @@ public class HiveEsriGeospatialBuiltins {
addNative(db, "st_SetSrid", false, Type.BINARY, Type.BINARY, Type.INT);
addNative(db, "st_GeometryType", false, Type.STRING, Type.BINARY);
+ // Constructors.
+ // Other point constructors are added as Java UDFs, see addJavaStPoint().
+ addNative(db, "st_Point", false, Type.BINARY, Type.DOUBLE, Type.DOUBLE);
+
// Predicates.
addNative(db, "st_EnvIntersects", false, Type.BOOLEAN, Type.BINARY,
Type.BINARY);
}
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-planner.test
b/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-planner.test
index 213b74fc7..c36dbd8ec 100644
---
a/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-planner.test
+++
b/testdata/workloads/functional-query/queries/QueryTest/geospatial-esri-planner.test
@@ -35,9 +35,9 @@ predicates: st_envintersects(binary_col, st_point(CAST(id AS
DOUBLE), CAST(id AS
# Check that AddEnvIntersectsRule does NOT add st_envintersects() if child
expressions
# are expensive (more than 2 Java function calls).
select * from functional.alltypestiny
-where st_touches(st_point(double_col, double_col), st_point(id, id))
+where st_touches(st_point(string_col), st_point(string_col))
---- RUNTIME_PROFILE
-predicates: st_touches(st_point(double_col, double_col), st_point(CAST(id AS
DOUBLE), CAST(id AS DOUBLE)))
+predicates: st_touches(st_point(string_col), st_point(string_col))
====
---- QUERY
# Check that PointEnvIntersectsRule is applied and st_envintersects() on
st_point(x,y)
@@ -77,7 +77,7 @@ INT,DOUBLE,FLOAT
20,0.0,0.0
21,10.1,1.100000023841858
---- RUNTIME_PROFILE
-row_regex: .*predicates: double_col <= CAST\(20 AS DOUBLE\), double_col >=
CAST\(-1 AS DOUBLE\), CAST\(float_col AS DOUBLE\) <= CAST\(20.5 AS DOUBLE\),
CAST\(float_col AS DOUBLE\) >= CAST\(0 AS DOUBLE\), st_intersects.*
+row_regex: predicates: double_col <= CAST\(20 AS DOUBLE\), double_col >=
CAST\(-1 AS DOUBLE\), CAST\(float_col AS DOUBLE\) <= CAST\(20.5 AS DOUBLE\),
CAST\(float_col AS DOUBLE\) >= CAST\(0 AS DOUBLE\), st_intersects
parquet statistics predicates: double_col <= CAST(20 AS DOUBLE), double_col >=
CAST(-1 AS DOUBLE), CAST(float_col AS DOUBLE) <= CAST(20.5 AS DOUBLE),
CAST(float_col AS DOUBLE) >= CAST(0 AS DOUBLE)
parquet dictionary predicates: double_col <= CAST(20 AS DOUBLE), double_col >=
CAST(-1 AS DOUBLE), CAST(float_col AS DOUBLE) <= CAST(20.5 AS DOUBLE),
CAST(float_col AS DOUBLE) >= CAST(0 AS DOUBLE)
=====
@@ -92,5 +92,5 @@ BIGINT
---- RESULTS
25
---- RUNTIME_PROFILE
-row_regex: .*predicates: st_envintersects\(\'unhex\(\".*\"\)\',
st_point\(sqrt\(double_col\), sqrt\(CAST\(float_col AS DOUBLE\)\)\)\)
+row_regex: predicates: st_envintersects\(\'unhex\(\".*\"\)\',
st_point\(sqrt\(double_col\), sqrt\(CAST\(float_col AS DOUBLE\)\)\)\)
=====