This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 09b084b4 feat(python/sedonadb): Implement parameter binding (#575)
09b084b4 is described below
commit 09b084b4eb54a2a48d6e1c76459b4fb46499520f
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Feb 10 10:21:33 2026 -0600
feat(python/sedonadb): Implement parameter binding (#575)
Co-authored-by: Copilot <[email protected]>
---
python/sedonadb/python/sedonadb/context.py | 49 +++++-
python/sedonadb/python/sedonadb/dataframe.py | 50 ++++++-
python/sedonadb/python/sedonadb/expr/__init__.py | 16 ++
python/sedonadb/python/sedonadb/expr/literal.py | 180 +++++++++++++++++++++++
python/sedonadb/src/dataframe.rs | 48 +++++-
python/sedonadb/src/import_from.rs | 21 ++-
python/sedonadb/tests/expr/test_literal.py | 151 +++++++++++++++++++
python/sedonadb/tests/test_dataframe.py | 47 ++++++
8 files changed, 549 insertions(+), 13 deletions(-)
diff --git a/python/sedonadb/python/sedonadb/context.py
b/python/sedonadb/python/sedonadb/context.py
index f0793870..1dcb3fad 100644
--- a/python/sedonadb/python/sedonadb/context.py
+++ b/python/sedonadb/python/sedonadb/context.py
@@ -20,7 +20,7 @@ import os
import sys
from functools import cached_property
from pathlib import Path
-from typing import Any, Dict, Iterable, Literal, Optional, Union
+from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union
from sedonadb._lib import InternalContext, configure_proj_shared
from sedonadb._options import Options
@@ -283,7 +283,9 @@ class SedonaContext:
self.options,
)
- def sql(self, sql: str) -> DataFrame:
+ def sql(
+ self, sql: str, *, params: Union[List, Tuple, Dict, None] = None
+ ) -> DataFrame:
"""Create a [DataFrame][sedonadb.dataframe.DataFrame] by executing SQL
Parses a SQL string into a logical plan and returns a DataFrame
@@ -291,15 +293,52 @@ class SedonaContext:
Args:
sql: A single SQL statement.
+ params: An optional specification of parameters to bind if sql
+ contains placeholders (e.g., `$1` or `$my_param`). Use a
+ list or tuple to replace positional parameters or a dictionary
+ to replace named parameters. This is shorthand for
+ `.sql(...).with_params(...)` that is syntax-compatible with
+ DuckDB. See `lit()` for a list of supported Python objects.
Examples:
>>> sd = sedona.db.connect()
- >>> sd.sql("SELECT ST_Point(0, 1) as geom")
- <sedonadb.dataframe.DataFrame object at ...>
+ >>> sd.sql("SELECT ST_Point(0, 1) AS geom").show()
+ ┌────────────┐
+ │ geom │
+ │ geometry │
+ ╞════════════╡
+ │ POINT(0 1) │
+ └────────────┘
+ >>> sd.sql("SELECT ST_Point($1, $2) AS geom", params=(0, 1)).show()
+ ┌────────────┐
+ │ geom │
+ │ geometry │
+ ╞════════════╡
+ │ POINT(0 1) │
+ └────────────┘
+ >>> sd.sql("SELECT ST_Point($x, $y) AS geom", params={"x": 0, "y":
1}).show()
+ ┌────────────┐
+ │ geom │
+ │ geometry │
+ ╞════════════╡
+ │ POINT(0 1) │
+ └────────────┘
"""
- return DataFrame(self._impl, self._impl.sql(sql), self.options)
+ df = DataFrame(self._impl, self._impl.sql(sql), self.options)
+
+ if params is not None:
+ if isinstance(params, (tuple, list)):
+ return df.with_params(*params)
+ elif isinstance(params, dict):
+ return df.with_params(**params)
+ else:
+ raise ValueError(
+ "params must be a list, tuple, or dict of scalar values"
+ )
+ else:
+ return df
def register_udf(self, udf: Any):
"""Register a user-defined function
diff --git a/python/sedonadb/python/sedonadb/dataframe.py
b/python/sedonadb/python/sedonadb/dataframe.py
index 38ed5caf..7f6d630e 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -16,14 +16,13 @@
# under the License.
from pathlib import Path
-from typing import TYPE_CHECKING, Union, Optional, Any, Iterable, Literal
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal,
Optional, Union
from sedonadb.utility import sedona # noqa: F401
-
if TYPE_CHECKING:
- import pandas
import geopandas
+ import pandas
import pyarrow
@@ -156,6 +155,49 @@ class DataFrame:
"""
return self._impl.count()
+ def with_params(self, *args: List[Any], **kwargs: Dict[str, Any]):
+ """Replace unbound parameters in this query
+
+ For DataFrames that represent a logical plan that contains parameters
(e.g.,
+ a SQL query of `SELECT $1 + 2`), replace parameters with concrete
values.
+ See `lit()` for a list of supported Python objects.
+
+ Args:
+ args: Values to bind to positional parameters (e.g., `$1`, `$2`,
`$3`)
+ kwargs: Values to bind to named parameters (e.g., `$my_param`).
Note that
+ positional and named parameters cannot currently be mixed
(i.e.,
+ parameters must be all positional or all named).
+
+ Examples:
+
+ >>> sd = sedona.db.connect()
+ >>> sd.sql("SELECT $1 + 2 AS c").with_params(100).show()
+ ┌───────┐
+ │ c │
+ │ int64 │
+ ╞═══════╡
+ │ 102 │
+ └───────┘
+ >>> sd.sql("SELECT $my_param + 2 AS
c").with_params(my_param=100).show()
+ ┌───────┐
+ │ c │
+ │ int64 │
+ ╞═══════╡
+ │ 102 │
+ └───────┘
+
+ """
+ from sedonadb.expr.literal import lit
+
+ positional_params = [lit(arg) for arg in args]
+ named_params = {k: lit(param) for k, param in kwargs.items()}
+
+ return DataFrame(
+ self._ctx,
+ self._impl.with_params(positional_params, named_params),
+ self._options,
+ )
+
def __arrow_c_schema__(self):
"""ArrowSchema PyCapsule interface
@@ -250,8 +292,8 @@ class DataFrame:
geometry: [[01010000000000000000000000000000000000F03F]]
"""
- import pyarrow as pa
import geoarrow.pyarrow # noqa: F401
+ import pyarrow as pa
# Collects all batches into an object that exposes __arrow_c_stream__()
batches = self._impl.to_batches(schema)
diff --git a/python/sedonadb/python/sedonadb/expr/__init__.py
b/python/sedonadb/python/sedonadb/expr/__init__.py
new file mode 100644
index 00000000..13a83393
--- /dev/null
+++ b/python/sedonadb/python/sedonadb/expr/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/sedonadb/python/sedonadb/expr/literal.py
b/python/sedonadb/python/sedonadb/expr/literal.py
new file mode 100644
index 00000000..6cd3d55a
--- /dev/null
+++ b/python/sedonadb/python/sedonadb/expr/literal.py
@@ -0,0 +1,180 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any
+
+
+class Literal:
+ """A Literal (constant) expression
+
+ This class represents a literal value in query that does not change
+ based on other information in the query or the environment. This type
+ of expression is also referred to as a constant. These types of
+ expressions are normally created with the `lit()` function or are
+ automatically created when passing an arbitrary Python object to
+ a context (e.g., parameterized SQL queries) where a literal is
+ required.
+
+ Literal expressions are lazily resolved such that specific contexts
+ have access to the underlying Python object and can resolve the
+ object specially (e.g., by forcing a specific Arrow type) if
+ required.
+
+ Args:
+ value: An arbitrary Python object.
+ """
+
+ def __init__(self, value: Any):
+ self._value = value
+
+ def __arrow_c_array__(self, requested_schema=None):
+ resolved_lit = _resolve_arrow_lit(self._value)
+ return
resolved_lit.__arrow_c_array__(requested_schema=requested_schema)
+
+ def __repr__(self):
+ return f"<Literal>\n{repr(self._value)}"
+
+
+def lit(value: Any) -> Literal:
+ """Create a literal (constant) expression
+
+ Creates a `Literal` object around value, or returns value if it is
+ already a `Literal`. This is the primary function that should be used
+ to wrap an arbitrary Python object a constant to prepare it as input
+ to any SedonaDB logical expression context (e.g., parameterized SQL).
+
+ Literal values can be created from a variety of Python objects whose
+ representation as a scalar constant is unambiguous. Any object that
+ is accepted by `pyarrow.array([...])` is supported in addition to:
+
+ - Shapely geometries become SedonaDB geometry objects.
+ - GeoSeries objects of length 1 become SedonaDB geometries
+ with CRS preserved.
+ - GeoDataFrame objects with a single column and single row become
+ SedonaDB geometries with CRS preserved.
+ - Pandas DataFrame objects with a single column and single row
+ are converted using `pa.array()`.
+ - SedonaDB DataFrame objects that evaluate to a single column and
+ row become a scalar value according to the single represented
+ value.
+
+ """
+ if isinstance(value, Literal):
+ return value
+ else:
+ return Literal(value)
+
+
+def _resolve_arrow_lit(obj: Any):
+ qualified_name = _qualified_type_name(obj)
+ if qualified_name in SPECIAL_CASED_LITERALS:
+ return SPECIAL_CASED_LITERALS[qualified_name](obj)
+
+ if hasattr(obj, "__arrow_c_array__"):
+ return obj
+
+ import pyarrow as pa
+
+ try:
+ return pa.array([obj])
+ except Exception as e:
+ raise ValueError(
+ f"Can't create SedonaDB literal from object of type
{qualified_name}"
+ ) from e
+
+
+def _lit_from_geoarrow_scalar(obj):
+ wkb_value = None if obj.value is None else obj.wkb
+ return _lit_from_wkb_and_crs(wkb_value, obj.type.crs)
+
+
+def _lit_from_dataframe(obj):
+ if obj.shape != (1, 1):
+ raise ValueError(
+ "Can't create SedonaDB literal from DataFrame with shape != (1, 1)"
+ )
+
+ return _resolve_arrow_lit(obj.iloc[0])
+
+
+def _lit_from_series(obj):
+ if len(obj) != 1:
+ raise ValueError("Can't create SedonaDB literal from Series with
length != 1")
+
+ # A column with dtype "geometry" is not always a GeoSeries; however, if
the dtype
+ # is geometry, obj.array.crs should still be available to extract the CRS.
+ if obj.dtype.name == "geometry":
+ first_value = obj.array[0]
+ first_wkb = None if first_value is None else first_value.wkb
+ return _lit_from_wkb_and_crs(first_wkb, obj.array.crs)
+ else:
+ import pyarrow as pa
+
+ return pa.array(obj)
+
+
+def _lit_from_sedonadb(obj):
+ if len(obj.columns) != 1:
+ raise ValueError(
+ "Can't create SedonaDB literal from SedonaDB DataFrame with number
of columns != 1"
+ )
+
+ tab = obj.limit(2).to_arrow_table()
+ if len(tab) != 1:
+ raise ValueError(
+ "Can't create SedonaDB literal from SedonaDB DataFrame with size
!= 1 row"
+ )
+
+ return tab[0].chunk(0)
+
+
+def _lit_from_shapely(obj):
+ return _lit_from_wkb_and_crs(obj.wkb, None)
+
+
+def _lit_from_wkb_and_crs(wkb, crs):
+ import pyarrow as pa
+ import geoarrow.pyarrow as ga
+
+ type = ga.wkb().with_crs(crs)
+ storage = pa.array([wkb], type.storage_type)
+ return type.wrap_array(storage)
+
+
+def _qualified_type_name(obj):
+ return f"{type(obj).__module__}.{type(obj).__name__}"
+
+
+SPECIAL_CASED_LITERALS = {
+ "geopandas.geodataframe.GeoDataFrame": _lit_from_dataframe,
+ "geopandas.geoseries.GeoSeries": _lit_from_series,
+ # pandas < 3.0
+ "pandas.core.frame.DataFrame": _lit_from_dataframe,
+ # pandas >= 3.0
+ "pandas.DataFrame": _lit_from_dataframe,
+ "pandas.Series": _lit_from_series,
+ "sedonadb.dataframe.DataFrame": _lit_from_sedonadb,
+ "shapely.geometry.point.Point": _lit_from_shapely,
+ "shapely.geometry.linestring.LineString": _lit_from_shapely,
+ "shapely.geometry.polygon.Polygon": _lit_from_shapely,
+ "shapely.geometry.polygon.LinearRing": _lit_from_shapely,
+ "shapely.geometry.multipoint.MultiPoint": _lit_from_shapely,
+ "shapely.geometry.multilinestring.MultiLineString": _lit_from_shapely,
+ "shapely.geometry.multipolygon.MultiPolygon": _lit_from_shapely,
+ "shapely.geometry.collection.GeometryCollection": _lit_from_shapely,
+ "geoarrow.pyarrow._scalar.WkbScalar": _lit_from_geoarrow_scalar,
+}
diff --git a/python/sedonadb/src/dataframe.rs b/python/sedonadb/src/dataframe.rs
index eb57f6db..e0bf2151 100644
--- a/python/sedonadb/src/dataframe.rs
+++ b/python/sedonadb/src/dataframe.rs
@@ -1,3 +1,4 @@
+use std::collections::HashMap;
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@@ -24,12 +25,12 @@ use arrow_schema::{Schema, SchemaRef};
use datafusion::catalog::MemTable;
use datafusion::logical_expr::SortExpr;
use datafusion::prelude::DataFrame;
-use datafusion_common::{Column, DataFusionError};
+use datafusion_common::{Column, DataFusionError, ParamValues};
use datafusion_expr::{ExplainFormat, ExplainOption, Expr};
use datafusion_ffi::table_provider::FFI_TableProvider;
use futures::TryStreamExt;
use pyo3::prelude::*;
-use pyo3::types::PyCapsule;
+use pyo3::types::{PyCapsule, PyDict, PyList};
use sedona::context::{SedonaDataFrame, SedonaWriteOptions};
use sedona::show::{DisplayMode, DisplayTableOptions};
use sedona_geoparquet::options::{GeoParquetVersion, TableGeoParquetOptions};
@@ -38,7 +39,7 @@ use tokio::runtime::Runtime;
use crate::context::InternalContext;
use crate::error::PySedonaError;
-use crate::import_from::import_arrow_schema;
+use crate::import_from::{import_arrow_scalar, import_arrow_schema};
use crate::reader::PySedonaStreamReader;
use crate::runtime::wait_for_future;
use crate::schema::PySedonaSchema;
@@ -275,6 +276,47 @@ impl InternalDataFrame {
Ok(Self::new(explain_df, self.runtime.clone()))
}
+ fn with_params<'py>(
+ &self,
+ params_positional_py: Bound<'py, PyList>,
+ params_named_py: Bound<'py, PyDict>,
+ ) -> Result<InternalDataFrame, PySedonaError> {
+ let mut df = self.inner.clone();
+
+ match (params_positional_py.is_empty(), params_named_py.is_empty()) {
+ (true, false) => {
+ let params = params_named_py
+ .iter()
+ .map(|(key, param_py)| {
+ let key_str: String = key.extract()?;
+ let value = import_arrow_scalar(¶m_py)?;
+ Ok((key_str, value))
+ })
+ .collect::<Result<HashMap<_, _>, PySedonaError>>()?;
+ df = df.with_param_values(ParamValues::Map(params))?;
+ }
+ (false, true) => {
+ let params = params_positional_py
+ .iter()
+ .map(|param_py| import_arrow_scalar(¶m_py))
+ .collect::<Result<Vec<_>, PySedonaError>>()?;
+ df = df.with_param_values(ParamValues::List(params))?;
+ }
+ (true, true) => {
+ // If both are empty, still attempt to bind with empty
parameter set.
+ // This ensures consistent errors for unbound parameters.
+ df =
df.with_param_values(ParamValues::Map(Default::default()))?;
+ }
+ (false, false) => {
+ return Err(PySedonaError::SedonaPython(
+ "Can't specify both positional and named
parameters".to_string(),
+ ))
+ }
+ }
+
+ Ok(InternalDataFrame::new(df, self.runtime.clone()))
+ }
+
fn __datafusion_table_provider__<'py>(
&self,
py: Python<'py>,
diff --git a/python/sedonadb/src/import_from.rs
b/python/sedonadb/src/import_from.rs
index e31b0bef..964951d4 100644
--- a/python/sedonadb/src/import_from.rs
+++ b/python/sedonadb/src/import_from.rs
@@ -26,7 +26,8 @@ use arrow_array::{
};
use arrow_schema::{Field, Schema};
use datafusion::catalog::TableProvider;
-use datafusion_expr::ScalarUDF;
+use datafusion_common::{metadata::ScalarAndMetadata, ScalarValue};
+use datafusion_expr::{expr::FieldMetadata, ScalarUDF};
use datafusion_ffi::{
table_provider::{FFI_TableProvider, ForeignTableProvider},
udf::{FFI_ScalarUDF, ForeignScalarUDF},
@@ -120,6 +121,24 @@ pub fn import_arrow_array(obj: &Bound<PyAny>) ->
Result<(Field, ArrayRef), PySed
Ok((result_field, make_array(result_array_data)))
}
+pub fn import_arrow_scalar(obj: &Bound<PyAny>) -> Result<ScalarAndMetadata,
PySedonaError> {
+ let (field, array) = import_arrow_array(obj)?;
+ if array.len() != 1 {
+ return Err(PySedonaError::SedonaPython(format!(
+ "Expected Arrow scalar input to be of length 1 but got length {}",
+ array.len()
+ )));
+ }
+
+ let metadata = FieldMetadata::new_from_field(&field);
+ let scalar_value = ScalarValue::try_from_array(&array, 0)?;
+ if metadata.is_empty() {
+ Ok(ScalarAndMetadata::new(scalar_value, None))
+ } else {
+ Ok(ScalarAndMetadata::new(scalar_value, Some(metadata)))
+ }
+}
+
pub fn import_arg_matcher(
obj: &Bound<PyAny>,
) -> Result<Arc<dyn TypeMatcher + Send + Sync>, PySedonaError> {
diff --git a/python/sedonadb/tests/expr/test_literal.py
b/python/sedonadb/tests/expr/test_literal.py
new file mode 100644
index 00000000..b1941131
--- /dev/null
+++ b/python/sedonadb/tests/expr/test_literal.py
@@ -0,0 +1,151 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow as pa
+import shapely
+import geopandas
+import pandas as pd
+import geoarrow.pyarrow as ga
+import geopandas.testing
+
+from sedonadb.expr.literal import lit
+import pytest
+
+
+def test_basic_python_literal():
+ assert pa.array(lit(1)) == pa.array([1])
+ assert pa.array(lit("one")) == pa.array(["one"])
+ assert pa.array(lit(None)) == pa.array([None])
+
+
+def test_already_arrow_literal():
+ assert pa.array(lit(pa.array([1]))) == pa.array([1])
+
+
+def test_arrow_scalar_literal():
+ non_geo_array = pa.array([1])
+ assert pa.array(lit(non_geo_array[0])) == pa.array([1])
+
+ # Check non-null
+ geo_array = ga.with_crs(ga.as_wkb(["POINT (0 1)"]), ga.OGC_CRS84)
+ lit_array = pa.array(lit(geo_array[0]))
+ assert lit_array.type.crs.to_json_dict()["id"] == {
+ "authority": "OGC",
+ "code": "CRS84",
+ }
+
+ # Check null (type and CRS should propagate)
+ geo_array = ga.with_crs(ga.as_wkb(pa.array([None], pa.binary())),
ga.OGC_CRS84)
+ lit_array = pa.array(lit(geo_array[0]))
+ assert lit_array.type.crs.to_json_dict()["id"] == {
+ "authority": "OGC",
+ "code": "CRS84",
+ }
+
+
+# We need to test all geometry types for shapely because these have all
different
+# Python class names depending on the geometry type
[email protected](
+ "wkt",
+ [
+ "POINT (0 1)",
+ "LINESTRING (0 0, 1 1, 2 0)",
+ "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
+ "MULTIPOINT ((0 0), (1 1))",
+ "MULTILINESTRING ((0 0, 1 1), (2 2, 3 3))",
+ "MULTIPOLYGON (((0 0, 1 0, 1 1, 0 1, 0 0)), ((2 2, 3 2, 3 3, 2 3, 2
2)))",
+ "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1))",
+ ],
+)
+def test_shapely_literal(wkt):
+ shapely_obj = shapely.from_wkt(wkt)
+ literal = lit(shapely_obj)
+
+ array = pa.array(literal)
+ assert array == ga.as_wkb([wkt])
+
+
+def test_shapely_linearring():
+ shapely_obj = shapely.from_wkt("LINEARRING (0 0, 1 0, 0 1, 0 0)")
+ literal = lit(shapely_obj)
+
+ array = pa.array(literal)
+ assert array == ga.as_wkb(["LINESTRING (0 0, 1 0, 0 1, 0 0)"])
+
+
+def test_geopandas_literal():
+ geoseries = geopandas.GeoSeries.from_wkt(["POINT (0 1)"], crs=3857)
+
+ # Check GeoSeries literal
+ literal = lit(geoseries)
+ array = pa.array(literal)
+ assert array.type.crs.to_json_dict()["id"] == {"authority": "EPSG",
"code": 3857}
+
+ geopandas.testing.assert_geoseries_equal(
+ geopandas.GeoSeries.from_arrow(array), geoseries
+ )
+
+ # Check GeoDataFrame literal
+ geodf = geopandas.GeoDataFrame({"geom": geoseries})
+ literal = lit(geodf)
+ array = pa.array(literal)
+ assert array.type.crs.to_json_dict()["id"] == {"authority": "EPSG",
"code": 3857}
+
+ geopandas.testing.assert_geoseries_equal(
+ geopandas.GeoSeries.from_arrow(array), geoseries
+ )
+
+ # Check GeoSeries literal where the first value was None (CRS and type
should
+ # still propagate)
+ geoseries = geopandas.GeoSeries([None], crs=3857)
+ literal = lit(geoseries)
+ array = pa.array(literal)
+ assert array.type.crs.to_json_dict()["id"] == {"authority": "EPSG",
"code": 3857}
+
+ geopandas.testing.assert_geoseries_equal(
+ geopandas.GeoSeries.from_arrow(array), geoseries
+ )
+
+
+def test_pandas_literal():
+ series = pd.Series([1])
+ pd.testing.assert_series_equal(pa.array(lit(series)).to_pandas(), series)
+
+ df = pd.DataFrame({"x": series})
+ pd.testing.assert_series_equal(pa.array(lit(df)).to_pandas(), series)
+
+ with pytest.raises(ValueError, match="with length != 1"):
+ pa.array(lit(pd.Series([])))
+
+ with pytest.raises(ValueError, match=r"with shape != \(1, 1\)"):
+ pa.array(lit(pd.DataFrame({"x": []})))
+
+ with pytest.raises(ValueError, match=r"with shape != \(1, 1\)"):
+ pa.array(lit(pd.DataFrame({"x": [1], "y": [2]})))
+
+
+def test_sedonadb_literal(con):
+ df = con.sql("SELECT 1 as one")
+ assert pa.array(lit(df)) == pa.array([1])
+
+ with pytest.raises(ValueError, match="number of columns != 1"):
+ df = con.sql("SELECT 1 as one, 2 as two")
+ pa.array(lit(df))
+
+ with pytest.raises(ValueError, match="size != 1 row"):
+ df = con.sql("SELECT 1 as one WHERE false")
+ pa.array(lit(df))
diff --git a/python/sedonadb/tests/test_dataframe.py
b/python/sedonadb/tests/test_dataframe.py
index 681d6d82..a72a7637 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -239,6 +239,53 @@ def test_count(con):
assert df.count() == 3
+def test_params(con):
+ # Binding a query that does not contain parameters with no parameters
+ # should work
+ df = con.sql("SELECT 101 AS col").with_params()
+ pd.testing.assert_frame_equal(df.to_pandas(), pd.DataFrame({"col": [101]}))
+
+ # Single parameter replaced in a few ways
+ df = con.sql("SELECT $1 + 1 AS col").with_params(100)
+ pd.testing.assert_frame_equal(df.to_pandas(), pd.DataFrame({"col": [101]}))
+
+ df = con.sql("SELECT $one + 1 AS col").with_params(one=100)
+ pd.testing.assert_frame_equal(df.to_pandas(), pd.DataFrame({"col": [101]}))
+
+ df = con.sql("SELECT $1 + 1 AS col", params=[100])
+ pd.testing.assert_frame_equal(df.to_pandas(), pd.DataFrame({"col": [101]}))
+
+ df = con.sql("SELECT $one + 1 AS col", params={"one": 100})
+ pd.testing.assert_frame_equal(df.to_pandas(), pd.DataFrame({"col": [101]}))
+
+ # Multiple parameters
+ df = con.sql("SELECT $one + $two AS col", params={"one": 100, "two": 1})
+ pd.testing.assert_frame_equal(df.to_pandas(), pd.DataFrame({"col": [101]}))
+
+ with pytest.raises(
+ ValueError, match="params must be a list, tuple, or dict of scalar
values"
+ ):
+ con.sql("SELECT 1", params=df)
+
+ with pytest.raises(
+ sedonadb._lib.SedonaError,
+ match="Can't specify both positional and named parameters",
+ ):
+ con.sql("SELECT 1").with_params(1, two=2)
+
+ with pytest.raises(
+ sedonadb._lib.SedonaError,
+ match=r"No value found for placeholder with name \$one",
+ ):
+ con.sql("SELECT $one + 1 AS col").with_params()
+
+ with pytest.raises(
+ sedonadb._lib.SedonaError,
+ match=r"Placeholder '\$one' was not provided a value for execution",
+ ):
+ con.sql("SELECT $one + 1 AS col").show()
+
+
def test_dataframe_to_arrow(con):
df = con.sql("SELECT 1 as one, ST_GeomFromWKT('POINT (0 1)') as geom")
expected_schema = pa.schema(