This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new fab75bf refactor(python/sedonadb): Scope options to the SedonaContext
(#106)
fab75bf is described below
commit fab75bfb9adc9cd81623ca1f8e287c1fffb7ce28
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Sep 18 09:44:43 2025 -0500
refactor(python/sedonadb): Scope options to the SedonaContext (#106)
---
python/sedonadb/python/sedonadb/__init__.py | 4 ---
python/sedonadb/python/sedonadb/_options.py | 14 --------
python/sedonadb/python/sedonadb/context.py | 21 ++++++++++--
python/sedonadb/python/sedonadb/dataframe.py | 49 ++++++++++++++--------------
python/sedonadb/tests/test_context.py | 8 +++++
python/sedonadb/tests/test_dataframe.py | 4 +--
6 files changed, 52 insertions(+), 48 deletions(-)
diff --git a/python/sedonadb/python/sedonadb/__init__.py
b/python/sedonadb/python/sedonadb/__init__.py
index 9d88698..7d46b86 100644
--- a/python/sedonadb/python/sedonadb/__init__.py
+++ b/python/sedonadb/python/sedonadb/__init__.py
@@ -16,10 +16,6 @@
# under the License.
from sedonadb import _lib
from sedonadb.context import connect, configure_proj
-from sedonadb import _options
-
-options = _options.global_options()
-"""Global options for SedonaDB"""
__version__ = _lib.sedona_python_version()
diff --git a/python/sedonadb/python/sedonadb/_options.py
b/python/sedonadb/python/sedonadb/_options.py
index 2f3c65f..b7ac40a 100644
--- a/python/sedonadb/python/sedonadb/_options.py
+++ b/python/sedonadb/python/sedonadb/_options.py
@@ -52,17 +52,3 @@ class Options:
@width.setter
def width(self, value: Optional[int]):
self._width = value
-
-
-def global_options() -> Options:
- """Access the global options
-
- Most users should use `sedonadb.options` to access this singleton; however,
- internal SedonaDB Python code must use this function to avoid a circular
- dependency.
- """
- global _global_options
- return _global_options
-
-
-_global_options = Options()
diff --git a/python/sedonadb/python/sedonadb/context.py
b/python/sedonadb/python/sedonadb/context.py
index 32f9dee..6348695 100644
--- a/python/sedonadb/python/sedonadb/context.py
+++ b/python/sedonadb/python/sedonadb/context.py
@@ -22,6 +22,7 @@ from typing import Any, Dict, Iterable, Literal, Optional,
Union
from sedonadb._lib import InternalContext, configure_proj_shared
from sedonadb.dataframe import DataFrame, _create_data_frame
from sedonadb.utility import sedona # noqa: F401
+from sedonadb._options import Options
class SedonaContext:
@@ -30,10 +31,23 @@ class SedonaContext:
This object keeps track of state such as registered functions,
registered tables, and available memory. This is similar to a
Spark SessionContext or a database connection.
+
+ Examples:
+
+ >>> sd = sedona.db.connect()
+ >>> sd.options.interactive = True
+ >>> sd.sql("SELECT 1 as one")
+ ┌───────┐
+ │ one │
+ │ int64 │
+ ╞═══════╡
+ │ 1 │
+ └───────┘
"""
def __init__(self):
self._impl = InternalContext()
+ self.options = Options()
def create_data_frame(self, obj: Any, schema: Any = None) -> DataFrame:
"""Create a DataFrame from an in-memory or protocol-enabled object.
@@ -64,7 +78,7 @@ class SedonaContext:
│ 1 │
└───────┘
"""
- return _create_data_frame(self._impl, obj, schema)
+ return _create_data_frame(self._impl, obj, schema, self.options)
def view(self, name: str) -> DataFrame:
"""Create a [DataFrame][sedonadb.dataframe.DataFrame] from a named view
@@ -88,7 +102,7 @@ class SedonaContext:
>>> sd.drop_view("foofy")
"""
- return DataFrame(self._impl, self._impl.view(name))
+ return DataFrame(self._impl, self._impl.view(name), self.options)
def drop_view(self, name: str) -> None:
"""Remove a named view
@@ -135,6 +149,7 @@ class SedonaContext:
return DataFrame(
self._impl,
self._impl.read_parquet([str(path) for path in table_paths],
options),
+ self.options,
)
def sql(self, sql: str) -> DataFrame:
@@ -153,7 +168,7 @@ class SedonaContext:
<sedonadb.dataframe.DataFrame object at ...>
"""
- return DataFrame(self._impl, self._impl.sql(sql))
+ return DataFrame(self._impl, self._impl.sql(sql), self.options)
def connect() -> SedonaContext:
diff --git a/python/sedonadb/python/sedonadb/dataframe.py
b/python/sedonadb/python/sedonadb/dataframe.py
index bd45432..673fbfb 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -18,7 +18,6 @@
from pathlib import Path
from typing import TYPE_CHECKING, Union, Optional, Any, Iterable
-from sedonadb._options import global_options
from sedonadb.utility import sedona # noqa: F401
@@ -36,9 +35,10 @@ class DataFrame:
reading a file, or executing SQL.
"""
- def __init__(self, ctx, impl):
+ def __init__(self, ctx, impl, options):
self._ctx = ctx
self._impl = impl
+ self._options = options
@property
def schema(self):
@@ -110,7 +110,7 @@ class DataFrame:
└───────┘
"""
- return DataFrame(self._ctx, self._impl.limit(n, offset))
+ return DataFrame(self._ctx, self._impl.limit(n, offset), self._options)
def execute(self) -> None:
"""Execute the plan represented by this DataFrame
@@ -220,7 +220,7 @@ class DataFrame:
└────────────┘
"""
- return DataFrame(self._ctx, self._impl.to_memtable(self._ctx))
+ return DataFrame(self._ctx, self._impl.to_memtable(self._ctx),
self._options)
def __datafusion_table_provider__(self):
return self._impl.__datafusion_table_provider__()
@@ -376,7 +376,7 @@ class DataFrame:
└────────────┘
"""
- width = _out_width(width)
+ width = self._out_width(width)
print(self._impl.show(self._ctx, limit, width, ascii), end="")
def explain(
@@ -420,29 +420,28 @@ class DataFrame:
│ ┆ │
└───────────────┴─────────────────────────────────┘
"""
- return DataFrame(self._ctx, self._impl.explain(type, format))
+ return DataFrame(self._ctx, self._impl.explain(type, format),
self._options)
def __repr__(self) -> str:
- if global_options().interactive:
- width = _out_width()
+ if self._options.interactive:
+ width = self._out_width()
return self._impl.show(self._ctx, 10, width, ascii=False).strip()
else:
return super().__repr__()
+ def _out_width(self, width=None) -> int:
+ if width is None:
+ width = self._options.width
-def _out_width(width=None) -> int:
- if width is None:
- width = global_options().width
+ if width is None:
+ import shutil
- if width is None:
- import shutil
+ width, _ = shutil.get_terminal_size(fallback=(100, 24))
- width, _ = shutil.get_terminal_size(fallback=(100, 24))
+ return width
- return width
-
-def _create_data_frame(ctx_impl, obj, schema) -> DataFrame:
+def _create_data_frame(ctx_impl, obj, schema, options) -> DataFrame:
"""Create a DataFrame (internal)
This is defined here because we need it in future dataframe methods like
@@ -461,27 +460,27 @@ def _create_data_frame(ctx_impl, obj, schema) ->
DataFrame:
# This includes geopandas/pandas DataFrames, pyarrow tables, and Polars
tables.
type_name = _qualified_type_name(obj)
if type_name in SPECIAL_CASED_SCANS:
- return SPECIAL_CASED_SCANS[type_name](ctx_impl, obj, schema)
+ return SPECIAL_CASED_SCANS[type_name](ctx_impl, obj, schema, options)
# The default implementation handles objects that implement
# __datafusion_table_provider__ or __arrow_c_stream__. For objects
implementing
# __arrow_c_stream__, this currently will only work for a single scan
(i.e.,
# the returned data frame can't be previewed before the query is computed).
- return _scan_default(ctx_impl, obj, schema)
+ return _scan_default(ctx_impl, obj, schema, options)
-def _scan_default(ctx_impl, obj, schema):
+def _scan_default(ctx_impl, obj, schema, options):
impl = ctx_impl.create_data_frame(obj, schema)
- return DataFrame(ctx_impl, impl)
+ return DataFrame(ctx_impl, impl, options)
-def _scan_collected_default(ctx_impl, obj, schema):
- return _scan_default(ctx_impl, obj, schema).to_memtable()
+def _scan_collected_default(ctx_impl, obj, schema, options):
+ return _scan_default(ctx_impl, obj, schema, options).to_memtable()
-def _scan_geopandas(ctx_impl, obj, schema):
+def _scan_geopandas(ctx_impl, obj, schema, options):
return _scan_collected_default(
- ctx_impl, obj.to_arrow(geometry_encoding="WKB"), schema
+ ctx_impl, obj.to_arrow(geometry_encoding="WKB"), schema, options
)
diff --git a/python/sedonadb/tests/test_context.py
b/python/sedonadb/tests/test_context.py
index 39ed012..758b201 100644
--- a/python/sedonadb/tests/test_context.py
+++ b/python/sedonadb/tests/test_context.py
@@ -20,6 +20,14 @@ import pytest
import sedonadb
+def test_options():
+ sd = sedonadb.connect()
+ assert "DataFrame object at" in repr(sd.sql("SELECT 1 as one"))
+
+ sd.options.interactive = True
+ assert "DataFrame object at" not in repr(sd.sql("SELECT 1 as one"))
+
+
def test_read_parquet(con, geoarrow_data):
# Check one file
tab = con.read_parquet(
diff --git a/python/sedonadb/tests/test_dataframe.py
b/python/sedonadb/tests/test_dataframe.py
index ef8cabc..b609635 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -446,7 +446,7 @@ def test_repr(con):
)
try:
- sedonadb.options.interactive = True
+ con.options.interactive = True
repr_interactive = repr(con.sql("SELECT 1 as one"))
expected = """
┌───────┐
@@ -458,4 +458,4 @@ def test_repr(con):
""".strip()
assert repr_interactive == expected
finally:
- sedonadb.options.interactive = False
+ con.options.interactive = False