This is an automated email from the ASF dual-hosted git repository.

kontinuation pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new bced6b3  feat: Support explaining DataFrame when using Python API (#92)
bced6b3 is described below

commit bced6b306d10acdef35066481238bfca0337b8cc
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Wed Sep 17 13:37:42 2025 +0800

    feat: Support explaining DataFrame when using Python API (#92)
    
    This patch fixes the following error when `show`ing a DataFrame backed by 
an `EXPLAIN ...` query:
    
    ```python
    >>> import sedonadb
    >>> con = sedonadb.connect()
    >>> con.sql("EXPLAIN SELECT 1 as one").show()
    Traceback (most recent call last):
      File "<python-input-4>", line 1, in <module>
        con.sql("EXPLAIN SELECT 1 as one").show()
        ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^
      File 
"/Users/bopeng/workspace/wherobots/sedona-db/python/sedonadb/python/sedonadb/dataframe.py",
 line 362, in show
        print(self._impl.show(self._ctx, limit, width, ascii), end="")
              ~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    sedonadb._lib.SedonaError: Unsupported logical plan: Explain must be root 
of the plan.
    This issue was likely caused by a bug in DataFusion's code. Please help us 
to resolve this by filing a bug report in our issue tracker: 
https://github.com/apache/datafusion/issues
    ```
    
    The above code will run successfully after applying this patch:
    
    ```
    >>> import sedonadb
    >>> con = sedonadb.connect()
    >>> con.sql("EXPLAIN SELECT 1 as one").show()
    ┌───────────────┬─────────────────────────────────┐
    │   plan_type   ┆               plan              │
    │      utf8     ┆               utf8              │
    ╞═══════════════╪═════════════════════════════════╡
    │ logical_plan  ┆ Projection: Int64(1) AS one     │
    │               ┆   EmptyRelation                 │
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ physical_plan ┆ ProjectionExec: expr=[1 as one] │
    │               ┆   PlaceholderRowExec            │
    │               ┆                                 │
    └───────────────┴─────────────────────────────────┘
    ```
    
    This patch also added an `explain` method to `DataFrame`.
---
 python/sedonadb/python/sedonadb/dataframe.py | 43 +++++++++++++++++++
 python/sedonadb/src/dataframe.rs             | 23 ++++++++++-
 python/sedonadb/tests/test_dataframe.py      | 62 ++++++++++++++++++++++++++++
 rust/sedona/src/context.rs                   | 50 ++++++++++++++++++++--
 4 files changed, 174 insertions(+), 4 deletions(-)

diff --git a/python/sedonadb/python/sedonadb/dataframe.py 
b/python/sedonadb/python/sedonadb/dataframe.py
index b24fed5..78bf47b 100644
--- a/python/sedonadb/python/sedonadb/dataframe.py
+++ b/python/sedonadb/python/sedonadb/dataframe.py
@@ -353,6 +353,49 @@ class DataFrame:
         width = _out_width(width)
         print(self._impl.show(self._ctx, limit, width, ascii), end="")
 
+    def explain(
+        self,
+        type: str = "standard",
+        format: str = "indent",
+    ) -> "DataFrame":
+        """Return the execution plan for this DataFrame as a DataFrame
+
+        Retrieves the logical and physical execution plans that will be used to
+        compute this DataFrame. This is useful for understanding query
+        performance and optimization.
+
+        Args:
+            type: The type of explain plan to generate. Supported values are:
+                "standard" (default) - shows logical and physical plans,
+                "extended" - includes additional query optimization details,
+                "analyze" - executes the plan and reports actual metrics.
+            format: The format to use for displaying the plan. Supported 
formats are
+                "indent" (default), "tree", "pgjson" and "graphviz".
+
+        Returns:
+            A DataFrame containing the execution plan information with columns
+            'plan_type' and 'plan'.
+
+        Examples:
+
+            >>> import sedonadb
+            >>> con = sedonadb.connect()
+            >>> df = con.sql("SELECT 1 as one")
+            >>> df.explain().show()
+            ┌───────────────┬─────────────────────────────────┐
+            │   plan_type   ┆               plan              │
+            │      utf8     ┆               utf8              │
+            ╞═══════════════╪═════════════════════════════════╡
+            │ logical_plan  ┆ Projection: Int64(1) AS one     │
+            │               ┆   EmptyRelation                 │
+            ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+            │ physical_plan ┆ ProjectionExec: expr=[1 as one] │
+            │               ┆   PlaceholderRowExec            │
+            │               ┆                                 │
+            └───────────────┴─────────────────────────────────┘
+        """
+        return DataFrame(self._ctx, self._impl.explain(type, format))
+
     def __repr__(self) -> str:
         if global_options().interactive:
             width = _out_width()
diff --git a/python/sedonadb/src/dataframe.rs b/python/sedonadb/src/dataframe.rs
index f6ddf06..6d92205 100644
--- a/python/sedonadb/src/dataframe.rs
+++ b/python/sedonadb/src/dataframe.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 use std::ffi::CString;
+use std::str::FromStr;
 use std::sync::Arc;
 
 use arrow_array::ffi::FFI_ArrowSchema;
@@ -25,7 +26,7 @@ use datafusion::catalog::MemTable;
 use datafusion::logical_expr::SortExpr;
 use datafusion::prelude::DataFrame;
 use datafusion_common::Column;
-use datafusion_expr::Expr;
+use datafusion_expr::{ExplainFormat, ExplainOption, Expr};
 use datafusion_ffi::table_provider::FFI_TableProvider;
 use pyo3::prelude::*;
 use pyo3::types::PyCapsule;
@@ -186,6 +187,26 @@ impl InternalDataFrame {
         Ok(content)
     }
 
+    fn explain(&self, explain_type: &str, format: &str) -> Result<Self, 
PySedonaError> {
+        let format = ExplainFormat::from_str(format)?;
+        let (analyze, verbose) = match explain_type {
+            "standard" => (false, false),
+            "extended" => (false, true),
+            "analyze" => (true, false),
+            _ => {
+                return Err(PySedonaError::SedonaPython(
+                    "explain type must be one of 'standard', 'extended', or 
'analyze'".to_string(),
+                ))
+            }
+        };
+        let explain_option = ExplainOption::default()
+            .with_analyze(analyze)
+            .with_verbose(verbose)
+            .with_format(format);
+        let explain_df = 
self.inner.clone().explain_with_options(explain_option)?;
+        Ok(Self::new(explain_df, self.runtime.clone()))
+    }
+
     fn __datafusion_table_provider__<'py>(
         &self,
         py: Python<'py>,
diff --git a/python/sedonadb/tests/test_dataframe.py 
b/python/sedonadb/tests/test_dataframe.py
index 289e9c4..b74bfc6 100644
--- a/python/sedonadb/tests/test_dataframe.py
+++ b/python/sedonadb/tests/test_dataframe.py
@@ -368,6 +368,68 @@ def test_show(con, capsys):
     assert capsys.readouterr().out.strip() == expected
 
 
+def test_show_explained(con, capsys):
+    con.sql("EXPLAIN SELECT 1 as one").show()
+    expected = """
+┌───────────────┬─────────────────────────────────┐
+│   plan_type   ┆               plan              │
+│      utf8     ┆               utf8              │
+╞═══════════════╪═════════════════════════════════╡
+│ logical_plan  ┆ Projection: Int64(1) AS one     │
+│               ┆   EmptyRelation                 │
+├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ physical_plan ┆ ProjectionExec: expr=[1 as one] │
+│               ┆   PlaceholderRowExec            │
+│               ┆                                 │
+└───────────────┴─────────────────────────────────┘
+    """.strip()
+    assert capsys.readouterr().out.strip() == expected
+
+
+def test_explain(con, capsys):
+    con.sql("SELECT 1 as one").explain().show()
+    expected = """
+┌───────────────┬─────────────────────────────────┐
+│   plan_type   ┆               plan              │
+│      utf8     ┆               utf8              │
+╞═══════════════╪═════════════════════════════════╡
+│ logical_plan  ┆ Projection: Int64(1) AS one     │
+│               ┆   EmptyRelation                 │
+├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+│ physical_plan ┆ ProjectionExec: expr=[1 as one] │
+│               ┆   PlaceholderRowExec            │
+│               ┆                                 │
+└───────────────┴─────────────────────────────────┘
+    """.strip()
+    assert capsys.readouterr().out.strip() == expected
+
+    con.sql("SELECT 1 as one").explain(format="tree").show()
+    expected = """
+┌───────────────┬───────────────────────────────┐
+│   plan_type   ┆              plan             │
+│      utf8     ┆              utf8             │
+╞═══════════════╪═══════════════════════════════╡
+│ physical_plan ┆ ┌───────────────────────────┐ │
+│               ┆ │       ProjectionExec      │ │
+│               ┆ │    --------------------   │ │
+│               ┆ │           one: 1          │ │
+│               ┆ └─────────────┬─────────────┘ │
+│               ┆ ┌─────────────┴─────────────┐ │
+│               ┆ │     PlaceholderRowExec    │ │
+│               ┆ └───────────────────────────┘ │
+│               ┆                               │
+└───────────────┴───────────────────────────────┘
+    """.strip()
+    assert capsys.readouterr().out.strip() == expected
+
+    query_plan = con.sql("SELECT 1 as one").explain(type="analyze").to_pandas()
+    assert query_plan.iloc[0, 0] == "Plan with Metrics"
+
+    query_plan = con.sql("SELECT 1 as 
one").explain(type="extended").to_pandas()
+    assert query_plan.iloc[0, 0] == "initial_logical_plan"
+    assert len(query_plan) > 10
+
+
 def test_repr(con):
     assert repr(con.sql("SELECT 1 as one")).startswith(
         "<sedonadb.dataframe.DataFrame object"
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index 07fabf8..e75d5d8 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -36,7 +36,7 @@ use datafusion::{
 use datafusion_common::not_impl_err;
 use datafusion_expr::dml::InsertOp;
 use datafusion_expr::sqlparser::dialect::{dialect_from_str, Dialect};
-use datafusion_expr::{LogicalPlanBuilder, SortExpr};
+use datafusion_expr::{LogicalPlan, LogicalPlanBuilder, SortExpr};
 use parking_lot::Mutex;
 use sedona_common::option::add_sedona_option_extension;
 use sedona_expr::aggregate_udf::SedonaAccumulatorRef;
@@ -292,9 +292,23 @@ impl SedonaDataFrame for DataFrame {
         self,
         ctx: &SedonaContext,
         limit: Option<usize>,
-        options: DisplayTableOptions<'a>,
+        mut options: DisplayTableOptions<'a>,
     ) -> Result<String> {
-        let df = self.limit(0, limit)?;
+        let df = if matches!(
+            self.logical_plan(),
+            LogicalPlan::Explain(_) | LogicalPlan::DescribeTable(_) | 
LogicalPlan::Analyze(_)
+        ) {
+            // Show multi-line output without truncation for plans like 
`EXPLAIN`
+            options.max_row_height = usize::MAX;
+
+            // We don't want to apply an additional .limit() to plans like 
`Explain`
+            // as that will trigger an internal error: Unsupported logical 
plan: Explain must be root of the plan
+            self
+        } else {
+            // Apply limit if specified
+            self.limit(0, limit)?
+        };
+
         let schema_without_qualifiers = df.schema().clone().strip_qualifiers();
         let schema = schema_without_qualifiers.as_arrow();
         let batches = df.collect().await?;
@@ -505,6 +519,36 @@ mod tests {
         );
     }
 
+    #[tokio::test]
+    async fn show_explain() {
+        let ctx = SedonaContext::new();
+        for limit in [None, Some(10)] {
+            let tbl = ctx
+                .sql("EXPLAIN SELECT 1 as one")
+                .await
+                .unwrap()
+                .show_sedona(&ctx, limit, DisplayTableOptions::default())
+                .await
+                .unwrap();
+
+            #[rustfmt::skip]
+            assert_eq!(
+                tbl.lines().collect::<Vec<_>>(),
+                vec![
+                    "+---------------+---------------------------------+",
+                    "|   plan_type   |               plan              |",
+                    "+---------------+---------------------------------+",
+                    "| logical_plan  | Projection: Int64(1) AS one     |",
+                    "|               |   EmptyRelation                 |",
+                    "| physical_plan | ProjectionExec: expr=[1 as one] |",
+                    "|               |   PlaceholderRowExec            |",
+                    "|               |                                 |",
+                    "+---------------+---------------------------------+",
+                ]
+            );
+        }
+    }
+
     #[tokio::test]
     async fn write_geoparquet() {
         let tmpdir = tempdir().unwrap();

Reply via email to