This is an automated email from the ASF dual-hosted git repository.

jli pushed a commit to branch fix-examples-not-loading
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 908359a1b52b3a84c22a5ac5832fad80db294fb1
Author: Joe Li <[email protected]>
AuthorDate: Thu Jan 29 14:50:22 2026 -0800

    fix(examples): set and backfill schema on SqlaTable creation
    
    Sets tbl.schema when creating new SqlaTable objects and backfills schema
    on existing tables that have schema=None. This ensures the schema-aware
    lookup in _find_dataset() can find datasets created before this fix.
    
    Adds tests for schema setting and backfilling behavior.
    
    Co-Authored-By: Claude Opus 4.5 <[email protected]>
---
 superset/examples/generic_loader.py              | 12 ++++
 tests/unit_tests/examples/generic_loader_test.py | 73 ++++++++++++++++++++++++
 2 files changed, 85 insertions(+)

diff --git a/superset/examples/generic_loader.py 
b/superset/examples/generic_loader.py
index 9b6df23f742..4d3cbac2167 100644
--- a/superset/examples/generic_loader.py
+++ b/superset/examples/generic_loader.py
@@ -133,9 +133,16 @@ def load_parquet_table(  # noqa: C901
         logger.info("Table %s already exists, skipping data load", table_name)
         tbl, found_by_uuid = _find_dataset(table_name, database.id, uuid, 
schema)
         if tbl:
+            needs_update = False
             # Backfill UUID if found by table_name (not UUID) and UUID not set
             if uuid and not tbl.uuid and not found_by_uuid:
                 tbl.uuid = uuid
+                needs_update = True
+            # Backfill schema if existing table has no schema set
+            if schema and not tbl.schema:
+                tbl.schema = schema
+                needs_update = True
+            if needs_update:
                 db.session.merge(tbl)
                 db.session.commit()  # pylint: 
disable=consider-using-transaction
             return tbl
@@ -211,11 +218,16 @@ def load_parquet_table(  # noqa: C901
     if not tbl:
         tbl = SqlaTable(table_name=table_name, database_id=database.id)
         tbl.database = database
+        tbl.schema = schema
 
     # Backfill UUID if found by table_name (not UUID) and UUID not set
     if uuid and not tbl.uuid and not found_by_uuid:
         tbl.uuid = uuid
 
+    # Backfill schema if existing table has no schema set
+    if schema and not tbl.schema:
+        tbl.schema = schema
+
     if not only_metadata:
         # Ensure database reference is set before fetching metadata
         if not tbl.database:
diff --git a/tests/unit_tests/examples/generic_loader_test.py 
b/tests/unit_tests/examples/generic_loader_test.py
index c50b5781493..08e5aaac045 100644
--- a/tests/unit_tests/examples/generic_loader_test.py
+++ b/tests/unit_tests/examples/generic_loader_test.py
@@ -271,6 +271,79 @@ def test_load_parquet_table_works_without_uuid(
         assert result.table_name == "test_table"
 
 
+@patch("superset.examples.generic_loader.db")
+@patch("superset.examples.generic_loader.get_example_database")
+@patch("superset.examples.generic_loader.read_example_data")
+def test_load_parquet_table_sets_schema_on_new_table(
+    mock_read_data: MagicMock,
+    mock_get_db: MagicMock,
+    mock_db: MagicMock,
+) -> None:
+    """Test that load_parquet_table sets schema when creating a new 
SqlaTable."""
+    from superset.examples.generic_loader import load_parquet_table
+
+    mock_database = MagicMock()
+    mock_inspector = _setup_database_mocks(mock_get_db, mock_database, 
has_table=False)
+
+    with patch("superset.examples.generic_loader.inspect") as mock_inspect:
+        mock_inspect.return_value = mock_inspector
+
+        # No existing table
+        
mock_db.session.query.return_value.filter_by.return_value.first.return_value = (
+            None
+        )
+
+        mock_read_data.return_value = pd.DataFrame({"col1": [1, 2, 3]})
+
+        result = load_parquet_table(
+            parquet_file="test_data",
+            table_name="test_table",
+            database=mock_database,
+            only_metadata=True,
+            schema="custom_schema",
+        )
+
+        assert result is not None
+        assert result.schema == "custom_schema"
+
+
+@patch("superset.examples.generic_loader.db")
+@patch("superset.examples.generic_loader.get_example_database")
+def test_load_parquet_table_backfills_schema_on_existing_table(
+    mock_get_db: MagicMock,
+    mock_db: MagicMock,
+) -> None:
+    """Test that existing dataset with schema=None gets schema backfilled."""
+    from superset.examples.generic_loader import load_parquet_table
+
+    mock_database = MagicMock()
+    mock_inspector = _setup_database_mocks(mock_get_db, mock_database, 
has_table=True)
+
+    with patch("superset.examples.generic_loader.inspect") as mock_inspect:
+        mock_inspect.return_value = mock_inspector
+
+        # Existing table with NO schema (needs backfill)
+        mock_existing_table = MagicMock()
+        mock_existing_table.uuid = "some-uuid"
+        mock_existing_table.schema = None
+        mock_existing_table.table_name = "test_table"
+
+        
mock_db.session.query.return_value.filter_by.return_value.first.return_value = (
+            mock_existing_table
+        )
+
+        result = load_parquet_table(
+            parquet_file="test_data",
+            table_name="test_table",
+            database=mock_database,
+            only_metadata=True,
+            schema="public",
+        )
+
+        # Schema should be backfilled
+        assert result.schema == "public"
+
+
 def test_create_generic_loader_passes_uuid() -> None:
     """Test that create_generic_loader passes UUID to load_parquet_table."""
     from superset.examples.generic_loader import create_generic_loader

Reply via email to