This is an automated email from the ASF dual-hosted git repository. jli pushed a commit to branch fix-examples-not-loading in repository https://gitbox.apache.org/repos/asf/superset.git
commit 908359a1b52b3a84c22a5ac5832fad80db294fb1 Author: Joe Li <[email protected]> AuthorDate: Thu Jan 29 14:50:22 2026 -0800 fix(examples): set and backfill schema on SqlaTable creation Sets tbl.schema when creating new SqlaTable objects and backfills schema on existing tables that have schema=None. This ensures the schema-aware lookup in _find_dataset() can find datasets created before this fix. Adds tests for schema setting and backfilling behavior. Co-Authored-By: Claude Opus 4.5 <[email protected]> --- superset/examples/generic_loader.py | 12 ++++ tests/unit_tests/examples/generic_loader_test.py | 73 ++++++++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/superset/examples/generic_loader.py b/superset/examples/generic_loader.py index 9b6df23f742..4d3cbac2167 100644 --- a/superset/examples/generic_loader.py +++ b/superset/examples/generic_loader.py @@ -133,9 +133,16 @@ def load_parquet_table( # noqa: C901 logger.info("Table %s already exists, skipping data load", table_name) tbl, found_by_uuid = _find_dataset(table_name, database.id, uuid, schema) if tbl: + needs_update = False # Backfill UUID if found by table_name (not UUID) and UUID not set if uuid and not tbl.uuid and not found_by_uuid: tbl.uuid = uuid + needs_update = True + # Backfill schema if existing table has no schema set + if schema and not tbl.schema: + tbl.schema = schema + needs_update = True + if needs_update: db.session.merge(tbl) db.session.commit() # pylint: disable=consider-using-transaction return tbl @@ -211,11 +218,16 @@ def load_parquet_table( # noqa: C901 if not tbl: tbl = SqlaTable(table_name=table_name, database_id=database.id) tbl.database = database + tbl.schema = schema # Backfill UUID if found by table_name (not UUID) and UUID not set if uuid and not tbl.uuid and not found_by_uuid: tbl.uuid = uuid + # Backfill schema if existing table has no schema set + if schema and not tbl.schema: + tbl.schema = schema + if not only_metadata: # Ensure database reference is set before fetching metadata if not tbl.database: diff --git a/tests/unit_tests/examples/generic_loader_test.py b/tests/unit_tests/examples/generic_loader_test.py index c50b5781493..08e5aaac045 100644 --- a/tests/unit_tests/examples/generic_loader_test.py +++ b/tests/unit_tests/examples/generic_loader_test.py @@ -271,6 +271,79 @@ def test_load_parquet_table_works_without_uuid( assert result.table_name == "test_table" +@patch("superset.examples.generic_loader.db") +@patch("superset.examples.generic_loader.get_example_database") +@patch("superset.examples.generic_loader.read_example_data") +def test_load_parquet_table_sets_schema_on_new_table( + mock_read_data: MagicMock, + mock_get_db: MagicMock, + mock_db: MagicMock, +) -> None: + """Test that load_parquet_table sets schema when creating a new SqlaTable.""" + from superset.examples.generic_loader import load_parquet_table + + mock_database = MagicMock() + mock_inspector = _setup_database_mocks(mock_get_db, mock_database, has_table=False) + + with patch("superset.examples.generic_loader.inspect") as mock_inspect: + mock_inspect.return_value = mock_inspector + + # No existing table + mock_db.session.query.return_value.filter_by.return_value.first.return_value = ( + None + ) + + mock_read_data.return_value = pd.DataFrame({"col1": [1, 2, 3]}) + + result = load_parquet_table( + parquet_file="test_data", + table_name="test_table", + database=mock_database, + only_metadata=True, + schema="custom_schema", + ) + + assert result is not None + assert result.schema == "custom_schema" + + +@patch("superset.examples.generic_loader.db") +@patch("superset.examples.generic_loader.get_example_database") +def test_load_parquet_table_backfills_schema_on_existing_table( + mock_get_db: MagicMock, + mock_db: MagicMock, +) -> None: + """Test that existing dataset with schema=None gets schema backfilled.""" + from superset.examples.generic_loader import load_parquet_table + + mock_database = MagicMock() + mock_inspector = _setup_database_mocks(mock_get_db, mock_database, has_table=True) + + with patch("superset.examples.generic_loader.inspect") as mock_inspect: + mock_inspect.return_value = mock_inspector + + # Existing table with NO schema (needs backfill) + mock_existing_table = MagicMock() + mock_existing_table.uuid = "some-uuid" + mock_existing_table.schema = None + mock_existing_table.table_name = "test_table" + + mock_db.session.query.return_value.filter_by.return_value.first.return_value = ( + mock_existing_table + ) + + result = load_parquet_table( + parquet_file="test_data", + table_name="test_table", + database=mock_database, + only_metadata=True, + schema="public", + ) + + # Schema should be backfilled + assert result.schema == "public" + + def test_create_generic_loader_passes_uuid() -> None: """Test that create_generic_loader passes UUID to load_parquet_table.""" from superset.examples.generic_loader import create_generic_loader
