This is an automated email from the ASF dual-hosted git repository.

rusackas pushed a commit to branch feat/transpile-virtual-datasets-on-import
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 119b45f5858ca486fd69be1dad9ef71d42e14847
Author: Evan Rusackas <[email protected]>
AuthorDate: Wed Jan 21 13:00:54 2026 -0800

    feat(examples): Transpile virtual dataset SQL on import
    
    When importing example datasets, automatically transpile virtual dataset
    SQL to the target database dialect using SQLGlot. This ensures that
    virtual datasets exported from one database type (e.g., PostgreSQL) can
    be loaded into a different database type (e.g., MySQL, DuckDB, SQLite).
    
    - Add transpile_virtual_dataset_sql() helper function
    - Integrate transpilation into ImportExamplesCommand._import()
    - Fall back gracefully to original SQL if transpilation fails
    - Add comprehensive unit tests
    
    Co-Authored-By: Claude Opus 4.5 <[email protected]>
---
 superset/commands/importers/v1/examples.py         |  51 ++++++++
 .../commands/importers/v1/examples_test.py         | 140 +++++++++++++++++++++
 2 files changed, 191 insertions(+)

diff --git a/superset/commands/importers/v1/examples.py 
b/superset/commands/importers/v1/examples.py
index 99ecab7955..48557fa7c0 100644
--- a/superset/commands/importers/v1/examples.py
+++ b/superset/commands/importers/v1/examples.py
@@ -14,11 +14,13 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import logging
 from typing import Any, Optional
 
 from marshmallow import Schema
 from sqlalchemy.exc import MultipleResultsFound
 
+from superset import db
 from superset.charts.schemas import ImportV1ChartSchema
 from superset.commands.chart.importers.v1 import ImportChartsCommand
 from superset.commands.chart.importers.v1.utils import import_chart
@@ -41,9 +43,55 @@ from superset.daos.base import BaseDAO
 from superset.dashboards.schemas import ImportV1DashboardSchema
 from superset.databases.schemas import ImportV1DatabaseSchema
 from superset.datasets.schemas import ImportV1DatasetSchema
+from superset.exceptions import QueryClauseValidationException
+from superset.models.core import Database
+from superset.sql.parse import transpile_to_dialect
 from superset.utils.core import get_example_default_schema
 from superset.utils.decorators import transaction
 
+logger = logging.getLogger(__name__)
+
+
+def transpile_virtual_dataset_sql(config: dict[str, Any], database_id: int) -> 
None:
+    """
+    Transpile virtual dataset SQL to the target database dialect.
+
+    This ensures that virtual datasets exported from one database type
+    (e.g., PostgreSQL) can be loaded into a different database type
+    (e.g., MySQL, DuckDB, SQLite).
+
+    Args:
+        config: Dataset configuration dict (modified in place)
+        database_id: ID of the target database
+    """
+    sql = config.get("sql")
+    if not sql:
+        return
+
+    database = db.session.query(Database).get(database_id)
+    if not database:
+        logger.warning("Database %s not found, skipping SQL transpilation", 
database_id)
+        return
+
+    target_engine = database.db_engine_spec.engine
+    try:
+        transpiled_sql = transpile_to_dialect(sql, target_engine)
+        if transpiled_sql != sql:
+            logger.info(
+                "Transpiled virtual dataset SQL for '%s' to %s dialect",
+                config.get("table_name", "unknown"),
+                target_engine,
+            )
+            config["sql"] = transpiled_sql
+    except QueryClauseValidationException as ex:
+        logger.warning(
+            "Could not transpile SQL for dataset '%s' to %s: %s. "
+            "Using original SQL which may not be compatible.",
+            config.get("table_name", "unknown"),
+            target_engine,
+            ex,
+        )
+
 
 class ImportExamplesCommand(ImportModelsCommand):
     """Import examples"""
@@ -119,6 +167,9 @@ class ImportExamplesCommand(ImportModelsCommand):
                 if config["schema"] is None:
                     config["schema"] = get_example_default_schema()
 
+                # transpile virtual dataset SQL to target database dialect
+                transpile_virtual_dataset_sql(config, config["database_id"])
+
                 try:
                     dataset = import_dataset(
                         config,
diff --git a/tests/unit_tests/commands/importers/v1/examples_test.py 
b/tests/unit_tests/commands/importers/v1/examples_test.py
new file mode 100644
index 0000000000..4305e2fd55
--- /dev/null
+++ b/tests/unit_tests/commands/importers/v1/examples_test.py
@@ -0,0 +1,140 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Tests for the examples importer, specifically SQL transpilation."""
+
+from unittest.mock import MagicMock, patch
+
+from superset.commands.importers.v1.examples import 
transpile_virtual_dataset_sql
+
+
+def test_transpile_virtual_dataset_sql_no_sql():
+    """Test that configs without SQL are unchanged."""
+    config = {"table_name": "my_table", "sql": None}
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] is None
+
+
+def test_transpile_virtual_dataset_sql_empty_sql():
+    """Test that configs with empty SQL are unchanged."""
+    config = {"table_name": "my_table", "sql": ""}
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] == ""
+
+
+@patch("superset.commands.importers.v1.examples.db")
+def test_transpile_virtual_dataset_sql_database_not_found(mock_db):
+    """Test graceful handling when database is not found."""
+    mock_db.session.query.return_value.get.return_value = None
+
+    config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
+    original_sql = config["sql"]
+
+    transpile_virtual_dataset_sql(config, 999)
+
+    # SQL should remain unchanged
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db):
+    """Test successful SQL transpilation."""
+    # Setup mock database
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    # Setup mock transpilation
+    mock_transpile.return_value = "SELECT * FROM `foo`"
+
+    config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
+    transpile_virtual_dataset_sql(config, 1)
+
+    # SQL should be transpiled
+    assert config["sql"] == "SELECT * FROM `foo`"
+    mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db):
+    """Test when transpilation returns same SQL (no dialect differences)."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "postgresql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    original_sql = "SELECT * FROM foo"
+    mock_transpile.return_value = original_sql
+
+    config = {"table_name": "my_table", "sql": original_sql}
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db):
+    """Test graceful fallback when transpilation fails."""
+    from superset.exceptions import QueryClauseValidationException
+
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "mysql"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    # Simulate transpilation failure
+    mock_transpile.side_effect = QueryClauseValidationException("Parse error")
+
+    original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo"
+    config = {"table_name": "my_table", "sql": original_sql}
+
+    # Should not raise, should keep original SQL
+    transpile_virtual_dataset_sql(config, 1)
+    assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_complex_query(mock_transpile, mock_db):
+    """Test transpilation of a more complex SQL query."""
+    mock_database = MagicMock()
+    mock_database.db_engine_spec.engine = "duckdb"
+    mock_db.session.query.return_value.get.return_value = mock_database
+
+    original_sql = """
+        SELECT
+            DATE_TRUNC('month', created_at) as month,
+            COUNT(*) as count
+        FROM orders
+        WHERE status = 'completed'
+        GROUP BY 1
+    """
+    transpiled_sql = """
+        SELECT
+            DATE_TRUNC('month', created_at) AS month,
+            COUNT(*) AS count
+        FROM orders
+        WHERE status = 'completed'
+        GROUP BY 1
+    """
+    mock_transpile.return_value = transpiled_sql
+
+    config = {"table_name": "monthly_orders", "sql": original_sql}
+    transpile_virtual_dataset_sql(config, 1)
+
+    assert config["sql"] == transpiled_sql
+    mock_transpile.assert_called_once_with(original_sql, "duckdb")

Reply via email to