This is an automated email from the ASF dual-hosted git repository.
rusackas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 87bbd54d0a feat(examples): Transpile virtual dataset SQL on import
(#37311)
87bbd54d0a is described below
commit 87bbd54d0aca3b98671e0516e0b2d7a9e66bc3c3
Author: Evan Rusackas <[email protected]>
AuthorDate: Thu Jan 22 09:50:05 2026 -0800
feat(examples): Transpile virtual dataset SQL on import (#37311)
Co-authored-by: Claude Opus 4.5 <[email protected]>
Co-authored-by: Beto Dealmeida <[email protected]>
Co-authored-by: bito-code-review[bot]
<188872107+bito-code-review[bot]@users.noreply.github.com>
---
superset/commands/dashboard/export_example.py | 4 +
superset/commands/importers/v1/examples.py | 58 +++++
superset/datasets/schemas.py | 2 +
superset/sql/parse.py | 21 +-
.../commands/importers/v1/examples_test.py | 244 +++++++++++++++++++++
tests/unit_tests/sql/transpile_to_dialect_test.py | 51 +++++
6 files changed, 377 insertions(+), 3 deletions(-)
diff --git a/superset/commands/dashboard/export_example.py
b/superset/commands/dashboard/export_example.py
index 0446e213b3..7924fe0ad4 100644
--- a/superset/commands/dashboard/export_example.py
+++ b/superset/commands/dashboard/export_example.py
@@ -175,6 +175,10 @@ def export_dataset_yaml(
"schema": None, # Don't export - use target database's default schema
# Preserve SQL for virtual datasets, None for physical (data is in
parquet)
"sql": dataset.sql if is_preserved_virtual else None,
+ # Track source database engine for SQL transpilation during import
+ "source_db_engine": (
+ dataset.database.db_engine_spec.engine if is_preserved_virtual
else None
+ ),
"params": None, # Don't export - contains stale import metadata
"template_params": dataset.template_params,
"filter_select_enabled": dataset.filter_select_enabled,
diff --git a/superset/commands/importers/v1/examples.py
b/superset/commands/importers/v1/examples.py
index 99ecab7955..19fe811044 100644
--- a/superset/commands/importers/v1/examples.py
+++ b/superset/commands/importers/v1/examples.py
@@ -14,11 +14,13 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+import logging
from typing import Any, Optional
from marshmallow import Schema
from sqlalchemy.exc import MultipleResultsFound
+from superset import db
from superset.charts.schemas import ImportV1ChartSchema
from superset.commands.chart.importers.v1 import ImportChartsCommand
from superset.commands.chart.importers.v1.utils import import_chart
@@ -41,9 +43,62 @@ from superset.daos.base import BaseDAO
from superset.dashboards.schemas import ImportV1DashboardSchema
from superset.databases.schemas import ImportV1DatabaseSchema
from superset.datasets.schemas import ImportV1DatasetSchema
+from superset.exceptions import QueryClauseValidationException
+from superset.models.core import Database
+from superset.sql.parse import transpile_to_dialect
from superset.utils.core import get_example_default_schema
from superset.utils.decorators import transaction
+logger = logging.getLogger(__name__)
+
+
+def transpile_virtual_dataset_sql(config: dict[str, Any], database_id: int) ->
None:
+ """
+ Transpile virtual dataset SQL to the target database dialect.
+
+ This ensures that virtual datasets exported from one database type
+ (e.g., PostgreSQL) can be loaded into a different database type
+ (e.g., MySQL, DuckDB, SQLite).
+
+ Args:
+ config: Dataset configuration dict (modified in place)
+ database_id: ID of the target database
+ """
+ sql = config.get("sql")
+ if not sql:
+ return
+
+ database = db.session.query(Database).get(database_id)
+ if not database:
+ logger.warning("Database %s not found, skipping SQL transpilation",
database_id)
+ return
+
+ target_engine = database.db_engine_spec.engine
+ source_engine = config.get("source_db_engine")
+ if target_engine == source_engine:
+ logger.info("Source and target dialects are identical, skipping
transpilation")
+ return
+
+ try:
+ transpiled_sql = transpile_to_dialect(sql, target_engine,
source_engine)
+ if transpiled_sql != sql:
+ logger.info(
+ "Transpiled virtual dataset SQL for '%s' from %s to %s
dialect",
+ config.get("table_name", "unknown"),
+ source_engine or "generic",
+ target_engine,
+ )
+ config["sql"] = transpiled_sql
+ except QueryClauseValidationException as ex:
+ logger.warning(
+ "Could not transpile SQL for dataset '%s' from %s to %s: %s. "
+ "Using original SQL which may not be compatible.",
+ config.get("table_name", "unknown"),
+ source_engine or "generic",
+ target_engine,
+ ex,
+ )
+
class ImportExamplesCommand(ImportModelsCommand):
"""Import examples"""
@@ -119,6 +174,9 @@ class ImportExamplesCommand(ImportModelsCommand):
if config["schema"] is None:
config["schema"] = get_example_default_schema()
+ # transpile virtual dataset SQL to target database dialect
+ transpile_virtual_dataset_sql(config, config["database_id"])
+
try:
dataset = import_dataset(
config,
diff --git a/superset/datasets/schemas.py b/superset/datasets/schemas.py
index 96bc44a9d3..1506ef45d1 100644
--- a/superset/datasets/schemas.py
+++ b/superset/datasets/schemas.py
@@ -322,6 +322,8 @@ class ImportV1DatasetSchema(Schema):
schema = fields.String(allow_none=True)
catalog = fields.String(allow_none=True)
sql = fields.String(allow_none=True)
+ # Source database engine for SQL transpilation (virtual datasets only)
+ source_db_engine = fields.String(allow_none=True, load_default=None)
params = fields.Dict(allow_none=True)
template_params = fields.Dict(allow_none=True)
filter_select_enabled = fields.Boolean()
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index af72f72e95..af9a740ec7 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -1522,9 +1522,21 @@ def sanitize_clause(clause: str, engine: str) -> str:
raise QueryClauseValidationException(f"Invalid SQL clause: {clause}")
from ex
-def transpile_to_dialect(sql: str, target_engine: str) -> str:
+def transpile_to_dialect(
+ sql: str,
+ target_engine: str,
+ source_engine: str | None = None,
+) -> str:
"""
- Transpile SQL from "generic SQL" to the target database dialect using
SQLGlot.
+ Transpile SQL from one database dialect to another using SQLGlot.
+
+ Args:
+ sql: The SQL query to transpile
+ target_engine: The target database engine (e.g., "mysql", "postgresql")
+ source_engine: The source database engine. If None, uses generic SQL
dialect.
+
+ Returns:
+ The transpiled SQL string
If the target engine is not in SQLGLOT_DIALECTS, returns the SQL as-is.
"""
@@ -1534,8 +1546,11 @@ def transpile_to_dialect(sql: str, target_engine: str)
-> str:
if target_dialect is None:
return sql
+ # Get source dialect (default to generic if not specified)
+ source_dialect = SQLGLOT_DIALECTS.get(source_engine) if source_engine else
Dialect
+
try:
- parsed = sqlglot.parse_one(sql, dialect=Dialect)
+ parsed = sqlglot.parse_one(sql, dialect=source_dialect)
return Dialect.get_or_raise(target_dialect).generate(
parsed,
copy=True,
diff --git a/tests/unit_tests/commands/importers/v1/examples_test.py
b/tests/unit_tests/commands/importers/v1/examples_test.py
new file mode 100644
index 0000000000..1ad6176dc1
--- /dev/null
+++ b/tests/unit_tests/commands/importers/v1/examples_test.py
@@ -0,0 +1,244 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Tests for the examples importer, specifically SQL transpilation."""
+
+from unittest.mock import MagicMock, patch
+
+from superset.commands.importers.v1.examples import
transpile_virtual_dataset_sql
+
+
+def test_transpile_virtual_dataset_sql_no_sql():
+ """Test that configs without SQL are unchanged."""
+ config = {"table_name": "my_table", "sql": None}
+ transpile_virtual_dataset_sql(config, 1)
+ assert config["sql"] is None
+
+
+def test_transpile_virtual_dataset_sql_empty_sql():
+ """Test that configs with empty SQL are unchanged."""
+ config = {"table_name": "my_table", "sql": ""}
+ transpile_virtual_dataset_sql(config, 1)
+ assert config["sql"] == ""
+
+
+@patch("superset.commands.importers.v1.examples.db")
+def test_transpile_virtual_dataset_sql_database_not_found(mock_db):
+ """Test graceful handling when database is not found."""
+ mock_db.session.query.return_value.get.return_value = None
+
+ config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
+ original_sql = config["sql"]
+
+ transpile_virtual_dataset_sql(config, 999)
+
+ # SQL should remain unchanged
+ assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_success(mock_transpile, mock_db):
+ """Test successful SQL transpilation with source engine."""
+ mock_database = MagicMock()
+ mock_database.db_engine_spec.engine = "mysql"
+ mock_db.session.query.return_value.get.return_value = mock_database
+
+ mock_transpile.return_value = "SELECT * FROM `foo`"
+
+ config = {
+ "table_name": "my_table",
+ "sql": "SELECT * FROM foo",
+ "source_db_engine": "postgresql",
+ }
+ transpile_virtual_dataset_sql(config, 1)
+
+ assert config["sql"] == "SELECT * FROM `foo`"
+ mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql",
"postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_no_source_engine(mock_transpile,
mock_db):
+ """Test transpilation when source_db_engine is not specified (legacy)."""
+ mock_database = MagicMock()
+ mock_database.db_engine_spec.engine = "mysql"
+ mock_db.session.query.return_value.get.return_value = mock_database
+
+ mock_transpile.return_value = "SELECT * FROM `foo`"
+
+ # No source_db_engine - should default to None (generic dialect)
+ config = {"table_name": "my_table", "sql": "SELECT * FROM foo"}
+ transpile_virtual_dataset_sql(config, 1)
+
+ assert config["sql"] == "SELECT * FROM `foo`"
+ mock_transpile.assert_called_once_with("SELECT * FROM foo", "mysql", None)
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_no_change(mock_transpile, mock_db):
+ """Test when transpilation returns same SQL (no dialect differences)."""
+ mock_database = MagicMock()
+ mock_database.db_engine_spec.engine = "postgresql"
+ mock_db.session.query.return_value.get.return_value = mock_database
+
+ original_sql = "SELECT * FROM foo"
+ mock_transpile.return_value = original_sql
+
+ config = {
+ "table_name": "my_table",
+ "sql": original_sql,
+ "source_db_engine": "postgresql",
+ }
+ transpile_virtual_dataset_sql(config, 1)
+
+ assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_error_fallback(mock_transpile, mock_db):
+ """Test graceful fallback when transpilation fails."""
+ from superset.exceptions import QueryClauseValidationException
+
+ mock_database = MagicMock()
+ mock_database.db_engine_spec.engine = "mysql"
+ mock_db.session.query.return_value.get.return_value = mock_database
+
+ mock_transpile.side_effect = QueryClauseValidationException("Parse error")
+
+ original_sql = "SELECT SOME_POSTGRES_SPECIFIC_FUNCTION() FROM foo"
+ config = {
+ "table_name": "my_table",
+ "sql": original_sql,
+ "source_db_engine": "postgresql",
+ }
+
+ # Should not raise, should keep original SQL
+ transpile_virtual_dataset_sql(config, 1)
+ assert config["sql"] == original_sql
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_duckdb(mock_transpile,
mock_db):
+ """Test transpilation from PostgreSQL to DuckDB."""
+ mock_database = MagicMock()
+ mock_database.db_engine_spec.engine = "duckdb"
+ mock_db.session.query.return_value.get.return_value = mock_database
+
+ original_sql = """
+ SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
+ FROM orders WHERE status = 'completed' GROUP BY 1
+ """
+ transpiled_sql = """
+ SELECT DATE_TRUNC('month', created_at) AS month, COUNT(*) AS cnt
+ FROM orders WHERE status = 'completed' GROUP BY 1
+ """
+ mock_transpile.return_value = transpiled_sql
+
+ config = {
+ "table_name": "monthly_orders",
+ "sql": original_sql,
+ "source_db_engine": "postgresql",
+ }
+ transpile_virtual_dataset_sql(config, 1)
+
+ assert config["sql"] == transpiled_sql
+ mock_transpile.assert_called_once_with(original_sql, "duckdb",
"postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_clickhouse(mock_transpile,
mock_db):
+ """Test transpilation from PostgreSQL to ClickHouse.
+
+ ClickHouse has different syntax for date functions, so this tests
+ real dialect differences.
+ """
+ mock_database = MagicMock()
+ mock_database.db_engine_spec.engine = "clickhouse"
+ mock_db.session.query.return_value.get.return_value = mock_database
+
+ # PostgreSQL syntax
+ original_sql = "SELECT DATE_TRUNC('month', created_at) AS month FROM
orders"
+ # ClickHouse uses toStartOfMonth instead
+ transpiled_sql = "SELECT toStartOfMonth(created_at) AS month FROM orders"
+ mock_transpile.return_value = transpiled_sql
+
+ config = {
+ "table_name": "monthly_orders",
+ "sql": original_sql,
+ "source_db_engine": "postgresql",
+ }
+ transpile_virtual_dataset_sql(config, 1)
+
+ assert config["sql"] == transpiled_sql
+ mock_transpile.assert_called_once_with(original_sql, "clickhouse",
"postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_mysql(mock_transpile,
mock_db):
+ """Test transpilation from PostgreSQL to MySQL.
+
+ MySQL uses backticks for identifiers and has different casting syntax.
+ """
+ mock_database = MagicMock()
+ mock_database.db_engine_spec.engine = "mysql"
+ mock_db.session.query.return_value.get.return_value = mock_database
+
+ # PostgreSQL syntax with :: casting
+ original_sql = "SELECT created_at::DATE AS date_only FROM orders"
+ # MySQL syntax with CAST
+ transpiled_sql = "SELECT CAST(created_at AS DATE) AS date_only FROM
`orders`"
+ mock_transpile.return_value = transpiled_sql
+
+ config = {
+ "table_name": "orders_dates",
+ "sql": original_sql,
+ "source_db_engine": "postgresql",
+ }
+ transpile_virtual_dataset_sql(config, 1)
+
+ assert config["sql"] == transpiled_sql
+ mock_transpile.assert_called_once_with(original_sql, "mysql", "postgresql")
+
+
+@patch("superset.commands.importers.v1.examples.db")
+@patch("superset.commands.importers.v1.examples.transpile_to_dialect")
+def test_transpile_virtual_dataset_sql_postgres_to_sqlite(mock_transpile,
mock_db):
+ """Test transpilation from PostgreSQL to SQLite."""
+ mock_database = MagicMock()
+ mock_database.db_engine_spec.engine = "sqlite"
+ mock_db.session.query.return_value.get.return_value = mock_database
+
+ original_sql = "SELECT * FROM orders WHERE created_at > NOW() - INTERVAL
'7 days'"
+ transpiled_sql = (
+ "SELECT * FROM orders WHERE created_at > DATETIME('now', '-7 days')"
+ )
+ mock_transpile.return_value = transpiled_sql
+
+ config = {
+ "table_name": "recent_orders",
+ "sql": original_sql,
+ "source_db_engine": "postgresql",
+ }
+ transpile_virtual_dataset_sql(config, 1)
+
+ assert config["sql"] == transpiled_sql
+ mock_transpile.assert_called_once_with(original_sql, "sqlite",
"postgresql")
diff --git a/tests/unit_tests/sql/transpile_to_dialect_test.py
b/tests/unit_tests/sql/transpile_to_dialect_test.py
index 1327b09009..5a11e501fa 100644
--- a/tests/unit_tests/sql/transpile_to_dialect_test.py
+++ b/tests/unit_tests/sql/transpile_to_dialect_test.py
@@ -345,3 +345,54 @@ def test_sqlglot_generation_error_raises_exception() ->
None:
match="Cannot transpile SQL to postgresql",
):
transpile_to_dialect("name = 'test'", "postgresql")
+
+
+# Tests for source_engine parameter
[email protected](
+ ("sql", "source_engine", "target_engine", "expected"),
+ [
+ # PostgreSQL to MySQL - should convert :: casting to CAST()
+ (
+ "SELECT created_at::DATE FROM orders",
+ "postgresql",
+ "mysql",
+ "SELECT CAST(created_at AS DATE) FROM orders",
+ ),
+ # Same dialect - should preserve SQL
+ (
+ "SELECT * FROM orders",
+ "postgresql",
+ "postgresql",
+ "SELECT * FROM orders",
+ ),
+ # PostgreSQL to DuckDB - DuckDB supports similar syntax (uppercases
date part)
+ (
+ "SELECT DATE_TRUNC('month', ts) FROM orders",
+ "postgresql",
+ "duckdb",
+ "SELECT DATE_TRUNC('MONTH', ts) FROM orders",
+ ),
+ ],
+)
+def test_transpile_with_source_engine(
+ sql: str, source_engine: str, target_engine: str, expected: str
+) -> None:
+ """Test transpilation with explicit source engine."""
+ result = transpile_to_dialect(sql, target_engine, source_engine)
+ assert result == expected
+
+
+def test_transpile_source_engine_none_uses_generic() -> None:
+ """Test that source_engine=None uses generic dialect (backward
compatible)."""
+ # Simple SQL that doesn't require dialect-specific parsing
+ result = transpile_to_dialect("SELECT * FROM orders", "postgresql", None)
+ assert result == "SELECT * FROM orders"
+
+
+def test_transpile_unknown_source_engine_uses_generic() -> None:
+ """Test that unknown source_engine falls back to generic dialect."""
+ # Unknown engine should be treated as None (generic)
+ result = transpile_to_dialect(
+ "SELECT * FROM orders", "postgresql", "unknown_engine"
+ )
+ assert result == "SELECT * FROM orders"