This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch semantic-layer-feature
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/semantic-layer-feature by this
push:
new e13619748ca feat: models and DAOs
e13619748ca is described below
commit e13619748ca3026aebc3e0f15f656155b6cd8637
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Feb 6 15:52:37 2026 -0500
feat: models and DAOs
---
.../src/superset_core/semantic_layers/types.py | 2 +-
superset/connectors/sqla/models.py | 6 +-
superset/explorables/base.py | 128 ++++++++++++++++++++-
superset/models/sql_lab.py | 6 +-
superset/superset_typing.py | 50 +++++++-
superset/utils/core.py | 33 +++---
tests/integration_tests/charts/api_tests.py | 6 +-
7 files changed, 200 insertions(+), 31 deletions(-)
diff --git a/superset-core/src/superset_core/semantic_layers/types.py
b/superset-core/src/superset_core/semantic_layers/types.py
index 42c0cd49050..46bcf707174 100644
--- a/superset-core/src/superset_core/semantic_layers/types.py
+++ b/superset-core/src/superset_core/semantic_layers/types.py
@@ -216,7 +216,7 @@ class Metric:
name: str
type: TypeOf[Type]
- definition: str | None
+ definition: str
description: str | None = None
diff --git a/superset/connectors/sqla/models.py
b/superset/connectors/sqla/models.py
index be74a199672..6aabe485de5 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -107,6 +107,8 @@ from superset.sql.parse import Table
from superset.superset_typing import (
AdhocColumn,
AdhocMetric,
+ DatasetColumnData,
+ DatasetMetricData,
ExplorableData,
Metric,
QueryObjectDict,
@@ -463,8 +465,8 @@ class BaseDatasource(
# sqla-specific
"sql": self.sql,
# one to many
- "columns": [o.data for o in self.columns],
- "metrics": [o.data for o in self.metrics],
+ "columns": [cast(DatasetColumnData, o.data) for o in self.columns],
+ "metrics": [cast(DatasetMetricData, o.data) for o in self.metrics],
"folders": self.folders,
# TODO deprecate, move logic to JS
"order_by_choices": self.order_by_choices,
diff --git a/superset/explorables/base.py b/superset/explorables/base.py
index 2d534b72099..de69257a317 100644
--- a/superset/explorables/base.py
+++ b/superset/explorables/base.py
@@ -53,6 +53,130 @@ class TimeGrainDict(TypedDict):
duration: str | None
+@runtime_checkable
+class MetricMetadata(Protocol):
+ """
+ Protocol for metric metadata objects.
+
+ Represents a metric that's available on an explorable data source.
+ Metrics contain SQL expressions or references to semantic layer measures.
+
+ Attributes:
+ metric_name: Unique identifier for the metric
+ expression: SQL expression or reference for calculating the metric
+ verbose_name: Human-readable name for display in the UI
+ description: Description of what the metric represents
+ d3format: D3 format string for formatting numeric values
+ currency: Currency configuration for the metric (JSON object)
+ warning_text: Warning message to display when using this metric
+ certified_by: Person or entity that certified this metric
+ certification_details: Details about the certification
+ """
+
+ @property
+ def metric_name(self) -> str:
+ """Unique identifier for the metric."""
+
+ @property
+ def expression(self) -> str:
+ """SQL expression or reference for calculating the metric."""
+
+ @property
+ def verbose_name(self) -> str | None:
+ """Human-readable name for display in the UI."""
+
+ @property
+ def description(self) -> str | None:
+ """Description of what the metric represents."""
+
+ @property
+ def d3format(self) -> str | None:
+ """D3 format string for formatting numeric values."""
+
+ @property
+ def currency(self) -> dict[str, Any] | None:
+ """Currency configuration for the metric (JSON object)."""
+
+ @property
+ def warning_text(self) -> str | None:
+ """Warning message to display when using this metric."""
+
+ @property
+ def certified_by(self) -> str | None:
+ """Person or entity that certified this metric."""
+
+ @property
+ def certification_details(self) -> str | None:
+ """Details about the certification."""
+
+
+@runtime_checkable
+class ColumnMetadata(Protocol):
+ """
+ Protocol for column metadata objects.
+
+ Represents a column/dimension that's available on an explorable data
source.
+ Used for grouping, filtering, and dimension-based analysis.
+
+ Attributes:
+ column_name: Unique identifier for the column
+ type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER',
'DATETIME')
+ is_dttm: Whether this column represents a date or time value
+ verbose_name: Human-readable name for display in the UI
+ description: Description of what the column represents
+ groupby: Whether this column is allowed for grouping/aggregation
+ filterable: Whether this column can be used in filters
+ expression: SQL expression if this is a calculated column
+ python_date_format: Python datetime format string for temporal columns
+ advanced_data_type: Advanced data type classification
+ extra: Additional metadata stored as JSON
+ """
+
+ @property
+ def column_name(self) -> str:
+ """Unique identifier for the column."""
+
+ @property
+ def type(self) -> str:
+ """SQL data type of the column."""
+
+ @property
+ def is_dttm(self) -> bool:
+ """Whether this column represents a date or time value."""
+
+ @property
+ def verbose_name(self) -> str | None:
+ """Human-readable name for display in the UI."""
+
+ @property
+ def description(self) -> str | None:
+ """Description of what the column represents."""
+
+ @property
+ def groupby(self) -> bool:
+ """Whether this column is allowed for grouping/aggregation."""
+
+ @property
+ def filterable(self) -> bool:
+ """Whether this column can be used in filters."""
+
+ @property
+ def expression(self) -> str | None:
+ """SQL expression if this is a calculated column."""
+
+ @property
+ def python_date_format(self) -> str | None:
+ """Python datetime format string for temporal columns."""
+
+ @property
+ def advanced_data_type(self) -> str | None:
+ """Advanced data type classification."""
+
+ @property
+ def extra(self) -> str | None:
+ """Additional metadata stored as JSON."""
+
+
@runtime_checkable
class Explorable(Protocol):
"""
@@ -132,7 +256,7 @@ class Explorable(Protocol):
"""
@property
- def metrics(self) -> list[Any]:
+ def metrics(self) -> list[MetricMetadata]:
"""
List of metric metadata objects.
@@ -147,7 +271,7 @@ class Explorable(Protocol):
# TODO: rename to dimensions
@property
- def columns(self) -> list[Any]:
+ def columns(self) -> list[ColumnMetadata]:
"""
List of column metadata objects.
diff --git a/superset/models/sql_lab.py b/superset/models/sql_lab.py
index 956d33053bc..e7d8cecff20 100644
--- a/superset/models/sql_lab.py
+++ b/superset/models/sql_lab.py
@@ -22,7 +22,7 @@ import logging
import re
from collections.abc import Hashable
from datetime import datetime
-from typing import Any, Optional, TYPE_CHECKING
+from typing import Any, cast, Optional, TYPE_CHECKING
import sqlalchemy as sqla
from flask import current_app as app
@@ -64,7 +64,7 @@ from superset.sql.parse import (
Table,
)
from superset.sqllab.limiting_factor import LimitingFactor
-from superset.superset_typing import ExplorableData, QueryObjectDict
+from superset.superset_typing import DatasetColumnData, ExplorableData,
QueryObjectDict
from superset.utils import json
from superset.utils.core import (
get_column_name,
@@ -258,7 +258,7 @@ class Query(
],
"filter_select": True,
"name": self.tab_name,
- "columns": [o.data for o in self.columns],
+ "columns": [cast(DatasetColumnData, o.data) for o in self.columns],
"metrics": [],
"id": self.id,
"type": self.type,
diff --git a/superset/superset_typing.py b/superset/superset_typing.py
index 02e294a08cf..ef002ac86ba 100644
--- a/superset/superset_typing.py
+++ b/superset/superset_typing.py
@@ -30,6 +30,46 @@ if TYPE_CHECKING:
SQLType: TypeAlias = TypeEngine | type[TypeEngine]
+class DatasetColumnData(TypedDict, total=False):
+ """Type for column metadata in ExplorableData datasets."""
+
+ advanced_data_type: str | None
+ certification_details: str | None
+ certified_by: str | None
+ column_name: str
+ description: str | None
+ expression: str | None
+ filterable: bool
+ groupby: bool
+ id: int | None
+ uuid: str | None
+ is_certified: bool
+ is_dttm: bool
+ python_date_format: str | None
+ type: str
+ type_generic: NotRequired["GenericDataType" | None]
+ verbose_name: str | None
+ warning_markdown: str | None
+
+
+class DatasetMetricData(TypedDict, total=False):
+ """Type for metric metadata in ExplorableData datasets."""
+
+ certification_details: str | None
+ certified_by: str | None
+ currency: NotRequired[dict[str, Any]]
+ d3format: str | None
+ description: str | None
+ expression: str | None
+ id: int | None
+ uuid: str | None
+ is_certified: bool
+ metric_name: str
+ warning_markdown: str | None
+ warning_text: str | None
+ verbose_name: str | None
+
+
class LegacyMetric(TypedDict):
label: str | None
@@ -254,7 +294,7 @@ class ExplorableData(TypedDict, total=False):
"""
# Core fields from BaseDatasource.data
- id: int
+ id: int | str # String for UUID-based explorables like SemanticView
uid: str
column_formats: dict[str, str | None]
description: str | None
@@ -274,8 +314,8 @@ class ExplorableData(TypedDict, total=False):
perm: str | None
edit_url: str
sql: str | None
- columns: list[dict[str, Any]]
- metrics: list[dict[str, Any]]
+ columns: list["DatasetColumnData"]
+ metrics: list["DatasetMetricData"]
folders: Any # JSON field, can be list or dict
order_by_choices: list[tuple[str, str]]
owners: list[int] | list[dict[str, Any]] # Can be either format
@@ -283,8 +323,8 @@ class ExplorableData(TypedDict, total=False):
select_star: str | None
# Additional fields from SqlaTable and data_for_slices
- column_types: list[Any]
- column_names: set[str] | set[Any]
+ column_types: list["GenericDataType"]
+ column_names: set[str] | list[str]
granularity_sqla: list[tuple[Any, Any]]
time_grain_sqla: list[tuple[Any, Any]]
main_dttm_col: str | None
diff --git a/superset/utils/core.py b/superset/utils/core.py
index 23a3017bf2c..795be29be69 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -96,7 +96,6 @@ from superset.exceptions import (
SupersetException,
SupersetTimeoutException,
)
-from superset.explorables.base import Explorable
from superset.sql.parse import sanitize_clause
from superset.superset_typing import (
AdhocColumn,
@@ -115,7 +114,7 @@ from superset.utils.hashing import hash_from_dict,
hash_from_str
from superset.utils.pandas import detect_datetime_format
if TYPE_CHECKING:
- from superset.connectors.sqla.models import TableColumn
+ from superset.explorables.base import ColumnMetadata, Explorable
from superset.models.core import Database
logging.getLogger("MARKDOWN").setLevel(logging.INFO)
@@ -200,6 +199,7 @@ class DatasourceType(StrEnum):
QUERY = "query"
SAVEDQUERY = "saved_query"
VIEW = "view"
+ SEMANTIC_VIEW = "semantic_view"
class LoggerLevel(StrEnum):
@@ -1730,15 +1730,12 @@ def get_metric_type_from_column(column: Any,
datasource: Explorable) -> str:
:return: The inferred metric type as a string, or an empty string if the
column is not a metric or no valid operation is found.
"""
-
- from superset.connectors.sqla.models import SqlMetric
-
- metric: SqlMetric = next(
- (metric for metric in datasource.metrics if metric.metric_name ==
column),
- SqlMetric(metric_name=""),
+ metric = next(
+ (m for m in datasource.metrics if m.metric_name == column),
+ None,
)
- if metric.metric_name == "":
+ if metric is None:
return ""
expression: str = metric.expression
@@ -1784,7 +1781,7 @@ def extract_dataframe_dtypes(
generic_types: list[GenericDataType] = []
for column in df.columns:
- column_object = columns_by_name.get(column)
+ column_object = columns_by_name.get(str(column))
series = df[column]
inferred_type: str = ""
if series.isna().all():
@@ -1814,11 +1811,17 @@ def extract_dataframe_dtypes(
return generic_types
-def extract_column_dtype(col: TableColumn) -> GenericDataType:
- if col.is_temporal:
+def extract_column_dtype(col: ColumnMetadata) -> GenericDataType:
+ # Check for temporal type
+ if hasattr(col, "is_temporal") and col.is_temporal:
+ return GenericDataType.TEMPORAL
+ if col.is_dttm:
return GenericDataType.TEMPORAL
- if col.is_numeric:
+
+ # Check for numeric type
+ if hasattr(col, "is_numeric") and col.is_numeric:
return GenericDataType.NUMERIC
+
# TODO: add check for boolean data type when proper support is added
return GenericDataType.STRING
@@ -1832,9 +1835,7 @@ def get_time_filter_status(
applied_time_extras: dict[str, str],
) -> tuple[list[dict[str, str]], list[dict[str, str]]]:
temporal_columns: set[Any] = {
- (col.column_name if hasattr(col, "column_name") else
col.get("column_name"))
- for col in datasource.columns
- if (col.is_dttm if hasattr(col, "is_dttm") else col.get("is_dttm"))
+ col.column_name for col in datasource.columns if col.is_dttm
}
applied: list[dict[str, str]] = []
rejected: list[dict[str, str]] = []
diff --git a/tests/integration_tests/charts/api_tests.py
b/tests/integration_tests/charts/api_tests.py
index b8b60355419..ea35b176449 100644
--- a/tests/integration_tests/charts/api_tests.py
+++ b/tests/integration_tests/charts/api_tests.py
@@ -626,7 +626,8 @@ class TestChartApi(ApiOwnersTestCaseMixin,
InsertChartMixin, SupersetTestCase):
assert response == {
"message": {
"datasource_type": [
- "Must be one of: table, dataset, query, saved_query, view."
+ "Must be one of: table, dataset, query, saved_query, view,
"
+ "semantic_view."
]
}
}
@@ -981,7 +982,8 @@ class TestChartApi(ApiOwnersTestCaseMixin,
InsertChartMixin, SupersetTestCase):
assert response == {
"message": {
"datasource_type": [
- "Must be one of: table, dataset, query, saved_query, view."
+ "Must be one of: table, dataset, query, saved_query, view,
"
+ "semantic_view."
]
}
}