This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch semantic-layer-feature
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/semantic-layer-feature by this 
push:
     new e13619748ca feat: models and DAOs
e13619748ca is described below

commit e13619748ca3026aebc3e0f15f656155b6cd8637
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Feb 6 15:52:37 2026 -0500

    feat: models and DAOs
---
 .../src/superset_core/semantic_layers/types.py     |   2 +-
 superset/connectors/sqla/models.py                 |   6 +-
 superset/explorables/base.py                       | 128 ++++++++++++++++++++-
 superset/models/sql_lab.py                         |   6 +-
 superset/superset_typing.py                        |  50 +++++++-
 superset/utils/core.py                             |  33 +++---
 tests/integration_tests/charts/api_tests.py        |   6 +-
 7 files changed, 200 insertions(+), 31 deletions(-)

diff --git a/superset-core/src/superset_core/semantic_layers/types.py 
b/superset-core/src/superset_core/semantic_layers/types.py
index 42c0cd49050..46bcf707174 100644
--- a/superset-core/src/superset_core/semantic_layers/types.py
+++ b/superset-core/src/superset_core/semantic_layers/types.py
@@ -216,7 +216,7 @@ class Metric:
     name: str
     type: TypeOf[Type]
 
-    definition: str | None
+    definition: str
     description: str | None = None
 
 
diff --git a/superset/connectors/sqla/models.py 
b/superset/connectors/sqla/models.py
index be74a199672..6aabe485de5 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -107,6 +107,8 @@ from superset.sql.parse import Table
 from superset.superset_typing import (
     AdhocColumn,
     AdhocMetric,
+    DatasetColumnData,
+    DatasetMetricData,
     ExplorableData,
     Metric,
     QueryObjectDict,
@@ -463,8 +465,8 @@ class BaseDatasource(
             # sqla-specific
             "sql": self.sql,
             # one to many
-            "columns": [o.data for o in self.columns],
-            "metrics": [o.data for o in self.metrics],
+            "columns": [cast(DatasetColumnData, o.data) for o in self.columns],
+            "metrics": [cast(DatasetMetricData, o.data) for o in self.metrics],
             "folders": self.folders,
             # TODO deprecate, move logic to JS
             "order_by_choices": self.order_by_choices,
diff --git a/superset/explorables/base.py b/superset/explorables/base.py
index 2d534b72099..de69257a317 100644
--- a/superset/explorables/base.py
+++ b/superset/explorables/base.py
@@ -53,6 +53,130 @@ class TimeGrainDict(TypedDict):
     duration: str | None
 
 
+@runtime_checkable
+class MetricMetadata(Protocol):
+    """
+    Protocol for metric metadata objects.
+
+    Represents a metric that's available on an explorable data source.
+    Metrics contain SQL expressions or references to semantic layer measures.
+
+    Attributes:
+        metric_name: Unique identifier for the metric
+        expression: SQL expression or reference for calculating the metric
+        verbose_name: Human-readable name for display in the UI
+        description: Description of what the metric represents
+        d3format: D3 format string for formatting numeric values
+        currency: Currency configuration for the metric (JSON object)
+        warning_text: Warning message to display when using this metric
+        certified_by: Person or entity that certified this metric
+        certification_details: Details about the certification
+    """
+
+    @property
+    def metric_name(self) -> str:
+        """Unique identifier for the metric."""
+
+    @property
+    def expression(self) -> str:
+        """SQL expression or reference for calculating the metric."""
+
+    @property
+    def verbose_name(self) -> str | None:
+        """Human-readable name for display in the UI."""
+
+    @property
+    def description(self) -> str | None:
+        """Description of what the metric represents."""
+
+    @property
+    def d3format(self) -> str | None:
+        """D3 format string for formatting numeric values."""
+
+    @property
+    def currency(self) -> dict[str, Any] | None:
+        """Currency configuration for the metric (JSON object)."""
+
+    @property
+    def warning_text(self) -> str | None:
+        """Warning message to display when using this metric."""
+
+    @property
+    def certified_by(self) -> str | None:
+        """Person or entity that certified this metric."""
+
+    @property
+    def certification_details(self) -> str | None:
+        """Details about the certification."""
+
+
+@runtime_checkable
+class ColumnMetadata(Protocol):
+    """
+    Protocol for column metadata objects.
+
+    Represents a column/dimension that's available on an explorable data 
source.
+    Used for grouping, filtering, and dimension-based analysis.
+
+    Attributes:
+        column_name: Unique identifier for the column
+        type: SQL data type of the column (e.g., 'VARCHAR', 'INTEGER', 
'DATETIME')
+        is_dttm: Whether this column represents a date or time value
+        verbose_name: Human-readable name for display in the UI
+        description: Description of what the column represents
+        groupby: Whether this column is allowed for grouping/aggregation
+        filterable: Whether this column can be used in filters
+        expression: SQL expression if this is a calculated column
+        python_date_format: Python datetime format string for temporal columns
+        advanced_data_type: Advanced data type classification
+        extra: Additional metadata stored as JSON
+    """
+
+    @property
+    def column_name(self) -> str:
+        """Unique identifier for the column."""
+
+    @property
+    def type(self) -> str:
+        """SQL data type of the column."""
+
+    @property
+    def is_dttm(self) -> bool:
+        """Whether this column represents a date or time value."""
+
+    @property
+    def verbose_name(self) -> str | None:
+        """Human-readable name for display in the UI."""
+
+    @property
+    def description(self) -> str | None:
+        """Description of what the column represents."""
+
+    @property
+    def groupby(self) -> bool:
+        """Whether this column is allowed for grouping/aggregation."""
+
+    @property
+    def filterable(self) -> bool:
+        """Whether this column can be used in filters."""
+
+    @property
+    def expression(self) -> str | None:
+        """SQL expression if this is a calculated column."""
+
+    @property
+    def python_date_format(self) -> str | None:
+        """Python datetime format string for temporal columns."""
+
+    @property
+    def advanced_data_type(self) -> str | None:
+        """Advanced data type classification."""
+
+    @property
+    def extra(self) -> str | None:
+        """Additional metadata stored as JSON."""
+
+
 @runtime_checkable
 class Explorable(Protocol):
     """
@@ -132,7 +256,7 @@ class Explorable(Protocol):
         """
 
     @property
-    def metrics(self) -> list[Any]:
+    def metrics(self) -> list[MetricMetadata]:
         """
         List of metric metadata objects.
 
@@ -147,7 +271,7 @@ class Explorable(Protocol):
 
     # TODO: rename to dimensions
     @property
-    def columns(self) -> list[Any]:
+    def columns(self) -> list[ColumnMetadata]:
         """
         List of column metadata objects.
 
diff --git a/superset/models/sql_lab.py b/superset/models/sql_lab.py
index 956d33053bc..e7d8cecff20 100644
--- a/superset/models/sql_lab.py
+++ b/superset/models/sql_lab.py
@@ -22,7 +22,7 @@ import logging
 import re
 from collections.abc import Hashable
 from datetime import datetime
-from typing import Any, Optional, TYPE_CHECKING
+from typing import Any, cast, Optional, TYPE_CHECKING
 
 import sqlalchemy as sqla
 from flask import current_app as app
@@ -64,7 +64,7 @@ from superset.sql.parse import (
     Table,
 )
 from superset.sqllab.limiting_factor import LimitingFactor
-from superset.superset_typing import ExplorableData, QueryObjectDict
+from superset.superset_typing import DatasetColumnData, ExplorableData, 
QueryObjectDict
 from superset.utils import json
 from superset.utils.core import (
     get_column_name,
@@ -258,7 +258,7 @@ class Query(
             ],
             "filter_select": True,
             "name": self.tab_name,
-            "columns": [o.data for o in self.columns],
+            "columns": [cast(DatasetColumnData, o.data) for o in self.columns],
             "metrics": [],
             "id": self.id,
             "type": self.type,
diff --git a/superset/superset_typing.py b/superset/superset_typing.py
index 02e294a08cf..ef002ac86ba 100644
--- a/superset/superset_typing.py
+++ b/superset/superset_typing.py
@@ -30,6 +30,46 @@ if TYPE_CHECKING:
 SQLType: TypeAlias = TypeEngine | type[TypeEngine]
 
 
+class DatasetColumnData(TypedDict, total=False):
+    """Type for column metadata in ExplorableData datasets."""
+
+    advanced_data_type: str | None
+    certification_details: str | None
+    certified_by: str | None
+    column_name: str
+    description: str | None
+    expression: str | None
+    filterable: bool
+    groupby: bool
+    id: int | None
+    uuid: str | None
+    is_certified: bool
+    is_dttm: bool
+    python_date_format: str | None
+    type: str
+    type_generic: NotRequired["GenericDataType" | None]
+    verbose_name: str | None
+    warning_markdown: str | None
+
+
+class DatasetMetricData(TypedDict, total=False):
+    """Type for metric metadata in ExplorableData datasets."""
+
+    certification_details: str | None
+    certified_by: str | None
+    currency: NotRequired[dict[str, Any]]
+    d3format: str | None
+    description: str | None
+    expression: str | None
+    id: int | None
+    uuid: str | None
+    is_certified: bool
+    metric_name: str
+    warning_markdown: str | None
+    warning_text: str | None
+    verbose_name: str | None
+
+
 class LegacyMetric(TypedDict):
     label: str | None
 
@@ -254,7 +294,7 @@ class ExplorableData(TypedDict, total=False):
     """
 
     # Core fields from BaseDatasource.data
-    id: int
+    id: int | str  # String for UUID-based explorables like SemanticView
     uid: str
     column_formats: dict[str, str | None]
     description: str | None
@@ -274,8 +314,8 @@ class ExplorableData(TypedDict, total=False):
     perm: str | None
     edit_url: str
     sql: str | None
-    columns: list[dict[str, Any]]
-    metrics: list[dict[str, Any]]
+    columns: list["DatasetColumnData"]
+    metrics: list["DatasetMetricData"]
     folders: Any  # JSON field, can be list or dict
     order_by_choices: list[tuple[str, str]]
     owners: list[int] | list[dict[str, Any]]  # Can be either format
@@ -283,8 +323,8 @@ class ExplorableData(TypedDict, total=False):
     select_star: str | None
 
     # Additional fields from SqlaTable and data_for_slices
-    column_types: list[Any]
-    column_names: set[str] | set[Any]
+    column_types: list["GenericDataType"]
+    column_names: set[str] | list[str]
     granularity_sqla: list[tuple[Any, Any]]
     time_grain_sqla: list[tuple[Any, Any]]
     main_dttm_col: str | None
diff --git a/superset/utils/core.py b/superset/utils/core.py
index 23a3017bf2c..795be29be69 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -96,7 +96,6 @@ from superset.exceptions import (
     SupersetException,
     SupersetTimeoutException,
 )
-from superset.explorables.base import Explorable
 from superset.sql.parse import sanitize_clause
 from superset.superset_typing import (
     AdhocColumn,
@@ -115,7 +114,7 @@ from superset.utils.hashing import hash_from_dict, 
hash_from_str
 from superset.utils.pandas import detect_datetime_format
 
 if TYPE_CHECKING:
-    from superset.connectors.sqla.models import TableColumn
+    from superset.explorables.base import ColumnMetadata, Explorable
     from superset.models.core import Database
 
 logging.getLogger("MARKDOWN").setLevel(logging.INFO)
@@ -200,6 +199,7 @@ class DatasourceType(StrEnum):
     QUERY = "query"
     SAVEDQUERY = "saved_query"
     VIEW = "view"
+    SEMANTIC_VIEW = "semantic_view"
 
 
 class LoggerLevel(StrEnum):
@@ -1730,15 +1730,12 @@ def get_metric_type_from_column(column: Any, 
datasource: Explorable) -> str:
     :return: The inferred metric type as a string, or an empty string if the
              column is not a metric or no valid operation is found.
     """
-
-    from superset.connectors.sqla.models import SqlMetric
-
-    metric: SqlMetric = next(
-        (metric for metric in datasource.metrics if metric.metric_name == 
column),
-        SqlMetric(metric_name=""),
+    metric = next(
+        (m for m in datasource.metrics if m.metric_name == column),
+        None,
     )
 
-    if metric.metric_name == "":
+    if metric is None:
         return ""
 
     expression: str = metric.expression
@@ -1784,7 +1781,7 @@ def extract_dataframe_dtypes(
 
     generic_types: list[GenericDataType] = []
     for column in df.columns:
-        column_object = columns_by_name.get(column)
+        column_object = columns_by_name.get(str(column))
         series = df[column]
         inferred_type: str = ""
         if series.isna().all():
@@ -1814,11 +1811,17 @@ def extract_dataframe_dtypes(
     return generic_types
 
 
-def extract_column_dtype(col: TableColumn) -> GenericDataType:
-    if col.is_temporal:
+def extract_column_dtype(col: ColumnMetadata) -> GenericDataType:
+    # Check for temporal type
+    if hasattr(col, "is_temporal") and col.is_temporal:
+        return GenericDataType.TEMPORAL
+    if col.is_dttm:
         return GenericDataType.TEMPORAL
-    if col.is_numeric:
+
+    # Check for numeric type
+    if hasattr(col, "is_numeric") and col.is_numeric:
         return GenericDataType.NUMERIC
+
     # TODO: add check for boolean data type when proper support is added
     return GenericDataType.STRING
 
@@ -1832,9 +1835,7 @@ def get_time_filter_status(
     applied_time_extras: dict[str, str],
 ) -> tuple[list[dict[str, str]], list[dict[str, str]]]:
     temporal_columns: set[Any] = {
-        (col.column_name if hasattr(col, "column_name") else 
col.get("column_name"))
-        for col in datasource.columns
-        if (col.is_dttm if hasattr(col, "is_dttm") else col.get("is_dttm"))
+        col.column_name for col in datasource.columns if col.is_dttm
     }
     applied: list[dict[str, str]] = []
     rejected: list[dict[str, str]] = []
diff --git a/tests/integration_tests/charts/api_tests.py 
b/tests/integration_tests/charts/api_tests.py
index b8b60355419..ea35b176449 100644
--- a/tests/integration_tests/charts/api_tests.py
+++ b/tests/integration_tests/charts/api_tests.py
@@ -626,7 +626,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, 
InsertChartMixin, SupersetTestCase):
         assert response == {
             "message": {
                 "datasource_type": [
-                    "Must be one of: table, dataset, query, saved_query, view."
+                    "Must be one of: table, dataset, query, saved_query, view, 
"
+                    "semantic_view."
                 ]
             }
         }
@@ -981,7 +982,8 @@ class TestChartApi(ApiOwnersTestCaseMixin, 
InsertChartMixin, SupersetTestCase):
         assert response == {
             "message": {
                 "datasource_type": [
-                    "Must be one of: table, dataset, query, saved_query, view."
+                    "Must be one of: table, dataset, query, saved_query, view, 
"
+                    "semantic_view."
                 ]
             }
         }

Reply via email to