This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch semantic-layer-feature in repository https://gitbox.apache.org/repos/asf/superset.git
commit 85fe51c816ec44282570ec0cf6151f95b9c6cd37 Author: Beto Dealmeida <[email protected]> AuthorDate: Fri Feb 6 10:39:38 2026 -0500 Initial files --- .../semantic_layers/semantic_layer.py | 114 +++++++ .../superset_core/semantic_layers/semantic_view.py | 105 +++++++ .../src/superset_core/semantic_layers/types.py | 334 +++++++++++++++++++++ 3 files changed, 553 insertions(+) diff --git a/superset-core/src/superset_core/semantic_layers/semantic_layer.py b/superset-core/src/superset_core/semantic_layers/semantic_layer.py new file mode 100644 index 00000000000..615014f8c1b --- /dev/null +++ b/superset-core/src/superset_core/semantic_layers/semantic_layer.py @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +from typing import Any, Protocol, runtime_checkable, TypeVar + +from pydantic import BaseModel +from superset_core.semantic_layers.semantic_view import SemanticView + +ConfigT = TypeVar("ConfigT", bound=BaseModel, contravariant=True) +SemanticViewT = TypeVar("SemanticViewT", bound="SemanticView") + + +# TODO (betodealmeida): convert to ABC +@runtime_checkable +class SemanticLayer(Protocol[ConfigT, SemanticViewT]): + """ + A protocol for semantic layers. + """ + + @classmethod + def from_configuration( + cls, + configuration: dict[str, Any], + ) -> SemanticLayer[ConfigT, SemanticViewT]: + """ + Create a semantic layer from its configuration. + """ + + @classmethod + def get_configuration_schema( + cls, + configuration: ConfigT | None = None, + ) -> dict[str, Any]: + """ + Get the JSON schema for the configuration needed to add the semantic layer. + + A partial configuration `configuration` can be sent to improve the schema, + allowing for progressive validation and better UX. For example, a semantic + layer might require: + + - auth information + - a database + + If the user provides the auth information, a client can send the partial + configuration to this method, and the resulting JSON schema would include + the list of databases the user has access to, allowing a dropdown to be + populated. + + The Snowflake semantic layer has an example implementation of this method, where + database and schema names are populated based on the provided connection info. + """ + + @classmethod + def get_runtime_schema( + cls, + configuration: ConfigT, + runtime_data: dict[str, Any] | None = None, + ) -> dict[str, Any]: + """ + Get the JSON schema for the runtime parameters needed to load semantic views. + + This returns the schema needed to connect to a semantic view given the + configuration for the semantic layer. For example, a semantic layer might + be configured by: + + - auth information + - an optional database + + If the user does not provide a database when creating the semantic layer, the + runtime schema would require the database name to be provided before loading any + semantic views. This allows users to create semantic layers that connect to a + specific database (or project, account, etc.), or that allow users to select it + at query time. + + The Snowflake semantic layer has an example implementation of this method, where + database and schema names are required if they were not provided in the initial + configuration. + """ + + def get_semantic_views( + self, + runtime_configuration: dict[str, Any], + ) -> set[SemanticViewT]: + """ + Get the semantic views available in the semantic layer. + + The runtime configuration can provide information like a given project or + schema, used to restrict the semantic views returned. + """ + + def get_semantic_view( + self, + name: str, + additional_configuration: dict[str, Any], + ) -> SemanticViewT: + """ + Get a specific semantic view by its name and additional configuration. + """ diff --git a/superset-core/src/superset_core/semantic_layers/semantic_view.py b/superset-core/src/superset_core/semantic_layers/semantic_view.py new file mode 100644 index 00000000000..11f041132e3 --- /dev/null +++ b/superset-core/src/superset_core/semantic_layers/semantic_view.py @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import enum +from typing import Protocol, runtime_checkable + +from superset_core.semant_views.types import ( + AdhocFilter, + Dimension, + Filter, + GroupLimit, + Metric, + OrderTuple, + SemanticResult, +) + + +class SemanticViewFeature(enum.Enum): + """ + Custom features supported by semantic layers. + """ + + ADHOC_EXPRESSIONS_IN_ORDERBY = "ADHOC_EXPRESSIONS_IN_ORDERBY" + GROUP_LIMIT = "GROUP_LIMIT" + GROUP_OTHERS = "GROUP_OTHERS" + + +# TODO (betodealmeida): convert to ABC +@runtime_checkable +class SemanticView(Protocol): + """ + A protocol for semantic views. + """ + + features: frozenset[SemanticViewFeature] + + def uid(self) -> str: + """ + Returns a unique identifier for the semantic view. + """ + + def get_dimensions(self) -> set[Dimension]: + """ + Get the dimensions defined in the semantic view. + """ + + def get_metrics(self) -> set[Metric]: + """ + Get the metrics defined in the semantic view. + """ + + def get_values( + self, + dimension: Dimension, + filters: set[Filter | AdhocFilter] | None = None, + ) -> SemanticResult: + """ + Return distinct values for a dimension. + """ + + def get_dataframe( + self, + metrics: list[Metric], + dimensions: list[Dimension], + filters: set[Filter | AdhocFilter] | None = None, + order: list[OrderTuple] | None = None, + limit: int | None = None, + offset: int | None = None, + *, + group_limit: GroupLimit | None = None, + ) -> SemanticResult: + """ + Execute a semantic query and return the results as a DataFrame. + """ + + def get_row_count( + self, + metrics: list[Metric], + dimensions: list[Dimension], + filters: set[Filter | AdhocFilter] | None = None, + order: list[OrderTuple] | None = None, + limit: int | None = None, + offset: int | None = None, + *, + group_limit: GroupLimit | None = None, + ) -> SemanticResult: + """ + Execute a query and return the number of rows the result would have. + """ diff --git a/superset-core/src/superset_core/semantic_layers/types.py b/superset-core/src/superset_core/semantic_layers/types.py new file mode 100644 index 00000000000..7796003461b --- /dev/null +++ b/superset-core/src/superset_core/semantic_layers/types.py @@ -0,0 +1,334 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import annotations + +import enum +from dataclasses import dataclass +from datetime import date, datetime, time, timedelta +from functools import total_ordering + +from pandas import DataFrame + +__all__ = [ + "BINARY", + "BOOLEAN", + "DATE", + "DATETIME", + "DECIMAL", + "Day", + "Dimension", + "Hour", + "INTEGER", + "INTERVAL", + "Minute", + "Month", + "NUMBER", + "OBJECT", + "Quarter", + "Second", + "STRING", + "TIME", + "Week", + "Year", +] + + +class Type: + """ + Base class for types. + """ + + +class INTEGER(Type): + """ + Represents an integer type. + """ + + +class NUMBER(Type): + """ + Represents a number type. + """ + + +class DECIMAL(Type): + """ + Represents a decimal type. + """ + + +class STRING(Type): + """ + Represents a string type. + """ + + +class BOOLEAN(Type): + """ + Represents a boolean type. + """ + + +class DATE(Type): + """ + Represents a date type. + """ + + +class TIME(Type): + """ + Represents a time type. + """ + + +class DATETIME(DATE, TIME): + """ + Represents a datetime type. + """ + + +class INTERVAL(Type): + """ + Represents an interval type. + """ + + +class OBJECT(Type): + """ + Represents an object type. + """ + + +class BINARY(Type): + """ + Represents a binary type. + """ + + +@dataclass(frozen=True) +@total_ordering +class Grain: + """ + Base class for time and date grains with comparison support. + + Attributes: + name: Human-readable name of the grain (e.g., "Second") + representation: ISO 8601 representation (e.g., "PT1S") + value: Time period as a timedelta + """ + + name: str + representation: str + value: timedelta + + def __eq__(self, other: object) -> bool: + if isinstance(other, Grain): + return self.value == other.value + return NotImplemented + + def __lt__(self, other: object) -> bool: + if isinstance(other, Grain): + return self.value < other.value + return NotImplemented + + def __hash__(self) -> int: + return hash((self.name, self.representation, self.value)) + + +class Second(Grain): + name = "Second" + representation = "PT1S" + value = timedelta(seconds=1) + + +class Minute(Grain): + name = "Minute" + representation = "PT1M" + value = timedelta(minutes=1) + + +class Hour(Grain): + name = "Hour" + representation = "PT1H" + value = timedelta(hours=1) + + +class Day(Grain): + name = "Day" + representation = "P1D" + value = timedelta(days=1) + + +class Week(Grain): + name = "Week" + representation = "P1W" + value = timedelta(weeks=1) + + +class Month(Grain): + name = "Month" + representation = "P1M" + value = timedelta(days=30) + + +class Quarter(Grain): + name = "Quarter" + representation = "P3M" + value = timedelta(days=90) + + +class Year(Grain): + name = "Year" + representation = "P1Y" + value = timedelta(days=365) + + +@dataclass(frozen=True) +class Dimension: + id: str + name: str + type: type[Type] + + definition: str | None = None + description: str | None = None + grain: Grain | None = None + + +@dataclass(frozen=True) +class Metric: + id: str + name: str + type: type[Type] + + definition: str | None + description: str | None = None + + +@dataclass(frozen=True) +class AdhocExpression: + id: str + definition: str + + +class Operator(str, enum.Enum): + EQUALS = "=" + NOT_EQUALS = "!=" + GREATER_THAN = ">" + LESS_THAN = "<" + GREATER_THAN_OR_EQUAL = ">=" + LESS_THAN_OR_EQUAL = "<=" + IN = "IN" + NOT_IN = "NOT IN" + LIKE = "LIKE" + NOT_LIKE = "NOT LIKE" + IS_NULL = "IS NULL" + IS_NOT_NULL = "IS NOT NULL" + + +FilterValues = str | int | float | bool | datetime | date | time | timedelta | None + + +class PredicateType(enum.Enum): + WHERE = "WHERE" + HAVING = "HAVING" + + +@dataclass(frozen=True, order=True) +class Filter: + type: PredicateType + column: Dimension | Metric + operator: Operator + value: FilterValues | set[FilterValues] + + +# TODO (betodealmeida): convert into Operator: +# Filter(type=..., column=None, operator=Operator.AdHoc, value="some definition") +@dataclass(frozen=True, order=True) +class AdhocFilter: + type: PredicateType + definition: str + + +class OrderDirection(enum.Enum): + ASC = "ASC" + DESC = "DESC" + + +OrderTuple = tuple[Metric | Dimension | AdhocExpression, OrderDirection] + + +@dataclass(frozen=True) +class GroupLimit: + """ + Limit query to top/bottom N combinations of specified dimensions. + + The `filters` parameter allows specifying separate filter constraints for the + group limit subquery. This is useful when you want to determine the top N groups + using different criteria (e.g., a different time range) than the main query. + + For example, you might want to find the top 10 products by sales over the last + 30 days, but then show daily sales for those products over the last 7 days. + """ + + dimensions: list[Dimension] + top: int + metric: Metric | None + direction: OrderDirection = OrderDirection.DESC + group_others: bool = False + filters: set[Filter | AdhocFilter] | None = None + + +@dataclass(frozen=True) +class SemanticRequest: + """ + Represents a request made to obtain semantic results. + + This could be a SQL query, an HTTP request, etc. + """ + + type: str + definition: str + + +@dataclass(frozen=True) +class SemanticResult: + """ + Represents the results of a semantic query. + + This includes any requests (SQL queries, HTTP requests) that were performed in order + to obtain the results, in order to help troubleshooting. + """ + + requests: list[SemanticRequest] + # TODO (betodealmeida): convert to PyArrow Table + results: DataFrame + + +@dataclass(frozen=True) +class SemanticQuery: + """ + Represents a semantic query. + """ + + metrics: list[Metric] + dimensions: list[Dimension] + filters: set[Filter | AdhocFilter] | None = None + order: list[OrderTuple] | None = None + limit: int | None = None + offset: int | None = None + group_limit: GroupLimit | None = None
