(superset) 07/37: feat(timeseries): enhance 'Series Limit' to support grouping the long tail (#34308)

elizabeth Tue, 20 Jan 2026 11:43:24 -0800

This is an automated email from the ASF dual-hosted git repository.

elizabeth pushed a commit to branch elizabeth/fix-resize-bug
in repository https://gitbox.apache.org/repos/asf/superset.git


commit b404dceb5c2e6ff7bcb50189541514fd6724ca2c
Author: Maxime Beauchemin <[email protected]>
AuthorDate: Fri Jul 25 16:26:32 2025 -0700

    feat(timeseries): enhance 'Series Limit' to support grouping the long tail 
(#34308)
---
 LLMS.md                                            |   8 +-
 .../src/sections/echartsTimeSeriesQuery.tsx        |   2 +-
 .../src/shared-controls/sharedControls.tsx         |  14 ++
 .../superset-ui-core/src/query/buildQueryObject.ts |   2 +
 .../test/MixedTimeseries/buildQuery.test.ts        |   2 +
 .../src/explore/controlPanels/sections.tsx         |   3 +-
 .../UploadDataModel/UploadDataModal.test.tsx       |   2 +-
 superset/charts/schemas.py                         |   8 ++
 superset/common/query_object.py                    |   3 +
 superset/models/helpers.py                         | 123 ++++++++++++++++-
 tests/unit_tests/models/helpers_test.py            | 150 +++++++++++++++++++++
 tests/unit_tests/queries/query_object_test.py      |   1 +
 12 files changed, 309 insertions(+), 9 deletions(-)

diff --git a/LLMS.md b/LLMS.md
index f59caa840b..22d58e691b 100644
--- a/LLMS.md
+++ b/LLMS.md
@@ -141,9 +141,11 @@ curl -f http://localhost:8088/health || echo "❌ Setup 
required - see https://s
 # Install hooks
 pre-commit install
 
-# Quick validation (only changed files - faster)
-pre-commit run                    # Staged files only - PREFERRED for 
iterations
-pre-commit run --all-files        # Full codebase - slower, use sparingly
+# IMPORTANT: Stage your changes first!
+git add .                        # Pre-commit only checks staged files
+
+# Quick validation (faster than --all-files)
+pre-commit run                   # Staged files only
 pre-commit run mypy              # Python type checking
 pre-commit run prettier          # Code formatting
 pre-commit run eslint            # Frontend linting
diff --git 
a/superset-frontend/packages/superset-ui-chart-controls/src/sections/echartsTimeSeriesQuery.tsx
 
b/superset-frontend/packages/superset-ui-chart-controls/src/sections/echartsTimeSeriesQuery.tsx
index b8ec643c0b..7a15bbfc9a 100644
--- 
a/superset-frontend/packages/superset-ui-chart-controls/src/sections/echartsTimeSeriesQuery.tsx
+++ 
b/superset-frontend/packages/superset-ui-chart-controls/src/sections/echartsTimeSeriesQuery.tsx
@@ -30,7 +30,7 @@ const controlsWithoutXAxis: ControlSetRow[] = [
   ['groupby'],
   [contributionModeControl],
   ['adhoc_filters'],
-  ['limit'],
+  ['limit', 'group_others_when_limit_reached'],
   ['timeseries_limit_metric'],
   ['order_desc'],
   ['row_limit'],
diff --git 
a/superset-frontend/packages/superset-ui-chart-controls/src/shared-controls/sharedControls.tsx
 
b/superset-frontend/packages/superset-ui-chart-controls/src/shared-controls/sharedControls.tsx
index 6ceff6a444..f456d98e22 100644
--- 
a/superset-frontend/packages/superset-ui-chart-controls/src/shared-controls/sharedControls.tsx
+++ 
b/superset-frontend/packages/superset-ui-chart-controls/src/shared-controls/sharedControls.tsx
@@ -283,6 +283,19 @@ const series_limit: SharedControlConfig<'SelectControl'> = 
{
   ),
 };
 
+const group_others_when_limit_reached: SharedControlConfig<'CheckboxControl'> =
+  {
+    type: 'CheckboxControl',
+    label: t('Group remaining as "Others"'),
+    default: false,
+    description: t(
+      'Groups remaining series into an "Others" category when series limit is 
reached. ' +
+        'This prevents incomplete time series data from being displayed.',
+    ),
+    visibility: ({ form_data }: { form_data: any }) =>
+      Boolean(form_data?.limit || form_data?.series_limit),
+  };
+
 const y_axis_format: SharedControlConfig<'SelectControl', SelectDefaultOption> 
=
   {
     type: 'SelectControl',
@@ -446,6 +459,7 @@ export default {
   time_shift_color,
   series_columns: dndColumnsControl,
   series_limit,
+  group_others_when_limit_reached,
   series_limit_metric: dndSortByControl,
   legacy_order_by: dndSortByControl,
   truncate_metric,
diff --git 
a/superset-frontend/packages/superset-ui-core/src/query/buildQueryObject.ts 
b/superset-frontend/packages/superset-ui-core/src/query/buildQueryObject.ts
index 725c2c87aa..3f3ec3615e 100644
--- a/superset-frontend/packages/superset-ui-core/src/query/buildQueryObject.ts
+++ b/superset-frontend/packages/superset-ui-core/src/query/buildQueryObject.ts
@@ -63,6 +63,7 @@ export default function buildQueryObject<T extends 
QueryFormData>(
     series_columns,
     series_limit,
     series_limit_metric,
+    group_others_when_limit_reached,
     ...residualFormData
   } = formData;
   const {
@@ -128,6 +129,7 @@ export default function buildQueryObject<T extends 
QueryFormData>(
       normalizeSeriesLimitMetric(series_limit_metric) ??
       timeseries_limit_metric ??
       undefined,
+    group_others_when_limit_reached: group_others_when_limit_reached ?? false,
     order_desc: typeof order_desc === 'undefined' ? true : order_desc,
     url_params: url_params || undefined,
     custom_params,
diff --git 
a/superset-frontend/plugins/plugin-chart-echarts/test/MixedTimeseries/buildQuery.test.ts
 
b/superset-frontend/plugins/plugin-chart-echarts/test/MixedTimeseries/buildQuery.test.ts
index eefdc298d8..7c265bb31e 100644
--- 
a/superset-frontend/plugins/plugin-chart-echarts/test/MixedTimeseries/buildQuery.test.ts
+++ 
b/superset-frontend/plugins/plugin-chart-echarts/test/MixedTimeseries/buildQuery.test.ts
@@ -107,6 +107,7 @@ test('should compile query object A', () => {
     series_columns: ['foo'],
     series_limit: 5,
     series_limit_metric: undefined,
+    group_others_when_limit_reached: false,
     url_params: {},
     custom_params: {},
     custom_form_data: {},
@@ -166,6 +167,7 @@ test('should compile query object B', () => {
     series_columns: [],
     series_limit: 0,
     series_limit_metric: undefined,
+    group_others_when_limit_reached: false,
     url_params: {},
     custom_params: {},
     custom_form_data: {},
diff --git a/superset-frontend/src/explore/controlPanels/sections.tsx 
b/superset-frontend/src/explore/controlPanels/sections.tsx
index 6ff7dd9fac..724e77ac8e 100644
--- a/superset-frontend/src/explore/controlPanels/sections.tsx
+++ b/superset-frontend/src/explore/controlPanels/sections.tsx
@@ -99,7 +99,8 @@ export const NVD3TimeSeries: ControlPanelSectionConfig[] = [
       ['metrics'],
       ['adhoc_filters'],
       ['groupby'],
-      ['limit', 'timeseries_limit_metric'],
+      ['limit', 'group_others_when_limit_reached'],
+      ['timeseries_limit_metric'],
       ['order_desc'],
       [
         {
diff --git 
a/superset-frontend/src/features/databases/UploadDataModel/UploadDataModal.test.tsx
 
b/superset-frontend/src/features/databases/UploadDataModel/UploadDataModal.test.tsx
index 3b268408e7..b7292087b9 100644
--- 
a/superset-frontend/src/features/databases/UploadDataModel/UploadDataModal.test.tsx
+++ 
b/superset-frontend/src/features/databases/UploadDataModel/UploadDataModal.test.tsx
@@ -409,7 +409,7 @@ describe('UploadDataModal - Database and Schema 
Population', () => {
     await userEvent.click(selectSchema);
     await waitFor(() => screen.getAllByText('schema1'));
     await waitFor(() => screen.getAllByText('schema2'));
-  });
+  }, 60000);
 });
 
 describe('UploadDataModal - Form Validation', () => {
diff --git a/superset/charts/schemas.py b/superset/charts/schemas.py
index 7d004431b0..a8911ce13d 100644
--- a/superset/charts/schemas.py
+++ b/superset/charts/schemas.py
@@ -1257,6 +1257,14 @@ class ChartDataQueryObjectSchema(Schema):
         },
         allow_none=True,
     )
+    group_others_when_limit_reached = fields.Boolean(
+        metadata={
+            "description": "When true, groups remaining series into an 
'Others' "
+            "category when series limit is reached. Prevents incomplete data."
+        },
+        load_default=False,
+        allow_none=True,
+    )
     timeseries_limit = fields.Integer(
         metadata={
             "description": "Maximum row count for timeseries queries. "
diff --git a/superset/common/query_object.py b/superset/common/query_object.py
index 8d828d0576..eb696f00b1 100644
--- a/superset/common/query_object.py
+++ b/superset/common/query_object.py
@@ -131,6 +131,7 @@ class QueryObject:  # pylint: 
disable=too-many-instance-attributes
         series_columns: list[Column] | None = None,
         series_limit: int = 0,
         series_limit_metric: Metric | None = None,
+        group_others_when_limit_reached: bool = False,
         time_range: str | None = None,
         time_shift: str | None = None,
         **kwargs: Any,
@@ -154,6 +155,7 @@ class QueryObject:  # pylint: 
disable=too-many-instance-attributes
         self._init_series_columns(series_columns, metrics, is_timeseries)
         self.series_limit = series_limit
         self.series_limit_metric = series_limit_metric
+        self.group_others_when_limit_reached = group_others_when_limit_reached
         self.time_range = time_range
         self.time_shift = time_shift
         self.from_dttm = kwargs.get("from_dttm")
@@ -356,6 +358,7 @@ class QueryObject:  # pylint: 
disable=too-many-instance-attributes
             "series_columns": self.series_columns,
             "series_limit": self.series_limit,
             "series_limit_metric": self.series_limit_metric,
+            "group_others_when_limit_reached": 
self.group_others_when_limit_reached,
             "to_dttm": self.to_dttm,
             "time_shift": self.time_shift,
         }
diff --git a/superset/models/helpers.py b/superset/models/helpers.py
index a555509d54..6b7c07b784 100644
--- a/superset/models/helpers.py
+++ b/superset/models/helpers.py
@@ -26,7 +26,7 @@ import re
 import uuid
 from collections.abc import Hashable
 from datetime import datetime, timedelta
-from typing import Any, cast, NamedTuple, Optional, TYPE_CHECKING, Union
+from typing import Any, Callable, cast, NamedTuple, Optional, TYPE_CHECKING, 
Union
 
 import dateutil.parser
 import humanize
@@ -1258,6 +1258,65 @@ class ExploreMixin:  # pylint: 
disable=too-many-public-methods
 
         return or_(*groups)
 
+    def _apply_series_others_grouping(
+        self,
+        select_exprs: list[Any],
+        groupby_all_columns: dict[str, Any],
+        groupby_series_columns: dict[str, Any],
+        condition_factory: Callable[[str, Any], Any],
+    ) -> tuple[list[Any], dict[str, Any]]:
+        """
+        Apply "Others" grouping to series columns in both SELECT and GROUP BY 
clauses.
+
+        This method encapsulates the common logic for replacing series columns 
with
+        CASE expressions that group remaining series into an "Others" category 
when
+        the series limit is reached.
+
+        Args:
+            select_exprs: List of SELECT expressions to modify
+            groupby_all_columns: Dict of GROUP BY columns to modify
+            groupby_series_columns: Dict of series columns to apply Others 
grouping to
+            condition_factory: Function that takes (col_name, original_expr) 
and returns
+                the condition for when to keep original value vs use "Others"
+
+        Returns:
+            Tuple of (modified_select_exprs, modified_groupby_all_columns)
+        """
+        # Modify SELECT expressions
+        modified_select_exprs = []
+        for expr in select_exprs:
+            if hasattr(expr, "name") and expr.name in groupby_series_columns:
+                # Create condition for this column using the factory function
+                condition = condition_factory(expr.name, expr)
+
+                # Create CASE expression: condition true -> original, else 
"Others"
+                case_expr = sa.case(
+                    [(condition, expr)], else_=sa.literal_column("Others")
+                )
+                case_expr = self.make_sqla_column_compatible(case_expr, 
expr.name)
+                modified_select_exprs.append(case_expr)
+            else:
+                modified_select_exprs.append(expr)
+
+        # Modify GROUP BY expressions
+        modified_groupby_all_columns = {}
+        for col_name, gby_expr in groupby_all_columns.items():
+            if col_name in groupby_series_columns:
+                # Create condition for this column using the factory function
+                condition = condition_factory(col_name, gby_expr)
+
+                # Create CASE expression for groupby
+                case_expr = sa.case(
+                    [(condition, gby_expr)],
+                    else_=sa.literal_column("Others"),
+                )
+                case_expr = self.make_sqla_column_compatible(case_expr, 
col_name)
+                modified_groupby_all_columns[col_name] = case_expr
+            else:
+                modified_groupby_all_columns[col_name] = gby_expr
+
+        return modified_select_exprs, modified_groupby_all_columns
+
     def dttm_sql_literal(self, dttm: datetime, col: "TableColumn") -> str:
         """Convert datetime object to a SQL expression string"""
 
@@ -1449,6 +1508,7 @@ class ExploreMixin:  # pylint: 
disable=too-many-public-methods
         series_columns: Optional[list[Column]] = None,
         series_limit: Optional[int] = None,
         series_limit_metric: Optional[Metric] = None,
+        group_others_when_limit_reached: bool = False,
         row_limit: Optional[int] = None,
         row_offset: Optional[int] = None,
         timeseries_limit: Optional[int] = None,
@@ -2040,7 +2100,43 @@ class ExploreMixin:  # pylint: 
disable=too-many-public-methods
                     col_name = db_engine_spec.make_label_compatible(gby_name + 
"__")
                     on_clause.append(gby_obj == sa.column(col_name))
 
-                tbl = tbl.join(subq.alias(SERIES_LIMIT_SUBQ_ALIAS), 
and_(*on_clause))
+                # Use LEFT JOIN when grouping others, INNER JOIN otherwise
+                if group_others_when_limit_reached:
+                    # Create the alias once and reuse it
+                    subq_alias = subq.alias(SERIES_LIMIT_SUBQ_ALIAS)
+                    tbl = tbl.join(
+                        subq_alias,
+                        and_(*on_clause),
+                        isouter=True,
+                    )
+
+                    # Apply Others grouping using the refactored method
+                    def _create_join_condition(col_name: str, expr: Any) -> 
Any:
+                        # Get the corresponding column from the subquery
+                        subq_col_name = db_engine_spec.make_label_compatible(
+                            col_name + "__"
+                        )
+                        # Reference the column from the already-created 
aliased subquery
+                        subq_col = subq_alias.c[subq_col_name]
+                        return subq_col.is_not(None)
+
+                    select_exprs, groupby_all_columns = (
+                        self._apply_series_others_grouping(
+                            select_exprs,
+                            groupby_all_columns,
+                            groupby_series_columns,
+                            _create_join_condition,
+                        )
+                    )
+
+                    # Reconstruct query with modified expressions
+                    qry = sa.select(select_exprs)
+                    if groupby_all_columns:
+                        qry = qry.group_by(*groupby_all_columns.values())
+                else:
+                    tbl = tbl.join(
+                        subq.alias(SERIES_LIMIT_SUBQ_ALIAS), and_(*on_clause)
+                    )
             else:
                 if series_limit_metric:
                     orderby = [
@@ -2081,7 +2177,28 @@ class ExploreMixin:  # pylint: 
disable=too-many-public-methods
                 top_groups = self._get_top_groups(
                     result.df, dimensions, groupby_series_columns, 
columns_by_name
                 )
-                qry = qry.where(top_groups)
+
+                if group_others_when_limit_reached:
+                    # Apply Others grouping using the refactored method
+                    def _create_top_groups_condition(col_name: str, expr: Any) 
-> Any:
+                        return top_groups
+
+                    select_exprs, groupby_all_columns = (
+                        self._apply_series_others_grouping(
+                            select_exprs,
+                            groupby_all_columns,
+                            groupby_series_columns,
+                            _create_top_groups_condition,
+                        )
+                    )
+
+                    # Reconstruct query with modified expressions
+                    qry = sa.select(select_exprs)
+                    if groupby_all_columns:
+                        qry = qry.group_by(*groupby_all_columns.values())
+                else:
+                    # Original behavior: filter to only top groups
+                    qry = qry.where(top_groups)
 
         qry = qry.select_from(tbl)
 
diff --git a/tests/unit_tests/models/helpers_test.py 
b/tests/unit_tests/models/helpers_test.py
index d0dfef43b8..05d837437b 100644
--- a/tests/unit_tests/models/helpers_test.py
+++ b/tests/unit_tests/models/helpers_test.py
@@ -210,3 +210,153 @@ def test_values_for_column_double_percents(
 
         assert called_sql.compare(expected_sql) is True
         assert called_conn.engine == engine
+
+
+def test_apply_series_others_grouping(database: Database) -> None:
+    """
+    Test the `_apply_series_others_grouping` method.
+
+    This method should replace series columns with CASE expressions that
+    group remaining series into an "Others" category based on a condition.
+    """
+    from unittest.mock import Mock
+
+    from superset.connectors.sqla.models import SqlaTable, TableColumn
+
+    # Create a mock table for testing
+    table = SqlaTable(
+        database=database,
+        schema=None,
+        table_name="test_table",
+        columns=[
+            TableColumn(column_name="category", type="TEXT"),
+            TableColumn(column_name="metric_col", type="INTEGER"),
+            TableColumn(column_name="other_col", type="TEXT"),
+        ],
+    )
+
+    # Mock SELECT expressions
+    category_expr = Mock()
+    category_expr.name = "category"
+    metric_expr = Mock()
+    metric_expr.name = "metric_col"
+    other_expr = Mock()
+    other_expr.name = "other_col"
+
+    select_exprs = [category_expr, metric_expr, other_expr]
+
+    # Mock GROUP BY columns
+    groupby_all_columns = {
+        "category": category_expr,
+        "other_col": other_expr,
+    }
+
+    # Define series columns (only category should be modified)
+    groupby_series_columns = {"category": category_expr}
+
+    # Create a condition factory that always returns True
+    def always_true_condition(col_name: str, expr) -> bool:
+        return True
+
+    # Mock the make_sqla_column_compatible method
+    def mock_make_compatible(expr, name=None):
+        mock_result = Mock()
+        mock_result.name = name
+        return mock_result
+
+    with patch.object(
+        table, "make_sqla_column_compatible", side_effect=mock_make_compatible
+    ):
+        # Call the method
+        result_select_exprs, result_groupby_columns = (
+            table._apply_series_others_grouping(
+                select_exprs,
+                groupby_all_columns,
+                groupby_series_columns,
+                always_true_condition,
+            )
+        )
+
+        # Verify SELECT expressions
+        assert len(result_select_exprs) == 3
+
+        # Category (series column) should be replaced with CASE expression
+        category_result = result_select_exprs[0]
+        assert category_result.name == "category"  # Should be made compatible
+
+        # Metric (non-series column) should remain unchanged
+        assert result_select_exprs[1] == metric_expr
+
+        # Other (non-series column) should remain unchanged
+        assert result_select_exprs[2] == other_expr
+
+        # Verify GROUP BY columns
+        assert len(result_groupby_columns) == 2
+
+        # Category (series column) should be replaced with CASE expression
+        assert "category" in result_groupby_columns
+        category_groupby_result = result_groupby_columns["category"]
+        assert category_groupby_result.name == "category"  # Should be made 
compatible
+
+        # Other (non-series column) should remain unchanged
+        assert result_groupby_columns["other_col"] == other_expr
+
+
+def test_apply_series_others_grouping_with_false_condition(database: Database) 
-> None:
+    """
+    Test the `_apply_series_others_grouping` method with a condition that 
returns False.
+
+    This should result in CASE expressions that always use "Others".
+    """
+    from unittest.mock import Mock
+
+    from superset.connectors.sqla.models import SqlaTable, TableColumn
+
+    # Create a mock table for testing
+    table = SqlaTable(
+        database=database,
+        schema=None,
+        table_name="test_table",
+        columns=[TableColumn(column_name="category", type="TEXT")],
+    )
+
+    # Mock SELECT expressions
+    category_expr = Mock()
+    category_expr.name = "category"
+    select_exprs = [category_expr]
+
+    # Mock GROUP BY columns
+    groupby_all_columns = {"category": category_expr}
+    groupby_series_columns = {"category": category_expr}
+
+    # Create a condition factory that always returns False
+    def always_false_condition(col_name: str, expr) -> bool:
+        return False
+
+    # Mock the make_sqla_column_compatible method
+    def mock_make_compatible(expr, name=None):
+        mock_result = Mock()
+        mock_result.name = name
+        return mock_result
+
+    with patch.object(
+        table, "make_sqla_column_compatible", side_effect=mock_make_compatible
+    ):
+        # Call the method
+        result_select_exprs, result_groupby_columns = (
+            table._apply_series_others_grouping(
+                select_exprs,
+                groupby_all_columns,
+                groupby_series_columns,
+                always_false_condition,
+            )
+        )
+
+        # Verify that the expressions were replaced (we can't test SQL 
generation
+        # in a unit test, but we can verify the structure changed)
+        assert len(result_select_exprs) == 1
+        assert result_select_exprs[0].name == "category"
+
+        assert len(result_groupby_columns) == 1
+        assert "category" in result_groupby_columns
+        assert result_groupby_columns["category"].name == "category"
diff --git a/tests/unit_tests/queries/query_object_test.py 
b/tests/unit_tests/queries/query_object_test.py
index 8f42a460e0..059711f16b 100644
--- a/tests/unit_tests/queries/query_object_test.py
+++ b/tests/unit_tests/queries/query_object_test.py
@@ -44,6 +44,7 @@ def test_default_query_object_to_dict():
         "filter": [],
         "from_dttm": None,
         "granularity": None,
+        "group_others_when_limit_reached": False,
         "inner_from_dttm": None,
         "inner_to_dttm": None,
         "is_rowcount": False,

(superset) 07/37: feat(timeseries): enhance 'Series Limit' to support grouping the long tail (#34308)

Reply via email to