This is an automated email from the ASF dual-hosted git repository.

jli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 290bcc1dbbb feat(cache): use configurable hash algorithm for 
flask-caching (#37361)
290bcc1dbbb is described below

commit 290bcc1dbbbd2824639d8d2a405028c339104a6b
Author: Daniel Vaz Gaspar <[email protected]>
AuthorDate: Mon Jan 26 18:19:51 2026 +0000

    feat(cache): use configurable hash algorithm for flask-caching (#37361)
---
 superset/utils/cache.py                      |   3 +-
 superset/utils/cache_manager.py              | 141 ++++++++++++++++++++--
 tests/unit_tests/utils/test_cache_manager.py | 171 +++++++++++++++++++++++++++
 3 files changed, 307 insertions(+), 8 deletions(-)

diff --git a/superset/utils/cache.py b/superset/utils/cache.py
index 76294696e43..706a74dbde4 100644
--- a/superset/utils/cache.py
+++ b/superset/utils/cache.py
@@ -31,6 +31,7 @@ from superset import db
 from superset.constants import CACHE_DISABLED_TIMEOUT
 from superset.extensions import cache_manager
 from superset.models.cache import CacheKey
+from superset.utils.cache_manager import configurable_hash_method
 from superset.utils.hashing import hash_from_dict
 from superset.utils.json import json_int_dttm_ser
 
@@ -273,7 +274,7 @@ def etag_cache(  # noqa: C901
         wrapper.uncached = f  # type: ignore
         wrapper.cache_timeout = timeout  # type: ignore
         wrapper.make_cache_key = cache._memoize_make_cache_key(  # type: 
ignore # pylint: disable=protected-access
-            make_name=None, timeout=timeout
+            make_name=None, timeout=timeout, 
hash_method=configurable_hash_method
         )
 
         return wrapper
diff --git a/superset/utils/cache_manager.py b/superset/utils/cache_manager.py
index d3b2dbdb00d..0804e0d4b5d 100644
--- a/superset/utils/cache_manager.py
+++ b/superset/utils/cache_manager.py
@@ -14,10 +14,11 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import hashlib
 import logging
-from typing import Any, Optional, Union
+from typing import Any, Callable, Optional, Union
 
-from flask import Flask
+from flask import current_app, Flask
 from flask_caching import Cache
 from markupsafe import Markup
 
@@ -27,8 +28,134 @@ logger = logging.getLogger(__name__)
 
 CACHE_IMPORT_PATH = 
"superset.extensions.metastore_cache.SupersetMetastoreCache"
 
+# Hash function lookup table matching superset.utils.hashing
+_HASH_METHODS: dict[str, Callable[..., Any]] = {
+    "sha256": hashlib.sha256,
+    "md5": hashlib.md5,
+}
+
+
+class ConfigurableHashMethod:
+    """
+    A callable that defers hash algorithm selection to runtime.
+
+    Flask-caching's memoize decorator evaluates hash_method at decoration time
+    (module import), but we need to read HASH_ALGORITHM config at function call
+    time when the app context is available.
+
+    This class acts like a hashlib function but looks up the configured
+    algorithm when called.
+    """
+
+    def __call__(self, data: bytes = b"") -> Any:
+        """
+        Create a hash object using the configured algorithm.
+
+        Args:
+            data: Optional initial data to hash
+
+        Returns:
+            A hashlib hash object (e.g., sha256 or md5)
+
+        Raises:
+            ValueError: If HASH_ALGORITHM is set to an unsupported value
+        """
+        algorithm = current_app.config["HASH_ALGORITHM"]
+        hash_func = _HASH_METHODS.get(algorithm)
+        if hash_func is None:
+            raise ValueError(f"Unsupported hash algorithm: {algorithm}")
+        return hash_func(data)
+
+
+# Singleton instance to use as default hash_method
+configurable_hash_method = ConfigurableHashMethod()
+
+
+class SupersetCache(Cache):
+    """
+    Cache subclass that uses the configured HASH_ALGORITHM instead of MD5.
+
+    Flask-caching uses MD5 by default for cache key generation, which fails
+    in FIPS mode where MD5 is disabled. This class overrides the default
+    hash method to use the algorithm specified by HASH_ALGORITHM config.
+
+    Note: Switching hash algorithms will invalidate existing cache keys,
+    causing a one-time cache miss on upgrade.
+    """
+
+    def memoize(
+        self,
+        timeout: int | None = None,
+        make_name: Callable[..., Any] | None = None,
+        unless: Callable[..., bool] | None = None,
+        forced_update: Callable[..., bool] | None = None,
+        response_filter: Callable[..., Any] | None = None,
+        hash_method: Callable[..., Any] = configurable_hash_method,
+        cache_none: bool = False,
+        source_check: bool | None = None,
+        args_to_ignore: Any | None = None,
+    ) -> Callable[..., Any]:
+        return super().memoize(
+            timeout=timeout,
+            make_name=make_name,
+            unless=unless,
+            forced_update=forced_update,
+            response_filter=response_filter,
+            hash_method=hash_method,
+            cache_none=cache_none,
+            source_check=source_check,
+            args_to_ignore=args_to_ignore,
+        )
+
+    def cached(
+        self,
+        timeout: int | None = None,
+        key_prefix: str = "view/%s",
+        unless: Callable[..., bool] | None = None,
+        forced_update: Callable[..., bool] | None = None,
+        response_filter: Callable[..., Any] | None = None,
+        query_string: bool = False,
+        hash_method: Callable[..., Any] = configurable_hash_method,
+        cache_none: bool = False,
+        make_cache_key: Callable[..., Any] | None = None,
+        source_check: bool | None = None,
+        response_hit_indication: bool | None = False,
+    ) -> Callable[..., Any]:
+        return super().cached(
+            timeout=timeout,
+            key_prefix=key_prefix,
+            unless=unless,
+            forced_update=forced_update,
+            response_filter=response_filter,
+            query_string=query_string,
+            hash_method=hash_method,
+            cache_none=cache_none,
+            make_cache_key=make_cache_key,
+            source_check=source_check,
+            response_hit_indication=response_hit_indication,
+        )
+
+    # pylint: disable=protected-access
+    def _memoize_make_cache_key(
+        self,
+        make_name: Callable[..., Any] | None = None,
+        timeout: Callable[..., Any] | None = None,
+        forced_update: bool = False,
+        hash_method: Callable[..., Any] = configurable_hash_method,
+        source_check: bool | None = False,
+        args_to_ignore: Any | None = None,
+    ) -> Callable[..., Any]:
+        return super()._memoize_make_cache_key(
+            make_name=make_name,
+            timeout=timeout,
+            forced_update=forced_update,
+            hash_method=hash_method,
+            source_check=source_check,
+            args_to_ignore=args_to_ignore,
+        )
+
 
-class ExploreFormDataCache(Cache):
+class ExploreFormDataCache(SupersetCache):
     def get(self, *args: Any, **kwargs: Any) -> Optional[Union[str, Markup]]:
         cache = self.cache.get(*args, **kwargs)
 
@@ -53,10 +180,10 @@ class CacheManager:
     def __init__(self) -> None:
         super().__init__()
 
-        self._cache = Cache()
-        self._data_cache = Cache()
-        self._thumbnail_cache = Cache()
-        self._filter_state_cache = Cache()
+        self._cache = SupersetCache()
+        self._data_cache = SupersetCache()
+        self._thumbnail_cache = SupersetCache()
+        self._filter_state_cache = SupersetCache()
         self._explore_form_data_cache = ExploreFormDataCache()
 
     @staticmethod
diff --git a/tests/unit_tests/utils/test_cache_manager.py 
b/tests/unit_tests/utils/test_cache_manager.py
new file mode 100644
index 00000000000..b7b10e4506e
--- /dev/null
+++ b/tests/unit_tests/utils/test_cache_manager.py
@@ -0,0 +1,171 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import hashlib
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from superset.utils.cache_manager import (
+    configurable_hash_method,
+    ConfigurableHashMethod,
+    SupersetCache,
+)
+
+
+def test_configurable_hash_method_uses_sha256():
+    """Test ConfigurableHashMethod uses sha256 when configured."""
+    mock_app = MagicMock()
+    mock_app.config = {"HASH_ALGORITHM": "sha256"}
+
+    with patch("superset.utils.cache_manager.current_app", mock_app):
+        hash_obj = configurable_hash_method(b"test")
+        # Verify it returns a sha256 hash object
+        assert hash_obj.hexdigest() == hashlib.sha256(b"test").hexdigest()
+
+
+def test_configurable_hash_method_uses_md5():
+    """Test ConfigurableHashMethod uses md5 when configured."""
+    mock_app = MagicMock()
+    mock_app.config = {"HASH_ALGORITHM": "md5"}
+
+    with patch("superset.utils.cache_manager.current_app", mock_app):
+        hash_obj = configurable_hash_method(b"test")
+        # Verify it returns a md5 hash object
+        assert hash_obj.hexdigest() == hashlib.md5(b"test").hexdigest()  # 
noqa: S324
+
+
+def test_configurable_hash_method_empty_data():
+    """Test ConfigurableHashMethod with empty data."""
+    mock_app = MagicMock()
+    mock_app.config = {"HASH_ALGORITHM": "sha256"}
+
+    with patch("superset.utils.cache_manager.current_app", mock_app):
+        hash_obj = configurable_hash_method()
+        assert hash_obj.hexdigest() == hashlib.sha256(b"").hexdigest()
+
+
+def test_configurable_hash_method_is_callable():
+    """Test that ConfigurableHashMethod instance is callable."""
+    method = ConfigurableHashMethod()
+    assert callable(method)
+
+
+def test_superset_cache_memoize_uses_configurable_hash():
+    """Test that SupersetCache.memoize uses configurable_hash_method by 
default."""
+    cache = SupersetCache()
+
+    with patch.object(
+        cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
+    ) as mock_memoize:
+        cache.memoize(timeout=300)
+
+        mock_memoize.assert_called_once()
+        call_kwargs = mock_memoize.call_args[1]
+        assert call_kwargs["hash_method"] is configurable_hash_method
+
+
+def test_superset_cache_memoize_allows_explicit_hash_method():
+    """Test that SupersetCache.memoize allows explicit hash_method override."""
+    cache = SupersetCache()
+
+    with patch.object(
+        cache.__class__.__bases__[0], "memoize", return_value=lambda f: f
+    ) as mock_memoize:
+        cache.memoize(timeout=300, hash_method=hashlib.md5)
+
+        mock_memoize.assert_called_once()
+        call_kwargs = mock_memoize.call_args[1]
+        assert call_kwargs["hash_method"] == hashlib.md5
+
+
+def test_superset_cache_cached_uses_configurable_hash():
+    """Test that SupersetCache.cached uses configurable_hash_method by 
default."""
+    cache = SupersetCache()
+
+    with patch.object(
+        cache.__class__.__bases__[0], "cached", return_value=lambda f: f
+    ) as mock_cached:
+        cache.cached(timeout=300)
+
+        mock_cached.assert_called_once()
+        call_kwargs = mock_cached.call_args[1]
+        assert call_kwargs["hash_method"] is configurable_hash_method
+
+
+def test_superset_cache_cached_allows_explicit_hash_method():
+    """Test that SupersetCache.cached allows explicit hash_method override."""
+    cache = SupersetCache()
+
+    with patch.object(
+        cache.__class__.__bases__[0], "cached", return_value=lambda f: f
+    ) as mock_cached:
+        cache.cached(timeout=300, hash_method=hashlib.md5)
+
+        mock_cached.assert_called_once()
+        call_kwargs = mock_cached.call_args[1]
+        assert call_kwargs["hash_method"] == hashlib.md5
+
+
+def test_superset_cache_memoize_make_cache_key_uses_configurable_hash():
+    """Test _memoize_make_cache_key uses configurable_hash_method by 
default."""
+    cache = SupersetCache()
+
+    with patch.object(
+        cache.__class__.__bases__[0],
+        "_memoize_make_cache_key",
+        return_value=lambda *args, **kwargs: "cache_key",
+    ) as mock_make_key:
+        cache._memoize_make_cache_key(make_name=None, timeout=300)
+
+        mock_make_key.assert_called_once()
+        call_kwargs = mock_make_key.call_args[1]
+        assert call_kwargs["hash_method"] is configurable_hash_method
+
+
+def test_superset_cache_memoize_make_cache_key_allows_explicit_hash():
+    """Test _memoize_make_cache_key allows explicit hash_method override."""
+    cache = SupersetCache()
+
+    with patch.object(
+        cache.__class__.__bases__[0],
+        "_memoize_make_cache_key",
+        return_value=lambda *args, **kwargs: "cache_key",
+    ) as mock_make_key:
+        cache._memoize_make_cache_key(
+            make_name=None, timeout=300, hash_method=hashlib.md5
+        )
+
+        mock_make_key.assert_called_once()
+        call_kwargs = mock_make_key.call_args[1]
+        assert call_kwargs["hash_method"] == hashlib.md5
+
+
[email protected](
+    "algorithm,expected_digest",
+    [
+        ("sha256", hashlib.sha256(b"test_data").hexdigest()),
+        ("md5", hashlib.md5(b"test_data").hexdigest()),  # noqa: S324
+    ],
+)
+def test_configurable_hash_method_parametrized(algorithm, expected_digest):
+    """Parametrized test for ConfigurableHashMethod with different 
algorithms."""
+    mock_app = MagicMock()
+    mock_app.config = {"HASH_ALGORITHM": algorithm}
+
+    with patch("superset.utils.cache_manager.current_app", mock_app):
+        hash_obj = configurable_hash_method(b"test_data")
+        assert hash_obj.hexdigest() == expected_digest

Reply via email to