kaxil commented on code in PR #62696:
URL: https://github.com/apache/airflow/pull/62696#discussion_r2889848542
##########
airflow-core/src/airflow/configuration.py:
##########
@@ -39,13 +37,12 @@
from typing_extensions import overload
from airflow._shared.configuration.parser import (
- VALUE_NOT_FOUND_SENTINEL,
AirflowConfigParser as _SharedAirflowConfigParser,
- ValueNotFound,
configure_parser_from_configuration_description,
)
from airflow._shared.module_loading import import_string
from airflow.exceptions import AirflowConfigException, RemovedInAirflow4Warning
+from airflow.providers_manager import ProvidersManager
from airflow.secrets import DEFAULT_SECRETS_SEARCH_PATH
Review Comment:
This moves the `ProvidersManager` import from lazy (inside methods) to
top-level. The existing code in this file uses lazy imports for
`ProvidersManager` in methods like `_ensure_providers_config_loaded` — was this
intentional to change that pattern? It works in breeze but seems inconsistent
with the rest of the file.
##########
task-sdk/src/airflow/sdk/providers_manager_runtime.py:
##########
@@ -218,6 +220,18 @@ def initialize_providers_taskflow_decorator(self):
self.initialize_providers_list()
self._discover_taskflow_decorators()
+ @provider_info_cache("provider_configs")
+ def initialize_provider_configs(self):
+ """Lazy initialization of provider configs."""
+ self.initialize_providers_list()
+ self._discover_config()
+
+ def _discover_config(self) -> None:
+ """Retrieve all configs defined in the providers."""
+ for provider_package, provider in self._provider_dict.items():
+ if provider.data.get("config"):
+ self._provider_configs[provider_package] =
provider.data.get("config")
+
def _discover_hooks_from_connection_types(
self,
Review Comment:
Nit: `provider.data.get("config")` is called twice — once in the `if` check
and once in the assignment.
```suggestion
if config := provider.data.get("config"):
self._provider_configs[provider_package] = config
```
##########
shared/configuration/src/airflow_shared/configuration/parser.py:
##########
@@ -1400,7 +1552,7 @@ def _get_config_sources_for_as_dict(self) ->
list[tuple[str, ConfigParser]]:
"""
Get list of config sources to use in as_dict().
- Subclasses can override to add additional sources (e.g., provider
configs).
+ Both core and SDK need provider configs.
"""
return [
("default", self._default_values),
Review Comment:
There are two definitions of `_get_config_sources_for_as_dict` in this class
— the provider-aware one added around line 255 and this one. Since Python uses
the last definition in a class body, this one wins and the provider-aware
version is dead code.
Verified in breeze:
```python
>>> conf._get_config_sources_for_as_dict()
[("default", ...), ("airflow.cfg", ...)]
```
Provider fallback sources are missing from `as_dict()`, which means `airflow
config list` and the config API endpoint won't show provider defaults.
Should delete this definition and keep the one at line 255.
##########
shared/configuration/src/airflow_shared/configuration/parser.py:
##########
@@ -213,8 +248,69 @@ def _lookup_sequence(self) -> list[Callable]:
self._get_option_from_commands,
self._get_option_from_secrets,
self._get_option_from_defaults,
+ self._get_option_from_provider_cfg_config_fallbacks,
+ self._get_option_from_provider_metadata_config_fallbacks,
]
+ def _get_config_sources_for_as_dict(self) -> list[tuple[str,
ConfigParser]]:
+ """Override the base method to add provider fallbacks."""
+ return [
+ ("provider-cfg-fallback-defaults",
self._provider_cfg_config_fallback_default_values),
+ ("provider-metadata-fallback-defaults",
self._provider_metadata_config_fallback_default_values),
+ ("default", self._default_values),
+ ("airflow.cfg", self),
+ ]
+
+ def _get_option_from_provider_cfg_config_fallbacks(
+ self,
+ deprecated_key: str | None,
+ deprecated_section: str | None,
+ key: str,
+ section: str,
+ issue_warning: bool = True,
+ extra_stacklevel: int = 0,
+ **kwargs,
+ ) -> str | ValueNotFound:
+ """Get config option from provider fallback defaults."""
+ if self.get_from_provider_cfg_config_fallback_defaults(section, key)
is not None:
+ # no expansion needed
+ return
self.get_from_provider_cfg_config_fallback_defaults(section, key, **kwargs)
+ return VALUE_NOT_FOUND_SENTINEL
+
+ def _get_option_from_provider_metadata_config_fallbacks(
+ self,
+ deprecated_key: str | None,
+ deprecated_section: str | None,
+ key: str,
+ section: str,
+ issue_warning: bool = True,
+ extra_stacklevel: int = 0,
+ **kwargs,
+ ) -> str | ValueNotFound:
+ """Get config option from provider metadata fallback defaults."""
+ if self.get_from_provider_metadata_config_fallback_defaults(section,
key) is not None:
+ # no expansion needed
+ return
self.get_from_provider_metadata_config_fallback_defaults(section, key, **kwargs)
+ return VALUE_NOT_FOUND_SENTINEL
+
+ def get_from_provider_cfg_config_fallback_defaults(self, section: str,
key: str, **kwargs) -> Any:
+ """Get provider config fallback default values."""
+ return self._provider_cfg_config_fallback_default_values.get(section,
key, fallback=None, **kwargs)
+
+ @cached_property
+ def _provider_metadata_config_fallback_default_values(self) ->
ConfigParser:
+ """Return Provider metadata config fallback default values."""
+ base_configuration_description: dict[str, dict[str, Any]] = {}
+ for _, config in self.provider_manager_type().provider_configs:
+ base_configuration_description.update(config)
Review Comment:
This `cached_property` calls `self.provider_manager_type().provider_configs`
which triggers full provider discovery. Since
`_get_option_from_provider_metadata_config_fallbacks` is in the base
`_lookup_sequence`, any `conf.get()` call (without `fallback=`) for a key not
in env/config/commands/secrets/defaults will trigger it.
Verified in breeze — SDK `conf.get("unknown", "key")` initializes
`ProvidersManagerTaskRuntime` eagerly:
```
ProvidersManagerTaskRuntime initialized before: False
ProvidersManagerTaskRuntime initialized after: True
```
For Core this is mostly fine since `ProvidersManager` is already initialized
early. For SDK, this changes the performance profile of conf lookups — worth
considering whether this is the intended behavior or should be guarded.
##########
shared/configuration/src/airflow_shared/configuration/parser.py:
##########
@@ -213,8 +248,69 @@ def _lookup_sequence(self) -> list[Callable]:
self._get_option_from_commands,
self._get_option_from_secrets,
self._get_option_from_defaults,
+ self._get_option_from_provider_cfg_config_fallbacks,
+ self._get_option_from_provider_metadata_config_fallbacks,
]
+ def _get_config_sources_for_as_dict(self) -> list[tuple[str,
ConfigParser]]:
+ """Override the base method to add provider fallbacks."""
+ return [
+ ("provider-cfg-fallback-defaults",
self._provider_cfg_config_fallback_default_values),
+ ("provider-metadata-fallback-defaults",
self._provider_metadata_config_fallback_default_values),
+ ("default", self._default_values),
+ ("airflow.cfg", self),
+ ]
+
+ def _get_option_from_provider_cfg_config_fallbacks(
+ self,
+ deprecated_key: str | None,
+ deprecated_section: str | None,
+ key: str,
+ section: str,
+ issue_warning: bool = True,
+ extra_stacklevel: int = 0,
+ **kwargs,
+ ) -> str | ValueNotFound:
+ """Get config option from provider fallback defaults."""
+ if self.get_from_provider_cfg_config_fallback_defaults(section, key)
is not None:
+ # no expansion needed
+ return
self.get_from_provider_cfg_config_fallback_defaults(section, key, **kwargs)
Review Comment:
Nit: this calls `get_from_provider_cfg_config_fallback_defaults(section,
key)` twice — once to check if it's not None, then again to return the value.
Same pattern in `_get_option_from_provider_metadata_config_fallbacks`. Could
simplify:
```python
value = self.get_from_provider_cfg_config_fallback_defaults(section, key,
**kwargs)
if value is not None:
return value
return VALUE_NOT_FOUND_SENTINEL
```
##########
scripts/ci/prek/check_airflow_imports_in_shared.py:
##########
@@ -33,16 +33,42 @@
from common_prek_utils import console
+def _is_type_checking_guard(node: ast.If) -> bool:
+ """Check if an If node is a ``TYPE_CHECKING`` guard."""
+ test = node.test
+ if isinstance(test, ast.Name) and test.id == "TYPE_CHECKING":
+ return True
+ if isinstance(test, ast.Attribute) and test.attr == "TYPE_CHECKING":
+ return True
+ return False
+
+
+def _collect_type_checking_node_ids(tree: ast.AST) -> set[int]:
+ """Return the ``id()`` of every AST node nested inside an ``if
TYPE_CHECKING`` block."""
+ guarded: set[int] = set()
+ for node in ast.walk(tree):
+ if isinstance(node, ast.If) and _is_type_checking_guard(node):
+ for child in ast.walk(node):
+ guarded.add(id(child))
+ return guarded
+
+
def check_file_for_prohibited_imports(file_path: Path) -> list[tuple[int,
str]]:
try:
source = file_path.read_text(encoding="utf-8")
tree = ast.parse(source, filename=str(file_path))
except (OSError, UnicodeDecodeError, SyntaxError):
return []
+ type_checking_ids = _collect_type_checking_node_ids(tree)
violations = []
Review Comment:
This prints for every AST node inside a TYPE_CHECKING block — could be quite
noisy in CI. Should this be removed or gated behind a verbose flag?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]