gopidesupavan commented on code in PR #62785: URL: https://github.com/apache/airflow/pull/62785#discussion_r2880145478
########## providers/common/ai/src/airflow/providers/common/ai/toolsets/hook.py: ########## @@ -0,0 +1,267 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Generic adapter that exposes Airflow Hook methods as pydantic-ai tools.""" + +from __future__ import annotations + +import inspect +import json +import re +import types +from typing import TYPE_CHECKING, Any, Union, get_args, get_origin, get_type_hints + +from pydantic_ai.tools import ToolDefinition +from pydantic_ai.toolsets.abstract import AbstractToolset, ToolsetTool +from pydantic_core import SchemaValidator, core_schema + +if TYPE_CHECKING: + from collections.abc import Callable + + from pydantic_ai._run_context import RunContext + + from airflow.providers.common.compat.sdk import BaseHook + +# Single shared validator — accepts any JSON-decoded dict from the LLM. +_PASSTHROUGH_VALIDATOR = SchemaValidator(core_schema.any_schema()) + +# Maps Python types to JSON Schema fragments. +_TYPE_MAP: dict[type, dict[str, Any]] = { + str: {"type": "string"}, + int: {"type": "integer"}, + float: {"type": "number"}, + bool: {"type": "boolean"}, + list: {"type": "array"}, + dict: {"type": "object"}, + bytes: {"type": "string"}, +} + + +class HookToolset(AbstractToolset[Any]): + """ + Expose selected methods of an Airflow Hook as pydantic-ai tools. + + This adapter introspects the method signatures and docstrings of the given + hook to build :class:`~pydantic_ai.tools.ToolDefinition` objects that an LLM + agent can call. + + :param hook: An instantiated Airflow Hook. + :param allowed_methods: Method names to expose as tools. Required — + auto-discovery is intentionally not supported for safety. + :param tool_name_prefix: Optional prefix prepended to each tool name + (e.g. ``"s3_"`` → ``"s3_list_keys"``). + """ + + def __init__( + self, + hook: BaseHook, + *, + allowed_methods: list[str], + tool_name_prefix: str = "", + ) -> None: + if not allowed_methods: + raise ValueError("allowed_methods must be a non-empty list.") + + hook_cls_name = type(hook).__name__ + for method_name in allowed_methods: + if not hasattr(hook, method_name): + raise ValueError( + f"Hook {hook_cls_name!r} has no method {method_name!r}. Check your allowed_methods list." + ) + if not callable(getattr(hook, method_name)): + raise ValueError(f"{hook_cls_name}.{method_name} is not callable.") + + self._hook = hook + self._allowed_methods = allowed_methods + self._tool_name_prefix = tool_name_prefix + self._id = f"hook-{type(hook).__name__}" + + @property + def id(self) -> str: + return self._id + + async def get_tools(self, ctx: RunContext[Any]) -> dict[str, ToolsetTool[Any]]: + tools: dict[str, ToolsetTool[Any]] = {} + for method_name in self._allowed_methods: + method = getattr(self._hook, method_name) + tool_name = f"{self._tool_name_prefix}{method_name}" if self._tool_name_prefix else method_name + + json_schema = _build_json_schema_from_signature(method) + description = _extract_description(method) + param_docs = _parse_param_docs(method.__doc__ or "") Review Comment: Now that key important for all the methods define proper doc strings :), nice ########## providers/common/ai/src/airflow/providers/common/ai/toolsets/hook.py: ########## @@ -0,0 +1,267 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Generic adapter that exposes Airflow Hook methods as pydantic-ai tools.""" + +from __future__ import annotations + +import inspect +import json +import re +import types +from typing import TYPE_CHECKING, Any, Union, get_args, get_origin, get_type_hints + +from pydantic_ai.tools import ToolDefinition +from pydantic_ai.toolsets.abstract import AbstractToolset, ToolsetTool +from pydantic_core import SchemaValidator, core_schema + +if TYPE_CHECKING: + from collections.abc import Callable + + from pydantic_ai._run_context import RunContext + + from airflow.providers.common.compat.sdk import BaseHook + +# Single shared validator — accepts any JSON-decoded dict from the LLM. +_PASSTHROUGH_VALIDATOR = SchemaValidator(core_schema.any_schema()) + +# Maps Python types to JSON Schema fragments. +_TYPE_MAP: dict[type, dict[str, Any]] = { + str: {"type": "string"}, + int: {"type": "integer"}, + float: {"type": "number"}, + bool: {"type": "boolean"}, + list: {"type": "array"}, + dict: {"type": "object"}, + bytes: {"type": "string"}, +} + + +class HookToolset(AbstractToolset[Any]): + """ + Expose selected methods of an Airflow Hook as pydantic-ai tools. + + This adapter introspects the method signatures and docstrings of the given + hook to build :class:`~pydantic_ai.tools.ToolDefinition` objects that an LLM + agent can call. + + :param hook: An instantiated Airflow Hook. + :param allowed_methods: Method names to expose as tools. Required — + auto-discovery is intentionally not supported for safety. + :param tool_name_prefix: Optional prefix prepended to each tool name + (e.g. ``"s3_"`` → ``"s3_list_keys"``). + """ + + def __init__( + self, + hook: BaseHook, + *, + allowed_methods: list[str], + tool_name_prefix: str = "", + ) -> None: + if not allowed_methods: + raise ValueError("allowed_methods must be a non-empty list.") + + hook_cls_name = type(hook).__name__ + for method_name in allowed_methods: + if not hasattr(hook, method_name): + raise ValueError( + f"Hook {hook_cls_name!r} has no method {method_name!r}. Check your allowed_methods list." + ) + if not callable(getattr(hook, method_name)): + raise ValueError(f"{hook_cls_name}.{method_name} is not callable.") + + self._hook = hook + self._allowed_methods = allowed_methods + self._tool_name_prefix = tool_name_prefix + self._id = f"hook-{type(hook).__name__}" + + @property + def id(self) -> str: + return self._id + + async def get_tools(self, ctx: RunContext[Any]) -> dict[str, ToolsetTool[Any]]: + tools: dict[str, ToolsetTool[Any]] = {} + for method_name in self._allowed_methods: + method = getattr(self._hook, method_name) + tool_name = f"{self._tool_name_prefix}{method_name}" if self._tool_name_prefix else method_name + + json_schema = _build_json_schema_from_signature(method) + description = _extract_description(method) + param_docs = _parse_param_docs(method.__doc__ or "") Review Comment: Now that key important thing for all the methods define proper doc strings :), nice -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
