This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 41c62b95cc6 Refactor registry workflow and metadata configuration 
constants. (#63024)
41c62b95cc6 is described below

commit 41c62b95cc6a4973a026b076bfe26d5eeb03b244
Author: Kaxil Naik <[email protected]>
AuthorDate: Sat Mar 7 12:24:57 2026 +0000

    Refactor registry workflow and metadata configuration constants. (#63024)
---
 .github/workflows/registry-build.yml | 54 +++++++++++++++++++++++-------------
 dev/registry/extract_metadata.py     | 31 +++++++++++++++------
 2 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/registry-build.yml 
b/.github/workflows/registry-build.yml
index 27a1713629c..f3e0e8bc7f2 100644
--- a/.github/workflows/registry-build.yml
+++ b/.github/workflows/registry-build.yml
@@ -59,6 +59,15 @@ jobs:
     timeout-minutes: 30
     name: "Build & Publish Registry"
     runs-on: ubuntu-latest
+    env:
+      EXISTING_REGISTRY_DIR: /tmp/existing-registry
+      REGISTRY_DATA_DIR: dev/registry
+      REGISTRY_PROVIDERS_JSON: providers.json
+      REGISTRY_MODULES_JSON: modules.json
+      REGISTRY_SITE_DATA_DIR: registry/src/_data
+      REGISTRY_SITE_VERSIONS_DIR: registry/src/_data/versions
+      REGISTRY_SITE_LOGOS_DIR: registry/public/logos
+      REGISTRY_CACHE_CONTROL: public, max-age=300
     permissions:
       contents: read
       id-token: write
@@ -142,12 +151,15 @@ jobs:
         env:
           S3_BUCKET: ${{ steps.destination.outputs.bucket }}
         run: |
-          mkdir -p /tmp/existing-registry
-          PROVIDERS_URL="${S3_BUCKET}api/providers.json"
-          MODULES_URL="${S3_BUCKET}api/modules.json"
-          if aws s3 cp "${PROVIDERS_URL}" 
/tmp/existing-registry/providers.json 2>/dev/null; then
+          mkdir -p "${EXISTING_REGISTRY_DIR}"
+          PROVIDERS_URL="${S3_BUCKET}api/${REGISTRY_PROVIDERS_JSON}"
+          MODULES_URL="${S3_BUCKET}api/${REGISTRY_MODULES_JSON}"
+          if aws s3 cp \
+            "${PROVIDERS_URL}" \
+            "${EXISTING_REGISTRY_DIR}/${REGISTRY_PROVIDERS_JSON}" \
+            2>/dev/null; then
             echo "found=true" >> "${GITHUB_OUTPUT}"
-            aws s3 cp "${MODULES_URL}" /tmp/existing-registry/modules.json || 
true
+            aws s3 cp "${MODULES_URL}" 
"${EXISTING_REGISTRY_DIR}/${REGISTRY_MODULES_JSON}" || true
           else
             echo "found=false" >> "${GITHUB_OUTPUT}"
             echo "No existing registry data on S3"
@@ -169,24 +181,28 @@ jobs:
         if: inputs.provider != '' && steps.download-existing.outputs.found == 
'true'
         run: |
           uv run dev/registry/merge_registry_data.py \
-            --existing-providers /tmp/existing-registry/providers.json \
-            --existing-modules /tmp/existing-registry/modules.json \
-            --new-providers dev/registry/providers.json \
-            --new-modules dev/registry/modules.json \
-            --output dev/registry/
+            --existing-providers 
"${EXISTING_REGISTRY_DIR}/${REGISTRY_PROVIDERS_JSON}" \
+            --existing-modules 
"${EXISTING_REGISTRY_DIR}/${REGISTRY_MODULES_JSON}" \
+            --new-providers "${REGISTRY_DATA_DIR}/${REGISTRY_PROVIDERS_JSON}" \
+            --new-modules "${REGISTRY_DATA_DIR}/${REGISTRY_MODULES_JSON}" \
+            --output "${REGISTRY_DATA_DIR}/"
 
       - name: "Copy breeze output to registry data"
         run: |
-          mkdir -p registry/src/_data/versions
-          cp dev/registry/providers.json registry/src/_data/providers.json
-          cp dev/registry/modules.json registry/src/_data/modules.json
-          if [ -d dev/registry/output/versions ]; then
-            cp -r dev/registry/output/versions/* registry/src/_data/versions/
+          mkdir -p "${REGISTRY_SITE_VERSIONS_DIR}"
+          cp \
+            "${REGISTRY_DATA_DIR}/${REGISTRY_PROVIDERS_JSON}" \
+            "${REGISTRY_SITE_DATA_DIR}/${REGISTRY_PROVIDERS_JSON}"
+          cp \
+            "${REGISTRY_DATA_DIR}/${REGISTRY_MODULES_JSON}" \
+            "${REGISTRY_SITE_DATA_DIR}/${REGISTRY_MODULES_JSON}"
+          if [ -d "${REGISTRY_DATA_DIR}/output/versions" ]; then
+            cp -r "${REGISTRY_DATA_DIR}/output/versions/"* 
"${REGISTRY_SITE_VERSIONS_DIR}/"
           fi
           # Copy provider logos extracted from 
providers/*/docs/integration-logos/
-          if [ -d dev/registry/logos ]; then
-            mkdir -p registry/public/logos
-            cp -r dev/registry/logos/* registry/public/logos/
+          if [ -d "${REGISTRY_DATA_DIR}/logos" ]; then
+            mkdir -p "${REGISTRY_SITE_LOGOS_DIR}"
+            cp -r "${REGISTRY_DATA_DIR}/logos/"* "${REGISTRY_SITE_LOGOS_DIR}/"
           fi
 
       - name: "Setup pnpm"
@@ -224,7 +240,7 @@ jobs:
           S3_BUCKET: ${{ steps.destination.outputs.bucket }}
         run: |
           aws s3 sync registry/_site/ "${S3_BUCKET}" \
-            --cache-control "public, max-age=300"
+            --cache-control "${REGISTRY_CACHE_CONTROL}"
 
       - name: "Publish version metadata"
         env:
diff --git a/dev/registry/extract_metadata.py b/dev/registry/extract_metadata.py
index 664e67f54e4..c1d2043d24d 100644
--- a/dev/registry/extract_metadata.py
+++ b/dev/registry/extract_metadata.py
@@ -44,11 +44,24 @@ from typing import Any
 import tomllib
 import yaml
 
+# External endpoints used by metadata extraction.
+PYPISTATS_RECENT_URL = 
"https://pypistats.org/api/packages/{package_name}/recent";
+PYPI_PACKAGE_JSON_URL = "https://pypi.org/pypi/{package_name}/json";
+S3_DOC_URL = "http://apache-airflow-docs.s3-website.eu-central-1.amazonaws.com";
+AIRFLOW_PROVIDER_DOCS_URL = 
"https://airflow.apache.org/docs/{package_name}/stable/";
+AIRFLOW_PROVIDER_CONNECTIONS_URL = (
+    
"https://airflow.apache.org/docs/{package_name}/stable/connections/index.html";
+)
+AIRFLOW_PROVIDER_SOURCE_URL = (
+    
"https://github.com/apache/airflow/tree/providers-{provider_id}/{version}/providers/{provider_path}";
+)
+PYPI_PACKAGE_URL = "https://pypi.org/project/{package_name}/";
+
 
 def fetch_pypi_downloads(package_name: str) -> dict[str, int]:
     """Fetch download statistics from pypistats.org API."""
     try:
-        url = f"https://pypistats.org/api/packages/{package_name}/recent";
+        url = PYPISTATS_RECENT_URL.format(package_name=package_name)
         with urllib.request.urlopen(url, timeout=5) as response:
             data = json.loads(response.read().decode())
             return {
@@ -64,7 +77,7 @@ def fetch_pypi_downloads(package_name: str) -> dict[str, int]:
 def fetch_pypi_dates(package_name: str) -> dict[str, str]:
     """Fetch first release and latest release dates from PyPI JSON API."""
     try:
-        url = f"https://pypi.org/pypi/{package_name}/json";
+        url = PYPI_PACKAGE_JSON_URL.format(package_name=package_name)
         with urllib.request.urlopen(url, timeout=10) as response:
             data = json.loads(response.read().decode())
 
@@ -109,7 +122,6 @@ def read_inventory(inv_path: Path) -> dict[str, str]:
     return result
 
 
-S3_DOC_URL = "http://apache-airflow-docs.s3-website.eu-central-1.amazonaws.com";
 INVENTORY_CACHE_DIR = Path(__file__).parent / ".inventory_cache"
 INVENTORY_TTL = datetime.timedelta(hours=12)
 
@@ -520,9 +532,7 @@ def main():
         # Extract connection types from provider.yaml
         # Link to the connections index page since individual connection pages 
might not exist
         connection_types = []
-        connections_index_url = (
-            
f"https://airflow.apache.org/docs/{package_name}/stable/connections/index.html";
-        )
+        connections_index_url = 
AIRFLOW_PROVIDER_CONNECTIONS_URL.format(package_name=package_name)
         for conn in provider_yaml.get("connection-types", []):
             conn_type = conn.get("connection-type", "")
             hook_class = conn.get("hook-class-name", "")
@@ -546,6 +556,7 @@ def main():
         # Airflow version compatibility (from pyproject.toml dependencies)
         airflow_versions = 
determine_airflow_versions(pyproject_data["dependencies"])
 
+        provider_source_path = 
provider_path.relative_to(PROVIDERS_DIR).as_posix()
         provider = Provider(
             id=provider_id,
             name=name,
@@ -563,9 +574,11 @@ def main():
             requires_python=pyproject_data["requires_python"],
             dependencies=pyproject_data["dependencies"],
             optional_extras=pyproject_data.get("optional_extras", {}),
-            docs_url=f"https://airflow.apache.org/docs/{package_name}/stable/";,
-            
source_url=f"https://github.com/apache/airflow/tree/providers-{provider_id}/{version}/providers/{provider_path.relative_to(PROVIDERS_DIR)}",
-            pypi_url=f"https://pypi.org/project/{package_name}/";,
+            
docs_url=AIRFLOW_PROVIDER_DOCS_URL.format(package_name=package_name),
+            source_url=AIRFLOW_PROVIDER_SOURCE_URL.format(
+                provider_id=provider_id, version=version, 
provider_path=provider_source_path
+            ),
+            pypi_url=PYPI_PACKAGE_URL.format(package_name=package_name),
             first_released=pypi_dates["first_released"],
             last_updated=pypi_dates["last_updated"],
         )

Reply via email to