This is an automated email from the ASF dual-hosted git repository.

jeffreyh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris-website.git


The following commit(s) were added to refs/heads/master by this push:
     new 2d796a1519d Fixed the issue that the broken link detection did not 
detect community documents (#2571)
2d796a1519d is described below

commit 2d796a1519dd569ca6ad349cb377e2696c469565
Author: yangon <[email protected]>
AuthorDate: Mon Jun 30 19:43:36 2025 +0800

    Fixed the issue that the broken link detection did not detect community 
documents (#2571)
---
 .github/workflows/build-check.yml | 14 +++---
 check_move_global.py              | 37 +++++++++++-----
 scripts/check_move.js             | 90 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/build-check.yml 
b/.github/workflows/build-check.yml
index ec37fe1a1b8..399dbd5d073 100644
--- a/.github/workflows/build-check.yml
+++ b/.github/workflows/build-check.yml
@@ -41,6 +41,11 @@ jobs:
               uses: actions/setup-python@v4
               with:
                 python-version: '3.9'
+
+            - name: Use Node.js
+              uses: actions/setup-node@v3
+              with:
+                  node-version: 20
             
             - name: Refactor PR commits and check move
               run: |
@@ -49,6 +54,10 @@ jobs:
                 git reset --soft origin/master
                 git commit -m "All this PR changed(for test)"
                 python check_move.py `git rev-parse HEAD`
+            
+            - name: Run Node.js dead link checker
+              run: |
+                node ./scripts/check_move.js `git rev-parse HEAD`
         
             - name: Check exit code
               run: |
@@ -56,11 +65,6 @@ jobs:
                     echo "Have detected not processed link changes, please fix 
them."
                     exit 1
                 fi
-            
-            - name: Use Node.js
-              uses: actions/setup-node@v3
-              with:
-                  node-version: 20
 
             - name: Build
               run: |
diff --git a/check_move_global.py b/check_move_global.py
index 57be298346c..13ea19ea6d5 100644
--- a/check_move_global.py
+++ b/check_move_global.py
@@ -1,3 +1,17 @@
+# Detect global dead links
+#
+# Core logic:
+# Traverse all documents, match the links in the documents, and determine 
whether it is a dead link by the link address; 
+# if it is a dead link, try to fix it. If the fix fails, it will print: ❌ 
xxxx/xxxx.md: Could not fix broken link ${target_link}; 
+# if the fix is ​​successful, it will print: 🛠️ xxxx/xxxx.md: Fixed broken 
link ${dead_link} -> ${link}
+#
+# Repair the logic of broken links:
+# Traverse all the documents in the directory with the current broken link 
layer by layer to see if the document name is consistent with the document name 
in the broken link. 
+# If they are consistent, the current directory is considered to be the 
correct directory of the broken link. 
+# The above situation is the case where the original link document directory 
has been migrated. If the document is deleted, the correction will fail.
+# 
+# Absolute paths or broken links starting with http/https cannot be judged
+
 import argparse
 import subprocess
 import re
@@ -9,7 +23,7 @@ from urllib.parse import urlparse
 move_pairs = []
 deletes = []
 change_detected = False
-search_dirs = ["docs", "i18n", "versioned_docs"]
+search_dirs = ["docs", "i18n", "versioned_docs", "community"]
 
 def is_same_file(path1, path2):
     return os.path.normpath(path1) == os.path.normpath(path2)
@@ -21,10 +35,10 @@ def remove_suffix(text: str, suffix: str):
 
 def find_nearest_file(file_base, start_dir):
     """
-    在 start_dir 向上查找最近的 file_base(.md/.mdx),否则全局搜索
+    Look for the nearest file_base (.md/.mdx) in start_dir upwards, otherwise 
search globally
     """
     cur_dir = start_dir
-    # 向上搜索最多 10 层,避免卡死
+    # Search up to 10 levels upwards to avoid stuck
     for _ in range(10):
         for ext in [".md", ".mdx"]:
             candidate = os.path.join(cur_dir, file_base + ext)
@@ -35,7 +49,7 @@ def find_nearest_file(file_base, start_dir):
             break
         cur_dir = parent
 
-    # 全局搜索
+    # Global Search
     for base_dir in search_dirs:
         for root, dirs, files in os.walk(base_dir):
             for file in files:
@@ -59,7 +73,7 @@ def process_md_file(file_path):
             if not full_path.endswith(".md") and not 
full_path.endswith(".mdx"):
                 full_path += ".md"
 
-            # 处理 rename 情况
+            # Handling rename situations
             for [from_path, to_path] in move_pairs:
                 from_base, from_ext = os.path.splitext(from_path)
                 to_base, to_ext = os.path.splitext(to_path)
@@ -74,15 +88,15 @@ def process_md_file(file_path):
                     new_content = new_content.replace(f"({link})", 
f"({relative_to_path})")
                     change_detected = True
 
-            # 处理 delete 情况
+            # Handling delete cases
             for deleted_path in deletes:
                 if is_same_file(full_path, deleted_path):
                     print(f"⚠️ {file_path}: Link to deleted file {link}")
                     change_detected = True
 
-            # 处理死链修复
+            # Dealing with broken link repair
             if not os.path.exists(full_path):
-                # 说明当前 link 是坏的
+                # Indicates that the current link is broken
                 file_base = os.path.basename(link)
                 file_base = remove_suffix(file_base, ".md")
                 file_base = remove_suffix(file_base, ".mdx")
@@ -92,11 +106,14 @@ def process_md_file(file_path):
                     relative_to_path = os.path.relpath(found_path, 
os.path.dirname(file_path))
                     relative_to_path = remove_suffix(relative_to_path, ".md")
                     relative_to_path = remove_suffix(relative_to_path, ".mdx")
-                    print(f"🛠️ {file_path}: Fixed broken link {link} -> 
{relative_to_path}")
+                    if "version-1.2" not in file_path and "version-2.0" not in 
file_path:
+                        print(f"🛠️ {file_path}: Fixed broken link {link} -> 
{relative_to_path}")
+        
                     new_content = new_content.replace(f"({link})", 
f"({relative_to_path})")
                     change_detected = True
                 else:
-                    print(f"❌ {file_path}: Could not fix broken link {link}")
+                    if "version-1.2" not in file_path and "version-2.0" not in 
file_path:
+                        print(f"❌ {file_path}: Could not fix broken link 
{link}")
                     change_detected = True
 
     if new_content != content:
diff --git a/scripts/check_move.js b/scripts/check_move.js
new file mode 100644
index 00000000000..0ad15df06b2
--- /dev/null
+++ b/scripts/check_move.js
@@ -0,0 +1,90 @@
+#!/usr/bin/env node
+
+const { execSync } = require("child_process");
+const fs = require("fs");
+const path = require("path");
+
+const commitHash = process.argv[2];
+
+if (!commitHash) {
+  console.error("❌ Please provide the commit hash, such as: node 
check-dead-links.js <commit-hash>");
+  process.exit(1);
+}
+
+const linkRegex = /\[.*?\]\((.*?)\)/g;
+let hasBrokenLinks = false;
+
+// Get the modified or newly added .md/.mdx files in the commit
+function getModifiedMarkdownFiles(commit) {
+  const output = execSync(`git show --name-status ${commit}`, { encoding: 
"utf-8" });
+  const lines = output.split("\n");
+  const files = [];
+
+  for (const line of lines) {
+    const parts = line.trim().split(/\s+/);
+    if (parts.length === 2) {
+      const [status, filePath] = parts;
+      if ((status === "A" || status === "M") && (filePath.endsWith(".md") || 
filePath.endsWith(".mdx"))) {
+        files.push(filePath);
+      }
+    }
+  }
+
+  return files;
+}
+
+// Checks if the link points to an existing local file
+function isLocalLink(link) {
+  return !link.startsWith("http://";) &&
+         !link.startsWith("https://";) &&
+         !link.startsWith("mailto:";) &&
+         !link.startsWith("#") &&
+         !path.isAbsolute(link);
+}
+
+// Check links in files
+function checkFileLinks(filePath) {
+  const content = fs.readFileSync(filePath, "utf-8");
+  const dir = path.dirname(filePath);
+  const matches = [...content.matchAll(linkRegex)];
+
+  for (const match of matches) {
+    const rawLink = match[1].split("#")[0]; // Remove anchor point
+    if (!isLocalLink(rawLink)) continue;
+
+    let fullPath = path.resolve(dir, rawLink);
+    if (!fs.existsSync(fullPath)) {
+      // Try adding a .md/.mdx suffix and try again
+      if (fs.existsSync(fullPath + ".md")) continue;
+      if (fs.existsSync(fullPath + ".mdx")) continue;
+
+      console.error(`❌ ${filePath}: Broken link -> ${rawLink}`);
+      hasBrokenLinks = true;
+    }
+  }
+}
+
+// Main function
+function main() {
+  const files = getModifiedMarkdownFiles(commitHash);
+  if (files.length === 0) {
+    console.log("✅ Unmodified Markdown files");
+    return;
+  }
+
+  for (const file of files) {
+    if (fs.existsSync(file)) {
+      checkFileLinks(file);
+    }
+  }
+
+
+  if (hasBrokenLinks) {
+    console.error("❗ A broken link was detected. Please fix it and submit.");
+    process.exit(1);
+  } else {
+    console.log("✅ All links are OK");
+  }
+}
+
+main();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to