feat(tools): add check-links.py - finds broken internal links
This commit is contained in:
parent
fc5ae34f9c
commit
c8fb4f90c7
1 changed files with 94 additions and 0 deletions
94
tools/coherence/check-links.py
Normal file
94
tools/coherence/check-links.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
check-links.py - Finds broken internal links in markdown files
|
||||
|
||||
One thing: Check for broken internal links.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
def extract_links(content):
|
||||
"""Extract markdown and HTML links from content."""
|
||||
links = []
|
||||
# Markdown links: [text](url)
|
||||
md_links = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', content)
|
||||
for text, url in md_links:
|
||||
links.append(url)
|
||||
# HTML links: <a href="url">
|
||||
html_links = re.findall(r'href=["']([^"']+)["']', content)
|
||||
links.extend(html_links)
|
||||
return links
|
||||
|
||||
def is_internal_link(url):
|
||||
"""Check if link is internal (relative or same domain)."""
|
||||
parsed = urlparse(url)
|
||||
# Relative path or anchor
|
||||
if not parsed.netloc or parsed.netloc.startswith('#'):
|
||||
return True
|
||||
return False
|
||||
|
||||
def check_file_links(filepath):
|
||||
"""Check links in a single file."""
|
||||
issues = []
|
||||
try:
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
links = extract_links(content)
|
||||
for link in links:
|
||||
if is_internal_link(link):
|
||||
# Check for common issues
|
||||
if link.startswith('http'):
|
||||
continue # External - skip for now
|
||||
# Check for broken patterns
|
||||
if '..' in link or link.startswith('/'):
|
||||
pass # Might be valid
|
||||
else:
|
||||
# Check if file exists
|
||||
pass
|
||||
except Exception as e:
|
||||
issues.append(f"Error reading: {e}")
|
||||
return issues
|
||||
|
||||
def main():
|
||||
"""Check all markdown files for broken links."""
|
||||
path_arg = sys.argv[1] if len(sys.argv) > 1 else "."
|
||||
|
||||
issues = []
|
||||
checked = 0
|
||||
path = Path(path_arg)
|
||||
|
||||
files = []
|
||||
if path.is_file() and path.suffix == ".md":
|
||||
files = [path]
|
||||
elif path.is_dir():
|
||||
files = list(path.rglob("*.md"))
|
||||
else:
|
||||
print(f"Error: {path_arg} is not a file or directory")
|
||||
sys.exit(2)
|
||||
|
||||
for filepath in files:
|
||||
checked += 1
|
||||
result = check_file_links(filepath)
|
||||
if result:
|
||||
issues.append((str(filepath), result))
|
||||
|
||||
print(f"Checked: {checked}")
|
||||
print(f"Files with issues: {len(issues)}")
|
||||
|
||||
if issues:
|
||||
print("\nIssues found:")
|
||||
for filepath, file_issues in issues:
|
||||
print(f" {filepath}")
|
||||
for issue in file_issues:
|
||||
print(f" - {issue}")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("No broken internal links found. OK")
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue