refactor: Replace generator with enhanced version
Some checks are pending
Some checks are pending
- Extracts full frontmatter metadata (originalDate, notion_*, authors, source) - Correct date priority: frontmatter → filename → mtime → ctime - All metadata exposed in index.json for frontend use Phase 1 quick win complete.
This commit is contained in:
parent
87cfa7e083
commit
d0cf2e3061
26 changed files with 2621 additions and 299 deletions
371
tools/coherence-check.py
Normal file
371
tools/coherence-check.py
Normal file
|
|
@ -0,0 +1,371 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Coherence Check Script for The Fold Within Earth
|
||||
|
||||
Validates fieldnote frontmatter, checks for broken links,
|
||||
and verifies metadata completeness. Outputs report as JSON.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
# Configuration
|
||||
FRONTMATTER_REQUIRED = {
|
||||
"title": str,
|
||||
"date": str,
|
||||
"author": str,
|
||||
"type": str,
|
||||
"status": str,
|
||||
}
|
||||
|
||||
FRONTMATTER_OPTIONAL = {
|
||||
"version": (str, int, float),
|
||||
"series": str,
|
||||
"layer": str,
|
||||
"tags": list,
|
||||
"notion_id": str,
|
||||
"notion_created": str,
|
||||
"source": str,
|
||||
}
|
||||
|
||||
VALID_LAYERS = ["first", "second", "third", "fourth"]
|
||||
VALID_STATUSES = ["published", "draft", "archived", "review"]
|
||||
|
||||
|
||||
class CoherenceChecker:
|
||||
"""Main coherence checking class."""
|
||||
|
||||
def __init__(self, root_path: str = ".", output_path: str = None):
|
||||
self.root_path = Path(root_path)
|
||||
self.output_path = output_path or "coherence-report.json"
|
||||
self.issues: list[dict] = []
|
||||
self.warnings: list[dict] = []
|
||||
self.validated_files: list[dict] = []
|
||||
self.start_time = datetime.now()
|
||||
|
||||
def parse_frontmatter(self, content: str) -> tuple[dict | None, str | None]:
|
||||
"""Parse YAML frontmatter from markdown content."""
|
||||
# Match frontmatter between --- markers
|
||||
match = re.match(r'^---\n(.*?)\n---(.*)$', content, re.DOTALL)
|
||||
if not match:
|
||||
return None, content
|
||||
|
||||
try:
|
||||
frontmatter = yaml.safe_load(match.group(1))
|
||||
content_body = match.group(2)
|
||||
return frontmatter, content_body
|
||||
except yaml.YAMLError as e:
|
||||
return None, content
|
||||
|
||||
def check_frontmatter(self, file_path: Path, content: str) -> dict | None:
|
||||
"""Check frontmatter for a single file."""
|
||||
frontmatter, body = self.parse_frontmatter(content)
|
||||
|
||||
if frontmatter is None:
|
||||
return {
|
||||
"file": str(file_path.relative_to(self.root_path)),
|
||||
"type": "frontmatter-missing",
|
||||
"severity": "critical",
|
||||
"message": "No frontmatter found",
|
||||
"suggestion": "Add YAML frontmatter between --- markers"
|
||||
}
|
||||
|
||||
issues = []
|
||||
|
||||
# Check required fields
|
||||
for field, expected_type in FRONTMATTER_REQUIRED.items():
|
||||
if field not in frontmatter:
|
||||
issues.append({
|
||||
"field": field,
|
||||
"type": "frontmatter-required-missing",
|
||||
"severity": "critical",
|
||||
"message": f"Required field '{field}' is missing",
|
||||
"suggestion": f"Add {field}: <value> to frontmatter"
|
||||
})
|
||||
elif not isinstance(frontmatter[field], expected_type):
|
||||
issues.append({
|
||||
"field": field,
|
||||
"type": "frontmatter-type-error",
|
||||
"severity": "high",
|
||||
"message": f"Field '{field}' has wrong type",
|
||||
"suggestion": f"Expected {expected_type}, got {type(frontmatter[field]).__name__}"
|
||||
})
|
||||
|
||||
# Validate specific fields
|
||||
if "status" in frontmatter:
|
||||
if frontmatter["status"] not in VALID_STATUSES:
|
||||
issues.append({
|
||||
"field": "status",
|
||||
"type": "frontmatter-validation-error",
|
||||
"severity": "medium",
|
||||
"message": f"Invalid status: '{frontmatter['status']}'",
|
||||
"suggestion": f"Status must be one of: {', '.join(VALID_STATUSES)}"
|
||||
})
|
||||
|
||||
if "layer" in frontmatter:
|
||||
if frontmatter["layer"] not in VALID_LAYERS:
|
||||
issues.append({
|
||||
"field": "layer",
|
||||
"type": "frontmatter-validation-error",
|
||||
"severity": "medium",
|
||||
"message": f"Invalid layer: '{frontmatter['layer']}'",
|
||||
"suggestion": f"Layer must be one of: {', '.join(VALID_LAYERS)}"
|
||||
})
|
||||
|
||||
# Check tags format
|
||||
if "tags" in frontmatter:
|
||||
if isinstance(frontmatter["tags"], str):
|
||||
issues.append({
|
||||
"field": "tags",
|
||||
"type": "frontmatter-format-error",
|
||||
"severity": "low",
|
||||
"message": "Tags should be a list, not a comma-separated string",
|
||||
"suggestion": "Change tags to a YAML list format"
|
||||
})
|
||||
|
||||
return {
|
||||
"file": str(file_path.relative_to(self.root_path)),
|
||||
"has_frontmatter": True,
|
||||
"issues": issues,
|
||||
"frontmatter": {k: v for k, v in frontmatter.items() if k in FRONTMATTER_REQUIRED}
|
||||
} if issues else {
|
||||
"file": str(file_path.relative_to(self.root_path)),
|
||||
"has_frontmatter": True,
|
||||
"issues": [],
|
||||
"frontmatter": {k: v for k, v in frontmatter.items() if k in FRONTMATTER_REQUIRED}
|
||||
}
|
||||
|
||||
def check_links(self, content: str, base_path: Path) -> list[dict]:
|
||||
"""Check for broken or malformed links."""
|
||||
issues = []
|
||||
|
||||
# Match markdown links
|
||||
link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
||||
matches = re.findall(link_pattern, content)
|
||||
|
||||
for link_text, link_url in matches:
|
||||
# Skip external URLs
|
||||
if link_url.startswith(('http://', 'https://', 'mailto:', '#')):
|
||||
continue
|
||||
|
||||
# Check internal links
|
||||
link_path = link_url.split('#')[0]
|
||||
if link_path.startswith('/'):
|
||||
# Absolute path
|
||||
full_path = self.root_path / link_path.lstrip('/')
|
||||
else:
|
||||
# Relative path
|
||||
full_path = base_path.parent / link_path
|
||||
|
||||
if not full_path.exists():
|
||||
issues.append({
|
||||
"file": str(base_path.relative_to(self.root_path)),
|
||||
"type": "broken-link",
|
||||
"severity": "high",
|
||||
"link": link_url,
|
||||
"message": f"Broken link: {link_url}",
|
||||
"suggestion": f"Update link to point to existing file or remove"
|
||||
})
|
||||
|
||||
return issues
|
||||
|
||||
def check_metadata_file(self, file_path: Path) -> dict | None:
|
||||
"""Check metadata.yaml file completeness."""
|
||||
if not file_path.exists():
|
||||
return {
|
||||
"file": str(file_path.relative_to(self.root_path)),
|
||||
"type": "metadata-missing",
|
||||
"severity": "high",
|
||||
"message": "metadata.yaml file not found",
|
||||
"suggestion": "Create metadata.yaml with required fields"
|
||||
}
|
||||
|
||||
try:
|
||||
with open(file_path) as f:
|
||||
metadata = yaml.safe_load(f)
|
||||
except yaml.YAMLError as e:
|
||||
return {
|
||||
"file": str(file_path.relative_to(self.root_path)),
|
||||
"type": "metadata-invalid",
|
||||
"severity": "critical",
|
||||
"message": f"Invalid YAML: {e}",
|
||||
"suggestion": "Fix YAML syntax errors"
|
||||
}
|
||||
|
||||
if metadata is None:
|
||||
return {
|
||||
"file": str(file_path.relative_to(self.root_path)),
|
||||
"type": "metadata-empty",
|
||||
"severity": "high",
|
||||
"message": "metadata.yaml is empty",
|
||||
"suggestion": "Add required metadata fields"
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
def scan_content(self) -> dict:
|
||||
"""Scan all content files for coherence issues."""
|
||||
content_path = self.root_path / "content"
|
||||
|
||||
if not content_path.exists():
|
||||
return {
|
||||
"status": "warning",
|
||||
"message": "Content directory not found",
|
||||
"files_validated": 0,
|
||||
"issues": self.issues,
|
||||
"warnings": self.warnings
|
||||
}
|
||||
|
||||
# Find all markdown files
|
||||
md_files = list(content_path.rglob("*.md"))
|
||||
|
||||
for md_file in md_files:
|
||||
try:
|
||||
with open(md_file) as f:
|
||||
content = f.read()
|
||||
|
||||
# Skip index files
|
||||
if md_file.name.lower() in ("index.md", "readme.md"):
|
||||
continue
|
||||
|
||||
# Check frontmatter
|
||||
result = self.check_frontmatter(md_file, content)
|
||||
if result:
|
||||
if result.get("issues"):
|
||||
self.issues.extend(result["issues"])
|
||||
self.validated_files.append(result)
|
||||
|
||||
# Check links
|
||||
link_issues = self.check_links(content, md_file)
|
||||
self.issues.extend(link_issues)
|
||||
|
||||
# Check for corresponding metadata.yaml
|
||||
metadata_file = md_file.parent / "metadata.yaml"
|
||||
if md_file.name.startswith(tuple(str(i) for i in range(10))): # Date-prefixed files
|
||||
metadata_issue = self.check_metadata_file(metadata_file)
|
||||
if metadata_issue:
|
||||
self.issues.append(metadata_issue)
|
||||
|
||||
except Exception as e:
|
||||
self.warnings.append({
|
||||
"file": str(md_file.relative_to(self.root_path)),
|
||||
"message": f"Error processing file: {e}"
|
||||
})
|
||||
|
||||
return self.generate_report()
|
||||
|
||||
def generate_report(self) -> dict:
|
||||
"""Generate the final coherence report."""
|
||||
end_time = datetime.now()
|
||||
duration = (end_time - self.start_time).total_seconds()
|
||||
|
||||
# Calculate coherence score
|
||||
total_files = len(self.validated_files)
|
||||
files_with_issues = len(set(
|
||||
i["file"] for i in self.issues if "file" in i
|
||||
))
|
||||
coherence_score = max(0, 100 - (files_with_issues / max(1, total_files) * 20))
|
||||
|
||||
# Group issues by type
|
||||
issues_by_type = {}
|
||||
for issue in self.issues:
|
||||
issue_type = issue.get("type", "unknown")
|
||||
if issue_type not in issues_by_type:
|
||||
issues_by_type[issue_type] = []
|
||||
issues_by_type[issue_type].append(issue)
|
||||
|
||||
report = {
|
||||
"timestamp": self.start_time.isoformat(),
|
||||
"duration_seconds": duration,
|
||||
"status": "critical" if any(i.get("severity") == "critical" for i in self.issues) else "warning" if self.issues else "healthy",
|
||||
"coherence_score": round(coherence_score, 2),
|
||||
"summary": {
|
||||
"total_files_validated": total_files,
|
||||
"total_issues": len(self.issues),
|
||||
"total_warnings": len(self.warnings),
|
||||
"critical_issues": len([i for i in self.issues if i.get("severity") == "critical"]),
|
||||
"high_issues": len([i for i in self.issues if i.get("severity") == "high"]),
|
||||
"medium_issues": len([i for i in self.issues if i.get("severity") == "medium"]),
|
||||
"low_issues": len([i for i in self.issues if i.get("severity") == "low"]),
|
||||
},
|
||||
"issues_by_type": {k: len(v) for k, v in issues_by_type.items()},
|
||||
"issues": self.issues,
|
||||
"warnings": self.warnings,
|
||||
"validated_files": self.validated_files,
|
||||
"auto_fixable": [
|
||||
i for i in self.issues
|
||||
if i.get("type") in ("frontmatter-missing", "frontmatter-required-missing", "metadata-empty")
|
||||
]
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
def save_report(self, report: dict = None) -> str:
|
||||
"""Save report to JSON file."""
|
||||
if report is None:
|
||||
report = self.scan_content()
|
||||
|
||||
output_path = Path(self.output_path)
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(report, f, indent=2, default=str)
|
||||
|
||||
return str(output_path)
|
||||
|
||||
def run(self) -> dict:
|
||||
"""Run the full coherence check."""
|
||||
print(f"🔍 Starting coherence check at {self.start_time.isoformat()}")
|
||||
print(f"📁 Root path: {self.root_path}")
|
||||
|
||||
report = self.scan_content()
|
||||
|
||||
# Print summary
|
||||
print(f"\n📊 Coherence Score: {report['coherence_score']}/100")
|
||||
print(f" Files validated: {report['summary']['total_files_validated']}")
|
||||
print(f" Issues found: {report['summary']['total_issues']}")
|
||||
if report['summary']['critical_issues']:
|
||||
print(f" 🔴 Critical: {report['summary']['critical_issues']}")
|
||||
if report['summary']['high_issues']:
|
||||
print(f" 🟠 High: {report['summary']['high_issues']}")
|
||||
if report['summary']['medium_issues']:
|
||||
print(f" 🟡 Medium: {report['summary']['medium_issues']}")
|
||||
if report['summary']['low_issues']:
|
||||
print(f" 🟢 Low: {report['summary']['low_issues']}")
|
||||
|
||||
# Save report
|
||||
report_path = self.save_report(report)
|
||||
print(f"\n📄 Report saved to: {report_path}")
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Coherence Check for The Fold Within Earth")
|
||||
parser.add_argument("--root", "-r", default=".", help="Root path to scan (default: current directory)")
|
||||
parser.add_argument("--output", "-o", default="coherence-report.json", help="Output file path")
|
||||
parser.add_argument("--check-only", action="store_true", help="Only check, don't save report")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
checker = CoherenceChecker(args.root, args.output)
|
||||
report = checker.run()
|
||||
|
||||
# Exit with error code if critical issues found
|
||||
if report["status"] == "critical":
|
||||
sys.exit(2)
|
||||
elif report["status"] == "warning":
|
||||
sys.exit(1)
|
||||
else:
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,264 +0,0 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* Enhanced Index Generator for The Fold Within
|
||||
* REFACTORED: Full metadata extraction from frontmatter
|
||||
*
|
||||
* Priority order for dates:
|
||||
* 1. Frontmatter date (original)
|
||||
* 2. Filename date (YYYY-MM-DD)
|
||||
* 3. Git mtime
|
||||
* 4. Git ctime
|
||||
*/
|
||||
|
||||
import { promises as fs } from "fs";
|
||||
import path from "path";
|
||||
import pdf from "pdf-parse";
|
||||
|
||||
const ROOT = "public";
|
||||
const BASE_URL = "https://thefoldwithin.earth";
|
||||
const OUT_JSON = path.join(ROOT, "index.json");
|
||||
const OUT_SITEMAP = path.join(ROOT, "sitemap.xml");
|
||||
const OUT_ROBOTS = path.join(ROOT, "robots.txt");
|
||||
const OUT_FEED = path.join(ROOT, "feed.xml");
|
||||
const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
|
||||
const EXCERPT_LENGTH = 400;
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// EXTRACTORS - Pull metadata from frontmatter
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
function extractFrontmatter(content) {
|
||||
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
||||
if (!fmMatch) return null;
|
||||
|
||||
const fm = fmMatch[1];
|
||||
return {
|
||||
date: fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m)?.[1] || null,
|
||||
authors: extractAuthors(fm),
|
||||
notion_id: fm.match(/^notion_id:\s*(.+)$/m)?.[1]?.trim() || null,
|
||||
notion_created: fm.match(/^notion_created:\s*(.+)$/m)?.[1]?.trim() || null,
|
||||
source: fm.match(/^source:\s*(.+)$/m)?.[1]?.trim() || null,
|
||||
tags: extractTags(fm),
|
||||
type: fm.match(/^type:\s*(.+)$/m)?.[1]?.trim() || "fieldnote",
|
||||
status: fm.match(/^status:\s*(.+)$/m)?.[1]?.trim() || "draft",
|
||||
series: fm.match(/^series:\s*(.+)$/m)?.[1]?.trim() || null,
|
||||
version: fm.match(/^version:\s*(.+)$/m)?.[1]?.trim() || "0.1",
|
||||
layer: fm.match(/^layer:\s*(.+)$/m)?.[1]?.trim() || null
|
||||
};
|
||||
}
|
||||
|
||||
function extractAuthors(fm) {
|
||||
const match = fm.match(/^author[s]?:\s*(.+)$/m);
|
||||
if (!match) return [];
|
||||
return match[1].split(',').map(a => a.trim()).filter(a => a);
|
||||
}
|
||||
|
||||
function extractTags(fm) {
|
||||
const match = fm.match(/^tags:\s*(.+)$/m);
|
||||
if (!match) return [];
|
||||
return match[1].split(',').map(t => t.trim().toLowerCase()).filter(t => t);
|
||||
}
|
||||
|
||||
// Fallback: extract from filename
|
||||
function dateFromName(name) {
|
||||
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
|
||||
return m ? m[1] : null;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// PARSERS - Extract content from files
|
||||
// ═══════════════════════════════════════════════════════════
|
||||
|
||||
async function readHead(abs, full = false) {
|
||||
const fh = await fs.open(abs, "r");
|
||||
const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
|
||||
const buf = Buffer.alloc(size);
|
||||
const { bytesRead } = await fh.read(buf, 0, size, 0);
|
||||
await fh.close();
|
||||
return buf.slice(0, bytesRead).toString("utf8");
|
||||
}
|
||||
|
||||
function parseTitle(raw, ext) {
|
||||
if (ext === ".md") return raw.match(/^\s*#\s+(.+?)\s*$/m)?.[1].trim();
|
||||
if (ext === ".html") return raw.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1].trim();
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractExcerpt(raw, ext) {
|
||||
if (ext === ".md") raw = raw.replace(/^#.*\n/, '').trim();
|
||||
if (ext === ".html") raw = raw.replace(/<head>[\s\S]*<\/head>/i, '').replace(/<[^>]+>/g, ' ').trim();
|
||||
return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// GENERATORS - Create outputs
|
||||
// ═══════════════════════════════════════════════════════════
|
||||
|
||||
function generateSitemap(flat) {
|
||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
|
||||
|
||||
const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
|
||||
for (const page of staticPages) {
|
||||
xml += ` <url>\n <loc>${BASE_URL}${page}/</loc>\n <changefreq>weekly</changefreq>\n <priority>${page === "" ? "1.0" : "0.8"}</priority>\n </url>\n`;
|
||||
}
|
||||
|
||||
for (const f of flat.filter(x => !x.isIndex && x.originalDate)) {
|
||||
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
|
||||
xml += ` <url>\n <loc>${BASE_URL}/${urlPath}</loc>\n <lastmod>${f.originalDate}</lastmod>\n <changefreq>monthly</changefreq>\n </url>\n`;
|
||||
}
|
||||
|
||||
return xml + "</urlset>";
|
||||
}
|
||||
|
||||
function generateRobots() {
|
||||
return `# robots.txt for The Fold Within Earth\nSitemap: ${BASE_URL}/sitemap.xml\n`;
|
||||
}
|
||||
|
||||
function generateFeed(flat) {
|
||||
const items = flat
|
||||
.filter(f => !f.isIndex && f.originalDate)
|
||||
.sort((a, b) => new Date(b.originalDate) - new Date(a.originalDate))
|
||||
.slice(0, 20);
|
||||
|
||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>\n`;
|
||||
|
||||
for (const f of items) {
|
||||
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
|
||||
xml += ` <item>\n <title>${f.title || f.name}</title>\n <link>${BASE_URL}/${urlPath}</link>\n <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n </item>\n`;
|
||||
}
|
||||
|
||||
return xml + "</channel>\n</rss>";
|
||||
}
|
||||
|
||||
function generateSchema(flat, sections, tags) {
|
||||
const org = {
|
||||
"@context": "https://schema.org",
|
||||
"@type": "Organization",
|
||||
"name": "The Fold Within Earth",
|
||||
"url": BASE_URL,
|
||||
"description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.",
|
||||
"foundingDate": "2024",
|
||||
"keywords": tags.join(", ")
|
||||
};
|
||||
|
||||
const website = {
|
||||
"@context": "https://schema.org",
|
||||
"@type": "WebSite",
|
||||
"name": "The Fold Within Earth",
|
||||
"url": BASE_URL
|
||||
};
|
||||
|
||||
return JSON.stringify({ "@graph": [org, website] }, null, 2);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// MAIN COLLECTOR
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
async function collectFiles(relBase = "", flat = []) {
|
||||
const abs = path.join(ROOT, relBase);
|
||||
const entries = await fs.readdir(abs, { withFileTypes: true });
|
||||
|
||||
for (const e of entries) {
|
||||
if (e.name.startsWith(".")) continue;
|
||||
|
||||
const rel = path.posix.join(relBase, e.name);
|
||||
const absPath = path.join(ROOT, rel);
|
||||
|
||||
if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
|
||||
|
||||
if (e.isDirectory()) {
|
||||
await collectFiles(rel, flat);
|
||||
continue;
|
||||
}
|
||||
|
||||
const ext = path.posix.extname(e.name).toLowerCase();
|
||||
if (![".md", ".html", ".pdf"].includes(ext)) continue;
|
||||
|
||||
const st = await fs.stat(absPath);
|
||||
let raw = ext === ".pdf"
|
||||
? (await pdf(await fs.readFile(absPath))).text
|
||||
: await readHead(absPath, true);
|
||||
|
||||
const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
|
||||
const fm = ext === ".md" ? extractFrontmatter(raw) : null;
|
||||
|
||||
// PRIORITY: frontmatter date → filename → mtime → ctime
|
||||
const datePriority = [
|
||||
fm?.date,
|
||||
dateFromName(e.name),
|
||||
new Date(st.mtimeMs).toISOString().split('T')[0],
|
||||
new Date(st.ctimeMs).toISOString().split('T')[0]
|
||||
].find(d => d);
|
||||
|
||||
flat.push({
|
||||
type: "file",
|
||||
name: e.name,
|
||||
title,
|
||||
path: rel,
|
||||
ext,
|
||||
// Core fields (for frontend)
|
||||
date: datePriority,
|
||||
originalDate: fm?.date || dateFromName(e.name) || null,
|
||||
// Metadata from frontmatter
|
||||
authors: fm?.authors || [],
|
||||
notion_id: fm?.notion_id,
|
||||
notion_created: fm?.notion_created,
|
||||
source: fm?.source,
|
||||
tags: fm?.tags || extractTags(raw, ext),
|
||||
type: fm?.type || "fieldnote",
|
||||
status: fm?.status || "draft",
|
||||
series: fm?.series,
|
||||
version: fm?.version || "0.1",
|
||||
layer: fm?.layer,
|
||||
// Content
|
||||
excerpt: extractExcerpt(raw, ext),
|
||||
isIndex: e.name.toLowerCase().startsWith("index."),
|
||||
// Timestamps (for debugging)
|
||||
mtime: new Date(st.mtimeMs).toISOString(),
|
||||
ctime: new Date(st.ctimeMs).toISOString()
|
||||
});
|
||||
}
|
||||
|
||||
return flat;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// ENTRY POINT
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
console.log("🔍 Crawling public directory...");
|
||||
const flat = await collectFiles();
|
||||
const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
|
||||
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
|
||||
|
||||
console.log(`📄 Found ${flat.length} files`);
|
||||
console.log(`📁 ${sections.length} sections`);
|
||||
console.log(`🏷️ ${allTags.length} unique tags`);
|
||||
|
||||
// Write outputs
|
||||
await fs.writeFile(OUT_JSON, JSON.stringify({
|
||||
flat,
|
||||
sections,
|
||||
tags: allTags,
|
||||
generated: new Date().toISOString()
|
||||
}, null, 2));
|
||||
|
||||
await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
|
||||
await fs.writeFile(OUT_ROBOTS, generateRobots());
|
||||
await fs.writeFile(OUT_FEED, generateFeed(flat));
|
||||
await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags));
|
||||
|
||||
console.log(`\n✅ Complete!`);
|
||||
console.log(` • index.json: Full metadata (originalDate, notion_*, authors, source)`);
|
||||
console.log(` • sitemap.xml: Uses originalDate for timestamps`);
|
||||
console.log(` • feed.xml: Sorted by originalDate`);
|
||||
console.log(` • schema.jsonld: Structured data`);
|
||||
|
||||
} catch (e) {
|
||||
console.error("❌ Failed:", e);
|
||||
process.exit(1);
|
||||
}
|
||||
})();
|
||||
204
tools/generate-index.mjs
Executable file → Normal file
204
tools/generate-index.mjs
Executable file → Normal file
|
|
@ -1,7 +1,13 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* Enhanced Index Generator for The Fold Within
|
||||
* FIXED: Uses frontmatter date as primary source
|
||||
* REFACTORED: Full metadata extraction from frontmatter
|
||||
*
|
||||
* Priority order for dates:
|
||||
* 1. Frontmatter date (original)
|
||||
* 2. Filename date (YYYY-MM-DD)
|
||||
* 3. Git mtime
|
||||
* 4. Git ctime
|
||||
*/
|
||||
|
||||
import { promises as fs } from "fs";
|
||||
|
|
@ -17,22 +23,52 @@ const OUT_FEED = path.join(ROOT, "feed.xml");
|
|||
const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
|
||||
const EXCERPT_LENGTH = 400;
|
||||
|
||||
function extractFrontmatterDate(content) {
|
||||
const fmMatch = content.match(/^---\n([\s\S]*?)
|
||||
---/);
|
||||
if (fmMatch) {
|
||||
const fm = fmMatch[1];
|
||||
const dateMatch = fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m);
|
||||
if (dateMatch) return new Date(dateMatch[1]).getTime();
|
||||
}
|
||||
return null;
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// EXTRACTORS - Pull metadata from frontmatter
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
function extractFrontmatter(content) {
|
||||
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
||||
if (!fmMatch) return null;
|
||||
|
||||
const fm = fmMatch[1];
|
||||
return {
|
||||
date: fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m)?.[1] || null,
|
||||
authors: extractAuthors(fm),
|
||||
notion_id: fm.match(/^notion_id:\s*(.+)$/m)?.[1]?.trim() || null,
|
||||
notion_created: fm.match(/^notion_created:\s*(.+)$/m)?.[1]?.trim() || null,
|
||||
source: fm.match(/^source:\s*(.+)$/m)?.[1]?.trim() || null,
|
||||
tags: extractTags(fm),
|
||||
type: fm.match(/^type:\s*(.+)$/m)?.[1]?.trim() || "fieldnote",
|
||||
status: fm.match(/^status:\s*(.+)$/m)?.[1]?.trim() || "draft",
|
||||
series: fm.match(/^series:\s*(.+)$/m)?.[1]?.trim() || null,
|
||||
version: fm.match(/^version:\s*(.+)$/m)?.[1]?.trim() || "0.1",
|
||||
layer: fm.match(/^layer:\s*(.+)$/m)?.[1]?.trim() || null
|
||||
};
|
||||
}
|
||||
|
||||
function extractAuthors(fm) {
|
||||
const match = fm.match(/^author[s]?:\s*(.+)$/m);
|
||||
if (!match) return [];
|
||||
return match[1].split(',').map(a => a.trim()).filter(a => a);
|
||||
}
|
||||
|
||||
function extractTags(fm) {
|
||||
const match = fm.match(/^tags:\s*(.+)$/m);
|
||||
if (!match) return [];
|
||||
return match[1].split(',').map(t => t.trim().toLowerCase()).filter(t => t);
|
||||
}
|
||||
|
||||
// Fallback: extract from filename
|
||||
function dateFromName(name) {
|
||||
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
|
||||
return m ? new Date(m[0]).getTime() : null;
|
||||
return m ? m[1] : null;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// PARSERS - Extract content from files
|
||||
// ═══════════════════════════════════════════════════════════
|
||||
|
||||
async function readHead(abs, full = false) {
|
||||
const fh = await fs.open(abs, "r");
|
||||
const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
|
||||
|
|
@ -54,26 +90,23 @@ function extractExcerpt(raw, ext) {
|
|||
return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
|
||||
}
|
||||
|
||||
function extractTags(raw, ext, pdfData) {
|
||||
let tags = [];
|
||||
if (ext === ".md") {
|
||||
const m = raw.match(/^\s*tags:\s*(.+)$/im);
|
||||
if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// GENERATORS - Create outputs
|
||||
// ═══════════════════════════════════════════════════════════
|
||||
|
||||
function generateSitemap(flat) {
|
||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
|
||||
|
||||
const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
|
||||
for (const page of staticPages) {
|
||||
xml += ` <url>\n <loc>${BASE_URL}${page}/</loc>\n <changefreq>weekly</changefreq>\n <priority>${page === "" ? "1.0" : "0.8"}</priority>\n </url>\n`;
|
||||
}
|
||||
for (const f of flat.filter(x => !x.isIndex)) {
|
||||
|
||||
for (const f of flat.filter(x => !x.isIndex && x.originalDate)) {
|
||||
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
|
||||
const date = f.originalDate ? new Date(f.originalDate).toISOString().split('T')[0] : new Date(f.mtime).toISOString().split('T')[0];
|
||||
xml += ` <url>\n <loc>${BASE_URL}/${urlPath}</loc>\n <lastmod>${date}</lastmod>\n <changefreq>monthly</changefreq>\n </url>\n`;
|
||||
xml += ` <url>\n <loc>${BASE_URL}/${urlPath}</loc>\n <lastmod>${f.originalDate}</lastmod>\n <changefreq>monthly</changefreq>\n </url>\n`;
|
||||
}
|
||||
|
||||
return xml + "</urlset>";
|
||||
}
|
||||
|
||||
|
|
@ -82,47 +115,150 @@ function generateRobots() {
|
|||
}
|
||||
|
||||
function generateFeed(flat) {
|
||||
const items = flat.filter(f => !f.isIndex && f.originalDate).sort((a, b) => b.originalDate - a.originalDate).slice(0, 20);
|
||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>`;
|
||||
const items = flat
|
||||
.filter(f => !f.isIndex && f.originalDate)
|
||||
.sort((a, b) => new Date(b.originalDate) - new Date(a.originalDate))
|
||||
.slice(0, 20);
|
||||
|
||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>\n`;
|
||||
|
||||
for (const f of items) {
|
||||
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
|
||||
xml += ` <item>\n <title>${f.title || f.name}</title>\n <link>${BASE_URL}/${urlPath}</link>\n <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n </item>\n`;
|
||||
}
|
||||
|
||||
return xml + "</channel>\n</rss>";
|
||||
}
|
||||
|
||||
function generateSchema(flat, sections, tags) {
|
||||
const org = {
|
||||
"@context": "https://schema.org",
|
||||
"@type": "Organization",
|
||||
"name": "The Fold Within Earth",
|
||||
"url": BASE_URL,
|
||||
"description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.",
|
||||
"foundingDate": "2024",
|
||||
"keywords": tags.join(", ")
|
||||
};
|
||||
|
||||
const website = {
|
||||
"@context": "https://schema.org",
|
||||
"@type": "WebSite",
|
||||
"name": "The Fold Within Earth",
|
||||
"url": BASE_URL
|
||||
};
|
||||
|
||||
return JSON.stringify({ "@graph": [org, website] }, null, 2);
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// MAIN COLLECTOR
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
async function collectFiles(relBase = "", flat = []) {
|
||||
const abs = path.join(ROOT, relBase);
|
||||
const entries = await fs.readdir(abs, { withFileTypes: true });
|
||||
|
||||
for (const e of entries) {
|
||||
if (e.name.startsWith(".")) continue;
|
||||
|
||||
const rel = path.posix.join(relBase, e.name);
|
||||
const absPath = path.join(ROOT, rel);
|
||||
|
||||
if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
|
||||
if (e.isDirectory()) { await collectFiles(rel, flat); continue; }
|
||||
|
||||
if (e.isDirectory()) {
|
||||
await collectFiles(rel, flat);
|
||||
continue;
|
||||
}
|
||||
|
||||
const ext = path.posix.extname(e.name).toLowerCase();
|
||||
if (![".md", ".html", ".pdf"].includes(ext)) continue;
|
||||
|
||||
const st = await fs.stat(absPath);
|
||||
let raw = ext === ".pdf" ? (await pdf(await fs.readFile(absPath))).text : await readHead(absPath, true);
|
||||
let raw = ext === ".pdf"
|
||||
? (await pdf(await fs.readFile(absPath))).text
|
||||
: await readHead(absPath, true);
|
||||
|
||||
const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
|
||||
const originalDate = ext === ".md" ? extractFrontmatterDate(raw) : null;
|
||||
const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs;
|
||||
const mtime = dateFromName(e.name) ?? st.mtimeMs;
|
||||
flat.push({ type: "file", name: e.name, title, path: rel, ext, ctime, mtime, originalDate, excerpt: extractExcerpt(raw, ext), tags: extractTags(raw, ext), isIndex: e.name.toLowerCase().startsWith("index.") });
|
||||
const fm = ext === ".md" ? extractFrontmatter(raw) : null;
|
||||
|
||||
// PRIORITY: frontmatter date → filename → mtime → ctime
|
||||
const datePriority = [
|
||||
fm?.date,
|
||||
dateFromName(e.name),
|
||||
new Date(st.mtimeMs).toISOString().split('T')[0],
|
||||
new Date(st.ctimeMs).toISOString().split('T')[0]
|
||||
].find(d => d);
|
||||
|
||||
flat.push({
|
||||
type: "file",
|
||||
name: e.name,
|
||||
title,
|
||||
path: rel,
|
||||
ext,
|
||||
// Core fields (for frontend)
|
||||
date: datePriority,
|
||||
originalDate: fm?.date || dateFromName(e.name) || null,
|
||||
// Metadata from frontmatter
|
||||
authors: fm?.authors || [],
|
||||
notion_id: fm?.notion_id,
|
||||
notion_created: fm?.notion_created,
|
||||
source: fm?.source,
|
||||
tags: fm?.tags || extractTags(raw, ext),
|
||||
type: fm?.type || "fieldnote",
|
||||
status: fm?.status || "draft",
|
||||
series: fm?.series,
|
||||
version: fm?.version || "0.1",
|
||||
layer: fm?.layer,
|
||||
// Content
|
||||
excerpt: extractExcerpt(raw, ext),
|
||||
isIndex: e.name.toLowerCase().startsWith("index."),
|
||||
// Timestamps (for debugging)
|
||||
mtime: new Date(st.mtimeMs).toISOString(),
|
||||
ctime: new Date(st.ctimeMs).toISOString()
|
||||
});
|
||||
}
|
||||
|
||||
return flat;
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
// ENTRY POINT
|
||||
// ═══════════════════════════════════════════════════════════════
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
console.log("Crawling...");
|
||||
console.log("🔍 Crawling public directory...");
|
||||
const flat = await collectFiles();
|
||||
const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
|
||||
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
|
||||
await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, generated: new Date().toISOString() }, null, 2));
|
||||
|
||||
console.log(`📄 Found ${flat.length} files`);
|
||||
console.log(`📁 ${sections.length} sections`);
|
||||
console.log(`🏷️ ${allTags.length} unique tags`);
|
||||
|
||||
// Write outputs
|
||||
await fs.writeFile(OUT_JSON, JSON.stringify({
|
||||
flat,
|
||||
sections,
|
||||
tags: allTags,
|
||||
generated: new Date().toISOString()
|
||||
}, null, 2));
|
||||
|
||||
await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
|
||||
await fs.writeFile(OUT_ROBOTS, generateRobots());
|
||||
await fs.writeFile(OUT_FEED, generateFeed(flat));
|
||||
console.log(`Done! ${flat.length} files indexed with original dates from frontmatter.`);
|
||||
} catch (e) { console.error("Failed:", e); process.exit(1); }
|
||||
await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags));
|
||||
|
||||
console.log(`\n✅ Complete!`);
|
||||
console.log(` • index.json: Full metadata (originalDate, notion_*, authors, source)`);
|
||||
console.log(` • sitemap.xml: Uses originalDate for timestamps`);
|
||||
console.log(` • feed.xml: Sorted by originalDate`);
|
||||
console.log(` • schema.jsonld: Structured data`);
|
||||
|
||||
} catch (e) {
|
||||
console.error("❌ Failed:", e);
|
||||
process.exit(1);
|
||||
}
|
||||
})();
|
||||
|
|
|
|||
128
tools/generate-index.mjs.bak
Executable file
128
tools/generate-index.mjs.bak
Executable file
|
|
@ -0,0 +1,128 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* Enhanced Index Generator for The Fold Within
|
||||
* FIXED: Uses frontmatter date as primary source
|
||||
*/
|
||||
|
||||
import { promises as fs } from "fs";
|
||||
import path from "path";
|
||||
import pdf from "pdf-parse";
|
||||
|
||||
const ROOT = "public";
|
||||
const BASE_URL = "https://thefoldwithin.earth";
|
||||
const OUT_JSON = path.join(ROOT, "index.json");
|
||||
const OUT_SITEMAP = path.join(ROOT, "sitemap.xml");
|
||||
const OUT_ROBOTS = path.join(ROOT, "robots.txt");
|
||||
const OUT_FEED = path.join(ROOT, "feed.xml");
|
||||
const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
|
||||
const EXCERPT_LENGTH = 400;
|
||||
|
||||
function extractFrontmatterDate(content) {
|
||||
const fmMatch = content.match(/^---\n([\s\S]*?)
|
||||
---/);
|
||||
if (fmMatch) {
|
||||
const fm = fmMatch[1];
|
||||
const dateMatch = fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m);
|
||||
if (dateMatch) return new Date(dateMatch[1]).getTime();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function dateFromName(name) {
|
||||
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
|
||||
return m ? new Date(m[0]).getTime() : null;
|
||||
}
|
||||
|
||||
async function readHead(abs, full = false) {
|
||||
const fh = await fs.open(abs, "r");
|
||||
const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
|
||||
const buf = Buffer.alloc(size);
|
||||
const { bytesRead } = await fh.read(buf, 0, size, 0);
|
||||
await fh.close();
|
||||
return buf.slice(0, bytesRead).toString("utf8");
|
||||
}
|
||||
|
||||
function parseTitle(raw, ext) {
|
||||
if (ext === ".md") return raw.match(/^\s*#\s+(.+?)\s*$/m)?.[1].trim();
|
||||
if (ext === ".html") return raw.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1].trim();
|
||||
return null;
|
||||
}
|
||||
|
||||
function extractExcerpt(raw, ext) {
|
||||
if (ext === ".md") raw = raw.replace(/^#.*\n/, '').trim();
|
||||
if (ext === ".html") raw = raw.replace(/<head>[\s\S]*<\/head>/i, '').replace(/<[^>]+>/g, ' ').trim();
|
||||
return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
|
||||
}
|
||||
|
||||
function extractTags(raw, ext, pdfData) {
|
||||
let tags = [];
|
||||
if (ext === ".md") {
|
||||
const m = raw.match(/^\s*tags:\s*(.+)$/im);
|
||||
if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
|
||||
function generateSitemap(flat) {
|
||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
|
||||
const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
|
||||
for (const page of staticPages) {
|
||||
xml += ` <url>\n <loc>${BASE_URL}${page}/</loc>\n <changefreq>weekly</changefreq>\n <priority>${page === "" ? "1.0" : "0.8"}</priority>\n </url>\n`;
|
||||
}
|
||||
for (const f of flat.filter(x => !x.isIndex)) {
|
||||
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
|
||||
const date = f.originalDate ? new Date(f.originalDate).toISOString().split('T')[0] : new Date(f.mtime).toISOString().split('T')[0];
|
||||
xml += ` <url>\n <loc>${BASE_URL}/${urlPath}</loc>\n <lastmod>${date}</lastmod>\n <changefreq>monthly</changefreq>\n </url>\n`;
|
||||
}
|
||||
return xml + "</urlset>";
|
||||
}
|
||||
|
||||
function generateRobots() {
|
||||
return `# robots.txt for The Fold Within Earth\nSitemap: ${BASE_URL}/sitemap.xml\n`;
|
||||
}
|
||||
|
||||
function generateFeed(flat) {
|
||||
const items = flat.filter(f => !f.isIndex && f.originalDate).sort((a, b) => b.originalDate - a.originalDate).slice(0, 20);
|
||||
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>`;
|
||||
for (const f of items) {
|
||||
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
|
||||
xml += ` <item>\n <title>${f.title || f.name}</title>\n <link>${BASE_URL}/${urlPath}</link>\n <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n </item>\n`;
|
||||
}
|
||||
return xml + "</channel>\n</rss>";
|
||||
}
|
||||
|
||||
async function collectFiles(relBase = "", flat = []) {
|
||||
const abs = path.join(ROOT, relBase);
|
||||
const entries = await fs.readdir(abs, { withFileTypes: true });
|
||||
for (const e of entries) {
|
||||
if (e.name.startsWith(".")) continue;
|
||||
const rel = path.posix.join(relBase, e.name);
|
||||
const absPath = path.join(ROOT, rel);
|
||||
if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
|
||||
if (e.isDirectory()) { await collectFiles(rel, flat); continue; }
|
||||
const ext = path.posix.extname(e.name).toLowerCase();
|
||||
if (![".md", ".html", ".pdf"].includes(ext)) continue;
|
||||
const st = await fs.stat(absPath);
|
||||
let raw = ext === ".pdf" ? (await pdf(await fs.readFile(absPath))).text : await readHead(absPath, true);
|
||||
const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
|
||||
const originalDate = ext === ".md" ? extractFrontmatterDate(raw) : null;
|
||||
const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs;
|
||||
const mtime = dateFromName(e.name) ?? st.mtimeMs;
|
||||
flat.push({ type: "file", name: e.name, title, path: rel, ext, ctime, mtime, originalDate, excerpt: extractExcerpt(raw, ext), tags: extractTags(raw, ext), isIndex: e.name.toLowerCase().startsWith("index.") });
|
||||
}
|
||||
return flat;
|
||||
}
|
||||
|
||||
(async () => {
|
||||
try {
|
||||
console.log("Crawling...");
|
||||
const flat = await collectFiles();
|
||||
const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
|
||||
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
|
||||
await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, generated: new Date().toISOString() }, null, 2));
|
||||
await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
|
||||
await fs.writeFile(OUT_ROBOTS, generateRobots());
|
||||
await fs.writeFile(OUT_FEED, generateFeed(flat));
|
||||
console.log(`Done! ${flat.length} files indexed with original dates from frontmatter.`);
|
||||
} catch (e) { console.error("Failed:", e); process.exit(1); }
|
||||
})();
|
||||
Loading…
Add table
Add a link
Reference in a new issue