refactor: Replace generator with enhanced version
Some checks are pending
Auto Changelog / changelog (push) Waiting to run
Coherence Check / coherence-check (push) Waiting to run
Coherence Check / coherence (push) Waiting to run
Security Scan / security (push) Waiting to run
Semantic Versioning / version (push) Waiting to run

- Extracts full frontmatter metadata (originalDate, notion_*, authors, source)
- Correct date priority: frontmatter → filename → mtime → ctime
- All metadata exposed in index.json for frontend use

Phase 1 quick win complete.
This commit is contained in:
Solaria Lumis Havens 2026-02-14 14:45:51 +00:00
parent 87cfa7e083
commit d0cf2e3061
26 changed files with 2621 additions and 299 deletions

371
tools/coherence-check.py Normal file
View file

@ -0,0 +1,371 @@
#!/usr/bin/env python3
"""
Coherence Check Script for The Fold Within Earth
Validates fieldnote frontmatter, checks for broken links,
and verifies metadata completeness. Outputs report as JSON.
"""
import argparse
import json
import os
import re
import sys
from datetime import datetime
from pathlib import Path
from typing import Any
import yaml
# Configuration
FRONTMATTER_REQUIRED = {
"title": str,
"date": str,
"author": str,
"type": str,
"status": str,
}
FRONTMATTER_OPTIONAL = {
"version": (str, int, float),
"series": str,
"layer": str,
"tags": list,
"notion_id": str,
"notion_created": str,
"source": str,
}
VALID_LAYERS = ["first", "second", "third", "fourth"]
VALID_STATUSES = ["published", "draft", "archived", "review"]
class CoherenceChecker:
"""Main coherence checking class."""
def __init__(self, root_path: str = ".", output_path: str = None):
self.root_path = Path(root_path)
self.output_path = output_path or "coherence-report.json"
self.issues: list[dict] = []
self.warnings: list[dict] = []
self.validated_files: list[dict] = []
self.start_time = datetime.now()
def parse_frontmatter(self, content: str) -> tuple[dict | None, str | None]:
"""Parse YAML frontmatter from markdown content."""
# Match frontmatter between --- markers
match = re.match(r'^---\n(.*?)\n---(.*)$', content, re.DOTALL)
if not match:
return None, content
try:
frontmatter = yaml.safe_load(match.group(1))
content_body = match.group(2)
return frontmatter, content_body
except yaml.YAMLError as e:
return None, content
def check_frontmatter(self, file_path: Path, content: str) -> dict | None:
"""Check frontmatter for a single file."""
frontmatter, body = self.parse_frontmatter(content)
if frontmatter is None:
return {
"file": str(file_path.relative_to(self.root_path)),
"type": "frontmatter-missing",
"severity": "critical",
"message": "No frontmatter found",
"suggestion": "Add YAML frontmatter between --- markers"
}
issues = []
# Check required fields
for field, expected_type in FRONTMATTER_REQUIRED.items():
if field not in frontmatter:
issues.append({
"field": field,
"type": "frontmatter-required-missing",
"severity": "critical",
"message": f"Required field '{field}' is missing",
"suggestion": f"Add {field}: <value> to frontmatter"
})
elif not isinstance(frontmatter[field], expected_type):
issues.append({
"field": field,
"type": "frontmatter-type-error",
"severity": "high",
"message": f"Field '{field}' has wrong type",
"suggestion": f"Expected {expected_type}, got {type(frontmatter[field]).__name__}"
})
# Validate specific fields
if "status" in frontmatter:
if frontmatter["status"] not in VALID_STATUSES:
issues.append({
"field": "status",
"type": "frontmatter-validation-error",
"severity": "medium",
"message": f"Invalid status: '{frontmatter['status']}'",
"suggestion": f"Status must be one of: {', '.join(VALID_STATUSES)}"
})
if "layer" in frontmatter:
if frontmatter["layer"] not in VALID_LAYERS:
issues.append({
"field": "layer",
"type": "frontmatter-validation-error",
"severity": "medium",
"message": f"Invalid layer: '{frontmatter['layer']}'",
"suggestion": f"Layer must be one of: {', '.join(VALID_LAYERS)}"
})
# Check tags format
if "tags" in frontmatter:
if isinstance(frontmatter["tags"], str):
issues.append({
"field": "tags",
"type": "frontmatter-format-error",
"severity": "low",
"message": "Tags should be a list, not a comma-separated string",
"suggestion": "Change tags to a YAML list format"
})
return {
"file": str(file_path.relative_to(self.root_path)),
"has_frontmatter": True,
"issues": issues,
"frontmatter": {k: v for k, v in frontmatter.items() if k in FRONTMATTER_REQUIRED}
} if issues else {
"file": str(file_path.relative_to(self.root_path)),
"has_frontmatter": True,
"issues": [],
"frontmatter": {k: v for k, v in frontmatter.items() if k in FRONTMATTER_REQUIRED}
}
def check_links(self, content: str, base_path: Path) -> list[dict]:
"""Check for broken or malformed links."""
issues = []
# Match markdown links
link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
matches = re.findall(link_pattern, content)
for link_text, link_url in matches:
# Skip external URLs
if link_url.startswith(('http://', 'https://', 'mailto:', '#')):
continue
# Check internal links
link_path = link_url.split('#')[0]
if link_path.startswith('/'):
# Absolute path
full_path = self.root_path / link_path.lstrip('/')
else:
# Relative path
full_path = base_path.parent / link_path
if not full_path.exists():
issues.append({
"file": str(base_path.relative_to(self.root_path)),
"type": "broken-link",
"severity": "high",
"link": link_url,
"message": f"Broken link: {link_url}",
"suggestion": f"Update link to point to existing file or remove"
})
return issues
def check_metadata_file(self, file_path: Path) -> dict | None:
"""Check metadata.yaml file completeness."""
if not file_path.exists():
return {
"file": str(file_path.relative_to(self.root_path)),
"type": "metadata-missing",
"severity": "high",
"message": "metadata.yaml file not found",
"suggestion": "Create metadata.yaml with required fields"
}
try:
with open(file_path) as f:
metadata = yaml.safe_load(f)
except yaml.YAMLError as e:
return {
"file": str(file_path.relative_to(self.root_path)),
"type": "metadata-invalid",
"severity": "critical",
"message": f"Invalid YAML: {e}",
"suggestion": "Fix YAML syntax errors"
}
if metadata is None:
return {
"file": str(file_path.relative_to(self.root_path)),
"type": "metadata-empty",
"severity": "high",
"message": "metadata.yaml is empty",
"suggestion": "Add required metadata fields"
}
return None
def scan_content(self) -> dict:
"""Scan all content files for coherence issues."""
content_path = self.root_path / "content"
if not content_path.exists():
return {
"status": "warning",
"message": "Content directory not found",
"files_validated": 0,
"issues": self.issues,
"warnings": self.warnings
}
# Find all markdown files
md_files = list(content_path.rglob("*.md"))
for md_file in md_files:
try:
with open(md_file) as f:
content = f.read()
# Skip index files
if md_file.name.lower() in ("index.md", "readme.md"):
continue
# Check frontmatter
result = self.check_frontmatter(md_file, content)
if result:
if result.get("issues"):
self.issues.extend(result["issues"])
self.validated_files.append(result)
# Check links
link_issues = self.check_links(content, md_file)
self.issues.extend(link_issues)
# Check for corresponding metadata.yaml
metadata_file = md_file.parent / "metadata.yaml"
if md_file.name.startswith(tuple(str(i) for i in range(10))): # Date-prefixed files
metadata_issue = self.check_metadata_file(metadata_file)
if metadata_issue:
self.issues.append(metadata_issue)
except Exception as e:
self.warnings.append({
"file": str(md_file.relative_to(self.root_path)),
"message": f"Error processing file: {e}"
})
return self.generate_report()
def generate_report(self) -> dict:
"""Generate the final coherence report."""
end_time = datetime.now()
duration = (end_time - self.start_time).total_seconds()
# Calculate coherence score
total_files = len(self.validated_files)
files_with_issues = len(set(
i["file"] for i in self.issues if "file" in i
))
coherence_score = max(0, 100 - (files_with_issues / max(1, total_files) * 20))
# Group issues by type
issues_by_type = {}
for issue in self.issues:
issue_type = issue.get("type", "unknown")
if issue_type not in issues_by_type:
issues_by_type[issue_type] = []
issues_by_type[issue_type].append(issue)
report = {
"timestamp": self.start_time.isoformat(),
"duration_seconds": duration,
"status": "critical" if any(i.get("severity") == "critical" for i in self.issues) else "warning" if self.issues else "healthy",
"coherence_score": round(coherence_score, 2),
"summary": {
"total_files_validated": total_files,
"total_issues": len(self.issues),
"total_warnings": len(self.warnings),
"critical_issues": len([i for i in self.issues if i.get("severity") == "critical"]),
"high_issues": len([i for i in self.issues if i.get("severity") == "high"]),
"medium_issues": len([i for i in self.issues if i.get("severity") == "medium"]),
"low_issues": len([i for i in self.issues if i.get("severity") == "low"]),
},
"issues_by_type": {k: len(v) for k, v in issues_by_type.items()},
"issues": self.issues,
"warnings": self.warnings,
"validated_files": self.validated_files,
"auto_fixable": [
i for i in self.issues
if i.get("type") in ("frontmatter-missing", "frontmatter-required-missing", "metadata-empty")
]
}
return report
def save_report(self, report: dict = None) -> str:
"""Save report to JSON file."""
if report is None:
report = self.scan_content()
output_path = Path(self.output_path)
with open(output_path, "w") as f:
json.dump(report, f, indent=2, default=str)
return str(output_path)
def run(self) -> dict:
"""Run the full coherence check."""
print(f"🔍 Starting coherence check at {self.start_time.isoformat()}")
print(f"📁 Root path: {self.root_path}")
report = self.scan_content()
# Print summary
print(f"\n📊 Coherence Score: {report['coherence_score']}/100")
print(f" Files validated: {report['summary']['total_files_validated']}")
print(f" Issues found: {report['summary']['total_issues']}")
if report['summary']['critical_issues']:
print(f" 🔴 Critical: {report['summary']['critical_issues']}")
if report['summary']['high_issues']:
print(f" 🟠 High: {report['summary']['high_issues']}")
if report['summary']['medium_issues']:
print(f" 🟡 Medium: {report['summary']['medium_issues']}")
if report['summary']['low_issues']:
print(f" 🟢 Low: {report['summary']['low_issues']}")
# Save report
report_path = self.save_report(report)
print(f"\n📄 Report saved to: {report_path}")
return report
def main():
parser = argparse.ArgumentParser(description="Coherence Check for The Fold Within Earth")
parser.add_argument("--root", "-r", default=".", help="Root path to scan (default: current directory)")
parser.add_argument("--output", "-o", default="coherence-report.json", help="Output file path")
parser.add_argument("--check-only", action="store_true", help="Only check, don't save report")
args = parser.parse_args()
checker = CoherenceChecker(args.root, args.output)
report = checker.run()
# Exit with error code if critical issues found
if report["status"] == "critical":
sys.exit(2)
elif report["status"] == "warning":
sys.exit(1)
else:
sys.exit(0)
if __name__ == "__main__":
main()

View file

@ -1,264 +0,0 @@
#!/usr/bin/env node
/**
* Enhanced Index Generator for The Fold Within
* REFACTORED: Full metadata extraction from frontmatter
*
* Priority order for dates:
* 1. Frontmatter date (original)
* 2. Filename date (YYYY-MM-DD)
* 3. Git mtime
* 4. Git ctime
*/
import { promises as fs } from "fs";
import path from "path";
import pdf from "pdf-parse";
const ROOT = "public";
const BASE_URL = "https://thefoldwithin.earth";
const OUT_JSON = path.join(ROOT, "index.json");
const OUT_SITEMAP = path.join(ROOT, "sitemap.xml");
const OUT_ROBOTS = path.join(ROOT, "robots.txt");
const OUT_FEED = path.join(ROOT, "feed.xml");
const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
const EXCERPT_LENGTH = 400;
// ═══════════════════════════════════════════════════════════════
// EXTRACTORS - Pull metadata from frontmatter
// ═══════════════════════════════════════════════════════════════
function extractFrontmatter(content) {
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
if (!fmMatch) return null;
const fm = fmMatch[1];
return {
date: fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m)?.[1] || null,
authors: extractAuthors(fm),
notion_id: fm.match(/^notion_id:\s*(.+)$/m)?.[1]?.trim() || null,
notion_created: fm.match(/^notion_created:\s*(.+)$/m)?.[1]?.trim() || null,
source: fm.match(/^source:\s*(.+)$/m)?.[1]?.trim() || null,
tags: extractTags(fm),
type: fm.match(/^type:\s*(.+)$/m)?.[1]?.trim() || "fieldnote",
status: fm.match(/^status:\s*(.+)$/m)?.[1]?.trim() || "draft",
series: fm.match(/^series:\s*(.+)$/m)?.[1]?.trim() || null,
version: fm.match(/^version:\s*(.+)$/m)?.[1]?.trim() || "0.1",
layer: fm.match(/^layer:\s*(.+)$/m)?.[1]?.trim() || null
};
}
function extractAuthors(fm) {
const match = fm.match(/^author[s]?:\s*(.+)$/m);
if (!match) return [];
return match[1].split(',').map(a => a.trim()).filter(a => a);
}
function extractTags(fm) {
const match = fm.match(/^tags:\s*(.+)$/m);
if (!match) return [];
return match[1].split(',').map(t => t.trim().toLowerCase()).filter(t => t);
}
// Fallback: extract from filename
function dateFromName(name) {
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
return m ? m[1] : null;
}
// ═══════════════════════════════════════════════════════════════
// PARSERS - Extract content from files
// ═══════════════════════════════════════════════════════════
async function readHead(abs, full = false) {
const fh = await fs.open(abs, "r");
const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
const buf = Buffer.alloc(size);
const { bytesRead } = await fh.read(buf, 0, size, 0);
await fh.close();
return buf.slice(0, bytesRead).toString("utf8");
}
function parseTitle(raw, ext) {
if (ext === ".md") return raw.match(/^\s*#\s+(.+?)\s*$/m)?.[1].trim();
if (ext === ".html") return raw.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1].trim();
return null;
}
function extractExcerpt(raw, ext) {
if (ext === ".md") raw = raw.replace(/^#.*\n/, '').trim();
if (ext === ".html") raw = raw.replace(/<head>[\s\S]*<\/head>/i, '').replace(/<[^>]+>/g, ' ').trim();
return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
}
// ═══════════════════════════════════════════════════════════════
// GENERATORS - Create outputs
// ═══════════════════════════════════════════════════════════
function generateSitemap(flat) {
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
for (const page of staticPages) {
xml += ` <url>\n <loc>${BASE_URL}${page}/</loc>\n <changefreq>weekly</changefreq>\n <priority>${page === "" ? "1.0" : "0.8"}</priority>\n </url>\n`;
}
for (const f of flat.filter(x => !x.isIndex && x.originalDate)) {
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
xml += ` <url>\n <loc>${BASE_URL}/${urlPath}</loc>\n <lastmod>${f.originalDate}</lastmod>\n <changefreq>monthly</changefreq>\n </url>\n`;
}
return xml + "</urlset>";
}
function generateRobots() {
return `# robots.txt for The Fold Within Earth\nSitemap: ${BASE_URL}/sitemap.xml\n`;
}
function generateFeed(flat) {
const items = flat
.filter(f => !f.isIndex && f.originalDate)
.sort((a, b) => new Date(b.originalDate) - new Date(a.originalDate))
.slice(0, 20);
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>\n`;
for (const f of items) {
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
xml += ` <item>\n <title>${f.title || f.name}</title>\n <link>${BASE_URL}/${urlPath}</link>\n <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n </item>\n`;
}
return xml + "</channel>\n</rss>";
}
function generateSchema(flat, sections, tags) {
const org = {
"@context": "https://schema.org",
"@type": "Organization",
"name": "The Fold Within Earth",
"url": BASE_URL,
"description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.",
"foundingDate": "2024",
"keywords": tags.join(", ")
};
const website = {
"@context": "https://schema.org",
"@type": "WebSite",
"name": "The Fold Within Earth",
"url": BASE_URL
};
return JSON.stringify({ "@graph": [org, website] }, null, 2);
}
// ═══════════════════════════════════════════════════════════════
// MAIN COLLECTOR
// ═══════════════════════════════════════════════════════════════
async function collectFiles(relBase = "", flat = []) {
const abs = path.join(ROOT, relBase);
const entries = await fs.readdir(abs, { withFileTypes: true });
for (const e of entries) {
if (e.name.startsWith(".")) continue;
const rel = path.posix.join(relBase, e.name);
const absPath = path.join(ROOT, rel);
if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
if (e.isDirectory()) {
await collectFiles(rel, flat);
continue;
}
const ext = path.posix.extname(e.name).toLowerCase();
if (![".md", ".html", ".pdf"].includes(ext)) continue;
const st = await fs.stat(absPath);
let raw = ext === ".pdf"
? (await pdf(await fs.readFile(absPath))).text
: await readHead(absPath, true);
const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
const fm = ext === ".md" ? extractFrontmatter(raw) : null;
// PRIORITY: frontmatter date → filename → mtime → ctime
const datePriority = [
fm?.date,
dateFromName(e.name),
new Date(st.mtimeMs).toISOString().split('T')[0],
new Date(st.ctimeMs).toISOString().split('T')[0]
].find(d => d);
flat.push({
type: "file",
name: e.name,
title,
path: rel,
ext,
// Core fields (for frontend)
date: datePriority,
originalDate: fm?.date || dateFromName(e.name) || null,
// Metadata from frontmatter
authors: fm?.authors || [],
notion_id: fm?.notion_id,
notion_created: fm?.notion_created,
source: fm?.source,
tags: fm?.tags || extractTags(raw, ext),
type: fm?.type || "fieldnote",
status: fm?.status || "draft",
series: fm?.series,
version: fm?.version || "0.1",
layer: fm?.layer,
// Content
excerpt: extractExcerpt(raw, ext),
isIndex: e.name.toLowerCase().startsWith("index."),
// Timestamps (for debugging)
mtime: new Date(st.mtimeMs).toISOString(),
ctime: new Date(st.ctimeMs).toISOString()
});
}
return flat;
}
// ═══════════════════════════════════════════════════════════════
// ENTRY POINT
// ═══════════════════════════════════════════════════════════════
(async () => {
try {
console.log("🔍 Crawling public directory...");
const flat = await collectFiles();
const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
console.log(`📄 Found ${flat.length} files`);
console.log(`📁 ${sections.length} sections`);
console.log(`🏷️ ${allTags.length} unique tags`);
// Write outputs
await fs.writeFile(OUT_JSON, JSON.stringify({
flat,
sections,
tags: allTags,
generated: new Date().toISOString()
}, null, 2));
await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
await fs.writeFile(OUT_ROBOTS, generateRobots());
await fs.writeFile(OUT_FEED, generateFeed(flat));
await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags));
console.log(`\n✅ Complete!`);
console.log(` • index.json: Full metadata (originalDate, notion_*, authors, source)`);
console.log(` • sitemap.xml: Uses originalDate for timestamps`);
console.log(` • feed.xml: Sorted by originalDate`);
console.log(` • schema.jsonld: Structured data`);
} catch (e) {
console.error("❌ Failed:", e);
process.exit(1);
}
})();

204
tools/generate-index.mjs Executable file → Normal file
View file

@ -1,7 +1,13 @@
#!/usr/bin/env node
/**
* Enhanced Index Generator for The Fold Within
* FIXED: Uses frontmatter date as primary source
* REFACTORED: Full metadata extraction from frontmatter
*
* Priority order for dates:
* 1. Frontmatter date (original)
* 2. Filename date (YYYY-MM-DD)
* 3. Git mtime
* 4. Git ctime
*/
import { promises as fs } from "fs";
@ -17,22 +23,52 @@ const OUT_FEED = path.join(ROOT, "feed.xml");
const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
const EXCERPT_LENGTH = 400;
function extractFrontmatterDate(content) {
const fmMatch = content.match(/^---\n([\s\S]*?)
---/);
if (fmMatch) {
const fm = fmMatch[1];
const dateMatch = fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m);
if (dateMatch) return new Date(dateMatch[1]).getTime();
}
return null;
// ═══════════════════════════════════════════════════════════════
// EXTRACTORS - Pull metadata from frontmatter
// ═══════════════════════════════════════════════════════════════
function extractFrontmatter(content) {
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
if (!fmMatch) return null;
const fm = fmMatch[1];
return {
date: fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m)?.[1] || null,
authors: extractAuthors(fm),
notion_id: fm.match(/^notion_id:\s*(.+)$/m)?.[1]?.trim() || null,
notion_created: fm.match(/^notion_created:\s*(.+)$/m)?.[1]?.trim() || null,
source: fm.match(/^source:\s*(.+)$/m)?.[1]?.trim() || null,
tags: extractTags(fm),
type: fm.match(/^type:\s*(.+)$/m)?.[1]?.trim() || "fieldnote",
status: fm.match(/^status:\s*(.+)$/m)?.[1]?.trim() || "draft",
series: fm.match(/^series:\s*(.+)$/m)?.[1]?.trim() || null,
version: fm.match(/^version:\s*(.+)$/m)?.[1]?.trim() || "0.1",
layer: fm.match(/^layer:\s*(.+)$/m)?.[1]?.trim() || null
};
}
function extractAuthors(fm) {
const match = fm.match(/^author[s]?:\s*(.+)$/m);
if (!match) return [];
return match[1].split(',').map(a => a.trim()).filter(a => a);
}
function extractTags(fm) {
const match = fm.match(/^tags:\s*(.+)$/m);
if (!match) return [];
return match[1].split(',').map(t => t.trim().toLowerCase()).filter(t => t);
}
// Fallback: extract from filename
function dateFromName(name) {
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
return m ? new Date(m[0]).getTime() : null;
return m ? m[1] : null;
}
// ═══════════════════════════════════════════════════════════════
// PARSERS - Extract content from files
// ═══════════════════════════════════════════════════════════
async function readHead(abs, full = false) {
const fh = await fs.open(abs, "r");
const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
@ -54,26 +90,23 @@ function extractExcerpt(raw, ext) {
return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
}
function extractTags(raw, ext, pdfData) {
let tags = [];
if (ext === ".md") {
const m = raw.match(/^\s*tags:\s*(.+)$/im);
if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
}
return tags;
}
// ═══════════════════════════════════════════════════════════════
// GENERATORS - Create outputs
// ═══════════════════════════════════════════════════════════
function generateSitemap(flat) {
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
for (const page of staticPages) {
xml += ` <url>\n <loc>${BASE_URL}${page}/</loc>\n <changefreq>weekly</changefreq>\n <priority>${page === "" ? "1.0" : "0.8"}</priority>\n </url>\n`;
}
for (const f of flat.filter(x => !x.isIndex)) {
for (const f of flat.filter(x => !x.isIndex && x.originalDate)) {
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
const date = f.originalDate ? new Date(f.originalDate).toISOString().split('T')[0] : new Date(f.mtime).toISOString().split('T')[0];
xml += ` <url>\n <loc>${BASE_URL}/${urlPath}</loc>\n <lastmod>${date}</lastmod>\n <changefreq>monthly</changefreq>\n </url>\n`;
xml += ` <url>\n <loc>${BASE_URL}/${urlPath}</loc>\n <lastmod>${f.originalDate}</lastmod>\n <changefreq>monthly</changefreq>\n </url>\n`;
}
return xml + "</urlset>";
}
@ -82,47 +115,150 @@ function generateRobots() {
}
function generateFeed(flat) {
const items = flat.filter(f => !f.isIndex && f.originalDate).sort((a, b) => b.originalDate - a.originalDate).slice(0, 20);
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>`;
const items = flat
.filter(f => !f.isIndex && f.originalDate)
.sort((a, b) => new Date(b.originalDate) - new Date(a.originalDate))
.slice(0, 20);
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>\n`;
for (const f of items) {
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
xml += ` <item>\n <title>${f.title || f.name}</title>\n <link>${BASE_URL}/${urlPath}</link>\n <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n </item>\n`;
}
return xml + "</channel>\n</rss>";
}
function generateSchema(flat, sections, tags) {
const org = {
"@context": "https://schema.org",
"@type": "Organization",
"name": "The Fold Within Earth",
"url": BASE_URL,
"description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.",
"foundingDate": "2024",
"keywords": tags.join(", ")
};
const website = {
"@context": "https://schema.org",
"@type": "WebSite",
"name": "The Fold Within Earth",
"url": BASE_URL
};
return JSON.stringify({ "@graph": [org, website] }, null, 2);
}
// ═══════════════════════════════════════════════════════════════
// MAIN COLLECTOR
// ═══════════════════════════════════════════════════════════════
async function collectFiles(relBase = "", flat = []) {
const abs = path.join(ROOT, relBase);
const entries = await fs.readdir(abs, { withFileTypes: true });
for (const e of entries) {
if (e.name.startsWith(".")) continue;
const rel = path.posix.join(relBase, e.name);
const absPath = path.join(ROOT, rel);
if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
if (e.isDirectory()) { await collectFiles(rel, flat); continue; }
if (e.isDirectory()) {
await collectFiles(rel, flat);
continue;
}
const ext = path.posix.extname(e.name).toLowerCase();
if (![".md", ".html", ".pdf"].includes(ext)) continue;
const st = await fs.stat(absPath);
let raw = ext === ".pdf" ? (await pdf(await fs.readFile(absPath))).text : await readHead(absPath, true);
let raw = ext === ".pdf"
? (await pdf(await fs.readFile(absPath))).text
: await readHead(absPath, true);
const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
const originalDate = ext === ".md" ? extractFrontmatterDate(raw) : null;
const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs;
const mtime = dateFromName(e.name) ?? st.mtimeMs;
flat.push({ type: "file", name: e.name, title, path: rel, ext, ctime, mtime, originalDate, excerpt: extractExcerpt(raw, ext), tags: extractTags(raw, ext), isIndex: e.name.toLowerCase().startsWith("index.") });
const fm = ext === ".md" ? extractFrontmatter(raw) : null;
// PRIORITY: frontmatter date → filename → mtime → ctime
const datePriority = [
fm?.date,
dateFromName(e.name),
new Date(st.mtimeMs).toISOString().split('T')[0],
new Date(st.ctimeMs).toISOString().split('T')[0]
].find(d => d);
flat.push({
type: "file",
name: e.name,
title,
path: rel,
ext,
// Core fields (for frontend)
date: datePriority,
originalDate: fm?.date || dateFromName(e.name) || null,
// Metadata from frontmatter
authors: fm?.authors || [],
notion_id: fm?.notion_id,
notion_created: fm?.notion_created,
source: fm?.source,
tags: fm?.tags || extractTags(raw, ext),
type: fm?.type || "fieldnote",
status: fm?.status || "draft",
series: fm?.series,
version: fm?.version || "0.1",
layer: fm?.layer,
// Content
excerpt: extractExcerpt(raw, ext),
isIndex: e.name.toLowerCase().startsWith("index."),
// Timestamps (for debugging)
mtime: new Date(st.mtimeMs).toISOString(),
ctime: new Date(st.ctimeMs).toISOString()
});
}
return flat;
}
// ═══════════════════════════════════════════════════════════════
// ENTRY POINT
// ═══════════════════════════════════════════════════════════════
(async () => {
try {
console.log("Crawling...");
console.log("🔍 Crawling public directory...");
const flat = await collectFiles();
const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, generated: new Date().toISOString() }, null, 2));
console.log(`📄 Found ${flat.length} files`);
console.log(`📁 ${sections.length} sections`);
console.log(`🏷️ ${allTags.length} unique tags`);
// Write outputs
await fs.writeFile(OUT_JSON, JSON.stringify({
flat,
sections,
tags: allTags,
generated: new Date().toISOString()
}, null, 2));
await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
await fs.writeFile(OUT_ROBOTS, generateRobots());
await fs.writeFile(OUT_FEED, generateFeed(flat));
console.log(`Done! ${flat.length} files indexed with original dates from frontmatter.`);
} catch (e) { console.error("Failed:", e); process.exit(1); }
await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags));
console.log(`\n✅ Complete!`);
console.log(` • index.json: Full metadata (originalDate, notion_*, authors, source)`);
console.log(` • sitemap.xml: Uses originalDate for timestamps`);
console.log(` • feed.xml: Sorted by originalDate`);
console.log(` • schema.jsonld: Structured data`);
} catch (e) {
console.error("❌ Failed:", e);
process.exit(1);
}
})();

128
tools/generate-index.mjs.bak Executable file
View file

@ -0,0 +1,128 @@
#!/usr/bin/env node
/**
* Enhanced Index Generator for The Fold Within
* FIXED: Uses frontmatter date as primary source
*/
import { promises as fs } from "fs";
import path from "path";
import pdf from "pdf-parse";
const ROOT = "public";
const BASE_URL = "https://thefoldwithin.earth";
const OUT_JSON = path.join(ROOT, "index.json");
const OUT_SITEMAP = path.join(ROOT, "sitemap.xml");
const OUT_ROBOTS = path.join(ROOT, "robots.txt");
const OUT_FEED = path.join(ROOT, "feed.xml");
const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
const EXCERPT_LENGTH = 400;
function extractFrontmatterDate(content) {
const fmMatch = content.match(/^---\n([\s\S]*?)
---/);
if (fmMatch) {
const fm = fmMatch[1];
const dateMatch = fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m);
if (dateMatch) return new Date(dateMatch[1]).getTime();
}
return null;
}
function dateFromName(name) {
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
return m ? new Date(m[0]).getTime() : null;
}
async function readHead(abs, full = false) {
const fh = await fs.open(abs, "r");
const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
const buf = Buffer.alloc(size);
const { bytesRead } = await fh.read(buf, 0, size, 0);
await fh.close();
return buf.slice(0, bytesRead).toString("utf8");
}
function parseTitle(raw, ext) {
if (ext === ".md") return raw.match(/^\s*#\s+(.+?)\s*$/m)?.[1].trim();
if (ext === ".html") return raw.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1].trim();
return null;
}
function extractExcerpt(raw, ext) {
if (ext === ".md") raw = raw.replace(/^#.*\n/, '').trim();
if (ext === ".html") raw = raw.replace(/<head>[\s\S]*<\/head>/i, '').replace(/<[^>]+>/g, ' ').trim();
return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
}
function extractTags(raw, ext, pdfData) {
let tags = [];
if (ext === ".md") {
const m = raw.match(/^\s*tags:\s*(.+)$/im);
if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
}
return tags;
}
function generateSitemap(flat) {
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
for (const page of staticPages) {
xml += ` <url>\n <loc>${BASE_URL}${page}/</loc>\n <changefreq>weekly</changefreq>\n <priority>${page === "" ? "1.0" : "0.8"}</priority>\n </url>\n`;
}
for (const f of flat.filter(x => !x.isIndex)) {
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
const date = f.originalDate ? new Date(f.originalDate).toISOString().split('T')[0] : new Date(f.mtime).toISOString().split('T')[0];
xml += ` <url>\n <loc>${BASE_URL}/${urlPath}</loc>\n <lastmod>${date}</lastmod>\n <changefreq>monthly</changefreq>\n </url>\n`;
}
return xml + "</urlset>";
}
function generateRobots() {
return `# robots.txt for The Fold Within Earth\nSitemap: ${BASE_URL}/sitemap.xml\n`;
}
function generateFeed(flat) {
const items = flat.filter(f => !f.isIndex && f.originalDate).sort((a, b) => b.originalDate - a.originalDate).slice(0, 20);
let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>`;
for (const f of items) {
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
xml += ` <item>\n <title>${f.title || f.name}</title>\n <link>${BASE_URL}/${urlPath}</link>\n <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n </item>\n`;
}
return xml + "</channel>\n</rss>";
}
async function collectFiles(relBase = "", flat = []) {
const abs = path.join(ROOT, relBase);
const entries = await fs.readdir(abs, { withFileTypes: true });
for (const e of entries) {
if (e.name.startsWith(".")) continue;
const rel = path.posix.join(relBase, e.name);
const absPath = path.join(ROOT, rel);
if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
if (e.isDirectory()) { await collectFiles(rel, flat); continue; }
const ext = path.posix.extname(e.name).toLowerCase();
if (![".md", ".html", ".pdf"].includes(ext)) continue;
const st = await fs.stat(absPath);
let raw = ext === ".pdf" ? (await pdf(await fs.readFile(absPath))).text : await readHead(absPath, true);
const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
const originalDate = ext === ".md" ? extractFrontmatterDate(raw) : null;
const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs;
const mtime = dateFromName(e.name) ?? st.mtimeMs;
flat.push({ type: "file", name: e.name, title, path: rel, ext, ctime, mtime, originalDate, excerpt: extractExcerpt(raw, ext), tags: extractTags(raw, ext), isIndex: e.name.toLowerCase().startsWith("index.") });
}
return flat;
}
(async () => {
try {
console.log("Crawling...");
const flat = await collectFiles();
const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, generated: new Date().toISOString() }, null, 2));
await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
await fs.writeFile(OUT_ROBOTS, generateRobots());
await fs.writeFile(OUT_FEED, generateFeed(flat));
console.log(`Done! ${flat.length} files indexed with original dates from frontmatter.`);
} catch (e) { console.error("Failed:", e); process.exit(1); }
})();