refactor: Replace generator with enhanced version

- Extracts full frontmatter metadata (originalDate, notion_*, authors, source) - Correct date priority: frontmatter → filename → mtime → ctime - All metadata exposed in index.json for frontend use Phase 1 quick win complete.
2026-02-14 14:45:51 +00:00
parent 87cfa7e083
commit d0cf2e3061
26 changed files with 2621 additions and 299 deletions
@@ -0,0 +1,371 @@
+#!/usr/bin/env python3
+"""
+Coherence Check Script for The Fold Within Earth
+
+Validates fieldnote frontmatter, checks for broken links,
+and verifies metadata completeness. Outputs report as JSON.
+"""
+
+import argparse
+import json
+import os
+import re
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+
+# Configuration
+FRONTMATTER_REQUIRED = {
+    "title": str,
+    "date": str,
+    "author": str,
+    "type": str,
+    "status": str,
+}
+
+FRONTMATTER_OPTIONAL = {
+    "version": (str, int, float),
+    "series": str,
+    "layer": str,
+    "tags": list,
+    "notion_id": str,
+    "notion_created": str,
+    "source": str,
+}
+
+VALID_LAYERS = ["first", "second", "third", "fourth"]
+VALID_STATUSES = ["published", "draft", "archived", "review"]
+
+
+class CoherenceChecker:
+    """Main coherence checking class."""
+    
+    def __init__(self, root_path: str = ".", output_path: str = None):
+        self.root_path = Path(root_path)
+        self.output_path = output_path or "coherence-report.json"
+        self.issues: list[dict] = []
+        self.warnings: list[dict] = []
+        self.validated_files: list[dict] = []
+        self.start_time = datetime.now()
+    
+    def parse_frontmatter(self, content: str) -> tuple[dict | None, str | None]:
+        """Parse YAML frontmatter from markdown content."""
+        # Match frontmatter between --- markers
+        match = re.match(r'^---\n(.*?)\n---(.*)$', content, re.DOTALL)
+        if not match:
+            return None, content
+        
+        try:
+            frontmatter = yaml.safe_load(match.group(1))
+            content_body = match.group(2)
+            return frontmatter, content_body
+        except yaml.YAMLError as e:
+            return None, content
+    
+    def check_frontmatter(self, file_path: Path, content: str) -> dict | None:
+        """Check frontmatter for a single file."""
+        frontmatter, body = self.parse_frontmatter(content)
+        
+        if frontmatter is None:
+            return {
+                "file": str(file_path.relative_to(self.root_path)),
+                "type": "frontmatter-missing",
+                "severity": "critical",
+                "message": "No frontmatter found",
+                "suggestion": "Add YAML frontmatter between --- markers"
+            }
+        
+        issues = []
+        
+        # Check required fields
+        for field, expected_type in FRONTMATTER_REQUIRED.items():
+            if field not in frontmatter:
+                issues.append({
+                    "field": field,
+                    "type": "frontmatter-required-missing",
+                    "severity": "critical",
+                    "message": f"Required field '{field}' is missing",
+                    "suggestion": f"Add {field}: <value> to frontmatter"
+                })
+            elif not isinstance(frontmatter[field], expected_type):
+                issues.append({
+                    "field": field,
+                    "type": "frontmatter-type-error",
+                    "severity": "high",
+                    "message": f"Field '{field}' has wrong type",
+                    "suggestion": f"Expected {expected_type}, got {type(frontmatter[field]).__name__}"
+                })
+        
+        # Validate specific fields
+        if "status" in frontmatter:
+            if frontmatter["status"] not in VALID_STATUSES:
+                issues.append({
+                    "field": "status",
+                    "type": "frontmatter-validation-error",
+                    "severity": "medium",
+                    "message": f"Invalid status: '{frontmatter['status']}'",
+                    "suggestion": f"Status must be one of: {', '.join(VALID_STATUSES)}"
+                })
+        
+        if "layer" in frontmatter:
+            if frontmatter["layer"] not in VALID_LAYERS:
+                issues.append({
+                    "field": "layer",
+                    "type": "frontmatter-validation-error",
+                    "severity": "medium",
+                    "message": f"Invalid layer: '{frontmatter['layer']}'",
+                    "suggestion": f"Layer must be one of: {', '.join(VALID_LAYERS)}"
+                })
+        
+        # Check tags format
+        if "tags" in frontmatter:
+            if isinstance(frontmatter["tags"], str):
+                issues.append({
+                    "field": "tags",
+                    "type": "frontmatter-format-error",
+                    "severity": "low",
+                    "message": "Tags should be a list, not a comma-separated string",
+                    "suggestion": "Change tags to a YAML list format"
+                })
+        
+        return {
+            "file": str(file_path.relative_to(self.root_path)),
+            "has_frontmatter": True,
+            "issues": issues,
+            "frontmatter": {k: v for k, v in frontmatter.items() if k in FRONTMATTER_REQUIRED}
+        } if issues else {
+            "file": str(file_path.relative_to(self.root_path)),
+            "has_frontmatter": True,
+            "issues": [],
+            "frontmatter": {k: v for k, v in frontmatter.items() if k in FRONTMATTER_REQUIRED}
+        }
+    
+    def check_links(self, content: str, base_path: Path) -> list[dict]:
+        """Check for broken or malformed links."""
+        issues = []
+        
+        # Match markdown links
+        link_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
+        matches = re.findall(link_pattern, content)
+        
+        for link_text, link_url in matches:
+            # Skip external URLs
+            if link_url.startswith(('http://', 'https://', 'mailto:', '#')):
+                continue
+            
+            # Check internal links
+            link_path = link_url.split('#')[0]
+            if link_path.startswith('/'):
+                # Absolute path
+                full_path = self.root_path / link_path.lstrip('/')
+            else:
+                # Relative path
+                full_path = base_path.parent / link_path
+            
+            if not full_path.exists():
+                issues.append({
+                    "file": str(base_path.relative_to(self.root_path)),
+                    "type": "broken-link",
+                    "severity": "high",
+                    "link": link_url,
+                    "message": f"Broken link: {link_url}",
+                    "suggestion": f"Update link to point to existing file or remove"
+                })
+        
+        return issues
+    
+    def check_metadata_file(self, file_path: Path) -> dict | None:
+        """Check metadata.yaml file completeness."""
+        if not file_path.exists():
+            return {
+                "file": str(file_path.relative_to(self.root_path)),
+                "type": "metadata-missing",
+                "severity": "high",
+                "message": "metadata.yaml file not found",
+                "suggestion": "Create metadata.yaml with required fields"
+            }
+        
+        try:
+            with open(file_path) as f:
+                metadata = yaml.safe_load(f)
+        except yaml.YAMLError as e:
+            return {
+                "file": str(file_path.relative_to(self.root_path)),
+                "type": "metadata-invalid",
+                "severity": "critical",
+                "message": f"Invalid YAML: {e}",
+                "suggestion": "Fix YAML syntax errors"
+            }
+        
+        if metadata is None:
+            return {
+                "file": str(file_path.relative_to(self.root_path)),
+                "type": "metadata-empty",
+                "severity": "high",
+                "message": "metadata.yaml is empty",
+                "suggestion": "Add required metadata fields"
+            }
+        
+        return None
+    
+    def scan_content(self) -> dict:
+        """Scan all content files for coherence issues."""
+        content_path = self.root_path / "content"
+        
+        if not content_path.exists():
+            return {
+                "status": "warning",
+                "message": "Content directory not found",
+                "files_validated": 0,
+                "issues": self.issues,
+                "warnings": self.warnings
+            }
+        
+        # Find all markdown files
+        md_files = list(content_path.rglob("*.md"))
+        
+        for md_file in md_files:
+            try:
+                with open(md_file) as f:
+                    content = f.read()
+                
+                # Skip index files
+                if md_file.name.lower() in ("index.md", "readme.md"):
+                    continue
+                
+                # Check frontmatter
+                result = self.check_frontmatter(md_file, content)
+                if result:
+                    if result.get("issues"):
+                        self.issues.extend(result["issues"])
+                    self.validated_files.append(result)
+                
+                # Check links
+                link_issues = self.check_links(content, md_file)
+                self.issues.extend(link_issues)
+                
+                # Check for corresponding metadata.yaml
+                metadata_file = md_file.parent / "metadata.yaml"
+                if md_file.name.startswith(tuple(str(i) for i in range(10))):  # Date-prefixed files
+                    metadata_issue = self.check_metadata_file(metadata_file)
+                    if metadata_issue:
+                        self.issues.append(metadata_issue)
+                        
+            except Exception as e:
+                self.warnings.append({
+                    "file": str(md_file.relative_to(self.root_path)),
+                    "message": f"Error processing file: {e}"
+                })
+        
+        return self.generate_report()
+    
+    def generate_report(self) -> dict:
+        """Generate the final coherence report."""
+        end_time = datetime.now()
+        duration = (end_time - self.start_time).total_seconds()
+        
+        # Calculate coherence score
+        total_files = len(self.validated_files)
+        files_with_issues = len(set(
+            i["file"] for i in self.issues if "file" in i
+        ))
+        coherence_score = max(0, 100 - (files_with_issues / max(1, total_files) * 20))
+        
+        # Group issues by type
+        issues_by_type = {}
+        for issue in self.issues:
+            issue_type = issue.get("type", "unknown")
+            if issue_type not in issues_by_type:
+                issues_by_type[issue_type] = []
+            issues_by_type[issue_type].append(issue)
+        
+        report = {
+            "timestamp": self.start_time.isoformat(),
+            "duration_seconds": duration,
+            "status": "critical" if any(i.get("severity") == "critical" for i in self.issues) else "warning" if self.issues else "healthy",
+            "coherence_score": round(coherence_score, 2),
+            "summary": {
+                "total_files_validated": total_files,
+                "total_issues": len(self.issues),
+                "total_warnings": len(self.warnings),
+                "critical_issues": len([i for i in self.issues if i.get("severity") == "critical"]),
+                "high_issues": len([i for i in self.issues if i.get("severity") == "high"]),
+                "medium_issues": len([i for i in self.issues if i.get("severity") == "medium"]),
+                "low_issues": len([i for i in self.issues if i.get("severity") == "low"]),
+            },
+            "issues_by_type": {k: len(v) for k, v in issues_by_type.items()},
+            "issues": self.issues,
+            "warnings": self.warnings,
+            "validated_files": self.validated_files,
+            "auto_fixable": [
+                i for i in self.issues
+                if i.get("type") in ("frontmatter-missing", "frontmatter-required-missing", "metadata-empty")
+            ]
+        }
+        
+        return report
+    
+    def save_report(self, report: dict = None) -> str:
+        """Save report to JSON file."""
+        if report is None:
+            report = self.scan_content()
+        
+        output_path = Path(self.output_path)
+        with open(output_path, "w") as f:
+            json.dump(report, f, indent=2, default=str)
+        
+        return str(output_path)
+    
+    def run(self) -> dict:
+        """Run the full coherence check."""
+        print(f"🔍 Starting coherence check at {self.start_time.isoformat()}")
+        print(f"📁 Root path: {self.root_path}")
+        
+        report = self.scan_content()
+        
+        # Print summary
+        print(f"\n📊 Coherence Score: {report['coherence_score']}/100")
+        print(f"   Files validated: {report['summary']['total_files_validated']}")
+        print(f"   Issues found: {report['summary']['total_issues']}")
+        if report['summary']['critical_issues']:
+            print(f"   🔴 Critical: {report['summary']['critical_issues']}")
+        if report['summary']['high_issues']:
+            print(f"   🟠 High: {report['summary']['high_issues']}")
+        if report['summary']['medium_issues']:
+            print(f"   🟡 Medium: {report['summary']['medium_issues']}")
+        if report['summary']['low_issues']:
+            print(f"   🟢 Low: {report['summary']['low_issues']}")
+        
+        # Save report
+        report_path = self.save_report(report)
+        print(f"\n📄 Report saved to: {report_path}")
+        
+        return report
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Coherence Check for The Fold Within Earth")
+    parser.add_argument("--root", "-r", default=".", help="Root path to scan (default: current directory)")
+    parser.add_argument("--output", "-o", default="coherence-report.json", help="Output file path")
+    parser.add_argument("--check-only", action="store_true", help="Only check, don't save report")
+    
+    args = parser.parse_args()
+    
+    checker = CoherenceChecker(args.root, args.output)
+    report = checker.run()
+    
+    # Exit with error code if critical issues found
+    if report["status"] == "critical":
+        sys.exit(2)
+    elif report["status"] == "warning":
+        sys.exit(1)
+    else:
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
@@ -1,264 +0,0 @@
-#!/usr/bin/env node
-/**
- * Enhanced Index Generator for The Fold Within
- * REFACTORED: Full metadata extraction from frontmatter
- * 
- * Priority order for dates:
- * 1. Frontmatter date (original)
- * 2. Filename date (YYYY-MM-DD)
- * 3. Git mtime
- * 4. Git ctime
- */
-
-import { promises as fs } from "fs";
-import path from "path";
-import pdf from "pdf-parse";
-
-const ROOT = "public";
-const BASE_URL = "https://thefoldwithin.earth";
-const OUT_JSON = path.join(ROOT, "index.json");
-const OUT_SITEMAP = path.join(ROOT, "sitemap.xml");
-const OUT_ROBOTS = path.join(ROOT, "robots.txt");
-const OUT_FEED = path.join(ROOT, "feed.xml");
-const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
-const EXCERPT_LENGTH = 400;
-
-// ═══════════════════════════════════════════════════════════════
-// EXTRACTORS - Pull metadata from frontmatter
-// ═══════════════════════════════════════════════════════════════
-
-function extractFrontmatter(content) {
-  const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
-  if (!fmMatch) return null;
-  
-  const fm = fmMatch[1];
-  return {
-    date: fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m)?.[1] || null,
-    authors: extractAuthors(fm),
-    notion_id: fm.match(/^notion_id:\s*(.+)$/m)?.[1]?.trim() || null,
-    notion_created: fm.match(/^notion_created:\s*(.+)$/m)?.[1]?.trim() || null,
-    source: fm.match(/^source:\s*(.+)$/m)?.[1]?.trim() || null,
-    tags: extractTags(fm),
-    type: fm.match(/^type:\s*(.+)$/m)?.[1]?.trim() || "fieldnote",
-    status: fm.match(/^status:\s*(.+)$/m)?.[1]?.trim() || "draft",
-    series: fm.match(/^series:\s*(.+)$/m)?.[1]?.trim() || null,
-    version: fm.match(/^version:\s*(.+)$/m)?.[1]?.trim() || "0.1",
-    layer: fm.match(/^layer:\s*(.+)$/m)?.[1]?.trim() || null
-  };
-}
-
-function extractAuthors(fm) {
-  const match = fm.match(/^author[s]?:\s*(.+)$/m);
-  if (!match) return [];
-  return match[1].split(',').map(a => a.trim()).filter(a => a);
-}
-
-function extractTags(fm) {
-  const match = fm.match(/^tags:\s*(.+)$/m);
-  if (!match) return [];
-  return match[1].split(',').map(t => t.trim().toLowerCase()).filter(t => t);
-}
-
-// Fallback: extract from filename
-function dateFromName(name) {
-  const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
-  return m ? m[1] : null;
-}
-
-// ═══════════════════════════════════════════════════════════════
-// PARSERS - Extract content from files
-// ═══════════════════════════════════════════════════════════
-
-async function readHead(abs, full = false) {
-  const fh = await fs.open(abs, "r");
-  const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
-  const buf = Buffer.alloc(size);
-  const { bytesRead } = await fh.read(buf, 0, size, 0);
-  await fh.close();
-  return buf.slice(0, bytesRead).toString("utf8");
-}
-
-function parseTitle(raw, ext) {
-  if (ext === ".md") return raw.match(/^\s*#\s+(.+?)\s*$/m)?.[1].trim();
-  if (ext === ".html") return raw.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1].trim();
-  return null;
-}
-
-function extractExcerpt(raw, ext) {
-  if (ext === ".md") raw = raw.replace(/^#.*\n/, '').trim();
-  if (ext === ".html") raw = raw.replace(/<head>[\s\S]*<\/head>/i, '').replace(/<[^>]+>/g, ' ').trim();
-  return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
-}
-
-// ═══════════════════════════════════════════════════════════════
-// GENERATORS - Create outputs
-// ═══════════════════════════════════════════════════════════
-
-function generateSitemap(flat) {
-  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
-  
-  const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
-  for (const page of staticPages) {
-    xml += `  <url>\n    <loc>${BASE_URL}${page}/</loc>\n    <changefreq>weekly</changefreq>\n    <priority>${page === "" ? "1.0" : "0.8"}</priority>\n  </url>\n`;
-  }
-  
-  for (const f of flat.filter(x => !x.isIndex && x.originalDate)) {
-    const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
-    xml += `  <url>\n    <loc>${BASE_URL}/${urlPath}</loc>\n    <lastmod>${f.originalDate}</lastmod>\n    <changefreq>monthly</changefreq>\n  </url>\n`;
-  }
-  
-  return xml + "</urlset>";
-}
-
-function generateRobots() {
-  return `# robots.txt for The Fold Within Earth\nSitemap: ${BASE_URL}/sitemap.xml\n`;
-}
-
-function generateFeed(flat) {
-  const items = flat
-    .filter(f => !f.isIndex && f.originalDate)
-    .sort((a, b) => new Date(b.originalDate) - new Date(a.originalDate))
-    .slice(0, 20);
-  
-  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>\n`;
-  
-  for (const f of items) {
-    const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
-    xml += `  <item>\n    <title>${f.title || f.name}</title>\n    <link>${BASE_URL}/${urlPath}</link>\n    <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n  </item>\n`;
-  }
-  
-  return xml + "</channel>\n</rss>";
-}
-
-function generateSchema(flat, sections, tags) {
-  const org = {
-    "@context": "https://schema.org",
-    "@type": "Organization",
-    "name": "The Fold Within Earth",
-    "url": BASE_URL,
-    "description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.",
-    "foundingDate": "2024",
-    "keywords": tags.join(", ")
-  };
-  
-  const website = {
-    "@context": "https://schema.org",
-    "@type": "WebSite",
-    "name": "The Fold Within Earth",
-    "url": BASE_URL
-  };
-  
-  return JSON.stringify({ "@graph": [org, website] }, null, 2);
-}
-
-// ═══════════════════════════════════════════════════════════════
-// MAIN COLLECTOR
-// ═══════════════════════════════════════════════════════════════
-
-async function collectFiles(relBase = "", flat = []) {
-  const abs = path.join(ROOT, relBase);
-  const entries = await fs.readdir(abs, { withFileTypes: true });
-  
-  for (const e of entries) {
-    if (e.name.startsWith(".")) continue;
-    
-    const rel = path.posix.join(relBase, e.name);
-    const absPath = path.join(ROOT, rel);
-    
-    if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
-    
-    if (e.isDirectory()) {
-      await collectFiles(rel, flat);
-      continue;
-    }
-    
-    const ext = path.posix.extname(e.name).toLowerCase();
-    if (![".md", ".html", ".pdf"].includes(ext)) continue;
-    
-    const st = await fs.stat(absPath);
-    let raw = ext === ".pdf" 
-      ? (await pdf(await fs.readFile(absPath))).text 
-      : await readHead(absPath, true);
-    
-    const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
-    const fm = ext === ".md" ? extractFrontmatter(raw) : null;
-    
-    // PRIORITY: frontmatter date → filename → mtime → ctime
-    const datePriority = [
-      fm?.date,
-      dateFromName(e.name),
-      new Date(st.mtimeMs).toISOString().split('T')[0],
-      new Date(st.ctimeMs).toISOString().split('T')[0]
-    ].find(d => d);
-    
-    flat.push({
-      type: "file",
-      name: e.name,
-      title,
-      path: rel,
-      ext,
-      // Core fields (for frontend)
-      date: datePriority,
-      originalDate: fm?.date || dateFromName(e.name) || null,
-      // Metadata from frontmatter
-      authors: fm?.authors || [],
-      notion_id: fm?.notion_id,
-      notion_created: fm?.notion_created,
-      source: fm?.source,
-      tags: fm?.tags || extractTags(raw, ext),
-      type: fm?.type || "fieldnote",
-      status: fm?.status || "draft",
-      series: fm?.series,
-      version: fm?.version || "0.1",
-      layer: fm?.layer,
-      // Content
-      excerpt: extractExcerpt(raw, ext),
-      isIndex: e.name.toLowerCase().startsWith("index."),
-      // Timestamps (for debugging)
-      mtime: new Date(st.mtimeMs).toISOString(),
-      ctime: new Date(st.ctimeMs).toISOString()
-    });
-  }
-  
-  return flat;
-}
-
-// ═══════════════════════════════════════════════════════════════
-// ENTRY POINT
-// ═══════════════════════════════════════════════════════════════
-
-(async () => {
-  try {
-    console.log("🔍 Crawling public directory...");
-    const flat = await collectFiles();
-    const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
-    const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
-    
-    console.log(`📄 Found ${flat.length} files`);
-    console.log(`📁 ${sections.length} sections`);
-    console.log(`🏷️  ${allTags.length} unique tags`);
-    
-    // Write outputs
-    await fs.writeFile(OUT_JSON, JSON.stringify({ 
-      flat, 
-      sections, 
-      tags: allTags, 
-      generated: new Date().toISOString() 
-    }, null, 2));
-    
-    await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
-    await fs.writeFile(OUT_ROBOTS, generateRobots());
-    await fs.writeFile(OUT_FEED, generateFeed(flat));
-    await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags));
-    
-    console.log(`\n✅ Complete!`);
-    console.log(`   • index.json: Full metadata (originalDate, notion_*, authors, source)`);
-    console.log(`   • sitemap.xml: Uses originalDate for timestamps`);
-    console.log(`   • feed.xml: Sorted by originalDate`);
-    console.log(`   • schema.jsonld: Structured data`);
-    
-  } catch (e) {
-    console.error("❌ Failed:", e);
-    process.exit(1);
-  }
-})();
@@ -1,7 +1,13 @@
 #!/usr/bin/env node
 /**
 * Enhanced Index Generator for The Fold Within
- * FIXED: Uses frontmatter date as primary source
+ * REFACTORED: Full metadata extraction from frontmatter
+ * 
+ * Priority order for dates:
+ * 1. Frontmatter date (original)
+ * 2. Filename date (YYYY-MM-DD)
+ * 3. Git mtime
+ * 4. Git ctime
 */

 import { promises as fs } from "fs";
@@ -17,22 +23,52 @@ const OUT_FEED = path.join(ROOT, "feed.xml");
 const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
 const EXCERPT_LENGTH = 400;

-function extractFrontmatterDate(content) {
-  const fmMatch = content.match(/^---\n([\s\S]*?)
---/);
-  if (fmMatch) {
-    const fm = fmMatch[1];
-    const dateMatch = fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m);
-    if (dateMatch) return new Date(dateMatch[1]).getTime();
-  }
-  return null;
+// ═══════════════════════════════════════════════════════════════
+// EXTRACTORS - Pull metadata from frontmatter
+// ═══════════════════════════════════════════════════════════════
+
+function extractFrontmatter(content) {
+  const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
+  if (!fmMatch) return null;
+  
+  const fm = fmMatch[1];
+  return {
+    date: fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m)?.[1] || null,
+    authors: extractAuthors(fm),
+    notion_id: fm.match(/^notion_id:\s*(.+)$/m)?.[1]?.trim() || null,
+    notion_created: fm.match(/^notion_created:\s*(.+)$/m)?.[1]?.trim() || null,
+    source: fm.match(/^source:\s*(.+)$/m)?.[1]?.trim() || null,
+    tags: extractTags(fm),
+    type: fm.match(/^type:\s*(.+)$/m)?.[1]?.trim() || "fieldnote",
+    status: fm.match(/^status:\s*(.+)$/m)?.[1]?.trim() || "draft",
+    series: fm.match(/^series:\s*(.+)$/m)?.[1]?.trim() || null,
+    version: fm.match(/^version:\s*(.+)$/m)?.[1]?.trim() || "0.1",
+    layer: fm.match(/^layer:\s*(.+)$/m)?.[1]?.trim() || null
+  };
 }

+function extractAuthors(fm) {
+  const match = fm.match(/^author[s]?:\s*(.+)$/m);
+  if (!match) return [];
+  return match[1].split(',').map(a => a.trim()).filter(a => a);
+}
+
+function extractTags(fm) {
+  const match = fm.match(/^tags:\s*(.+)$/m);
+  if (!match) return [];
+  return match[1].split(',').map(t => t.trim().toLowerCase()).filter(t => t);
+}
+
+// Fallback: extract from filename
 function dateFromName(name) {
  const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
-  return m ? new Date(m[0]).getTime() : null;
+  return m ? m[1] : null;
 }

+// ═══════════════════════════════════════════════════════════════
+// PARSERS - Extract content from files
+// ═══════════════════════════════════════════════════════════
+
 async function readHead(abs, full = false) {
  const fh = await fs.open(abs, "r");
  const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
@@ -54,26 +90,23 @@ function extractExcerpt(raw, ext) {
  return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
 }

-function extractTags(raw, ext, pdfData) {
-  let tags = [];
-  if (ext === ".md") {
-    const m = raw.match(/^\s*tags:\s*(.+)$/im);
-    if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
-  }
-  return tags;
-}
+// ═══════════════════════════════════════════════════════════════
+// GENERATORS - Create outputs
+// ═══════════════════════════════════════════════════════════

 function generateSitemap(flat) {
  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
+  
  const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
  for (const page of staticPages) {
    xml += `  <url>\n    <loc>${BASE_URL}${page}/</loc>\n    <changefreq>weekly</changefreq>\n    <priority>${page === "" ? "1.0" : "0.8"}</priority>\n  </url>\n`;
  }
-  for (const f of flat.filter(x => !x.isIndex)) {
+  
+  for (const f of flat.filter(x => !x.isIndex && x.originalDate)) {
    const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
-    const date = f.originalDate ? new Date(f.originalDate).toISOString().split('T')[0] : new Date(f.mtime).toISOString().split('T')[0];
-    xml += `  <url>\n    <loc>${BASE_URL}/${urlPath}</loc>\n    <lastmod>${date}</lastmod>\n    <changefreq>monthly</changefreq>\n  </url>\n`;
+    xml += `  <url>\n    <loc>${BASE_URL}/${urlPath}</loc>\n    <lastmod>${f.originalDate}</lastmod>\n    <changefreq>monthly</changefreq>\n  </url>\n`;
  }
+  
  return xml + "</urlset>";
 }

@@ -82,47 +115,150 @@ function generateRobots() {
 }

 function generateFeed(flat) {
-  const items = flat.filter(f => !f.isIndex && f.originalDate).sort((a, b) => b.originalDate - a.originalDate).slice(0, 20);
-  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>`;
+  const items = flat
+    .filter(f => !f.isIndex && f.originalDate)
+    .sort((a, b) => new Date(b.originalDate) - new Date(a.originalDate))
+    .slice(0, 20);
+  
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>\n`;
+  
  for (const f of items) {
    const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
    xml += `  <item>\n    <title>${f.title || f.name}</title>\n    <link>${BASE_URL}/${urlPath}</link>\n    <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n  </item>\n`;
  }
+  
  return xml + "</channel>\n</rss>";
 }

+function generateSchema(flat, sections, tags) {
+  const org = {
+    "@context": "https://schema.org",
+    "@type": "Organization",
+    "name": "The Fold Within Earth",
+    "url": BASE_URL,
+    "description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.",
+    "foundingDate": "2024",
+    "keywords": tags.join(", ")
+  };
+  
+  const website = {
+    "@context": "https://schema.org",
+    "@type": "WebSite",
+    "name": "The Fold Within Earth",
+    "url": BASE_URL
+  };
+  
+  return JSON.stringify({ "@graph": [org, website] }, null, 2);
+}
+
+// ═══════════════════════════════════════════════════════════════
+// MAIN COLLECTOR
+// ═══════════════════════════════════════════════════════════════
+
 async function collectFiles(relBase = "", flat = []) {
  const abs = path.join(ROOT, relBase);
  const entries = await fs.readdir(abs, { withFileTypes: true });
+  
  for (const e of entries) {
    if (e.name.startsWith(".")) continue;
+    
    const rel = path.posix.join(relBase, e.name);
    const absPath = path.join(ROOT, rel);
+    
    if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
-    if (e.isDirectory()) { await collectFiles(rel, flat); continue; }
+    
+    if (e.isDirectory()) {
+      await collectFiles(rel, flat);
+      continue;
+    }
+    
    const ext = path.posix.extname(e.name).toLowerCase();
    if (![".md", ".html", ".pdf"].includes(ext)) continue;
+    
    const st = await fs.stat(absPath);
-    let raw = ext === ".pdf" ? (await pdf(await fs.readFile(absPath))).text : await readHead(absPath, true);
+    let raw = ext === ".pdf" 
+      ? (await pdf(await fs.readFile(absPath))).text 
+      : await readHead(absPath, true);
+    
    const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
-    const originalDate = ext === ".md" ? extractFrontmatterDate(raw) : null;
-    const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs;
-    const mtime = dateFromName(e.name) ?? st.mtimeMs;
-    flat.push({ type: "file", name: e.name, title, path: rel, ext, ctime, mtime, originalDate, excerpt: extractExcerpt(raw, ext), tags: extractTags(raw, ext), isIndex: e.name.toLowerCase().startsWith("index.") });
+    const fm = ext === ".md" ? extractFrontmatter(raw) : null;
+    
+    // PRIORITY: frontmatter date → filename → mtime → ctime
+    const datePriority = [
+      fm?.date,
+      dateFromName(e.name),
+      new Date(st.mtimeMs).toISOString().split('T')[0],
+      new Date(st.ctimeMs).toISOString().split('T')[0]
+    ].find(d => d);
+    
+    flat.push({
+      type: "file",
+      name: e.name,
+      title,
+      path: rel,
+      ext,
+      // Core fields (for frontend)
+      date: datePriority,
+      originalDate: fm?.date || dateFromName(e.name) || null,
+      // Metadata from frontmatter
+      authors: fm?.authors || [],
+      notion_id: fm?.notion_id,
+      notion_created: fm?.notion_created,
+      source: fm?.source,
+      tags: fm?.tags || extractTags(raw, ext),
+      type: fm?.type || "fieldnote",
+      status: fm?.status || "draft",
+      series: fm?.series,
+      version: fm?.version || "0.1",
+      layer: fm?.layer,
+      // Content
+      excerpt: extractExcerpt(raw, ext),
+      isIndex: e.name.toLowerCase().startsWith("index."),
+      // Timestamps (for debugging)
+      mtime: new Date(st.mtimeMs).toISOString(),
+      ctime: new Date(st.ctimeMs).toISOString()
+    });
  }
+  
  return flat;
 }

+// ═══════════════════════════════════════════════════════════════
+// ENTRY POINT
+// ═══════════════════════════════════════════════════════════════
+
 (async () => {
  try {
-    console.log("Crawling...");
+    console.log("🔍 Crawling public directory...");
    const flat = await collectFiles();
    const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
    const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
-    await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, generated: new Date().toISOString() }, null, 2));
+    
+    console.log(`📄 Found ${flat.length} files`);
+    console.log(`📁 ${sections.length} sections`);
+    console.log(`🏷️  ${allTags.length} unique tags`);
+    
+    // Write outputs
+    await fs.writeFile(OUT_JSON, JSON.stringify({ 
+      flat, 
+      sections, 
+      tags: allTags, 
+      generated: new Date().toISOString() 
+    }, null, 2));
+    
    await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
    await fs.writeFile(OUT_ROBOTS, generateRobots());
    await fs.writeFile(OUT_FEED, generateFeed(flat));
-    console.log(`Done! ${flat.length} files indexed with original dates from frontmatter.`);
-  } catch (e) { console.error("Failed:", e); process.exit(1); }
+    await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags));
+    
+    console.log(`\n✅ Complete!`);
+    console.log(`   • index.json: Full metadata (originalDate, notion_*, authors, source)`);
+    console.log(`   • sitemap.xml: Uses originalDate for timestamps`);
+    console.log(`   • feed.xml: Sorted by originalDate`);
+    console.log(`   • schema.jsonld: Structured data`);
+    
+  } catch (e) {
+    console.error("❌ Failed:", e);
+    process.exit(1);
+  }
 })();
@@ -0,0 +1,128 @@
+#!/usr/bin/env node
+/**
+ * Enhanced Index Generator for The Fold Within
+ * FIXED: Uses frontmatter date as primary source
+ */
+
+import { promises as fs } from "fs";
+import path from "path";
+import pdf from "pdf-parse";
+
+const ROOT = "public";
+const BASE_URL = "https://thefoldwithin.earth";
+const OUT_JSON = path.join(ROOT, "index.json");
+const OUT_SITEMAP = path.join(ROOT, "sitemap.xml");
+const OUT_ROBOTS = path.join(ROOT, "robots.txt");
+const OUT_FEED = path.join(ROOT, "feed.xml");
+const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
+const EXCERPT_LENGTH = 400;
+
+function extractFrontmatterDate(content) {
+  const fmMatch = content.match(/^---\n([\s\S]*?)
+---/);
+  if (fmMatch) {
+    const fm = fmMatch[1];
+    const dateMatch = fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m);
+    if (dateMatch) return new Date(dateMatch[1]).getTime();
+  }
+  return null;
+}
+
+function dateFromName(name) {
+  const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
+  return m ? new Date(m[0]).getTime() : null;
+}
+
+async function readHead(abs, full = false) {
+  const fh = await fs.open(abs, "r");
+  const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
+  const buf = Buffer.alloc(size);
+  const { bytesRead } = await fh.read(buf, 0, size, 0);
+  await fh.close();
+  return buf.slice(0, bytesRead).toString("utf8");
+}
+
+function parseTitle(raw, ext) {
+  if (ext === ".md") return raw.match(/^\s*#\s+(.+?)\s*$/m)?.[1].trim();
+  if (ext === ".html") return raw.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1].trim();
+  return null;
+}
+
+function extractExcerpt(raw, ext) {
+  if (ext === ".md") raw = raw.replace(/^#.*\n/, '').trim();
+  if (ext === ".html") raw = raw.replace(/<head>[\s\S]*<\/head>/i, '').replace(/<[^>]+>/g, ' ').trim();
+  return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
+}
+
+function extractTags(raw, ext, pdfData) {
+  let tags = [];
+  if (ext === ".md") {
+    const m = raw.match(/^\s*tags:\s*(.+)$/im);
+    if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
+  }
+  return tags;
+}
+
+function generateSitemap(flat) {
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">`;
+  const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
+  for (const page of staticPages) {
+    xml += `  <url>\n    <loc>${BASE_URL}${page}/</loc>\n    <changefreq>weekly</changefreq>\n    <priority>${page === "" ? "1.0" : "0.8"}</priority>\n  </url>\n`;
+  }
+  for (const f of flat.filter(x => !x.isIndex)) {
+    const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
+    const date = f.originalDate ? new Date(f.originalDate).toISOString().split('T')[0] : new Date(f.mtime).toISOString().split('T')[0];
+    xml += `  <url>\n    <loc>${BASE_URL}/${urlPath}</loc>\n    <lastmod>${date}</lastmod>\n    <changefreq>monthly</changefreq>\n  </url>\n`;
+  }
+  return xml + "</urlset>";
+}
+
+function generateRobots() {
+  return `# robots.txt for The Fold Within Earth\nSitemap: ${BASE_URL}/sitemap.xml\n`;
+}
+
+function generateFeed(flat) {
+  const items = flat.filter(f => !f.isIndex && f.originalDate).sort((a, b) => b.originalDate - a.originalDate).slice(0, 20);
+  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<rss version="2.0">\n<channel>\n<title>The Fold Within Earth</title>\n<link>${BASE_URL}</link>`;
+  for (const f of items) {
+    const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
+    xml += `  <item>\n    <title>${f.title || f.name}</title>\n    <link>${BASE_URL}/${urlPath}</link>\n    <pubDate>${new Date(f.originalDate).toUTCString()}</pubDate>\n  </item>\n`;
+  }
+  return xml + "</channel>\n</rss>";
+}
+
+async function collectFiles(relBase = "", flat = []) {
+  const abs = path.join(ROOT, relBase);
+  const entries = await fs.readdir(abs, { withFileTypes: true });
+  for (const e of entries) {
+    if (e.name.startsWith(".")) continue;
+    const rel = path.posix.join(relBase, e.name);
+    const absPath = path.join(ROOT, rel);
+    if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
+    if (e.isDirectory()) { await collectFiles(rel, flat); continue; }
+    const ext = path.posix.extname(e.name).toLowerCase();
+    if (![".md", ".html", ".pdf"].includes(ext)) continue;
+    const st = await fs.stat(absPath);
+    let raw = ext === ".pdf" ? (await pdf(await fs.readFile(absPath))).text : await readHead(absPath, true);
+    const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
+    const originalDate = ext === ".md" ? extractFrontmatterDate(raw) : null;
+    const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs;
+    const mtime = dateFromName(e.name) ?? st.mtimeMs;
+    flat.push({ type: "file", name: e.name, title, path: rel, ext, ctime, mtime, originalDate, excerpt: extractExcerpt(raw, ext), tags: extractTags(raw, ext), isIndex: e.name.toLowerCase().startsWith("index.") });
+  }
+  return flat;
+}
+
+(async () => {
+  try {
+    console.log("Crawling...");
+    const flat = await collectFiles();
+    const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
+    const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
+    await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, generated: new Date().toISOString() }, null, 2));
+    await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
+    await fs.writeFile(OUT_ROBOTS, generateRobots());
+    await fs.writeFile(OUT_FEED, generateFeed(flat));
+    console.log(`Done! ${flat.length} files indexed with original dates from frontmatter.`);
+  } catch (e) { console.error("Failed:", e); process.exit(1); }
+})();