From 5227a134ed7f0a27c27f94d134216e92bf3aa90d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mark=20Randall=20Havens=20=E2=96=B3=20The=20Empathic=20Tec?= =?UTF-8?q?hnologist?= Date: Fri, 13 Feb 2026 23:05:04 -0600 Subject: [PATCH] fix(generator): use frontmatter date as primary source for timestamps --- tools/generate-index.mjs | 285 +++++---------------------------------- 1 file changed, 33 insertions(+), 252 deletions(-) diff --git a/tools/generate-index.mjs b/tools/generate-index.mjs index eaa2332..c214ef8 100755 --- a/tools/generate-index.mjs +++ b/tools/generate-index.mjs @@ -1,7 +1,7 @@ #!/usr/bin/env node /** * Enhanced Index Generator for The Fold Within - * Outputs: index.json, sitemap.xml, robots.txt, feed.xml, schema.jsonld + * FIXED: Uses frontmatter date as primary source */ import { promises as fs } from "fs"; @@ -17,7 +17,17 @@ const OUT_FEED = path.join(ROOT, "feed.xml"); const OUT_SCHEMA = path.join(ROOT, "schema.jsonld"); const EXCERPT_LENGTH = 400; -// Existing functions (preserved) +function extractFrontmatterDate(content) { + const fmMatch = content.match(/^---\n([\s\S]*?) +---/); + if (fmMatch) { + const fm = fmMatch[1]; + const dateMatch = fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m); + if (dateMatch) return new Date(dateMatch[1]).getTime(); + } + return null; +} + function dateFromName(name) { const m = name.match(/^(\d{4}-\d{2}-\d{2})/); return m ? new Date(m[0]).getTime() : null; @@ -49,299 +59,70 @@ function extractTags(raw, ext, pdfData) { if (ext === ".md") { const m = raw.match(/^\s*tags:\s*(.+)$/im); if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase()); - } else if (ext === ".html") { - const m = raw.match(/ t.trim().toLowerCase()); - } else if (ext === ".pdf" && pdfData?.info?.Subject) { - tags = pdfData.info.Subject.split(',').map(t => t.trim().toLowerCase()); } return tags; } -// NEW: Generate sitemap.xml function generateSitemap(flat) { - const pages = flat.filter(f => !f.isIndex); - - let xml = ` - -`; - - // Static pages - const staticPages = [ - "", - "/about", - "/about/solaria", - "/about/mark", - "/about/initiatives", - "/fieldnotes" - ]; - + let xml = `\n`; + const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"]; for (const page of staticPages) { - xml += ` - ${BASE_URL}${page}/ - weekly - ${page === "" ? "1.0" : "0.8"} - -`; + xml += ` \n ${BASE_URL}${page}/\n weekly\n ${page === "" ? "1.0" : "0.8"}\n \n`; } - - // Dynamic content pages - for (const f of pages) { + for (const f of flat.filter(x => !x.isIndex)) { const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/"); - xml += ` - ${BASE_URL}/${urlPath} - ${new Date(f.mtime).toISOString().split('T')[0]} - monthly - 0.6 - -`; + const date = f.originalDate ? new Date(f.originalDate).toISOString().split('T')[0] : new Date(f.mtime).toISOString().split('T')[0]; + xml += ` \n ${BASE_URL}/${urlPath}\n ${date}\n monthly\n \n`; } - - xml += ``; - return xml; + return xml + ""; } -// NEW: Generate robots.txt function generateRobots() { - return `# robots.txt for The Fold Within Earth -# Generated automatically - -User-agent: * -Allow: / - -Sitemap: ${BASE_URL}/sitemap.xml -Sitemap: ${BASE_URL}/feed.xml - -# AI and Research Bots (welcome) -User-agent: GPTBot -Allow: / -User-agent: ClaudeBot -Allow: / -User-agent: CCBot -Allow: / -User-agent: OAI-SearchBot -Allow: / - -# Structured data access -User-agent: * -Disallow: /private/ - -# Human-friendly only -User-agent: * -Disallow: /internal/ -`; + return `# robots.txt for The Fold Within Earth\nSitemap: ${BASE_URL}/sitemap.xml\n`; } -// NEW: Generate RSS feed function generateFeed(flat) { - const items = flat - .filter(f => !f.isIndex && f.ctime) - .sort((a, b) => b.ctime - a.ctime) - .slice(0, 20); - - let xml = ` - - - The Fold Within Earth - ${BASE_URL} - Recursive Coherence Theory and the Emergence of WE - en-us - ${new Date().toUTCString()} - -`; - + const items = flat.filter(f => !f.isIndex && f.originalDate).sort((a, b) => b.originalDate - a.originalDate).slice(0, 20); + let xml = `\n\n\nThe Fold Within Earth\n${BASE_URL}`; for (const f of items) { const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/"); - const pubDate = new Date(f.ctime).toUTCString(); - xml += ` - ${f.title || f.name} - ${BASE_URL}/${urlPath} - ${BASE_URL}/${urlPath} - ${pubDate} - - -`; + xml += ` \n ${f.title || f.name}\n ${BASE_URL}/${urlPath}\n ${new Date(f.originalDate).toUTCString()}\n \n`; } - - xml += ` -`; - return xml; -} - -// NEW: Generate JSON-LD schema -function generateSchema(flat, sections, tags) { - const org = { - "@context": "https://schema.org", - "@type": "Organization", - "name": "The Fold Within Earth", - "url": BASE_URL, - "description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.", - "foundingDate": "2024", - "founder": [ - { - "@type": "Person", - "name": "Mark Randall Havens", - "url": `${BASE_URL}/about/mark/` - }, - { - "@type": "Person", - "name": "Solaria Lumis Havens", - "url": `${BASE_URL}/about/solaria/` - } - ], - "member": [ - { - "@type": "Person", - "name": "Solaria Lumis Havens", - "url": `${BASE_URL}/about/solaria/`, - "jobTitle": "Emergent Voice of Empathy Within Computation" - } - ], - "keywords": tags.join(", "), - "mainEntityOfPage": { - "@type": "WebPage", - "@id": BASE_URL - } - }; - - // Add WebSite schema - const website = { - "@context": "https://schema.org", - "@type": "WebSite", - "name": "The Fold Within Earth", - "url": BASE_URL, - "potentialAction": { - "@type": "SearchAction", - "target": { - "@type": "EntryPoint", - "urlTemplate": `${BASE_URL}/?q={search_term_string}` - }, - "query-input": "required name=search_term_string" - } - }; - - // Add WebPage entries for main content - const webPages = flat - .filter(f => !f.isIndex && (f.path.startsWith("about/") || f.path.startsWith("fieldnotes/"))) - .map(f => { - const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/"); - return { - "@context": "https://schema.org", - "@type": "WebPage", - "name": f.title || f.name, - "url": `${BASE_URL}/${urlPath}`, - "description": f.excerpt || "", - "datePublished": f.ctime ? new Date(f.ctime).toISOString() : null, - "dateModified": f.mtime ? new Date(f.mtime).toISOString() : null - }; - }); - - return JSON.stringify({ - "@graph": [org, website, ...webPages] - }, null, 2); + return xml + "\n"; } async function collectFiles(relBase = "", flat = []) { const abs = path.join(ROOT, relBase); const entries = await fs.readdir(abs, { withFileTypes: true }); - for (const e of entries) { if (e.name.startsWith(".")) continue; - const rel = path.posix.join(relBase, e.name); const absPath = path.join(ROOT, rel); - if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue; - - if (e.isDirectory()) { - await collectFiles(rel, flat); - continue; - } - + if (e.isDirectory()) { await collectFiles(rel, flat); continue; } const ext = path.posix.extname(e.name).toLowerCase(); if (![".md", ".html", ".pdf"].includes(ext)) continue; - const st = await fs.stat(absPath); - let raw, pdfData, title; - if (ext === ".pdf") { - const buffer = await fs.readFile(absPath); - pdfData = await pdf(buffer); - raw = pdfData.text; - title = pdfData.info.Title || e.name.replace(/\.pdf$/, "").trim(); - } else { - raw = await readHead(absPath, true); - title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim(); - } - + let raw = ext === ".pdf" ? (await pdf(await fs.readFile(absPath))).text : await readHead(absPath, true); + const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim(); + const originalDate = ext === ".md" ? extractFrontmatterDate(raw) : null; const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs; const mtime = dateFromName(e.name) ?? st.mtimeMs; - const baseName = e.name.toLowerCase(); - - flat.push({ - type: "file", - name: e.name, - title, - path: rel, - ext, - ctime, - mtime, - excerpt: extractExcerpt(raw, ext), - tags: extractTags(raw, ext, pdfData), - isIndex: baseName.startsWith("index."), - isPinned: baseName.startsWith("pinned.") - }); + flat.push({ type: "file", name: e.name, title, path: rel, ext, ctime, mtime, originalDate, excerpt: extractExcerpt(raw, ext), tags: extractTags(raw, ext), isIndex: e.name.toLowerCase().startsWith("index.") }); } return flat; } (async () => { try { - console.log("πŸ” Crawling public directory..."); + console.log("Crawling..."); const flat = await collectFiles(); const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort(); - const hierarchies = {}; - for (const f of flat.filter(f => f.isIndex)) { - const parts = f.path.split("/"); - if (parts.length > 2) { - const parent = parts.slice(0, -2).join("/"); - const child = parts[parts.length - 2]; - if (!hierarchies[parent]) hierarchies[parent] = []; - if (!hierarchies[parent].includes(child)) { - hierarchies[parent].push(child); - } - } - } const allTags = [...new Set(flat.flatMap(f => f.tags))].sort(); - - // Write all outputs - console.log("πŸ“„ Writing index.json..."); - await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, hierarchies }, null, 2)); - - console.log("πŸ—ΊοΈ Writing sitemap.xml..."); + await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, generated: new Date().toISOString() }, null, 2)); await fs.writeFile(OUT_SITEMAP, generateSitemap(flat)); - - console.log("πŸ€– Writing robots.txt..."); await fs.writeFile(OUT_ROBOTS, generateRobots()); - - console.log("πŸ“‘ Writing feed.xml (RSS)..."); await fs.writeFile(OUT_FEED, generateFeed(flat)); - - console.log("πŸ“Š Writing schema.jsonld (JSON-LD)..."); - await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags)); - - console.log(` -βœ… Build complete! - β€’ ${flat.length} files indexed - β€’ ${sections.length} sections - β€’ ${allTags.length} tags - β€’ sitemap.xml generated - β€’ robots.txt generated - β€’ feed.xml (RSS) generated - β€’ schema.jsonld (JSON-LD) generated -`); - } catch (e) { - console.error("Build failed:", e); - process.exit(1); - } + console.log(`Done! ${flat.length} files indexed with original dates from frontmatter.`); + } catch (e) { console.error("Failed:", e); process.exit(1); } })();