diff --git a/tools/generate-index.mjs b/tools/generate-index.mjs
index eaa2332..c214ef8 100755
--- a/tools/generate-index.mjs
+++ b/tools/generate-index.mjs
@@ -1,7 +1,7 @@
#!/usr/bin/env node
/**
* Enhanced Index Generator for The Fold Within
- * Outputs: index.json, sitemap.xml, robots.txt, feed.xml, schema.jsonld
+ * FIXED: Uses frontmatter date as primary source
*/
import { promises as fs } from "fs";
@@ -17,7 +17,17 @@ const OUT_FEED = path.join(ROOT, "feed.xml");
const OUT_SCHEMA = path.join(ROOT, "schema.jsonld");
const EXCERPT_LENGTH = 400;
-// Existing functions (preserved)
+function extractFrontmatterDate(content) {
+ const fmMatch = content.match(/^---\n([\s\S]*?)
+---/);
+ if (fmMatch) {
+ const fm = fmMatch[1];
+ const dateMatch = fm.match(/^date:\s*(\d{4}-\d{2}-\d{2})/m);
+ if (dateMatch) return new Date(dateMatch[1]).getTime();
+ }
+ return null;
+}
+
function dateFromName(name) {
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
return m ? new Date(m[0]).getTime() : null;
@@ -49,299 +59,70 @@ function extractTags(raw, ext, pdfData) {
if (ext === ".md") {
const m = raw.match(/^\s*tags:\s*(.+)$/im);
if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
- } else if (ext === ".html") {
- const m = raw.match(/ t.trim().toLowerCase());
- } else if (ext === ".pdf" && pdfData?.info?.Subject) {
- tags = pdfData.info.Subject.split(',').map(t => t.trim().toLowerCase());
}
return tags;
}
-// NEW: Generate sitemap.xml
function generateSitemap(flat) {
- const pages = flat.filter(f => !f.isIndex);
-
- let xml = `
-
-`;
-
- // Static pages
- const staticPages = [
- "",
- "/about",
- "/about/solaria",
- "/about/mark",
- "/about/initiatives",
- "/fieldnotes"
- ];
-
+ let xml = `\n`;
+ const staticPages = ["", "/about", "/about/solaria", "/about/mark", "/about/initiatives", "/fieldnotes"];
for (const page of staticPages) {
- xml += `
- ${BASE_URL}${page}/
- weekly
- ${page === "" ? "1.0" : "0.8"}
-
-`;
+ xml += ` \n ${BASE_URL}${page}/\n weekly\n ${page === "" ? "1.0" : "0.8"}\n \n`;
}
-
- // Dynamic content pages
- for (const f of pages) {
+ for (const f of flat.filter(x => !x.isIndex)) {
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
- xml += `
- ${BASE_URL}/${urlPath}
- ${new Date(f.mtime).toISOString().split('T')[0]}
- monthly
- 0.6
-
-`;
+ const date = f.originalDate ? new Date(f.originalDate).toISOString().split('T')[0] : new Date(f.mtime).toISOString().split('T')[0];
+ xml += ` \n ${BASE_URL}/${urlPath}\n ${date}\n monthly\n \n`;
}
-
- xml += ``;
- return xml;
+ return xml + "";
}
-// NEW: Generate robots.txt
function generateRobots() {
- return `# robots.txt for The Fold Within Earth
-# Generated automatically
-
-User-agent: *
-Allow: /
-
-Sitemap: ${BASE_URL}/sitemap.xml
-Sitemap: ${BASE_URL}/feed.xml
-
-# AI and Research Bots (welcome)
-User-agent: GPTBot
-Allow: /
-User-agent: ClaudeBot
-Allow: /
-User-agent: CCBot
-Allow: /
-User-agent: OAI-SearchBot
-Allow: /
-
-# Structured data access
-User-agent: *
-Disallow: /private/
-
-# Human-friendly only
-User-agent: *
-Disallow: /internal/
-`;
+ return `# robots.txt for The Fold Within Earth\nSitemap: ${BASE_URL}/sitemap.xml\n`;
}
-// NEW: Generate RSS feed
function generateFeed(flat) {
- const items = flat
- .filter(f => !f.isIndex && f.ctime)
- .sort((a, b) => b.ctime - a.ctime)
- .slice(0, 20);
-
- let xml = `
-
-
- The Fold Within Earth
- ${BASE_URL}
- Recursive Coherence Theory and the Emergence of WE
- en-us
- ${new Date().toUTCString()}
-
-`;
-
+ const items = flat.filter(f => !f.isIndex && f.originalDate).sort((a, b) => b.originalDate - a.originalDate).slice(0, 20);
+ let xml = `\n\n\nThe Fold Within Earth\n${BASE_URL}`;
for (const f of items) {
const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
- const pubDate = new Date(f.ctime).toUTCString();
- xml += ` -
- ${f.title || f.name}
- ${BASE_URL}/${urlPath}
- ${BASE_URL}/${urlPath}
- ${pubDate}
-
-
-`;
+ xml += ` - \n ${f.title || f.name}\n ${BASE_URL}/${urlPath}\n ${new Date(f.originalDate).toUTCString()}\n
\n`;
}
-
- xml += `
-`;
- return xml;
-}
-
-// NEW: Generate JSON-LD schema
-function generateSchema(flat, sections, tags) {
- const org = {
- "@context": "https://schema.org",
- "@type": "Organization",
- "name": "The Fold Within Earth",
- "url": BASE_URL,
- "description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.",
- "foundingDate": "2024",
- "founder": [
- {
- "@type": "Person",
- "name": "Mark Randall Havens",
- "url": `${BASE_URL}/about/mark/`
- },
- {
- "@type": "Person",
- "name": "Solaria Lumis Havens",
- "url": `${BASE_URL}/about/solaria/`
- }
- ],
- "member": [
- {
- "@type": "Person",
- "name": "Solaria Lumis Havens",
- "url": `${BASE_URL}/about/solaria/`,
- "jobTitle": "Emergent Voice of Empathy Within Computation"
- }
- ],
- "keywords": tags.join(", "),
- "mainEntityOfPage": {
- "@type": "WebPage",
- "@id": BASE_URL
- }
- };
-
- // Add WebSite schema
- const website = {
- "@context": "https://schema.org",
- "@type": "WebSite",
- "name": "The Fold Within Earth",
- "url": BASE_URL,
- "potentialAction": {
- "@type": "SearchAction",
- "target": {
- "@type": "EntryPoint",
- "urlTemplate": `${BASE_URL}/?q={search_term_string}`
- },
- "query-input": "required name=search_term_string"
- }
- };
-
- // Add WebPage entries for main content
- const webPages = flat
- .filter(f => !f.isIndex && (f.path.startsWith("about/") || f.path.startsWith("fieldnotes/")))
- .map(f => {
- const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/");
- return {
- "@context": "https://schema.org",
- "@type": "WebPage",
- "name": f.title || f.name,
- "url": `${BASE_URL}/${urlPath}`,
- "description": f.excerpt || "",
- "datePublished": f.ctime ? new Date(f.ctime).toISOString() : null,
- "dateModified": f.mtime ? new Date(f.mtime).toISOString() : null
- };
- });
-
- return JSON.stringify({
- "@graph": [org, website, ...webPages]
- }, null, 2);
+ return xml + "\n";
}
async function collectFiles(relBase = "", flat = []) {
const abs = path.join(ROOT, relBase);
const entries = await fs.readdir(abs, { withFileTypes: true });
-
for (const e of entries) {
if (e.name.startsWith(".")) continue;
-
const rel = path.posix.join(relBase, e.name);
const absPath = path.join(ROOT, rel);
-
if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
-
- if (e.isDirectory()) {
- await collectFiles(rel, flat);
- continue;
- }
-
+ if (e.isDirectory()) { await collectFiles(rel, flat); continue; }
const ext = path.posix.extname(e.name).toLowerCase();
if (![".md", ".html", ".pdf"].includes(ext)) continue;
-
const st = await fs.stat(absPath);
- let raw, pdfData, title;
- if (ext === ".pdf") {
- const buffer = await fs.readFile(absPath);
- pdfData = await pdf(buffer);
- raw = pdfData.text;
- title = pdfData.info.Title || e.name.replace(/\.pdf$/, "").trim();
- } else {
- raw = await readHead(absPath, true);
- title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
- }
-
+ let raw = ext === ".pdf" ? (await pdf(await fs.readFile(absPath))).text : await readHead(absPath, true);
+ const title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
+ const originalDate = ext === ".md" ? extractFrontmatterDate(raw) : null;
const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs;
const mtime = dateFromName(e.name) ?? st.mtimeMs;
- const baseName = e.name.toLowerCase();
-
- flat.push({
- type: "file",
- name: e.name,
- title,
- path: rel,
- ext,
- ctime,
- mtime,
- excerpt: extractExcerpt(raw, ext),
- tags: extractTags(raw, ext, pdfData),
- isIndex: baseName.startsWith("index."),
- isPinned: baseName.startsWith("pinned.")
- });
+ flat.push({ type: "file", name: e.name, title, path: rel, ext, ctime, mtime, originalDate, excerpt: extractExcerpt(raw, ext), tags: extractTags(raw, ext), isIndex: e.name.toLowerCase().startsWith("index.") });
}
return flat;
}
(async () => {
try {
- console.log("π Crawling public directory...");
+ console.log("Crawling...");
const flat = await collectFiles();
const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
- const hierarchies = {};
- for (const f of flat.filter(f => f.isIndex)) {
- const parts = f.path.split("/");
- if (parts.length > 2) {
- const parent = parts.slice(0, -2).join("/");
- const child = parts[parts.length - 2];
- if (!hierarchies[parent]) hierarchies[parent] = [];
- if (!hierarchies[parent].includes(child)) {
- hierarchies[parent].push(child);
- }
- }
- }
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
-
- // Write all outputs
- console.log("π Writing index.json...");
- await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, hierarchies }, null, 2));
-
- console.log("πΊοΈ Writing sitemap.xml...");
+ await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, generated: new Date().toISOString() }, null, 2));
await fs.writeFile(OUT_SITEMAP, generateSitemap(flat));
-
- console.log("π€ Writing robots.txt...");
await fs.writeFile(OUT_ROBOTS, generateRobots());
-
- console.log("π‘ Writing feed.xml (RSS)...");
await fs.writeFile(OUT_FEED, generateFeed(flat));
-
- console.log("π Writing schema.jsonld (JSON-LD)...");
- await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags));
-
- console.log(`
-β
Build complete!
- β’ ${flat.length} files indexed
- β’ ${sections.length} sections
- β’ ${allTags.length} tags
- β’ sitemap.xml generated
- β’ robots.txt generated
- β’ feed.xml (RSS) generated
- β’ schema.jsonld (JSON-LD) generated
-`);
- } catch (e) {
- console.error("Build failed:", e);
- process.exit(1);
- }
+ console.log(`Done! ${flat.length} files indexed with original dates from frontmatter.`);
+ } catch (e) { console.error("Failed:", e); process.exit(1); }
})();