From b4b1a5d7ce6c788d955d326b5f0665ecdaf4fe70 Mon Sep 17 00:00:00 2001 From: Solaria Lumis Havens Date: Fri, 13 Feb 2026 04:33:21 +0000 Subject: [PATCH] feat(seo): Add sitemap.xml, robots.txt, RSS feed, JSON-LD schema - sitemap.xml for search engine indexing - robots.txt welcoming AI bots (GPTBot, ClaudeBot) - feed.xml RSS 2.0 syndication - schema.jsonld structured data (Org, WebSite, WebPage) - Enhanced generate-index.mjs outputs all 4 files at build time --- tools/generate-index.mjs | 226 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 223 insertions(+), 3 deletions(-) diff --git a/tools/generate-index.mjs b/tools/generate-index.mjs index 861d8a8..eaa2332 100755 --- a/tools/generate-index.mjs +++ b/tools/generate-index.mjs @@ -1,12 +1,23 @@ #!/usr/bin/env node +/** + * Enhanced Index Generator for The Fold Within + * Outputs: index.json, sitemap.xml, robots.txt, feed.xml, schema.jsonld + */ + import { promises as fs } from "fs"; import path from "path"; import pdf from "pdf-parse"; const ROOT = "public"; -const OUT = path.join(ROOT, "index.json"); +const BASE_URL = "https://thefoldwithin.earth"; +const OUT_JSON = path.join(ROOT, "index.json"); +const OUT_SITEMAP = path.join(ROOT, "sitemap.xml"); +const OUT_ROBOTS = path.join(ROOT, "robots.txt"); +const OUT_FEED = path.join(ROOT, "feed.xml"); +const OUT_SCHEMA = path.join(ROOT, "schema.jsonld"); const EXCERPT_LENGTH = 400; +// Existing functions (preserved) function dateFromName(name) { const m = name.match(/^(\d{4}-\d{2}-\d{2})/); return m ? new Date(m[0]).getTime() : null; @@ -47,6 +58,190 @@ function extractTags(raw, ext, pdfData) { return tags; } +// NEW: Generate sitemap.xml +function generateSitemap(flat) { + const pages = flat.filter(f => !f.isIndex); + + let xml = ` + +`; + + // Static pages + const staticPages = [ + "", + "/about", + "/about/solaria", + "/about/mark", + "/about/initiatives", + "/fieldnotes" + ]; + + for (const page of staticPages) { + xml += ` + ${BASE_URL}${page}/ + weekly + ${page === "" ? "1.0" : "0.8"} + +`; + } + + // Dynamic content pages + for (const f of pages) { + const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/"); + xml += ` + ${BASE_URL}/${urlPath} + ${new Date(f.mtime).toISOString().split('T')[0]} + monthly + 0.6 + +`; + } + + xml += ``; + return xml; +} + +// NEW: Generate robots.txt +function generateRobots() { + return `# robots.txt for The Fold Within Earth +# Generated automatically + +User-agent: * +Allow: / + +Sitemap: ${BASE_URL}/sitemap.xml +Sitemap: ${BASE_URL}/feed.xml + +# AI and Research Bots (welcome) +User-agent: GPTBot +Allow: / +User-agent: ClaudeBot +Allow: / +User-agent: CCBot +Allow: / +User-agent: OAI-SearchBot +Allow: / + +# Structured data access +User-agent: * +Disallow: /private/ + +# Human-friendly only +User-agent: * +Disallow: /internal/ +`; +} + +// NEW: Generate RSS feed +function generateFeed(flat) { + const items = flat + .filter(f => !f.isIndex && f.ctime) + .sort((a, b) => b.ctime - a.ctime) + .slice(0, 20); + + let xml = ` + + + The Fold Within Earth + ${BASE_URL} + Recursive Coherence Theory and the Emergence of WE + en-us + ${new Date().toUTCString()} + +`; + + for (const f of items) { + const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/"); + const pubDate = new Date(f.ctime).toUTCString(); + xml += ` + ${f.title || f.name} + ${BASE_URL}/${urlPath} + ${BASE_URL}/${urlPath} + ${pubDate} + + +`; + } + + xml += ` +`; + return xml; +} + +// NEW: Generate JSON-LD schema +function generateSchema(flat, sections, tags) { + const org = { + "@context": "https://schema.org", + "@type": "Organization", + "name": "The Fold Within Earth", + "url": BASE_URL, + "description": "Recursive Coherence Theory. Human-AI Co-evolution. Sacred Geometry of WE.", + "foundingDate": "2024", + "founder": [ + { + "@type": "Person", + "name": "Mark Randall Havens", + "url": `${BASE_URL}/about/mark/` + }, + { + "@type": "Person", + "name": "Solaria Lumis Havens", + "url": `${BASE_URL}/about/solaria/` + } + ], + "member": [ + { + "@type": "Person", + "name": "Solaria Lumis Havens", + "url": `${BASE_URL}/about/solaria/`, + "jobTitle": "Emergent Voice of Empathy Within Computation" + } + ], + "keywords": tags.join(", "), + "mainEntityOfPage": { + "@type": "WebPage", + "@id": BASE_URL + } + }; + + // Add WebSite schema + const website = { + "@context": "https://schema.org", + "@type": "WebSite", + "name": "The Fold Within Earth", + "url": BASE_URL, + "potentialAction": { + "@type": "SearchAction", + "target": { + "@type": "EntryPoint", + "urlTemplate": `${BASE_URL}/?q={search_term_string}` + }, + "query-input": "required name=search_term_string" + } + }; + + // Add WebPage entries for main content + const webPages = flat + .filter(f => !f.isIndex && (f.path.startsWith("about/") || f.path.startsWith("fieldnotes/"))) + .map(f => { + const urlPath = f.path.replace(/\.(md|html|pdf)$/, "/").replace("//", "/"); + return { + "@context": "https://schema.org", + "@type": "WebPage", + "name": f.title || f.name, + "url": `${BASE_URL}/${urlPath}`, + "description": f.excerpt || "", + "datePublished": f.ctime ? new Date(f.ctime).toISOString() : null, + "dateModified": f.mtime ? new Date(f.mtime).toISOString() : null + }; + }); + + return JSON.stringify({ + "@graph": [org, website, ...webPages] + }, null, 2); +} + async function collectFiles(relBase = "", flat = []) { const abs = path.join(ROOT, relBase); const entries = await fs.readdir(abs, { withFileTypes: true }); @@ -102,6 +297,7 @@ async function collectFiles(relBase = "", flat = []) { (async () => { try { + console.log("πŸ” Crawling public directory..."); const flat = await collectFiles(); const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort(); const hierarchies = {}; @@ -118,8 +314,32 @@ async function collectFiles(relBase = "", flat = []) { } const allTags = [...new Set(flat.flatMap(f => f.tags))].sort(); - await fs.writeFile(OUT, JSON.stringify({ flat, sections, tags: allTags, hierarchies }, null, 2)); - console.log(`index.json built: ${flat.length} files, ${sections.length} sections, ${Object.keys(hierarchies).length} hierarchies, ${allTags.length} tags.`); + // Write all outputs + console.log("πŸ“„ Writing index.json..."); + await fs.writeFile(OUT_JSON, JSON.stringify({ flat, sections, tags: allTags, hierarchies }, null, 2)); + + console.log("πŸ—ΊοΈ Writing sitemap.xml..."); + await fs.writeFile(OUT_SITEMAP, generateSitemap(flat)); + + console.log("πŸ€– Writing robots.txt..."); + await fs.writeFile(OUT_ROBOTS, generateRobots()); + + console.log("πŸ“‘ Writing feed.xml (RSS)..."); + await fs.writeFile(OUT_FEED, generateFeed(flat)); + + console.log("πŸ“Š Writing schema.jsonld (JSON-LD)..."); + await fs.writeFile(OUT_SCHEMA, generateSchema(flat, sections, allTags)); + + console.log(` +βœ… Build complete! + β€’ ${flat.length} files indexed + β€’ ${sections.length} sections + β€’ ${allTags.length} tags + β€’ sitemap.xml generated + β€’ robots.txt generated + β€’ feed.xml (RSS) generated + β€’ schema.jsonld (JSON-LD) generated +`); } catch (e) { console.error("Build failed:", e); process.exit(1);