From a0b41ae265888d33415ec321849fbf3aed743776 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mark=20Randall=20Havens=20=E2=96=B3=20The=20Empathic=20Tec?= =?UTF-8?q?hnologist=20=E2=9F=81=20Doctor=20Who=2042?= Date: Sat, 8 Nov 2025 16:07:52 -0600 Subject: [PATCH] Update generate-index.mjs --- tools/generate-index.mjs | 75 ++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/tools/generate-index.mjs b/tools/generate-index.mjs index 75f4d5a..631b599 100755 --- a/tools/generate-index.mjs +++ b/tools/generate-index.mjs @@ -5,18 +5,19 @@ import pdf from "pdf-parse"; const ROOT = "public"; const OUT = path.join(ROOT, "index.json"); -const STATIC_TOPLEVEL = new Set(["about", "contact", "legal"]); -const MAX_BYTES = 64 * 1024; +const EXCERPT_LENGTH = 500; +const MAX_HEAD_BYTES = 64 * 1024; function dateFromName(name) { const m = name.match(/^(\d{4}-\d{2}-\d{2})/); return m ? new Date(m[0]).getTime() : null; } -async function readHead(abs) { +async function readHead(abs, fullForExcerpt = false) { const fh = await fs.open(abs, "r"); - const buf = Buffer.alloc(MAX_BYTES); - const { bytesRead } = await fh.read(buf, 0, MAX_BYTES, 0); + const bufSize = fullForExcerpt ? await fs.stat(abs).then(st => Math.min(st.size, EXCERPT_LENGTH * 2)) : MAX_HEAD_BYTES; + const buf = Buffer.alloc(bufSize); + const { bytesRead } = await fh.read(buf, 0, bufSize, 0); await fh.close(); return buf.slice(0, bytesRead).toString("utf8"); } @@ -27,6 +28,27 @@ function parseTitle(raw, ext) { return null; } +function extractExcerpt(raw, ext) { + // Trim headers/metadata for cleaner excerpts + if (ext === ".md") raw = raw.replace(/^#.*\n?/, '').trim(); + if (ext === ".html") raw = raw.replace(/.*<\/head>/is, '').replace(/<[^>]+>/g, ' ').trim(); + return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH); +} + +function extractTags(raw, ext, pdfData = null) { + let tags = []; + if (ext === ".md") { + const match = raw.match(/^\s*tags:\s*(.+)$/im); + if (match) tags = match[1].split(',').map(t => t.trim().toLowerCase()); + } else if (ext === ".html") { + const match = raw.match(/ t.trim().toLowerCase()); + } else if (ext === ".pdf" && pdfData?.info?.Subject) { + tags = pdfData.info.Subject.split(',').map(t => t.trim().toLowerCase()); + } + return tags; +} + async function collectFiles(relBase = "", flat = []) { const abs = path.join(ROOT, relBase); const entries = await fs.readdir(abs, { withFileTypes: true }); @@ -36,25 +58,26 @@ async function collectFiles(relBase = "", flat = []) { const rel = path.posix.join(relBase, e.name); const absPath = path.join(ROOT, rel); if (e.isDirectory()) { - const top = rel.split("/")[0]; - if (STATIC_TOPLEVEL.has(top)) continue; await collectFiles(rel, flat); continue; } const ext = path.posix.extname(e.name).toLowerCase(); - if (![".md", ".html", ".pdf"].includes(ext)) continue; + if (![".md", ".html", ".pdf"].includes(ext) || e.name === "index.html") continue; const st = await fs.stat(absPath); - let title; + let raw, pdfData, title; if (ext === ".pdf") { const buffer = await fs.readFile(absPath); - const pdfData = await pdf(buffer); + pdfData = await pdf(buffer); + raw = pdfData.text; title = pdfData.info.Title || e.name.replace(/\.pdf$/, "").trim(); } else { - const raw = await readHead(absPath); + raw = await readHead(absPath, true); title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim(); } const mtime = dateFromName(e.name) ?? st.mtimeMs; + const excerpt = extractExcerpt(raw, ext); + const tags = extractTags(raw, ext, pdfData); flat.push({ type: "file", @@ -63,18 +86,40 @@ async function collectFiles(relBase = "", flat = []) { path: rel, ext, pinned: rel.startsWith("pinned/"), - mtime + mtime, + excerpt, + tags }); } return flat; } +async function detectSections() { + const topEntries = await fs.readdir(ROOT, { withFileTypes: true }); + const sections = []; + for (const e of topEntries) { + if (!e.isDirectory() || e.name.startsWith(".")) continue; + const indexPath = path.join(ROOT, e.name, "index.html"); + let isStatic = false; + try { + await fs.access(indexPath); + isStatic = true; + } catch {} + // Check if dynamic (has content files) - but since flat collects them, infer from flat later + sections.push({ name: e.name, isStatic }); + } + return sections.sort((a, b) => a.name.localeCompare(b.name)); // Alpha sort +} + (async () => { try { const flat = await collectFiles(); - const sections = [...new Set(flat.map(f => f.path.split("/")[0]))]; - await fs.writeFile(OUT, JSON.stringify({ flat, sections }, null, 2)); - console.log(`index.json built with ${flat.length} files across ${sections.length} sections.`); + const sections = await detectSections(); + // Filter sections to those with content or static + const activeSections = sections.filter(s => s.isStatic || flat.some(f => f.path.split("/")[0] === s.name)); + const allTags = [...new Set(flat.flatMap(f => f.tags))].sort(); + await fs.writeFile(OUT, JSON.stringify({ flat, sections: activeSections, tags: allTags }, null, 2)); + console.log(`index.json built: ${flat.length} files, ${activeSections.length} sections (${activeSections.filter(s => s.isStatic).length} static), ${allTags.length} tags.`); } catch (e) { console.error("Build failed:", e); process.exit(1);