2025-11-08 11:23:21 -06:00
|
|
|
#!/usr/bin/env node
|
2025-11-08 11:29:10 -06:00
|
|
|
import { promises as fs } from "fs";
|
2025-11-08 09:05:04 -06:00
|
|
|
import path from "path";
|
2025-11-08 15:47:51 -06:00
|
|
|
import pdf from "pdf-parse";
|
2025-11-08 14:34:15 -06:00
|
|
|
|
2025-11-08 15:24:49 -06:00
|
|
|
const ROOT = "public";
|
2025-11-08 11:23:21 -06:00
|
|
|
const OUT = path.join(ROOT, "index.json");
|
2025-11-08 18:21:53 -06:00
|
|
|
const EXCERPT_LENGTH = 400;
|
2025-11-08 10:37:54 -06:00
|
|
|
|
2025-11-08 11:29:10 -06:00
|
|
|
function dateFromName(name) {
|
|
|
|
|
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
|
|
|
|
|
return m ? new Date(m[0]).getTime() : null;
|
2025-11-08 10:37:54 -06:00
|
|
|
}
|
2025-11-08 15:24:49 -06:00
|
|
|
|
2025-11-08 18:21:53 -06:00
|
|
|
async function readHead(abs, full = false) {
|
2025-11-08 11:29:10 -06:00
|
|
|
const fh = await fs.open(abs, "r");
|
2025-11-08 18:21:53 -06:00
|
|
|
const size = full ? await fs.stat(abs).then(s => Math.min(s.size, EXCERPT_LENGTH * 2)) : 64 * 1024;
|
|
|
|
|
const buf = Buffer.alloc(size);
|
|
|
|
|
const { bytesRead } = await fh.read(buf, 0, size, 0);
|
2025-11-08 10:37:54 -06:00
|
|
|
await fh.close();
|
2025-11-08 11:29:10 -06:00
|
|
|
return buf.slice(0, bytesRead).toString("utf8");
|
2025-11-08 10:37:54 -06:00
|
|
|
}
|
2025-11-08 15:24:49 -06:00
|
|
|
|
2025-11-08 11:29:10 -06:00
|
|
|
function parseTitle(raw, ext) {
|
2025-11-08 15:24:49 -06:00
|
|
|
if (ext === ".md") return raw.match(/^\s*#\s+(.+?)\s*$/m)?.[1].trim();
|
|
|
|
|
if (ext === ".html") return raw.match(/<title[^>]*>([^<]+)<\/title>/i)?.[1].trim();
|
2025-11-08 10:37:54 -06:00
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-08 16:07:52 -06:00
|
|
|
function extractExcerpt(raw, ext) {
|
2025-11-08 18:21:53 -06:00
|
|
|
if (ext === ".md") raw = raw.replace(/^#.*\n/, '').trim();
|
|
|
|
|
if (ext === ".html") raw = raw.replace(/<head>[\s\S]*<\/head>/i, '').replace(/<[^>]+>/g, ' ').trim();
|
2025-11-08 16:07:52 -06:00
|
|
|
return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-08 18:21:53 -06:00
|
|
|
function extractTags(raw, ext, pdfData) {
|
2025-11-08 16:07:52 -06:00
|
|
|
let tags = [];
|
|
|
|
|
if (ext === ".md") {
|
2025-11-08 18:21:53 -06:00
|
|
|
const m = raw.match(/^\s*tags:\s*(.+)$/im);
|
|
|
|
|
if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
|
2025-11-08 16:07:52 -06:00
|
|
|
} else if (ext === ".html") {
|
2025-11-08 18:21:53 -06:00
|
|
|
const m = raw.match(/<meta\s+name="keywords"\s+content="([^"]+)"/i);
|
|
|
|
|
if (m) tags = m[1].split(',').map(t => t.trim().toLowerCase());
|
2025-11-08 16:07:52 -06:00
|
|
|
} else if (ext === ".pdf" && pdfData?.info?.Subject) {
|
|
|
|
|
tags = pdfData.info.Subject.split(',').map(t => t.trim().toLowerCase());
|
|
|
|
|
}
|
|
|
|
|
return tags;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-08 15:34:32 -06:00
|
|
|
async function collectFiles(relBase = "", flat = []) {
|
2025-11-08 11:30:41 -06:00
|
|
|
const abs = path.join(ROOT, relBase);
|
|
|
|
|
const entries = await fs.readdir(abs, { withFileTypes: true });
|
2025-11-08 14:34:15 -06:00
|
|
|
|
2025-11-08 11:30:41 -06:00
|
|
|
for (const e of entries) {
|
2025-11-08 14:40:38 -06:00
|
|
|
if (e.name.startsWith(".")) continue;
|
2025-11-08 19:46:56 -06:00
|
|
|
|
2025-11-08 11:30:41 -06:00
|
|
|
const rel = path.posix.join(relBase, e.name);
|
|
|
|
|
const absPath = path.join(ROOT, rel);
|
2025-11-08 19:46:56 -06:00
|
|
|
|
|
|
|
|
// Skip the SPA root index file entirely — it's the shell, not content
|
|
|
|
|
if (rel.toLowerCase() === "index.html" || rel.toLowerCase() === "index.md") continue;
|
|
|
|
|
|
2025-11-08 11:30:41 -06:00
|
|
|
if (e.isDirectory()) {
|
2025-11-08 15:34:32 -06:00
|
|
|
await collectFiles(rel, flat);
|
2025-11-08 11:30:41 -06:00
|
|
|
continue;
|
|
|
|
|
}
|
2025-11-08 14:34:15 -06:00
|
|
|
|
2025-11-08 11:30:41 -06:00
|
|
|
const ext = path.posix.extname(e.name).toLowerCase();
|
2025-11-08 18:21:53 -06:00
|
|
|
if (![".md", ".html", ".pdf"].includes(ext)) continue;
|
|
|
|
|
|
2025-11-08 11:30:41 -06:00
|
|
|
const st = await fs.stat(absPath);
|
2025-11-08 16:07:52 -06:00
|
|
|
let raw, pdfData, title;
|
2025-11-08 15:47:51 -06:00
|
|
|
if (ext === ".pdf") {
|
|
|
|
|
const buffer = await fs.readFile(absPath);
|
2025-11-08 16:07:52 -06:00
|
|
|
pdfData = await pdf(buffer);
|
|
|
|
|
raw = pdfData.text;
|
2025-11-08 15:47:51 -06:00
|
|
|
title = pdfData.info.Title || e.name.replace(/\.pdf$/, "").trim();
|
|
|
|
|
} else {
|
2025-11-08 16:07:52 -06:00
|
|
|
raw = await readHead(absPath, true);
|
2025-11-08 15:47:51 -06:00
|
|
|
title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
|
|
|
|
|
}
|
2025-11-08 18:21:53 -06:00
|
|
|
|
2025-11-08 19:15:52 -06:00
|
|
|
const ctime = st.birthtimeMs || st.mtimeMs || dateFromName(e.name) || st.mtimeMs;
|
2025-11-08 11:30:41 -06:00
|
|
|
const mtime = dateFromName(e.name) ?? st.mtimeMs;
|
2025-11-08 19:15:52 -06:00
|
|
|
const baseName = e.name.toLowerCase();
|
2025-11-08 15:24:49 -06:00
|
|
|
|
2025-11-08 15:34:32 -06:00
|
|
|
flat.push({
|
2025-11-08 11:30:41 -06:00
|
|
|
type: "file",
|
|
|
|
|
name: e.name,
|
|
|
|
|
title,
|
|
|
|
|
path: rel,
|
|
|
|
|
ext,
|
2025-11-08 19:15:52 -06:00
|
|
|
ctime,
|
|
|
|
|
mtime,
|
2025-11-08 18:21:53 -06:00
|
|
|
excerpt: extractExcerpt(raw, ext),
|
|
|
|
|
tags: extractTags(raw, ext, pdfData),
|
2025-11-08 19:22:15 -06:00
|
|
|
isIndex: baseName.startsWith("index."),
|
2025-11-08 19:15:52 -06:00
|
|
|
isPinned: baseName.startsWith("pinned.")
|
2025-11-08 11:30:41 -06:00
|
|
|
});
|
|
|
|
|
}
|
2025-11-08 15:34:32 -06:00
|
|
|
return flat;
|
2025-11-08 11:30:41 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
(async () => {
|
|
|
|
|
try {
|
2025-11-08 15:34:32 -06:00
|
|
|
const flat = await collectFiles();
|
2025-11-08 23:24:54 -06:00
|
|
|
|
|
|
|
|
// Build sections: folders with non-index files
|
2025-11-08 18:41:51 -06:00
|
|
|
const sections = [...new Set(flat.filter(f => !f.isIndex).map(f => f.path.split("/")[0]))].sort();
|
2025-11-08 23:24:54 -06:00
|
|
|
|
|
|
|
|
// Build hierarchies: parent → [child] where child has index.*
|
|
|
|
|
const hierarchies = {};
|
|
|
|
|
for (const f of flat.filter(f => f.isIndex)) {
|
|
|
|
|
const parts = f.path.split("/");
|
|
|
|
|
if (parts.length > 2) { // e.g., essays/ai/index.md → parts[0]=essays, parts[1]=ai
|
|
|
|
|
const parent = parts[0];
|
|
|
|
|
const child = parts[1];
|
|
|
|
|
if (!hierarchies[parent]) hierarchies[parent] = [];
|
|
|
|
|
if (!hierarchies[parent].includes(child)) {
|
|
|
|
|
hierarchies[parent].push(child);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-08 16:07:52 -06:00
|
|
|
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
|
2025-11-08 23:24:54 -06:00
|
|
|
|
|
|
|
|
await fs.writeFile(OUT, JSON.stringify({ flat, sections, tags: allTags, hierarchies }, null, 2));
|
|
|
|
|
console.log(`index.json built: ${flat.length} files, ${sections.length} sections, ${Object.keys(hierarchies).length} hierarchies, ${allTags.length} tags.`);
|
2025-11-08 11:30:41 -06:00
|
|
|
} catch (e) {
|
2025-11-08 15:24:49 -06:00
|
|
|
console.error("Build failed:", e);
|
2025-11-08 11:30:41 -06:00
|
|
|
process.exit(1);
|
|
|
|
|
}
|
|
|
|
|
})();
|