Update generate-index.mjs
This commit is contained in:
parent
8faea2945f
commit
a0b41ae265
1 changed files with 60 additions and 15 deletions
|
|
@ -5,18 +5,19 @@ import pdf from "pdf-parse";
|
||||||
|
|
||||||
const ROOT = "public";
|
const ROOT = "public";
|
||||||
const OUT = path.join(ROOT, "index.json");
|
const OUT = path.join(ROOT, "index.json");
|
||||||
const STATIC_TOPLEVEL = new Set(["about", "contact", "legal"]);
|
const EXCERPT_LENGTH = 500;
|
||||||
const MAX_BYTES = 64 * 1024;
|
const MAX_HEAD_BYTES = 64 * 1024;
|
||||||
|
|
||||||
function dateFromName(name) {
|
function dateFromName(name) {
|
||||||
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
|
const m = name.match(/^(\d{4}-\d{2}-\d{2})/);
|
||||||
return m ? new Date(m[0]).getTime() : null;
|
return m ? new Date(m[0]).getTime() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function readHead(abs) {
|
async function readHead(abs, fullForExcerpt = false) {
|
||||||
const fh = await fs.open(abs, "r");
|
const fh = await fs.open(abs, "r");
|
||||||
const buf = Buffer.alloc(MAX_BYTES);
|
const bufSize = fullForExcerpt ? await fs.stat(abs).then(st => Math.min(st.size, EXCERPT_LENGTH * 2)) : MAX_HEAD_BYTES;
|
||||||
const { bytesRead } = await fh.read(buf, 0, MAX_BYTES, 0);
|
const buf = Buffer.alloc(bufSize);
|
||||||
|
const { bytesRead } = await fh.read(buf, 0, bufSize, 0);
|
||||||
await fh.close();
|
await fh.close();
|
||||||
return buf.slice(0, bytesRead).toString("utf8");
|
return buf.slice(0, bytesRead).toString("utf8");
|
||||||
}
|
}
|
||||||
|
|
@ -27,6 +28,27 @@ function parseTitle(raw, ext) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function extractExcerpt(raw, ext) {
|
||||||
|
// Trim headers/metadata for cleaner excerpts
|
||||||
|
if (ext === ".md") raw = raw.replace(/^#.*\n?/, '').trim();
|
||||||
|
if (ext === ".html") raw = raw.replace(/<head>.*<\/head>/is, '').replace(/<[^>]+>/g, ' ').trim();
|
||||||
|
return raw.replace(/\s+/g, ' ').slice(0, EXCERPT_LENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
function extractTags(raw, ext, pdfData = null) {
|
||||||
|
let tags = [];
|
||||||
|
if (ext === ".md") {
|
||||||
|
const match = raw.match(/^\s*tags:\s*(.+)$/im);
|
||||||
|
if (match) tags = match[1].split(',').map(t => t.trim().toLowerCase());
|
||||||
|
} else if (ext === ".html") {
|
||||||
|
const match = raw.match(/<meta\s+name="keywords"\s+content="([^"]+)"/i);
|
||||||
|
if (match) tags = match[1].split(',').map(t => t.trim().toLowerCase());
|
||||||
|
} else if (ext === ".pdf" && pdfData?.info?.Subject) {
|
||||||
|
tags = pdfData.info.Subject.split(',').map(t => t.trim().toLowerCase());
|
||||||
|
}
|
||||||
|
return tags;
|
||||||
|
}
|
||||||
|
|
||||||
async function collectFiles(relBase = "", flat = []) {
|
async function collectFiles(relBase = "", flat = []) {
|
||||||
const abs = path.join(ROOT, relBase);
|
const abs = path.join(ROOT, relBase);
|
||||||
const entries = await fs.readdir(abs, { withFileTypes: true });
|
const entries = await fs.readdir(abs, { withFileTypes: true });
|
||||||
|
|
@ -36,25 +58,26 @@ async function collectFiles(relBase = "", flat = []) {
|
||||||
const rel = path.posix.join(relBase, e.name);
|
const rel = path.posix.join(relBase, e.name);
|
||||||
const absPath = path.join(ROOT, rel);
|
const absPath = path.join(ROOT, rel);
|
||||||
if (e.isDirectory()) {
|
if (e.isDirectory()) {
|
||||||
const top = rel.split("/")[0];
|
|
||||||
if (STATIC_TOPLEVEL.has(top)) continue;
|
|
||||||
await collectFiles(rel, flat);
|
await collectFiles(rel, flat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ext = path.posix.extname(e.name).toLowerCase();
|
const ext = path.posix.extname(e.name).toLowerCase();
|
||||||
if (![".md", ".html", ".pdf"].includes(ext)) continue;
|
if (![".md", ".html", ".pdf"].includes(ext) || e.name === "index.html") continue;
|
||||||
const st = await fs.stat(absPath);
|
const st = await fs.stat(absPath);
|
||||||
let title;
|
let raw, pdfData, title;
|
||||||
if (ext === ".pdf") {
|
if (ext === ".pdf") {
|
||||||
const buffer = await fs.readFile(absPath);
|
const buffer = await fs.readFile(absPath);
|
||||||
const pdfData = await pdf(buffer);
|
pdfData = await pdf(buffer);
|
||||||
|
raw = pdfData.text;
|
||||||
title = pdfData.info.Title || e.name.replace(/\.pdf$/, "").trim();
|
title = pdfData.info.Title || e.name.replace(/\.pdf$/, "").trim();
|
||||||
} else {
|
} else {
|
||||||
const raw = await readHead(absPath);
|
raw = await readHead(absPath, true);
|
||||||
title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
|
title = parseTitle(raw, ext) || e.name.replace(new RegExp(`\\${ext}$`), "").trim();
|
||||||
}
|
}
|
||||||
const mtime = dateFromName(e.name) ?? st.mtimeMs;
|
const mtime = dateFromName(e.name) ?? st.mtimeMs;
|
||||||
|
const excerpt = extractExcerpt(raw, ext);
|
||||||
|
const tags = extractTags(raw, ext, pdfData);
|
||||||
|
|
||||||
flat.push({
|
flat.push({
|
||||||
type: "file",
|
type: "file",
|
||||||
|
|
@ -63,18 +86,40 @@ async function collectFiles(relBase = "", flat = []) {
|
||||||
path: rel,
|
path: rel,
|
||||||
ext,
|
ext,
|
||||||
pinned: rel.startsWith("pinned/"),
|
pinned: rel.startsWith("pinned/"),
|
||||||
mtime
|
mtime,
|
||||||
|
excerpt,
|
||||||
|
tags
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
return flat;
|
return flat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function detectSections() {
|
||||||
|
const topEntries = await fs.readdir(ROOT, { withFileTypes: true });
|
||||||
|
const sections = [];
|
||||||
|
for (const e of topEntries) {
|
||||||
|
if (!e.isDirectory() || e.name.startsWith(".")) continue;
|
||||||
|
const indexPath = path.join(ROOT, e.name, "index.html");
|
||||||
|
let isStatic = false;
|
||||||
|
try {
|
||||||
|
await fs.access(indexPath);
|
||||||
|
isStatic = true;
|
||||||
|
} catch {}
|
||||||
|
// Check if dynamic (has content files) - but since flat collects them, infer from flat later
|
||||||
|
sections.push({ name: e.name, isStatic });
|
||||||
|
}
|
||||||
|
return sections.sort((a, b) => a.name.localeCompare(b.name)); // Alpha sort
|
||||||
|
}
|
||||||
|
|
||||||
(async () => {
|
(async () => {
|
||||||
try {
|
try {
|
||||||
const flat = await collectFiles();
|
const flat = await collectFiles();
|
||||||
const sections = [...new Set(flat.map(f => f.path.split("/")[0]))];
|
const sections = await detectSections();
|
||||||
await fs.writeFile(OUT, JSON.stringify({ flat, sections }, null, 2));
|
// Filter sections to those with content or static
|
||||||
console.log(`index.json built with ${flat.length} files across ${sections.length} sections.`);
|
const activeSections = sections.filter(s => s.isStatic || flat.some(f => f.path.split("/")[0] === s.name));
|
||||||
|
const allTags = [...new Set(flat.flatMap(f => f.tags))].sort();
|
||||||
|
await fs.writeFile(OUT, JSON.stringify({ flat, sections: activeSections, tags: allTags }, null, 2));
|
||||||
|
console.log(`index.json built: ${flat.length} files, ${activeSections.length} sections (${activeSections.filter(s => s.isStatic).length} static), ${allTags.length} tags.`);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("Build failed:", e);
|
console.error("Build failed:", e);
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue