import path from "node:path"; import fs from "node:fs/promises"; const MAX_BASE_LEN = 120; /** * Sanitize a filename for cross-platform safety while preserving most user intent * (unicode, spaces, dashes). Strips characters illegal on Windows + control chars, * trims trailing dots/spaces, falls back to "image" if everything was stripped. */ export function sanitizeFilename(name: string): { base: string; ext: string } { const ext = (path.extname(name) || "").toLowerCase(); let base = path.basename(name, ext); base = base // disallowed on Windows + path separators .replace(/[<>:"/\\|?*\x00-\x1f]/g, "") // collapse whitespace runs .replace(/\s+/g, " ") // trim trailing dots/spaces (Windows refuses these) .replace(/[.\s]+$/g, "") .trim(); if (base.length > MAX_BASE_LEN) base = base.slice(0, MAX_BASE_LEN).trim(); if (!base) base = "image"; return { base, ext: ext || ".png" }; } /** * Find a path under `dirAbs` that matches `${base}${ext}` (or `${base}-N${ext}` on * collision) and atomically reserves it by creating a 0-byte file with the * exclusive-create flag. The caller is expected to overwrite this placeholder * with the real bytes; this avoids a check-then-write race where two * concurrent uploads pick the same "unique" path. */ export async function uniqueFilePath( dirAbs: string, base: string, ext: string, ): Promise { const tryPath = (n: number) => path.join(dirAbs, n === 1 ? `${base}${ext}` : `${base}-${n}${ext}`); for (let i = 1; i < 10_000; i++) { const p = tryPath(i); try { const handle = await fs.open(p, "wx"); await handle.close(); return p; } catch (e) { // EEXIST → another caller (or prior run) holds this name; try the // next suffix. Anything else (perms, ENOENT on dir) is fatal. if ((e as NodeJS.ErrnoException).code !== "EEXIST") throw e; } } throw new Error(`Could not find unique filename for ${base}${ext} after 10000 tries`); } export function dayPartition(d: Date): string { const y = d.getFullYear(); const m = String(d.getMonth() + 1).padStart(2, "0"); const day = String(d.getDate()).padStart(2, "0"); return path.posix.join(String(y), m, day); } /** * Letter-bucketed storage path keyed off the first letter of a JAV code. * Codes whose first letter isn't A-Z (null code, digit-prefix, non-Latin) * fall into the `#` fallback bucket so the on-disk layout stays clean. */ const LETTER_RANGES: ReadonlyArray<{ range: string; letters: string }> = [ { range: "A-E", letters: "ABCDE" }, { range: "F-J", letters: "FGHIJ" }, { range: "K-P", letters: "KLMNOP" }, { range: "Q-U", letters: "QRSTU" }, { range: "V-Z", letters: "VWXYZ" }, ]; /** * Canonical filename for a cover's grid-preview WebP. Embeds the JAV code * (when known) so the data/thumbs/ folder is browsable by hand: * "DDT-203-2aa9...945f.webp" instead of just "2aa9...945f.webp". * Codes are validated against the same alphabet that codeParser produces * (uppercase letters/digits/dash); anything else is dropped to keep * filenames safe across NTFS/ext4. */ export function canonicalThumbName(code: string | null | undefined, sha: string): string { const safeCode = (code ?? "").trim().toUpperCase().replace(/[^A-Z0-9-]/g, ""); return safeCode ? `${safeCode}-${sha}.webp` : `${sha}.webp`; } export function letterBucket(code: string | null | undefined): { range: string; letter: string; dirRel: string } { const ch = (code ?? "").trim().charAt(0).toUpperCase(); if (!/^[A-Z]$/.test(ch)) { // Fallback bucket is a single level — there's only one possible // "letter" inside `#` so an extra `#/#/` layer would be redundant. return { range: "#", letter: "#", dirRel: "#" }; } const r = LETTER_RANGES.find((x) => x.letters.includes(ch)); if (!r) return { range: "#", letter: "#", dirRel: "#" }; return { range: r.range, letter: ch, dirRel: path.posix.join(r.range, ch) }; }