import "server-only"; import path from "node:path"; import fs from "node:fs/promises"; import { extractCode, normalizeCode } from "@/lib/jav/codeParser"; import { getAppSetting } from "@/lib/db/appSettings"; import { rawDb } from "@/lib/db/client"; import { syncVideoMetadataIndex } from "./metadata"; export const VIDEO_EXTENSIONS = new Set([ ".mp4", ".mkv", ".m4v", ".mov", ".webm", ".avi", ".wmv", ".ts", ".mpg", ".mpeg", ".flv", ]); const SUBTITLE_EXTENSIONS = new Set([".srt", ".vtt", ".ass", ".ssa"]); /** One video file the index found on disk. */ export interface VideoFile { /** Absolute path on disk. */ abs: string; /** Path relative to the configured video library root. */ rel: string; /** Filename (with extension). */ filename: string; /** Normalized JAV code parsed from the filename. */ code: string; /** File size in bytes. */ size: number; /** Last-modified timestamp (ms). */ mtime: number; } /** * Lightweight scan-state record. Authoritative file data lives in the * `video_metadata` SQLite table — accessors below query it directly, * so this struct holds only what describes the most recent rescan. */ interface VideoIndex { /** When the index was last built. */ lastScannedAt: number; /** All folder roots that were scanned, in order: main first, extras after. * Used both to display in the UI and to detect setting changes. */ rootsScanned: string[]; /** Total files matched by the most recent scan. */ count: number; } const EMPTY_INDEX: VideoIndex = { lastScannedAt: 0, rootsScanned: [], count: 0, }; let cachedScanState: VideoIndex = EMPTY_INDEX; let scanInFlight: Promise | null = null; interface CachedFileRow { abs_path: string; rel_path: string; code: string; size_bytes: number; mtime_ms: number; } interface WalkOpts { /** When true, ignore the dir-mtime cache and re-readdir every dir. * Use after structural file edits that don't change dir mtime * (e.g. content rewrite without rename). */ force?: boolean; } /** * Walk the configured roots and produce a flat VideoFile[]. The caller * writes the result to the `video_metadata` table — nothing is held in * memory beyond the duration of one rescan. * * Incremental: each directory's mtime is compared to a stored value * in `video_dir_mtimes`. If unchanged, the immediate-children file * rows for that dir are reused from `video_metadata` instead of * readdir + stat per file. Subdirs are still walked (their mtimes * may have changed independently). */ async function walkAllRoots( roots: string[], opts: WalkOpts = {}, ): Promise<{ files: VideoFile[]; count: number; visitedDirs: Set; reused: number; rescanned: number }> { const cachedMtimes = opts.force ? new Map() : loadDirMtimeCache(); const visitedDirs = new Set(); const files: VideoFile[] = []; const cachedFilesByDir = opts.force ? new Map() : loadCachedFileIndex(); let reused = 0; let rescanned = 0; for (const root of roots) { type Frame = { dir: string }; const stack: Frame[] = [{ dir: root }]; while (stack.length) { const { dir } = stack.pop()!; visitedDirs.add(dir); let dirStat: import("node:fs").Stats; try { dirStat = await fs.stat(dir); } catch { continue; // dir vanished mid-walk } const cachedMtime = cachedMtimes.get(dir); const dirUnchanged = cachedMtime != null && cachedMtime === dirStat.mtimeMs; // Always recurse — subdir mtimes are tracked independently. // For *children* enumeration we use cached rows when unchanged. // We still need the subdir list either way; if we're skipping // the readdir for cache reuse, we need an alternate way to find // subdirs. Cheapest: readdir the directory entries once for // dirs (tiny per-dir cost) and use the dirent type directly. let entries: import("node:fs").Dirent[]; try { entries = await fs.readdir(dir, { withFileTypes: true }); } catch { continue; } // Push subdirs onto the stack regardless of cache state. for (const e of entries) { if (e.isDirectory()) { stack.push({ dir: path.join(dir, e.name) }); } } if (dirUnchanged) { // Reuse cached rows for files immediately in this directory. const cached = cachedFilesByDir.get(dir); if (cached) { for (const row of cached) { files.push({ abs: row.abs_path, rel: path.relative(root, row.abs_path), filename: path.basename(row.abs_path), code: row.code, size: row.size_bytes, mtime: row.mtime_ms, }); } reused += cached.length; } continue; } // Dir changed (or no cache entry yet). Readdir + stat each file. rescanned++; for (const e of entries) { if (!e.isFile()) continue; const ext = path.extname(e.name).toLowerCase(); if (!VIDEO_EXTENSIONS.has(ext)) continue; const abs = path.join(dir, e.name); const stem = e.name.slice(0, e.name.length - ext.length); const code = extractCode(stem); if (!code) continue; const norm = normalizeCode(code); if (!norm) continue; let st: import("node:fs").Stats; try { st = await fs.stat(abs); } catch { continue; } files.push({ abs, rel: path.relative(root, abs), filename: e.name, code: norm, size: st.size, mtime: st.mtimeMs, }); } // Update cached mtime so the NEXT scan sees this dir as fresh. cachedMtimes.set(dir, dirStat.mtimeMs); } } // Persist updated mtime cache for next scan. saveDirMtimeCache(cachedMtimes, visitedDirs); // Stable order across rescans. files.sort((a, b) => a.code.localeCompare(b.code) || a.filename.localeCompare(b.filename)); return { files, count: files.length, visitedDirs, reused, rescanned }; } /** Load all `video_dir_mtimes` rows into a Map keyed by abs_dir. */ function loadDirMtimeCache(): Map { const rows = rawDb.prepare(`SELECT abs_dir, mtime_ms FROM video_dir_mtimes`).all() as Array<{ abs_dir: string; mtime_ms: number }>; const out = new Map(); for (const r of rows) out.set(r.abs_dir, r.mtime_ms); return out; } /** Group the entire video_metadata table by dir_path so dir-cache * reuse is a single in-memory lookup per dir. One linear scan of the * table — cheap even at 80k rows. */ function loadCachedFileIndex(): Map { const rows = rawDb.prepare(` SELECT abs_path, rel_path, code, size_bytes, mtime_ms, dir_path FROM video_metadata `).all() as Array; const out = new Map(); for (const r of rows) { const arr = out.get(r.dir_path); if (arr) arr.push(r); else out.set(r.dir_path, [r]); } return out; } /** Upsert dir mtimes for visited dirs and prune rows for dirs we * didn't see this scan (deleted folders). */ function saveDirMtimeCache(mtimes: Map, visited: Set): void { const upsert = rawDb.prepare(` INSERT INTO video_dir_mtimes (abs_dir, mtime_ms, last_seen_at) VALUES (?, ?, ?) ON CONFLICT(abs_dir) DO UPDATE SET mtime_ms = excluded.mtime_ms, last_seen_at = excluded.last_seen_at `); const now = Date.now(); const tx = rawDb.transaction(() => { for (const [dir, mtime] of mtimes) { // Only persist dirs we actually visited this scan — others may // have been moved/renamed and their cache entry is stale. if (!visited.has(dir)) continue; upsert.run(dir, mtime, now); } // Prune rows whose dir we didn't see this scan. Drops cleanup of // deleted dirs in O(rows) — fine at any reasonable scale. const allRows = rawDb.prepare(`SELECT abs_dir FROM video_dir_mtimes`).all() as Array<{ abs_dir: string }>; const del = rawDb.prepare(`DELETE FROM video_dir_mtimes WHERE abs_dir = ?`); for (const r of allRows) { if (!visited.has(r.abs_dir)) del.run(r.abs_dir); } }); try { tx(); } catch (e) { console.error("[video] failed to save dir mtime cache:", e); } } /** * Walk every place a sidecar subtitle could live and return the set of * canonical codes that have at least one. Cheap signal — no ffprobe. * * - Each video's own directory, filtered to filenames that start with * the video stem (so a stray `OTHER-001.srt` next to `YUJ-001.mp4` * doesn't taint YUJ-001). * - Each entry in `subtitleExtraPaths` (recursive walk, depth 3) — * extracts the code from the filename directly. * - data/generated-subtitles// — directory name IS the code. * * Result is consumed once by syncHasSubtitleColumn and discarded — no * persistent in-memory copy. */ async function collectSubtitleCodes(files: VideoFile[]): Promise> { const codes = new Set(); // Same-folder scan: per video, look at sibling files. Cache directory // listings so a folder with N videos is only listed once. const dirCache = new Map(); for (const file of files) { const dir = path.dirname(file.abs); let entries = dirCache.get(dir); if (!entries) { try { entries = await fs.readdir(dir, { withFileTypes: true }); } catch { entries = []; } dirCache.set(dir, entries); } const stem = file.filename.slice(0, file.filename.length - path.extname(file.filename).length); const stemLower = stem.toLowerCase(); const codeLower = file.code.toLowerCase(); for (const e of entries) { if (!e.isFile()) continue; const ext = path.extname(e.name).toLowerCase(); if (!SUBTITLE_EXTENSIONS.has(ext)) continue; const lower = e.name.toLowerCase(); // Code-substring match must treat the code as a delimited token // (start, end, or wrapped in non-alphanumeric) — bare `.includes` // would attribute `notes-yuj-001-bad.srt` to YUJ-001. const codeAsToken = (() => { const idx = lower.indexOf(codeLower); if (idx < 0) return false; const before = idx === 0 ? "" : lower[idx - 1]!; const afterIdx = idx + codeLower.length; const after = afterIdx >= lower.length ? "" : lower[afterIdx]!; const isBoundary = (c: string) => c === "" || !/[a-z0-9]/.test(c); return isBoundary(before) && isBoundary(after); })(); if (lower.startsWith(stemLower + ".") || lower === stemLower + ext || codeAsToken) { codes.add(file.code); break; } } } // Persistent subtitle library roots — extract codes from filenames. const extraRoots = (getAppSetting("subtitleExtraPaths") ?? []).filter(Boolean); for (const root of extraRoots) { await walkSubtitleRoot(root, codes, 3); } // data/generated-subtitles// — directory name is the code. const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles"); try { const subdirs = await fs.readdir(generatedRoot, { withFileTypes: true }); for (const d of subdirs) { if (!d.isDirectory()) continue; const dirAbs = path.join(generatedRoot, d.name); let entries: import("node:fs").Dirent[]; try { entries = await fs.readdir(dirAbs, { withFileTypes: true }); } catch { continue; } const hasSub = entries.some( (e) => e.isFile() && SUBTITLE_EXTENSIONS.has(path.extname(e.name).toLowerCase()), ); if (hasSub) { const norm = normalizeCode(d.name); if (norm) codes.add(norm); } } } catch { /* generated-subtitles not present yet — fine */ } return codes; } async function walkSubtitleRoot(root: string, out: Set, maxDepth: number): Promise { type Frame = { dir: string; depth: number }; const stack: Frame[] = [{ dir: root, depth: 0 }]; while (stack.length) { const { dir, depth } = stack.pop()!; let entries: import("node:fs").Dirent[]; try { entries = await fs.readdir(dir, { withFileTypes: true }); } catch { continue; } for (const e of entries) { const full = path.join(dir, e.name); if (e.isDirectory()) { if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 }); } else if (e.isFile()) { const ext = path.extname(e.name).toLowerCase(); if (!SUBTITLE_EXTENSIONS.has(ext)) continue; const stem = e.name.slice(0, e.name.length - ext.length); const code = extractCode(stem); if (!code) continue; const norm = normalizeCode(code); if (norm) out.add(norm); } } } } /** Exposed for path-allowlist checks (e.g. subtitle file resolution). */ export function getConfiguredVideoRoots(): string[] { return configuredRoots(); } function configuredRoots(): string[] { const main = (getAppSetting("videoLibraryPath") || "").trim(); const extras = getAppSetting("videoExtraPaths") ?? []; const out: string[] = []; if (main) out.push(main); for (const e of extras) { const t = (e ?? "").trim(); if (t) out.push(t); } return out; } function rootsEqual(a: string[], b: string[]): boolean { if (a.length !== b.length) return false; for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false; return true; } /** * Scan-state probe — used by API routes to decide whether the cached * data still matches current settings. Returns the empty state if the * configured roots have changed (caller can trigger a rescan). */ export function getVideoIndex(): VideoIndex { const roots = configuredRoots(); if (roots.length === 0) return EMPTY_INDEX; if (!rootsEqual(cachedScanState.rootsScanned, roots)) return EMPTY_INDEX; return cachedScanState; } /** Rebuild the index from disk. Coalesces concurrent calls. Authoritative * data lands in the `video_metadata` table; this function returns only * scan-state metadata. * * Default mode is incremental — directories whose mtime hasn't * changed since the last scan reuse cached file rows without * readdir-per-file. Pass `{force:true}` to bypass the dir-mtime * cache (e.g. after content edits that don't bump dir mtime). */ export async function rescanVideoIndex(opts: { force?: boolean } = {}): Promise { const roots = configuredRoots(); if (scanInFlight) return scanInFlight; scanInFlight = (async () => { try { const cleanRoots = roots.map((r) => (r ?? "").trim()).filter(Boolean); if (cleanRoots.length === 0) { cachedScanState = { ...EMPTY_INDEX }; return cachedScanState; } const t0 = Date.now(); const { files, count, reused, rescanned } = await walkAllRoots(cleanRoots, { force: opts.force }); const walkMs = Date.now() - t0; console.log( `[video] rescan walk in ${walkMs}ms — ${count} files (${reused} reused, ${rescanned} dir(s) rewalked${opts.force ? ", forced" : ""})`, ); // Persist the file table first — has_video / has_subtitle bulk // updates and metadata sync all run off it. await syncVideoMetadataIndex(files); syncHasVideoColumn(files); const subtitleCodes = await collectSubtitleCodes(files); syncHasSubtitleColumn(subtitleCodes); cachedScanState = { lastScannedAt: Date.now(), rootsScanned: cleanRoots, count, }; return cachedScanState; } finally { scanInFlight = null; } })(); return scanInFlight; } /** * Mirror the freshly-walked code list into images.has_video so SQL * filters / counts can use the column directly. */ function syncHasVideoColumn(files: VideoFile[]): void { const codes = Array.from(new Set(files.map((f) => f.code))); const tx = rawDb.transaction(() => { rawDb.prepare(`UPDATE images SET has_video = 0 WHERE has_video = 1`).run(); if (codes.length === 0) return; // Chunk to stay well below SQLite's bind-parameter cap. const CHUNK = 500; for (let i = 0; i < codes.length; i += CHUNK) { const slice = codes.slice(i, i + CHUNK); const placeholders = slice.map(() => "?").join(","); rawDb.prepare( `UPDATE images SET has_video = 1 WHERE upper(code) IN (${placeholders})`, ).run(...slice); } }); try { tx(); } catch (e) { console.error("[video] failed to sync has_video column:", e); } } /** Mirror the freshly-walked subtitle code set into images.has_subtitle. */ function syncHasSubtitleColumn(subtitleCodes: Set): void { const codes = Array.from(subtitleCodes); const tx = rawDb.transaction(() => { rawDb.prepare(`UPDATE images SET has_subtitle = 0 WHERE has_subtitle = 1`).run(); if (codes.length === 0) return; const CHUNK = 500; for (let i = 0; i < codes.length; i += CHUNK) { const slice = codes.slice(i, i + CHUNK); const placeholders = slice.map(() => "?").join(","); rawDb.prepare( `UPDATE images SET has_subtitle = 1 WHERE upper(code) IN (${placeholders})`, ).run(...slice); } }); try { tx(); } catch (e) { console.error("[video] failed to sync has_subtitle column:", e); } } interface VideoMetaRow { abs_path: string; rel_path: string; code: string; size_bytes: number; mtime_ms: number; } /** Look up files for a single normalized code. Reads directly from the * video_metadata table so the result is always current with the most * recent rescan. */ export function findVideosForCode(code: string | null | undefined): VideoFile[] { if (!code) return []; const norm = normalizeCode(code) ?? code.toUpperCase(); const rows = rawDb.prepare(` SELECT abs_path, rel_path, code, size_bytes, mtime_ms FROM video_metadata WHERE upper(code) = ? ORDER BY rel_path COLLATE NOCASE `).all(norm) as VideoMetaRow[]; return rows.map((r) => ({ abs: r.abs_path, rel: r.rel_path, filename: path.basename(r.abs_path), code: r.code, size: r.size_bytes, mtime: r.mtime_ms, })); } /** Set of every code present in video_metadata — fast existence check. */ export function getCodesWithVideos(): Set { const rows = rawDb.prepare(` SELECT DISTINCT upper(code) AS code FROM video_metadata `).all() as Array<{ code: string }>; return new Set(rows.map((r) => r.code)); } /** Set of every code with a discoverable subtitle sidecar. Reads from * the images.has_subtitle column populated at rescan time. */ export function getCodesWithSubtitles(): Set { const rows = rawDb.prepare(` SELECT DISTINCT upper(code) AS code FROM images WHERE has_subtitle = 1 AND code IS NOT NULL `).all() as Array<{ code: string }>; return new Set(rows.map((r) => r.code)); }