Initial commit

2026-05-26 22:46:00 +02:00
commit 7e2c2ff89c
256 changed files with 51523 additions and 0 deletions
@@ -0,0 +1,59 @@
+import "server-only";
+import { spawn } from "node:child_process";
+
+const cache = new Map<string, number>();
+
+const PROBE_TIMEOUT_MS = 10_000;
+
+/**
+ * Probe a video file's duration in seconds via ffprobe. Cached per-path
+ * for the lifetime of the process — files don't change duration on us.
+ * Returns null if ffprobe fails or returns garbage.
+ *
+ * Caps the probe at PROBE_TIMEOUT_MS and ties to an optional AbortSignal
+ * so a hung ffprobe (network mount, weird codec, dead disk) can't leave
+ * the request awaiting forever or zombie the subprocess.
+ */
+export async function probeDuration(abs: string, signal?: AbortSignal): Promise<number | null> {
+  const cached = cache.get(abs);
+  if (cached !== undefined) return cached;
+  return new Promise((resolve) => {
+    const proc = spawn("ffprobe", [
+      "-v", "error",
+      "-show_entries", "format=duration",
+      "-of", "default=noprint_wrappers=1:nokey=1",
+      abs,
+    ]);
+    let out = "";
+    let settled = false;
+
+    const settle = (n: number | null) => {
+      if (settled) return;
+      settled = true;
+      if (timeoutId) clearTimeout(timeoutId);
+      if (signal && abortHandler) signal.removeEventListener("abort", abortHandler);
+      if (n != null && Number.isFinite(n) && n > 0) {
+        cache.set(abs, n);
+        resolve(n);
+      } else {
+        resolve(null);
+      }
+    };
+
+    const kill = () => {
+      try { proc.kill("SIGKILL"); } catch { /* ignore */ }
+      settle(null);
+    };
+
+    const timeoutId = setTimeout(kill, PROBE_TIMEOUT_MS);
+    const abortHandler = signal ? () => kill() : null;
+    if (signal && abortHandler) {
+      if (signal.aborted) { kill(); return; }
+      signal.addEventListener("abort", abortHandler, { once: true });
+    }
+
+    proc.stdout?.on("data", (d) => { out += d.toString(); });
+    proc.on("close", () => settle(Number(out.trim())));
+    proc.on("error", () => settle(null));
+  });
+}
@@ -0,0 +1,539 @@
+import "server-only";
+import path from "node:path";
+import fs from "node:fs/promises";
+import { extractCode, normalizeCode } from "@/lib/jav/codeParser";
+import { getAppSetting } from "@/lib/db/appSettings";
+import { rawDb } from "@/lib/db/client";
+import { syncVideoMetadataIndex } from "./metadata";
+
+export const VIDEO_EXTENSIONS = new Set([
+  ".mp4", ".mkv", ".m4v", ".mov", ".webm", ".avi", ".wmv", ".ts", ".mpg", ".mpeg", ".flv",
+]);
+
+const SUBTITLE_EXTENSIONS = new Set([".srt", ".vtt", ".ass", ".ssa"]);
+
+/** One video file the index found on disk. */
+export interface VideoFile {
+  /** Absolute path on disk. */
+  abs: string;
+  /** Path relative to the configured video library root. */
+  rel: string;
+  /** Filename (with extension). */
+  filename: string;
+  /** Normalized JAV code parsed from the filename. */
+  code: string;
+  /** File size in bytes. */
+  size: number;
+  /** Last-modified timestamp (ms). */
+  mtime: number;
+}
+
+/**
+ * Lightweight scan-state record. Authoritative file data lives in the
+ * `video_metadata` SQLite table — accessors below query it directly,
+ * so this struct holds only what describes the most recent rescan.
+ */
+interface VideoIndex {
+  /** When the index was last built. */
+  lastScannedAt: number;
+  /** All folder roots that were scanned, in order: main first, extras after.
+   *  Used both to display in the UI and to detect setting changes. */
+  rootsScanned: string[];
+  /** Total files matched by the most recent scan. */
+  count: number;
+}
+
+const EMPTY_INDEX: VideoIndex = {
+  lastScannedAt: 0,
+  rootsScanned: [],
+  count: 0,
+};
+
+let cachedScanState: VideoIndex = EMPTY_INDEX;
+let scanInFlight: Promise<VideoIndex> | null = null;
+
+interface CachedFileRow {
+  abs_path: string;
+  rel_path: string;
+  code: string;
+  size_bytes: number;
+  mtime_ms: number;
+}
+
+interface WalkOpts {
+  /** When true, ignore the dir-mtime cache and re-readdir every dir.
+   *  Use after structural file edits that don't change dir mtime
+   *  (e.g. content rewrite without rename). */
+  force?: boolean;
+}
+
+/**
+ * Walk the configured roots and produce a flat VideoFile[]. The caller
+ * writes the result to the `video_metadata` table — nothing is held in
+ * memory beyond the duration of one rescan.
+ *
+ * Incremental: each directory's mtime is compared to a stored value
+ * in `video_dir_mtimes`. If unchanged, the immediate-children file
+ * rows for that dir are reused from `video_metadata` instead of
+ * readdir + stat per file. Subdirs are still walked (their mtimes
+ * may have changed independently).
+ */
+async function walkAllRoots(
+  roots: string[],
+  opts: WalkOpts = {},
+): Promise<{ files: VideoFile[]; count: number; visitedDirs: Set<string>; reused: number; rescanned: number }> {
+  const cachedMtimes = opts.force
+    ? new Map<string, number>()
+    : loadDirMtimeCache();
+  const visitedDirs = new Set<string>();
+  const files: VideoFile[] = [];
+  const cachedFilesByDir = opts.force
+    ? new Map<string, CachedFileRow[]>()
+    : loadCachedFileIndex();
+
+  let reused = 0;
+  let rescanned = 0;
+
+  for (const root of roots) {
+    type Frame = { dir: string };
+    const stack: Frame[] = [{ dir: root }];
+    while (stack.length) {
+      const { dir } = stack.pop()!;
+      visitedDirs.add(dir);
+      let dirStat: import("node:fs").Stats;
+      try {
+        dirStat = await fs.stat(dir);
+      } catch {
+        continue; // dir vanished mid-walk
+      }
+      const cachedMtime = cachedMtimes.get(dir);
+      const dirUnchanged = cachedMtime != null && cachedMtime === dirStat.mtimeMs;
+
+      // Always recurse — subdir mtimes are tracked independently.
+      // For *children* enumeration we use cached rows when unchanged.
+      // We still need the subdir list either way; if we're skipping
+      // the readdir for cache reuse, we need an alternate way to find
+      // subdirs. Cheapest: readdir the directory entries once for
+      // dirs (tiny per-dir cost) and use the dirent type directly.
+      let entries: import("node:fs").Dirent[];
+      try {
+        entries = await fs.readdir(dir, { withFileTypes: true });
+      } catch {
+        continue;
+      }
+
+      // Push subdirs onto the stack regardless of cache state.
+      for (const e of entries) {
+        if (e.isDirectory()) {
+          stack.push({ dir: path.join(dir, e.name) });
+        }
+      }
+
+      if (dirUnchanged) {
+        // Reuse cached rows for files immediately in this directory.
+        const cached = cachedFilesByDir.get(dir);
+        if (cached) {
+          for (const row of cached) {
+            files.push({
+              abs: row.abs_path,
+              rel: path.relative(root, row.abs_path),
+              filename: path.basename(row.abs_path),
+              code: row.code,
+              size: row.size_bytes,
+              mtime: row.mtime_ms,
+            });
+          }
+          reused += cached.length;
+        }
+        continue;
+      }
+
+      // Dir changed (or no cache entry yet). Readdir + stat each file.
+      rescanned++;
+      for (const e of entries) {
+        if (!e.isFile()) continue;
+        const ext = path.extname(e.name).toLowerCase();
+        if (!VIDEO_EXTENSIONS.has(ext)) continue;
+        const abs = path.join(dir, e.name);
+        const stem = e.name.slice(0, e.name.length - ext.length);
+        const code = extractCode(stem);
+        if (!code) continue;
+        const norm = normalizeCode(code);
+        if (!norm) continue;
+        let st: import("node:fs").Stats;
+        try {
+          st = await fs.stat(abs);
+        } catch {
+          continue;
+        }
+        files.push({
+          abs,
+          rel: path.relative(root, abs),
+          filename: e.name,
+          code: norm,
+          size: st.size,
+          mtime: st.mtimeMs,
+        });
+      }
+      // Update cached mtime so the NEXT scan sees this dir as fresh.
+      cachedMtimes.set(dir, dirStat.mtimeMs);
+    }
+  }
+
+  // Persist updated mtime cache for next scan.
+  saveDirMtimeCache(cachedMtimes, visitedDirs);
+
+  // Stable order across rescans.
+  files.sort((a, b) => a.code.localeCompare(b.code) || a.filename.localeCompare(b.filename));
+  return { files, count: files.length, visitedDirs, reused, rescanned };
+}
+
+/** Load all `video_dir_mtimes` rows into a Map keyed by abs_dir. */
+function loadDirMtimeCache(): Map<string, number> {
+  const rows = rawDb.prepare(`SELECT abs_dir, mtime_ms FROM video_dir_mtimes`).all() as Array<{ abs_dir: string; mtime_ms: number }>;
+  const out = new Map<string, number>();
+  for (const r of rows) out.set(r.abs_dir, r.mtime_ms);
+  return out;
+}
+
+/** Group the entire video_metadata table by dir_path so dir-cache
+ *  reuse is a single in-memory lookup per dir. One linear scan of the
+ *  table — cheap even at 80k rows. */
+function loadCachedFileIndex(): Map<string, CachedFileRow[]> {
+  const rows = rawDb.prepare(`
+    SELECT abs_path, rel_path, code, size_bytes, mtime_ms, dir_path
+    FROM video_metadata
+  `).all() as Array<CachedFileRow & { dir_path: string }>;
+  const out = new Map<string, CachedFileRow[]>();
+  for (const r of rows) {
+    const arr = out.get(r.dir_path);
+    if (arr) arr.push(r);
+    else out.set(r.dir_path, [r]);
+  }
+  return out;
+}
+
+/** Upsert dir mtimes for visited dirs and prune rows for dirs we
+ *  didn't see this scan (deleted folders). */
+function saveDirMtimeCache(mtimes: Map<string, number>, visited: Set<string>): void {
+  const upsert = rawDb.prepare(`
+    INSERT INTO video_dir_mtimes (abs_dir, mtime_ms, last_seen_at)
+    VALUES (?, ?, ?)
+    ON CONFLICT(abs_dir) DO UPDATE SET
+      mtime_ms = excluded.mtime_ms,
+      last_seen_at = excluded.last_seen_at
+  `);
+  const now = Date.now();
+  const tx = rawDb.transaction(() => {
+    for (const [dir, mtime] of mtimes) {
+      // Only persist dirs we actually visited this scan — others may
+      // have been moved/renamed and their cache entry is stale.
+      if (!visited.has(dir)) continue;
+      upsert.run(dir, mtime, now);
+    }
+    // Prune rows whose dir we didn't see this scan. Drops cleanup of
+    // deleted dirs in O(rows) — fine at any reasonable scale.
+    const allRows = rawDb.prepare(`SELECT abs_dir FROM video_dir_mtimes`).all() as Array<{ abs_dir: string }>;
+    const del = rawDb.prepare(`DELETE FROM video_dir_mtimes WHERE abs_dir = ?`);
+    for (const r of allRows) {
+      if (!visited.has(r.abs_dir)) del.run(r.abs_dir);
+    }
+  });
+  try {
+    tx();
+  } catch (e) {
+    console.error("[video] failed to save dir mtime cache:", e);
+  }
+}
+
+/**
+ * Walk every place a sidecar subtitle could live and return the set of
+ * canonical codes that have at least one. Cheap signal — no ffprobe.
+ *
+ *  - Each video's own directory, filtered to filenames that start with
+ *    the video stem (so a stray `OTHER-001.srt` next to `YUJ-001.mp4`
+ *    doesn't taint YUJ-001).
+ *  - Each entry in `subtitleExtraPaths` (recursive walk, depth 3) —
+ *    extracts the code from the filename directly.
+ *  - data/generated-subtitles/<code>/ — directory name IS the code.
+ *
+ * Result is consumed once by syncHasSubtitleColumn and discarded — no
+ * persistent in-memory copy.
+ */
+async function collectSubtitleCodes(files: VideoFile[]): Promise<Set<string>> {
+  const codes = new Set<string>();
+
+  // Same-folder scan: per video, look at sibling files. Cache directory
+  // listings so a folder with N videos is only listed once.
+  const dirCache = new Map<string, import("node:fs").Dirent[]>();
+  for (const file of files) {
+    const dir = path.dirname(file.abs);
+    let entries = dirCache.get(dir);
+    if (!entries) {
+      try {
+        entries = await fs.readdir(dir, { withFileTypes: true });
+      } catch {
+        entries = [];
+      }
+      dirCache.set(dir, entries);
+    }
+    const stem = file.filename.slice(0, file.filename.length - path.extname(file.filename).length);
+    const stemLower = stem.toLowerCase();
+    const codeLower = file.code.toLowerCase();
+    for (const e of entries) {
+      if (!e.isFile()) continue;
+      const ext = path.extname(e.name).toLowerCase();
+      if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
+      const lower = e.name.toLowerCase();
+      // Code-substring match must treat the code as a delimited token
+      // (start, end, or wrapped in non-alphanumeric) — bare `.includes`
+      // would attribute `notes-yuj-001-bad.srt` to YUJ-001.
+      const codeAsToken = (() => {
+        const idx = lower.indexOf(codeLower);
+        if (idx < 0) return false;
+        const before = idx === 0 ? "" : lower[idx - 1]!;
+        const afterIdx = idx + codeLower.length;
+        const after = afterIdx >= lower.length ? "" : lower[afterIdx]!;
+        const isBoundary = (c: string) => c === "" || !/[a-z0-9]/.test(c);
+        return isBoundary(before) && isBoundary(after);
+      })();
+      if (lower.startsWith(stemLower + ".") || lower === stemLower + ext || codeAsToken) {
+        codes.add(file.code);
+        break;
+      }
+    }
+  }
+
+  // Persistent subtitle library roots — extract codes from filenames.
+  const extraRoots = (getAppSetting("subtitleExtraPaths") ?? []).filter(Boolean);
+  for (const root of extraRoots) {
+    await walkSubtitleRoot(root, codes, 3);
+  }
+
+  // data/generated-subtitles/<code>/ — directory name is the code.
+  const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles");
+  try {
+    const subdirs = await fs.readdir(generatedRoot, { withFileTypes: true });
+    for (const d of subdirs) {
+      if (!d.isDirectory()) continue;
+      const dirAbs = path.join(generatedRoot, d.name);
+      let entries: import("node:fs").Dirent[];
+      try {
+        entries = await fs.readdir(dirAbs, { withFileTypes: true });
+      } catch {
+        continue;
+      }
+      const hasSub = entries.some(
+        (e) => e.isFile() && SUBTITLE_EXTENSIONS.has(path.extname(e.name).toLowerCase()),
+      );
+      if (hasSub) {
+        const norm = normalizeCode(d.name);
+        if (norm) codes.add(norm);
+      }
+    }
+  } catch { /* generated-subtitles not present yet — fine */ }
+
+  return codes;
+}
+
+async function walkSubtitleRoot(root: string, out: Set<string>, maxDepth: number): Promise<void> {
+  type Frame = { dir: string; depth: number };
+  const stack: Frame[] = [{ dir: root, depth: 0 }];
+  while (stack.length) {
+    const { dir, depth } = stack.pop()!;
+    let entries: import("node:fs").Dirent[];
+    try {
+      entries = await fs.readdir(dir, { withFileTypes: true });
+    } catch {
+      continue;
+    }
+    for (const e of entries) {
+      const full = path.join(dir, e.name);
+      if (e.isDirectory()) {
+        if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 });
+      } else if (e.isFile()) {
+        const ext = path.extname(e.name).toLowerCase();
+        if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
+        const stem = e.name.slice(0, e.name.length - ext.length);
+        const code = extractCode(stem);
+        if (!code) continue;
+        const norm = normalizeCode(code);
+        if (norm) out.add(norm);
+      }
+    }
+  }
+}
+
+/** Exposed for path-allowlist checks (e.g. subtitle file resolution). */
+export function getConfiguredVideoRoots(): string[] {
+  return configuredRoots();
+}
+
+function configuredRoots(): string[] {
+  const main = (getAppSetting("videoLibraryPath") || "").trim();
+  const extras = getAppSetting("videoExtraPaths") ?? [];
+  const out: string[] = [];
+  if (main) out.push(main);
+  for (const e of extras) {
+    const t = (e ?? "").trim();
+    if (t) out.push(t);
+  }
+  return out;
+}
+
+function rootsEqual(a: string[], b: string[]): boolean {
+  if (a.length !== b.length) return false;
+  for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
+  return true;
+}
+
+/**
+ * Scan-state probe — used by API routes to decide whether the cached
+ * data still matches current settings. Returns the empty state if the
+ * configured roots have changed (caller can trigger a rescan).
+ */
+export function getVideoIndex(): VideoIndex {
+  const roots = configuredRoots();
+  if (roots.length === 0) return EMPTY_INDEX;
+  if (!rootsEqual(cachedScanState.rootsScanned, roots)) return EMPTY_INDEX;
+  return cachedScanState;
+}
+
+/** Rebuild the index from disk. Coalesces concurrent calls. Authoritative
+ *  data lands in the `video_metadata` table; this function returns only
+ *  scan-state metadata.
+ *
+ *  Default mode is incremental — directories whose mtime hasn't
+ *  changed since the last scan reuse cached file rows without
+ *  readdir-per-file. Pass `{force:true}` to bypass the dir-mtime
+ *  cache (e.g. after content edits that don't bump dir mtime). */
+export async function rescanVideoIndex(opts: { force?: boolean } = {}): Promise<VideoIndex> {
+  const roots = configuredRoots();
+  if (scanInFlight) return scanInFlight;
+  scanInFlight = (async () => {
+    try {
+      const cleanRoots = roots.map((r) => (r ?? "").trim()).filter(Boolean);
+      if (cleanRoots.length === 0) {
+        cachedScanState = { ...EMPTY_INDEX };
+        return cachedScanState;
+      }
+      const t0 = Date.now();
+      const { files, count, reused, rescanned } = await walkAllRoots(cleanRoots, { force: opts.force });
+      const walkMs = Date.now() - t0;
+      console.log(
+        `[video] rescan walk in ${walkMs}ms — ${count} files (${reused} reused, ${rescanned} dir(s) rewalked${opts.force ? ", forced" : ""})`,
+      );
+      // Persist the file table first — has_video / has_subtitle bulk
+      // updates and metadata sync all run off it.
+      await syncVideoMetadataIndex(files);
+      syncHasVideoColumn(files);
+      const subtitleCodes = await collectSubtitleCodes(files);
+      syncHasSubtitleColumn(subtitleCodes);
+
+      cachedScanState = {
+        lastScannedAt: Date.now(),
+        rootsScanned: cleanRoots,
+        count,
+      };
+      return cachedScanState;
+    } finally {
+      scanInFlight = null;
+    }
+  })();
+  return scanInFlight;
+}
+
+/**
+ * Mirror the freshly-walked code list into images.has_video so SQL
+ * filters / counts can use the column directly.
+ */
+function syncHasVideoColumn(files: VideoFile[]): void {
+  const codes = Array.from(new Set(files.map((f) => f.code)));
+  const tx = rawDb.transaction(() => {
+    rawDb.prepare(`UPDATE images SET has_video = 0 WHERE has_video = 1`).run();
+    if (codes.length === 0) return;
+    // Chunk to stay well below SQLite's bind-parameter cap.
+    const CHUNK = 500;
+    for (let i = 0; i < codes.length; i += CHUNK) {
+      const slice = codes.slice(i, i + CHUNK);
+      const placeholders = slice.map(() => "?").join(",");
+      rawDb.prepare(
+        `UPDATE images SET has_video = 1 WHERE upper(code) IN (${placeholders})`,
+      ).run(...slice);
+    }
+  });
+  try {
+    tx();
+  } catch (e) {
+    console.error("[video] failed to sync has_video column:", e);
+  }
+}
+
+/** Mirror the freshly-walked subtitle code set into images.has_subtitle. */
+function syncHasSubtitleColumn(subtitleCodes: Set<string>): void {
+  const codes = Array.from(subtitleCodes);
+  const tx = rawDb.transaction(() => {
+    rawDb.prepare(`UPDATE images SET has_subtitle = 0 WHERE has_subtitle = 1`).run();
+    if (codes.length === 0) return;
+    const CHUNK = 500;
+    for (let i = 0; i < codes.length; i += CHUNK) {
+      const slice = codes.slice(i, i + CHUNK);
+      const placeholders = slice.map(() => "?").join(",");
+      rawDb.prepare(
+        `UPDATE images SET has_subtitle = 1 WHERE upper(code) IN (${placeholders})`,
+      ).run(...slice);
+    }
+  });
+  try {
+    tx();
+  } catch (e) {
+    console.error("[video] failed to sync has_subtitle column:", e);
+  }
+}
+
+interface VideoMetaRow {
+  abs_path: string;
+  rel_path: string;
+  code: string;
+  size_bytes: number;
+  mtime_ms: number;
+}
+
+/** Look up files for a single normalized code. Reads directly from the
+ *  video_metadata table so the result is always current with the most
+ *  recent rescan. */
+export function findVideosForCode(code: string | null | undefined): VideoFile[] {
+  if (!code) return [];
+  const norm = normalizeCode(code) ?? code.toUpperCase();
+  const rows = rawDb.prepare(`
+    SELECT abs_path, rel_path, code, size_bytes, mtime_ms
+    FROM video_metadata
+    WHERE upper(code) = ?
+    ORDER BY rel_path COLLATE NOCASE
+  `).all(norm) as VideoMetaRow[];
+  return rows.map((r) => ({
+    abs: r.abs_path,
+    rel: r.rel_path,
+    filename: path.basename(r.abs_path),
+    code: r.code,
+    size: r.size_bytes,
+    mtime: r.mtime_ms,
+  }));
+}
+
+/** Set of every code present in video_metadata — fast existence check. */
+export function getCodesWithVideos(): Set<string> {
+  const rows = rawDb.prepare(`
+    SELECT DISTINCT upper(code) AS code FROM video_metadata
+  `).all() as Array<{ code: string }>;
+  return new Set(rows.map((r) => r.code));
+}
+
+/** Set of every code with a discoverable subtitle sidecar. Reads from
+ *  the images.has_subtitle column populated at rescan time. */
+export function getCodesWithSubtitles(): Set<string> {
+  const rows = rawDb.prepare(`
+    SELECT DISTINCT upper(code) AS code FROM images WHERE has_subtitle = 1 AND code IS NOT NULL
+  `).all() as Array<{ code: string }>;
+  return new Set(rows.map((r) => r.code));
+}
@@ -0,0 +1,58 @@
+import "server-only";
+import path from "node:path";
+import { rawDb } from "@/lib/db/client";
+
+export interface ManualSubtitle {
+  code: string;
+  partIdx: number;
+  absPath: string;
+  attachedAt: number;
+}
+
+interface ManualSubtitleRow {
+  code: string;
+  part_idx: number;
+  abs_path: string;
+  attached_at: number;
+}
+
+function rowToEntry(r: ManualSubtitleRow): ManualSubtitle {
+  return { code: r.code, partIdx: r.part_idx, absPath: r.abs_path, attachedAt: r.attached_at };
+}
+
+export function listManualSubtitlesForVariant(code: string, partIdx: number): ManualSubtitle[] {
+  const rows = rawDb.prepare(`
+    SELECT code, part_idx, abs_path, attached_at FROM manual_subtitles
+    WHERE code = ? AND part_idx = ?
+    ORDER BY attached_at DESC
+  `).all(code, partIdx) as ManualSubtitleRow[];
+  return rows.map(rowToEntry);
+}
+
+/** True iff this exact abs path is recorded against any (code, part). */
+export function isManualSubtitlePath(abs: string): boolean {
+  const resolved = path.resolve(abs);
+  // Windows paths are case-insensitive on disk but stored as-typed.
+  // Compare with a case-insensitive LIKE on Windows, exact on POSIX.
+  if (process.platform === "win32") {
+    const row = rawDb.prepare(`
+      SELECT 1 FROM manual_subtitles WHERE LOWER(abs_path) = LOWER(?) LIMIT 1
+    `).get(resolved);
+    return !!row;
+  }
+  const row = rawDb.prepare(`SELECT 1 FROM manual_subtitles WHERE abs_path = ? LIMIT 1`).get(resolved);
+  return !!row;
+}
+
+export function attachManualSubtitle(code: string, partIdx: number, absPath: string): void {
+  rawDb.prepare(`
+    INSERT OR REPLACE INTO manual_subtitles (code, part_idx, abs_path, attached_at)
+    VALUES (?, ?, ?, ?)
+  `).run(code, partIdx, path.resolve(absPath), Date.now());
+}
+
+export function detachManualSubtitle(code: string, partIdx: number, absPath: string): void {
+  rawDb.prepare(`
+    DELETE FROM manual_subtitles WHERE code = ? AND part_idx = ? AND abs_path = ?
+  `).run(code, partIdx, path.resolve(absPath));
+}
@@ -0,0 +1,580 @@
+import "server-only";
+import path from "node:path";
+import { spawn } from "node:child_process";
+import fs from "node:fs/promises";
+import { revalidatePath } from "next/cache";
+import { rawDb } from "@/lib/db/client";
+import { getAppSetting } from "@/lib/db/appSettings";
+import { classifyGroup, compilePatterns } from "./partClassify";
+import type { VideoFile } from "./index";
+
+const PROBE_TIMEOUT_MS = 10_000;
+
+export type PlaybackMode = "direct" | "transcode";
+
+export interface StoredVideoMetadata {
+  absPath: string;
+  relPath: string;
+  code: string;
+  sizeBytes: number;
+  mtimeMs: number;
+  probedAt: number | null;
+  probeError: string | null;
+  durationSec: number | null;
+  videoCodec: string | null;
+  videoBFrames: number | null;
+  width: number | null;
+  height: number | null;
+  videoBitrate: number | null;
+  playbackMode: PlaybackMode | null;
+  partKind: "part" | "variant" | "single" | null;
+  partIndex: number | null;
+  variantGroup: string | null;
+}
+
+interface VideoMetadataRow {
+  abs_path: string;
+  rel_path: string;
+  code: string;
+  size_bytes: number;
+  mtime_ms: number;
+  probed_at: number | null;
+  probe_error: string | null;
+  duration_sec: number | null;
+  video_codec: string | null;
+  video_b_frames: number | null;
+  width: number | null;
+  height: number | null;
+  video_bitrate: number | null;
+  playback_mode: string | null;
+  part_kind: string | null;
+  part_index: number | null;
+  variant_group: string | null;
+}
+
+interface FfprobeJson {
+  streams?: Array<{
+    codec_name?: string;
+    width?: number;
+    height?: number;
+    bit_rate?: string;
+    has_b_frames?: number;
+  }>;
+  format?: {
+    duration?: string;
+    bit_rate?: string;
+  };
+}
+
+function mapRow(row: VideoMetadataRow | undefined): StoredVideoMetadata | null {
+  if (!row) return null;
+  return {
+    absPath: row.abs_path,
+    relPath: row.rel_path,
+    code: row.code,
+    sizeBytes: row.size_bytes,
+    mtimeMs: row.mtime_ms,
+    probedAt: row.probed_at,
+    probeError: row.probe_error,
+    durationSec: row.duration_sec,
+    videoCodec: row.video_codec,
+    videoBFrames: row.video_b_frames,
+    width: row.width,
+    height: row.height,
+    videoBitrate: row.video_bitrate,
+    playbackMode: row.playback_mode === "direct" || row.playback_mode === "transcode" ? row.playback_mode : null,
+    partKind: row.part_kind === "part" || row.part_kind === "variant" || row.part_kind === "single" ? row.part_kind : null,
+    partIndex: row.part_index,
+    variantGroup: row.variant_group,
+  };
+}
+
+function parseFiniteNumber(value: unknown): number | null {
+  if (value == null || value === "N/A") return null;
+  const n = typeof value === "number" ? value : Number(value);
+  return Number.isFinite(n) && n > 0 ? n : null;
+}
+
+function parseNonNegativeNumber(value: unknown): number | null {
+  if (value == null || value === "N/A") return null;
+  const n = typeof value === "number" ? value : Number(value);
+  return Number.isFinite(n) && n >= 0 ? n : null;
+}
+
+function isStatMatch(row: StoredVideoMetadata, sizeBytes: number, mtimeMs: number): boolean {
+  return row.sizeBytes === sizeBytes && Math.abs(row.mtimeMs - mtimeMs) < 1;
+}
+
+export function getStoredVideoMetadata(absPath: string): StoredVideoMetadata | null {
+  return mapRow(rawDb.prepare(`SELECT * FROM video_metadata WHERE abs_path = ?`).get(absPath) as VideoMetadataRow | undefined);
+}
+
+export function listStoredVideoMetadataForCode(code: string | null | undefined): StoredVideoMetadata[] {
+  if (!code) return [];
+  const rows = rawDb.prepare(`
+    SELECT * FROM video_metadata
+    WHERE upper(code) = upper(?)
+    ORDER BY rel_path ASC
+  `).all(code) as VideoMetadataRow[];
+  return rows.map((row) => mapRow(row)).filter((row): row is StoredVideoMetadata => row !== null);
+}
+
+export function serializeVideoMetadata(meta: StoredVideoMetadata | null) {
+  if (!meta) return null;
+  return {
+    absPath: meta.absPath,
+    relPath: meta.relPath,
+    code: meta.code,
+    sizeBytes: meta.sizeBytes,
+    mtimeMs: meta.mtimeMs,
+    probedAt: meta.probedAt,
+    probeError: meta.probeError,
+    durationSec: meta.durationSec,
+    videoCodec: meta.videoCodec,
+    videoBFrames: meta.videoBFrames,
+    width: meta.width,
+    height: meta.height,
+    videoBitrate: meta.videoBitrate,
+    playbackMode: meta.playbackMode,
+    partKind: meta.partKind,
+    partIndex: meta.partIndex,
+    variantGroup: meta.variantGroup,
+  };
+}
+
+export async function syncVideoMetadataIndex(files: VideoFile[]): Promise<void> {
+  const found = new Set(files.map((file) => file.abs));
+  const upsert = rawDb.prepare(`
+    INSERT INTO video_metadata (abs_path, rel_path, code, size_bytes, mtime_ms, dir_path)
+    VALUES (?, ?, ?, ?, ?, ?)
+    ON CONFLICT(abs_path) DO UPDATE SET
+      rel_path = excluded.rel_path,
+      code = excluded.code,
+      dir_path = excluded.dir_path,
+      probed_at = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.probed_at
+      END,
+      probe_error = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.probe_error
+      END,
+      duration_sec = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.duration_sec
+      END,
+      video_codec = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.video_codec
+      END,
+      video_b_frames = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.video_b_frames
+      END,
+      width = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.width
+      END,
+      height = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.height
+      END,
+      video_bitrate = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.video_bitrate
+      END,
+      playback_mode = CASE
+        WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
+        ELSE video_metadata.playback_mode
+      END,
+      size_bytes = excluded.size_bytes,
+      mtime_ms = excluded.mtime_ms
+  `);
+  const deleteStale = rawDb.prepare(`DELETE FROM video_metadata WHERE abs_path = ?`);
+  const tx = rawDb.transaction(() => {
+    for (const file of files) {
+      const last = Math.max(file.abs.lastIndexOf("/"), file.abs.lastIndexOf("\\"));
+      const dir = last >= 0 ? file.abs.slice(0, last) : "";
+      upsert.run(file.abs, file.rel, file.code, file.size, file.mtime, dir);
+    }
+    const rows = rawDb.prepare(`SELECT abs_path FROM video_metadata`).all() as Array<{ abs_path: string }>;
+    for (const row of rows) {
+      if (!found.has(row.abs_path)) deleteStale.run(row.abs_path);
+    }
+  });
+  tx();
+  classifyAndPersist(files);
+
+  // Probe-data refresh runs in the background. Awaiting here used to
+  // block rescan responses for minutes on libraries with many drifted
+  // files (e.g. after a bulk rename). Each per-file probe completion
+  // calls revalidatePath internally so detail pages update as soon as
+  // their own video is fresh — no batch-level waiting.
+  void reprobeDirtyFiles(files);
+}
+
+const REPROBE_CONCURRENCY = 2;
+
+async function reprobeDirtyFiles(files: VideoFile[]): Promise<void> {
+  let dirty: Array<{ abs_path: string }>;
+  try {
+    dirty = rawDb
+      .prepare(`SELECT abs_path FROM video_metadata WHERE probed_at IS NULL AND probe_error IS NULL`)
+      .all() as Array<{ abs_path: string }>;
+  } catch (e) {
+    console.error("[video] reprobe-dirty query failed:", e);
+    return;
+  }
+  if (dirty.length === 0) return;
+
+  const dirtySet = new Set(dirty.map((r) => r.abs_path));
+  const targets = files.filter((f) => dirtySet.has(f.abs));
+  if (targets.length === 0) return;
+
+  // Process in chunks of REPROBE_CONCURRENCY. ffprobe is mostly waiting
+  // on disk; small parallelism is enough.
+  let cursor = 0;
+  const workers: Promise<void>[] = [];
+  // Throttle revalidation calls: a burst of 1000 path invalidations
+  // would itself thrash. Coalesce so each batch of N codes triggers
+  // one revalidate per code, deduped within a short window.
+  const codesSeen = new Set<string>();
+  for (let i = 0; i < REPROBE_CONCURRENCY; i++) {
+    workers.push((async () => {
+      while (cursor < targets.length) {
+        const idx = cursor++;
+        const file = targets[idx];
+        if (!file) break;
+        try {
+          await probeVideoMetadata(file);
+          if (!codesSeen.has(file.code)) {
+            codesSeen.add(file.code);
+            try { revalidatePath("/id/[code]", "page"); } catch { /* ignore */ }
+          }
+        } catch (e) {
+          console.error(`[video] reprobe failed for ${file.abs}:`, e);
+        }
+      }
+    })());
+  }
+  await Promise.all(workers).catch(() => { /* swallowed */ });
+}
+
+/**
+ * Recompute part/variant classification for every file based on the
+ * current `partSuffixPatterns` setting. Independent of probe data; safe
+ * to run on every scan.
+ */
+function classifyAndPersist(files: VideoFile[]): void {
+  const sources = getAppSetting("partSuffixPatterns") ?? [];
+  const patterns = compilePatterns(sources);
+  const byCode = new Map<string, VideoFile[]>();
+  for (const f of files) {
+    const arr = byCode.get(f.code);
+    if (arr) arr.push(f);
+    else byCode.set(f.code, [f]);
+  }
+  const update = rawDb.prepare(`
+    UPDATE video_metadata SET part_kind = ?, part_index = ?, variant_group = ?
+    WHERE abs_path = ?
+  `);
+  const tx = rawDb.transaction(() => {
+    for (const group of byCode.values()) {
+      const inputs = group.map((f) => ({
+        key: f.abs,
+        stem: stemOf(f.filename),
+      }));
+      const results = classifyGroup(inputs, patterns);
+      for (const r of results) {
+        update.run(r.partKind, r.partIndex, r.variantGroup, r.key);
+      }
+    }
+  });
+  tx();
+}
+
+function stemOf(filename: string): string {
+  const ext = path.extname(filename);
+  return ext ? filename.slice(0, -ext.length) : filename;
+}
+
+export interface SubtitleStreamInfo {
+  index: number;
+  codec: string;
+  language: string | null;
+  title: string | null;
+  isImageBased: boolean;
+  isTextBased: boolean;
+}
+
+const TEXT_SUBTITLE_CODECS = new Set(["subrip", "ass", "ssa", "mov_text", "webvtt", "text"]);
+const IMAGE_SUBTITLE_CODECS = new Set(["hdmv_pgs_subtitle", "dvd_subtitle", "dvb_subtitle", "dvbsub", "pgssub"]);
+
+interface FfprobeStream {
+  index?: number;
+  codec_type?: string;
+  codec_name?: string;
+  tags?: { language?: string; title?: string };
+}
+
+/** Enumerate subtitle streams in a container. Computed on demand — not
+ *  persisted, since users frequently remux subs in/out and a stale list
+ *  is worse than re-probing. Returns [] on error or missing ffprobe. */
+export async function runFfprobeSubtitles(absPath: string): Promise<SubtitleStreamInfo[]> {
+  return new Promise((resolve) => {
+    const proc = spawn("ffprobe", [
+      "-v", "error",
+      "-select_streams", "s",
+      "-show_entries", "stream=index,codec_name,codec_type:stream_tags=language,title",
+      "-of", "json",
+      absPath,
+    ]);
+    let out = "";
+    let settled = false;
+    const settle = (val: SubtitleStreamInfo[]) => { if (!settled) { settled = true; clearTimeout(t); resolve(val); } };
+    const t = setTimeout(() => { try { proc.kill("SIGKILL"); } catch {} settle([]); }, PROBE_TIMEOUT_MS);
+    proc.stdout?.on("data", (d) => { out += d.toString(); });
+    proc.on("error", () => settle([]));
+    proc.on("close", (code) => {
+      if (code !== 0) { settle([]); return; }
+      try {
+        const json = JSON.parse(out) as { streams?: FfprobeStream[] };
+        const streams = (json.streams ?? []).filter((s) => s.codec_type === "subtitle");
+        const result: SubtitleStreamInfo[] = streams.map((s, i) => {
+          const codec = (s.codec_name ?? "unknown").toLowerCase();
+          return {
+            // Use the per-codec_type ordinal — that's what ffmpeg's
+            // 0:s:N mapping wants, NOT the absolute stream index.
+            index: i,
+            codec,
+            language: typeof s.tags?.language === "string" ? s.tags.language : null,
+            title: typeof s.tags?.title === "string" ? s.tags.title : null,
+            isImageBased: IMAGE_SUBTITLE_CODECS.has(codec),
+            isTextBased: TEXT_SUBTITLE_CODECS.has(codec),
+          };
+        });
+        settle(result);
+      } catch {
+        settle([]);
+      }
+    });
+  });
+}
+
+async function runFfprobe(absPath: string, signal?: AbortSignal): Promise<{
+  durationSec: number | null;
+  videoCodec: string | null;
+  videoBFrames: number | null;
+  width: number | null;
+  height: number | null;
+  videoBitrate: number | null;
+}> {
+  return new Promise((resolve, reject) => {
+    const proc = spawn("ffprobe", [
+      "-v", "error",
+      "-select_streams", "v:0",
+      "-show_entries", "stream=codec_name,width,height,bit_rate,has_b_frames:format=duration,bit_rate",
+      "-of", "json",
+      absPath,
+    ]);
+    let out = "";
+    let err = "";
+    let settled = false;
+
+    const settle = (fn: () => void) => {
+      if (settled) return;
+      settled = true;
+      clearTimeout(timeoutId);
+      if (signal && abortHandler) signal.removeEventListener("abort", abortHandler);
+      fn();
+    };
+    const kill = (message: string) => {
+      try { proc.kill("SIGKILL"); } catch {}
+      settle(() => reject(new Error(message)));
+    };
+
+    const timeoutId = setTimeout(() => kill("ffprobe timed out"), PROBE_TIMEOUT_MS);
+    const abortHandler = signal ? () => kill("ffprobe aborted") : null;
+    if (signal && abortHandler) {
+      if (signal.aborted) { kill("ffprobe aborted"); return; }
+      signal.addEventListener("abort", abortHandler, { once: true });
+    }
+
+    proc.stdout?.on("data", (d) => { out += d.toString(); });
+    proc.stderr?.on("data", (d) => { err += d.toString(); });
+    proc.on("error", (e) => settle(() => reject(e)));
+    proc.on("close", (code) => {
+      settle(() => {
+        if (code !== 0) {
+          reject(new Error(err.trim() || `ffprobe exited ${code}`));
+          return;
+        }
+        try {
+          const json = JSON.parse(out) as FfprobeJson;
+          const stream = json.streams?.[0] ?? {};
+          const streamBitrate = parseFiniteNumber(stream.bit_rate);
+          const formatBitrate = parseFiniteNumber(json.format?.bit_rate);
+          resolve({
+            durationSec: parseFiniteNumber(json.format?.duration),
+            videoCodec: typeof stream.codec_name === "string" ? stream.codec_name : null,
+            videoBFrames: parseNonNegativeNumber(stream.has_b_frames),
+            width: parseFiniteNumber(stream.width),
+            height: parseFiniteNumber(stream.height),
+            videoBitrate: streamBitrate ?? formatBitrate,
+          });
+        } catch (e) {
+          reject(e);
+        }
+      });
+    });
+  });
+}
+
+export async function probeVideoMetadata(file: VideoFile, signal?: AbortSignal): Promise<StoredVideoMetadata> {
+  const stat = await fs.stat(file.abs);
+  const existing = getStoredVideoMetadata(file.abs);
+  if (existing && isStatMatch(existing, stat.size, stat.mtimeMs)) {
+    if (existing.probeError || existing.probedAt != null) return existing;
+  }
+
+  const base = {
+    absPath: file.abs,
+    relPath: file.rel,
+    code: file.code,
+    sizeBytes: stat.size,
+    mtimeMs: stat.mtimeMs,
+    playbackMode: existing?.playbackMode ?? null,
+  };
+
+  try {
+    const probed = await runFfprobe(file.abs, signal);
+    rawDb.prepare(`
+      INSERT INTO video_metadata (
+        abs_path, rel_path, code, size_bytes, mtime_ms, probed_at, probe_error,
+        duration_sec, video_codec, video_b_frames, width, height, video_bitrate, playback_mode
+      ) VALUES (?, ?, ?, ?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
+      ON CONFLICT(abs_path) DO UPDATE SET
+        rel_path = excluded.rel_path,
+        code = excluded.code,
+        size_bytes = excluded.size_bytes,
+        mtime_ms = excluded.mtime_ms,
+        probed_at = excluded.probed_at,
+        probe_error = NULL,
+        duration_sec = excluded.duration_sec,
+        video_codec = excluded.video_codec,
+        video_b_frames = excluded.video_b_frames,
+        width = excluded.width,
+        height = excluded.height,
+        video_bitrate = excluded.video_bitrate,
+        playback_mode = excluded.playback_mode
+    `).run(
+      base.absPath, base.relPath, base.code, base.sizeBytes, base.mtimeMs, Date.now(),
+      probed.durationSec, probed.videoCodec, probed.videoBFrames, probed.width, probed.height, probed.videoBitrate, base.playbackMode,
+    );
+  } catch (e) {
+    rawDb.prepare(`
+      INSERT INTO video_metadata (
+        abs_path, rel_path, code, size_bytes, mtime_ms, probed_at, probe_error, playback_mode
+      ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+      ON CONFLICT(abs_path) DO UPDATE SET
+        rel_path = excluded.rel_path,
+        code = excluded.code,
+        size_bytes = excluded.size_bytes,
+        mtime_ms = excluded.mtime_ms,
+        probed_at = excluded.probed_at,
+        probe_error = excluded.probe_error,
+        playback_mode = excluded.playback_mode
+    `).run(
+      base.absPath, base.relPath, base.code, base.sizeBytes, base.mtimeMs, Date.now(),
+      e instanceof Error ? e.message.slice(0, 500) : "ffprobe failed",
+      base.playbackMode,
+    );
+  }
+
+  return getStoredVideoMetadata(file.abs) ?? {
+    ...base,
+    probedAt: null,
+    probeError: "metadata unavailable",
+    durationSec: null,
+    videoCodec: null,
+    videoBFrames: null,
+    width: null,
+    height: null,
+    videoBitrate: null,
+    partKind: null,
+    partIndex: null,
+    variantGroup: null,
+  };
+}
+
+export function setVideoPlaybackMode(file: VideoFile, mode: PlaybackMode | null): void {
+  rawDb.prepare(`
+    INSERT INTO video_metadata (abs_path, rel_path, code, size_bytes, mtime_ms, playback_mode)
+    VALUES (?, ?, ?, ?, ?, ?)
+    ON CONFLICT(abs_path) DO UPDATE SET
+      rel_path = excluded.rel_path,
+      code = excluded.code,
+      size_bytes = excluded.size_bytes,
+      mtime_ms = excluded.mtime_ms,
+      playback_mode = excluded.playback_mode
+  `).run(file.abs, file.rel, file.code, file.size, file.mtime, mode);
+}
+
+export function formatDuration(sec: number | null | undefined): string | null {
+  if (sec == null || !Number.isFinite(sec) || sec <= 0) return null;
+  const total = Math.round(sec);
+  const h = Math.floor(total / 3600);
+  const m = Math.floor((total % 3600) / 60);
+  const s = total % 60;
+  if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
+  return `${m}:${String(s).padStart(2, "0")}`;
+}
+
+export function formatBitrate(bps: number | null | undefined): string | null {
+  if (bps == null || !Number.isFinite(bps) || bps <= 0) return null;
+  if (bps >= 1_000_000) return `${(bps / 1_000_000).toFixed(1)} Mbps`;
+  if (bps >= 1_000) return `${Math.round(bps / 1_000)} Kbps`;
+  return `${Math.round(bps)} bps`;
+}
+
+export function formatBytes(bytes: number | null | undefined): string | null {
+  if (bytes == null || !Number.isFinite(bytes) || bytes < 0) return null;
+  const units = ["B", "KB", "MB", "GB", "TB"];
+  let n = bytes;
+  let i = 0;
+  while (n >= 1024 && i < units.length - 1) {
+    n /= 1024;
+    i++;
+  }
+  return `${i === 0 ? Math.round(n) : n.toFixed(n >= 10 ? 1 : 2)} ${units[i]}`;
+}
+
+export function formatResolution(width: number | null | undefined, height: number | null | undefined): string | null {
+  if (!width || !height) return null;
+  return `${width}x${height}`;
+}
+
+export function formatCodec(codec: string | null | undefined): string | null {
+  if (!codec) return null;
+  const map: Record<string, string> = {
+    h264: "H.264",
+    hevc: "HEVC",
+    h265: "HEVC",
+    av1: "AV1",
+    vp9: "VP9",
+    mpeg4: "MPEG-4",
+  };
+  return map[codec.toLowerCase()] ?? codec.toUpperCase();
+}
+
+export function formatVideoSummary(meta: StoredVideoMetadata | null | undefined): string | null {
+  if (!meta || meta.probeError) return null;
+  const parts = [
+    formatResolution(meta.width, meta.height),
+    formatCodec(meta.videoCodec),
+    formatBitrate(meta.videoBitrate),
+    formatBytes(meta.sizeBytes),
+    formatDuration(meta.durationSec),
+  ].filter((part): part is string => Boolean(part));
+  return parts.length > 0 ? parts.join(" · ") : null;
+}
@@ -0,0 +1,254 @@
+/**
+ * Token-grammar classifier for video filenames in a JAVID group.
+ *
+ * Patterns use a simplified token grammar (option A1 from the mockups):
+ *   - `{N}` — one or more digits, captured as the part index
+ *   - `{L}` — single letter A–Z, captured (A=1, B=2, ...)
+ *   - everything else is a literal character
+ *
+ * Patterns match at the END of the filename stem (no extension),
+ * case-insensitive.
+ *
+ * Classification rules for files sharing one normalized JAV code:
+ *   - "part"    — stem ends with a configured pattern; index is the
+ *                 captured numeric/letter value.
+ *   - "variant" — stem does NOT match any pattern but its prefix
+ *                 (first dot-segment) equals a stem that DID match.
+ *                 Variants belong to the matching part.
+ *   - "single"  — lone file in its code group with no pattern match.
+ *
+ * Tiebreak for "default variant" (the one to play first): the file
+ * whose stem equals the variant_group exactly. Otherwise the
+ * alphabetically first stem in the group.
+ */
+export interface CompiledPattern {
+  /** Original token-grammar source. */
+  source: string;
+  /** Compiled regex anchored to end-of-stem (case-insensitive). */
+  re: RegExp;
+  /** What the captured token represents. */
+  kind: "digits" | "letter";
+}
+
+/** Minimal description of one file presented to the classifier. */
+export interface ClassifyInput {
+  /** Stable identifier, opaque to the classifier. */
+  key: string;
+  /** Filename stem (no extension), as on disk. */
+  stem: string;
+}
+
+export interface ClassifyResult {
+  key: string;
+  partKind: "part" | "variant" | "single";
+  /** 1-based sort index for parts; null otherwise. */
+  partIndex: number | null;
+  /** Stem-with-suffix-stripped — variants share this with their part. */
+  variantGroup: string | null;
+}
+
+const TOKEN_RE = /\{[NL]\}/g;
+
+/** Compile one token-grammar pattern into a regex. Throws on bad token. */
+export function compileToken(source: string): CompiledPattern | null {
+  if (!source) return null;
+  // Validate first: only {N} and {L} are allowed; nothing else may use {}.
+  // A bare `{` without a known token is invalid.
+  let kind: "digits" | "letter" | null = null;
+  let body = "";
+  let i = 0;
+  while (i < source.length) {
+    const c = source[i]!;
+    if (c === "{") {
+      const close = source.indexOf("}", i);
+      if (close < 0) return null;
+      const tok = source.slice(i, close + 1);
+      if (tok === "{N}") {
+        if (kind != null) return null; // only one capture per pattern
+        body += "(\\d+)";
+        kind = "digits";
+      } else if (tok === "{L}") {
+        if (kind != null) return null;
+        body += "([A-Za-z])";
+        kind = "letter";
+      } else {
+        return null;
+      }
+      i = close + 1;
+    } else {
+      body += c.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+      i++;
+    }
+  }
+  if (kind == null) return null;
+  return {
+    source,
+    re: new RegExp(body + "$", "i"),
+    kind,
+  };
+}
+
+/** Compile a list of patterns; silently drops malformed ones. */
+export function compilePatterns(sources: string[]): CompiledPattern[] {
+  const out: CompiledPattern[] = [];
+  for (const s of sources) {
+    const c = compileToken(s);
+    if (c) out.push(c);
+  }
+  return out;
+}
+
+function indexFromCapture(capture: string, kind: "digits" | "letter"): number | null {
+  if (kind === "digits") {
+    const n = Number(capture);
+    return Number.isFinite(n) && n > 0 ? Math.trunc(n) : null;
+  }
+  // Letter: A=1, B=2, ...
+  const code = capture.toUpperCase().charCodeAt(0);
+  if (code < 65 || code > 90) return null;
+  return code - 64;
+}
+
+interface PatternHit {
+  partIndex: number;
+  /** Stem with the matched suffix removed. */
+  variantGroup: string;
+}
+
+function tryMatch(stem: string, patterns: CompiledPattern[]): PatternHit | null {
+  for (const p of patterns) {
+    const m = stem.match(p.re);
+    if (!m) continue;
+    const idx = indexFromCapture(m[1] ?? "", p.kind);
+    if (idx == null) continue;
+    return {
+      partIndex: idx,
+      variantGroup: stem.slice(0, m.index!),
+    };
+  }
+  return null;
+}
+
+/**
+ * Classify a group of files that share one normalized JAV code.
+ *
+ * Algorithm:
+ *   1. Try each pattern against each stem; record matches.
+ *   2. Files with no match are candidate variants. A candidate is a
+ *      variant of a matched file if its stem's first dot-segment
+ *      equals the matched file's variant_group's first dot-segment.
+ *      (This catches `XXX-001.fixed.mp4` aligning with `XXX-001-cd1.mp4`
+ *      → no, those don't share a dot-prefix; they'd stay singles. But
+ *      `XXX-001-cd1.fixed.mp4` would align with `XXX-001-cd1.mp4`.)
+ *   3. If no patterns match anything in the group, all stems share
+ *      one variant_group (the longest common prefix of all stems,
+ *      trimmed at the last alpha-numeric run); kind = variant for >1
+ *      files, single for 1.
+ */
+export function classifyGroup(
+  files: ClassifyInput[],
+  patterns: CompiledPattern[],
+): ClassifyResult[] {
+  if (files.length === 0) return [];
+  if (files.length === 1) {
+    const only = files[0]!;
+    return [{ key: only.key, partKind: "single", partIndex: null, variantGroup: null }];
+  }
+
+  // Pass 1: pattern match.
+  const hits = new Map<string, PatternHit>();
+  for (const f of files) {
+    const hit = tryMatch(f.stem, patterns);
+    if (hit) hits.set(f.key, hit);
+  }
+
+  if (hits.size === 0) {
+    // No part-style suffixes detected anywhere → treat the whole group
+    // as variants of one part.
+    const group = longestCommonPrefix(files.map((f) => f.stem));
+    return files.map((f) => ({
+      key: f.key,
+      partKind: "variant" as const,
+      partIndex: null,
+      variantGroup: group || f.stem,
+    }));
+  }
+
+  // Pass 2: attach unmatched stems to the matched stem they extend.
+  // A non-matching stem `S` is a variant of part group `G` iff `S`
+  // starts with `G + "."` (i.e. `G` followed by a dot — the typical
+  // "alt encode" suffix shape: `XXX-001-cd1.fixed.mp4`).
+  const matchedGroupKeys = Array.from(new Set(Array.from(hits.values()).map((h) => h.variantGroup)));
+  // Sort by length desc so longer (more specific) groups bind first.
+  matchedGroupKeys.sort((a, b) => b.length - a.length);
+
+  const out: ClassifyResult[] = [];
+  for (const f of files) {
+    const hit = hits.get(f.key);
+    if (hit) {
+      out.push({
+        key: f.key,
+        partKind: "part",
+        partIndex: hit.partIndex,
+        variantGroup: hit.variantGroup,
+      });
+      continue;
+    }
+    // Unmatched: try to attach to a part group via dot-prefix.
+    const attached = matchedGroupKeys.find(
+      (g) => g && (f.stem === g || f.stem.startsWith(g + ".")),
+    );
+    if (attached) {
+      out.push({ key: f.key, partKind: "variant", partIndex: null, variantGroup: attached });
+    } else {
+      // No way to attach — the file is a stray. Mark single.
+      out.push({ key: f.key, partKind: "single", partIndex: null, variantGroup: null });
+    }
+  }
+  return out;
+}
+
+function longestCommonPrefix(strs: string[]): string {
+  if (strs.length === 0) return "";
+  let prefix = strs[0]!;
+  for (let i = 1; i < strs.length; i++) {
+    const s = strs[i]!;
+    let j = 0;
+    while (j < prefix.length && j < s.length && prefix[j] === s[j]) j++;
+    prefix = prefix.slice(0, j);
+    if (!prefix) return "";
+  }
+  // Trim trailing punctuation so we don't end on a half-word like "XXX-001.".
+  return prefix.replace(/[\s._\-]+$/, "");
+}
+
+/**
+ * From a set of files all sharing the same variantGroup, pick the one
+ * to play by default. Rule: stem === group exactly; else alphabetically
+ * first.
+ */
+export function pickDefaultVariant<T extends { stem: string }>(
+  variants: T[],
+  group: string,
+): T | null {
+  if (variants.length === 0) return null;
+  const exact = variants.find((v) => v.stem === group);
+  if (exact) return exact;
+  return [...variants].sort((a, b) => a.stem.localeCompare(b.stem))[0] ?? null;
+}
+
+/**
+ * Compute a short label for a variant relative to its group stem.
+ * `XXX-001.fixed` with group `XXX-001` → `fixed`.
+ * Falls back to `original` for the default / matching stem.
+ */
+export function variantLabel(stem: string, group: string): string {
+  if (stem === group) return "original";
+  if (stem.startsWith(group + ".")) {
+    return stem.slice(group.length + 1) || "original";
+  }
+  if (stem.startsWith(group)) {
+    return stem.slice(group.length).replace(/^[._\-\s]+/, "") || "original";
+  }
+  return stem;
+}
@@ -0,0 +1,81 @@
+import "server-only";
+import path from "node:path";
+import { getConfiguredVideoRoots } from "./index";
+import { getAppSetting } from "@/lib/db/appSettings";
+import { isManualSubtitlePath } from "./manualSubtitles";
+
+/**
+ * In-process set of subtitle paths the user picked via /api/pick-file
+ * during this session. Covers the case where someone browses a .srt
+ * sitting outside any indexed video root — the OS picker IS the
+ * authorization. Entries time out after TTL_MS to bound how long an
+ * old picked path remains servable.
+ */
+const TTL_MS = 60 * 60 * 1000; // 1 hour
+const trusted = new Map<string, number>();
+
+function pruneExpired(now: number): void {
+  for (const [k, expiresAt] of trusted) {
+    if (expiresAt <= now) trusted.delete(k);
+  }
+}
+
+function normalize(p: string): string {
+  // Path keys use the resolved + lowercased form on Windows so case
+  // differences don't bypass the guard. POSIX is case-sensitive so we
+  // keep original case there.
+  const resolved = path.resolve(p);
+  return process.platform === "win32" ? resolved.toLowerCase() : resolved;
+}
+
+export function trustSubtitlePath(abs: string): void {
+  pruneExpired(Date.now());
+  trusted.set(normalize(abs), Date.now() + TTL_MS);
+}
+
+export function isSessionTrustedSubtitlePath(abs: string): boolean {
+  const now = Date.now();
+  pruneExpired(now);
+  const key = normalize(abs);
+  const exp = trusted.get(key);
+  if (exp == null) return false;
+  if (exp <= now) {
+    trusted.delete(key);
+    return false;
+  }
+  return true;
+}
+
+function isInside(child: string, parent: string): boolean {
+  const c = process.platform === "win32" ? path.resolve(child).toLowerCase() : path.resolve(child);
+  const p = process.platform === "win32" ? path.resolve(parent).toLowerCase() : path.resolve(parent);
+  if (!p) return false;
+  if (c === p) return true;
+  const sep = path.sep;
+  return c.startsWith(p.endsWith(sep) ? p : p + sep);
+}
+
+/**
+ * True if `abs` resolves under one of:
+ *  - a configured video root,
+ *  - a configured subtitleExtraPaths entry,
+ *  - the implicit data/generated-subtitles/ root (WhisperJAV output),
+ *  - a session-trusted pick-file path (exact match, not prefix),
+ *  - a path persisted in the manual_subtitles table (user explicitly
+ *    Browse'd it during a previous session).
+ */
+export function isAllowedSubtitlePath(abs: string): boolean {
+  const resolved = path.resolve(abs);
+  for (const root of getConfiguredVideoRoots()) {
+    if (root && isInside(resolved, root)) return true;
+  }
+  const subRoots = getAppSetting("subtitleExtraPaths") ?? [];
+  for (const root of subRoots) {
+    if (root && isInside(resolved, root)) return true;
+  }
+  const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles");
+  if (isInside(resolved, generatedRoot)) return true;
+  if (isSessionTrustedSubtitlePath(resolved)) return true;
+  if (isManualSubtitlePath(resolved)) return true;
+  return false;
+}
@@ -0,0 +1,145 @@
+import "server-only";
+import path from "node:path";
+import fs from "node:fs";
+import fsp from "node:fs/promises";
+import crypto from "node:crypto";
+
+/**
+ * Bump on any change to srtToVtt, the ffmpeg arg recipe, or the
+ * cache-key composition. Old entries become unreachable automatically.
+ *  v1 → initial.
+ *  v2 → added decodeSubtitleBuffer for non-UTF-8 SRTs/VTTs (cp936,
+ *       shift-jis, big5, UTF-16). Existing UTF-8-only entries would
+ *       still be correct but the version bump ensures any cached
+ *       output produced with a buggy decode path is regenerated.
+ */
+export const CONVERTER_VERSION = 2;
+
+const CACHE_DIR = path.join(process.cwd(), "data", "subtitle-cache");
+fs.mkdirSync(CACHE_DIR, { recursive: true });
+
+export type CacheKind = "embedded" | "srt" | "ass" | "ssa";
+
+export interface CacheKeyInput {
+  abs: string;
+  size: number;
+  mtimeMs: number;
+  kind: CacheKind;
+  /** ffmpeg stream index for embedded; ext for sidecar files. */
+  streamOrExt: string | number;
+}
+
+export function cachePath(input: CacheKeyInput): string {
+  const raw = [
+    input.abs,
+    input.size,
+    Math.round(input.mtimeMs),
+    input.kind,
+    String(input.streamOrExt),
+    CONVERTER_VERSION,
+  ].join("|");
+  const hash = crypto.createHash("sha1").update(raw).digest("hex");
+  return path.join(CACHE_DIR, `${hash}.vtt`);
+}
+
+export async function readCache(file: string): Promise<Buffer | null> {
+  try {
+    const buf = await fsp.readFile(file);
+    // Bump mtime so LRU pruning treats this entry as recently used.
+    // Best effort: failure (read-only fs, locked file) is harmless.
+    const now = Date.now() / 1000;
+    fsp.utimes(file, now, now).catch(() => { /* ignore */ });
+    return buf;
+  } catch {
+    return null;
+  }
+}
+
+let writesSinceLastPrune = 0;
+const PRUNE_WRITE_INTERVAL = 25;
+
+export async function writeCache(file: string, data: Buffer | string): Promise<void> {
+  // Atomic via rename — avoids partial files if the process is killed
+  // mid-write or two requests race on the same key.
+  const tmp = `${file}.${process.pid}.${Date.now()}.tmp`;
+  try {
+    await fsp.writeFile(tmp, data);
+    await fsp.rename(tmp, file);
+  } catch (e) {
+    try { await fsp.unlink(tmp); } catch { /* ignore */ }
+    throw e;
+  }
+  writesSinceLastPrune++;
+  if (writesSinceLastPrune >= PRUNE_WRITE_INTERVAL) {
+    writesSinceLastPrune = 0;
+    void pruneSubtitleCacheIfNeeded();
+  }
+}
+
+interface PruneResult {
+  scanned: number;
+  removed: number;
+  beforeBytes: number;
+  afterBytes: number;
+}
+
+/** LRU sweep keyed on file mtime. Walks `data/subtitle-cache/`,
+ *  computes total size, and if it exceeds the configured limit,
+ *  deletes the oldest-mtime entries until size drops below 80% of
+ *  the cap. No-op when the limit setting is 0 (unlimited). */
+export async function pruneSubtitleCacheIfNeeded(): Promise<PruneResult> {
+  const { getAppSetting } = await import("@/lib/db/appSettings");
+  const limitMb = Number(getAppSetting("subtitleCacheLimitMb"));
+  const result: PruneResult = { scanned: 0, removed: 0, beforeBytes: 0, afterBytes: 0 };
+  if (!Number.isFinite(limitMb) || limitMb <= 0) return result;
+  const limitBytes = limitMb * 1024 * 1024;
+  const lowWatermark = Math.floor(limitBytes * 0.8);
+
+  let entries: import("node:fs").Dirent[];
+  try {
+    entries = await fsp.readdir(CACHE_DIR, { withFileTypes: true });
+  } catch {
+    return result;
+  }
+  type CacheEntry = { abs: string; size: number; mtimeMs: number };
+  const items: CacheEntry[] = [];
+  for (const e of entries) {
+    if (!e.isFile() || !e.name.endsWith(".vtt")) continue;
+    const abs = path.join(CACHE_DIR, e.name);
+    try {
+      const stat = await fsp.stat(abs);
+      items.push({ abs, size: stat.size, mtimeMs: stat.mtimeMs });
+      result.scanned++;
+      result.beforeBytes += stat.size;
+    } catch { /* file vanished mid-walk; skip */ }
+  }
+  if (result.beforeBytes <= limitBytes) {
+    result.afterBytes = result.beforeBytes;
+    return result;
+  }
+  // Oldest first.
+  items.sort((a, b) => a.mtimeMs - b.mtimeMs);
+  let running = result.beforeBytes;
+  for (const it of items) {
+    if (running <= lowWatermark) break;
+    try {
+      await fsp.unlink(it.abs);
+      running -= it.size;
+      result.removed++;
+    } catch { /* concurrent delete; skip */ }
+  }
+  result.afterBytes = running;
+  if (result.removed > 0) {
+    console.log(
+      `[subtitle-cache] pruned ${result.removed}/${result.scanned} files; ${(result.beforeBytes / 1_048_576).toFixed(1)}MB → ${(running / 1_048_576).toFixed(1)}MB`,
+    );
+  }
+  return result;
+}
+
+/** Bootstrap entrypoint — fire one sweep on module load (delayed so
+ *  other startup work isn't blocked). */
+function scheduleBootstrapPrune(): void {
+  setTimeout(() => { void pruneSubtitleCacheIfNeeded(); }, 5_000);
+}
+scheduleBootstrapPrune();
@@ -0,0 +1,195 @@
+import "server-only";
+import path from "node:path";
+import fs from "node:fs/promises";
+import iconv from "iconv-lite";
+
+export const SUBTITLE_EXTS = [".srt", ".vtt", ".ass", ".ssa"] as const;
+export type SubtitleExt = (typeof SUBTITLE_EXTS)[number];
+
+const SUBTITLE_EXT_SET = new Set<string>(SUBTITLE_EXTS);
+
+export type LangIso = "eng" | "zho" | "jpn";
+export type LangPref = "EN" | "CN" | "JP" | "off";
+
+export interface SubtitleFileEntry {
+  abs: string;
+  filename: string;
+}
+
+export async function walkSubtitles(root: string, maxDepth = 2): Promise<SubtitleFileEntry[]> {
+  const out: SubtitleFileEntry[] = [];
+  type Frame = { dir: string; depth: number };
+  const stack: Frame[] = [{ dir: root, depth: 0 }];
+  while (stack.length) {
+    const { dir, depth } = stack.pop()!;
+    let entries: import("node:fs").Dirent[];
+    try {
+      entries = await fs.readdir(dir, { withFileTypes: true });
+    } catch {
+      continue;
+    }
+    for (const e of entries) {
+      const full = path.join(dir, e.name);
+      if (e.isDirectory()) {
+        if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 });
+      } else if (e.isFile()) {
+        const ext = path.extname(e.name).toLowerCase();
+        if (SUBTITLE_EXT_SET.has(ext)) out.push({ abs: full, filename: e.name });
+      }
+    }
+  }
+  return out;
+}
+
+const PREF_TO_ISO: Record<Exclude<LangPref, "off">, LangIso> = {
+  EN: "eng",
+  CN: "zho",
+  JP: "jpn",
+};
+
+const ISO_TO_PREF: Record<LangIso, Exclude<LangPref, "off">> = {
+  eng: "EN",
+  zho: "CN",
+  jpn: "JP",
+};
+
+export function isoFromPref(pref: LangPref): LangIso | null {
+  return pref === "off" ? null : PREF_TO_ISO[pref];
+}
+
+export function prefFromIso(iso: LangIso | null): LangPref {
+  return iso == null ? "off" : ISO_TO_PREF[iso];
+}
+
+const ENGLISH_TOKENS = new Set(["en", "eng", "english"]);
+const CHINESE_TOKENS = new Set([
+  "zh", "zho", "chi", "chs", "cht", "chn", "cn", "chinese",
+  "schinese", "tchinese", "simplified", "traditional",
+  "zh-cn", "zh-tw", "zh-hans", "zh-hant",
+]);
+const JAPANESE_TOKENS = new Set(["ja", "jp", "jpn", "japanese", "jap"]);
+
+export function normalizeLanguageTag(tag: string | null | undefined): LangIso | null {
+  if (!tag) return null;
+  const lower = tag.trim().toLowerCase();
+  if (!lower) return null;
+  if (ENGLISH_TOKENS.has(lower)) return "eng";
+  if (CHINESE_TOKENS.has(lower)) return "zho";
+  if (JAPANESE_TOKENS.has(lower)) return "jpn";
+  return null;
+}
+
+export function languageDisplay(iso: LangIso | null): string {
+  if (iso === "eng") return "English";
+  if (iso === "zho") return "Chinese";
+  if (iso === "jpn") return "Japanese";
+  return "Unknown";
+}
+
+const TOKEN_SPLIT = /[\s._\-\[\]()+,;]+/g;
+
+export interface DetectedLanguage {
+  /** Single ISO code if exactly one language was detected. */
+  lang: LangIso | null;
+  /** Display label — "English", "Chinese", "English/Chinese", "Unknown". */
+  label: string;
+}
+
+/** Inspect a filename's stem for embedded language hints. Multiple hits
+ *  produce a compound label (e.g. "English/Chinese") but `lang` stays null
+ *  so sticky-pref matching only ever resolves to a single language. */
+export function detectLanguageFromName(filename: string): DetectedLanguage {
+  const ext = path.extname(filename).toLowerCase();
+  const stem = ext ? filename.slice(0, -ext.length) : filename;
+  const tokens = stem.toLowerCase().split(TOKEN_SPLIT).filter(Boolean);
+  const found = new Set<LangIso>();
+  for (const t of tokens) {
+    const iso = normalizeLanguageTag(t);
+    if (iso) found.add(iso);
+  }
+  if (found.size === 0) return { lang: null, label: "Unknown" };
+  if (found.size === 1) {
+    const iso = [...found][0]!;
+    return { lang: iso, label: languageDisplay(iso) };
+  }
+  const order: LangIso[] = ["eng", "zho", "jpn"];
+  const ordered = order.filter((i) => found.has(i));
+  return { lang: null, label: ordered.map(languageDisplay).join("/") };
+}
+
+const SRT_TIMESTAMP = /(\d{1,2}:\d{2}:\d{2}),(\d{3})/g;
+
+/** Pure JS SRT → WebVTT converter. Strips BOM, normalizes CRLF, swaps
+ *  the comma in HH:MM:SS,mmm timestamps for a dot, and prepends the
+ *  WEBVTT header. No styling translation. Cheap; runs on every sidecar
+ *  miss without spawning ffmpeg. */
+export function srtToVtt(srt: string): string {
+  let body = srt;
+  if (body.charCodeAt(0) === 0xfeff) body = body.slice(1);
+  body = body.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
+  body = body.replace(SRT_TIMESTAMP, "$1.$2");
+  return `WEBVTT\n\n${body.trimStart()}`;
+}
+
+export function stemOf(filename: string): string {
+  const ext = path.extname(filename);
+  return ext ? filename.slice(0, -ext.length) : filename;
+}
+
+const REPLACEMENT_CHAR = "�";
+
+/**
+ * Decode a subtitle file buffer to a JS string with best-effort
+ * encoding detection. Many older Asian SRTs ship as cp936/GBK or
+ * Shift-JIS — feeding them through `Buffer.toString("utf8")` produces
+ * mojibake. Strategy:
+ *   1. Strip BOM if present (UTF-8 / UTF-16 LE / UTF-16 BE).
+ *   2. Try UTF-8 strict. If it decodes without invalid sequences, use it.
+ *   3. Otherwise decode as UTF-8 / shift_jis / gb18030 / big5 and
+ *      pick whichever has the fewest replacement chars per kbyte.
+ *   4. Tie-break preference: shift_jis when katakana/hiragana ranges
+ *      appear in the JS surrogates, gb18030 otherwise — common
+ *      heuristic for JP vs CN fansub source material.
+ */
+export function decodeSubtitleBuffer(buf: Buffer): string {
+  // BOM detection — if present, the encoding is unambiguous.
+  if (buf.length >= 3 && buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
+    return buf.subarray(3).toString("utf8");
+  }
+  if (buf.length >= 2 && buf[0] === 0xff && buf[1] === 0xfe) {
+    return iconv.decode(buf.subarray(2), "utf-16le");
+  }
+  if (buf.length >= 2 && buf[0] === 0xfe && buf[1] === 0xff) {
+    return iconv.decode(buf.subarray(2), "utf-16be");
+  }
+
+  // UTF-8 strict — fast path for the common case.
+  try {
+    const decoder = new TextDecoder("utf-8", { fatal: true });
+    return decoder.decode(buf);
+  } catch { /* fall through to heuristic */ }
+
+  // Compare candidate encodings by replacement-char count.
+  const candidates: Array<"utf8" | "shift_jis" | "gb18030" | "big5"> = [
+    "utf8", "shift_jis", "gb18030", "big5",
+  ];
+  let best: { encoding: typeof candidates[number]; text: string; score: number } | null = null;
+  for (const encoding of candidates) {
+    const text = iconv.decode(buf, encoding);
+    let bad = 0;
+    for (let i = 0; i < text.length; i++) {
+      if (text[i] === REPLACEMENT_CHAR) bad++;
+    }
+    // Tie-break preference: shift_jis when text contains kana, since
+    // gb18030 happens to map many JP code points without errors but
+    // produces gibberish that we wouldn't catch by rep-count alone.
+    const hasKana = /[぀-ヿ]/.test(text);
+    const adjusted = hasKana && encoding === "shift_jis"
+      ? bad - 1
+      : encoding === "utf8" ? bad - 1 : bad;
+    if (best == null || adjusted < best.score) {
+      best = { encoding, text, score: adjusted };
+    }
+  }
+  return best?.text ?? buf.toString("utf8");
+}