import "server-only"; import path from "node:path"; import fs from "node:fs/promises"; import { extractCode, normalizeCode } from "@/lib/jav/codeParser"; import { getAppSetting } from "@/lib/db/appSettings"; import { rawDb } from "@/lib/db/client"; import { syncVideoMetadataIndex } from "./metadata"; export const VIDEO_EXTENSIONS = new Set([ ".mp4", ".mkv", ".m4v", ".mov", ".webm", ".avi", ".wmv", ".ts", ".mpg", ".mpeg", ".flv", ]); const SUBTITLE_EXTENSIONS = new Set([".srt", ".vtt", ".ass", ".ssa"]); /** One video file the index found on disk. */ export interface VideoFile { /** Absolute path on disk. */ abs: string; /** Path relative to the configured video library root. */ rel: string; /** Filename (with extension). */ filename: string; /** Normalized JAV code parsed from the filename. */ code: string; /** File size in bytes. */ size: number; /** Last-modified timestamp (ms). */ mtime: number; } /** * Lightweight scan-state record. Authoritative file data lives in the * `video_metadata` SQLite table — accessors below query it directly, * so this struct holds only what describes the most recent rescan. */ interface VideoIndex { /** When the index was last built. */ lastScannedAt: number; /** All folder roots that were scanned, in order: main first, extras after. * Used both to display in the UI and to detect setting changes. */ rootsScanned: string[]; /** Total files matched by the most recent scan. */ count: number; } const EMPTY_INDEX: VideoIndex = { lastScannedAt: 0, rootsScanned: [], count: 0, }; let cachedScanState: VideoIndex = EMPTY_INDEX; let scanInFlight: Promise | null = null; interface CachedFileRow { abs_path: string; rel_path: string; code: string; size_bytes: number; mtime_ms: number; } interface WalkOpts { /** When true, ignore the dir-mtime cache and re-readdir every dir. * Use after structural file edits that don't change dir mtime * (e.g. content rewrite without rename). */ force?: boolean; } /** * Walk the configured roots and produce a flat VideoFile[]. The caller * writes the result to the `video_metadata` table — nothing is held in * memory beyond the duration of one rescan. * * Incremental: each directory's mtime is compared to a stored value * in `video_dir_mtimes`. If unchanged, the immediate-children file * rows for that dir are reused from `video_metadata` instead of * readdir + stat per file. Subdirs are still walked (their mtimes * may have changed independently). */ async function walkAllRoots( roots: string[], opts: WalkOpts = {}, ): Promise<{ files: VideoFile[]; count: number; visitedDirs: Set; reused: number; rescanned: number }> { const cachedMtimes = opts.force ? new Map() : loadDirMtimeCache(); const visitedDirs = new Set(); const files: VideoFile[] = []; const cachedFilesByDir = opts.force ? new Map() : loadCachedFileIndex(); let reused = 0; let rescanned = 0; for (const root of roots) { type Frame = { dir: string }; const stack: Frame[] = [{ dir: root }]; while (stack.length) { const { dir } = stack.pop()!; visitedDirs.add(dir); let dirStat: import("node:fs").Stats; try { dirStat = await fs.stat(dir); } catch { continue; // dir vanished mid-walk } const cachedMtime = cachedMtimes.get(dir); const dirUnchanged = cachedMtime != null && cachedMtime === dirStat.mtimeMs; // Always recurse — subdir mtimes are tracked independently. // For *children* enumeration we use cached rows when unchanged. // We still need the subdir list either way; if we're skipping // the readdir for cache reuse, we need an alternate way to find // subdirs. Cheapest: readdir the directory entries once for // dirs (tiny per-dir cost) and use the dirent type directly. let entries: import("node:fs").Dirent[]; try { entries = await fs.readdir(dir, { withFileTypes: true }); } catch { continue; } // Push subdirs onto the stack regardless of cache state. for (const e of entries) { if (e.isDirectory()) { stack.push({ dir: path.join(dir, e.name) }); } } if (dirUnchanged) { // Reuse cached rows for files immediately in this directory. const cached = cachedFilesByDir.get(dir); if (cached) { for (const row of cached) { files.push({ abs: row.abs_path, rel: path.relative(root, row.abs_path), filename: path.basename(row.abs_path), code: row.code, size: row.size_bytes, mtime: row.mtime_ms, }); } reused += cached.length; } continue; } // Dir changed (or no cache entry yet). Readdir + stat each file. rescanned++; for (const e of entries) { if (!e.isFile()) continue; const ext = path.extname(e.name).toLowerCase(); if (!VIDEO_EXTENSIONS.has(ext)) continue; const abs = path.join(dir, e.name); const stem = e.name.slice(0, e.name.length - ext.length); const code = extractCode(stem); if (!code) continue; const norm = normalizeCode(code); if (!norm) continue; let st: import("node:fs").Stats; try { st = await fs.stat(abs); } catch { continue; } files.push({ abs, rel: path.relative(root, abs), filename: e.name, code: norm, size: st.size, mtime: st.mtimeMs, }); } // Update cached mtime so the NEXT scan sees this dir as fresh. cachedMtimes.set(dir, dirStat.mtimeMs); } } // Persist updated mtime cache for next scan. saveDirMtimeCache(cachedMtimes, visitedDirs); // Stable order across rescans. files.sort((a, b) => a.code.localeCompare(b.code) || a.filename.localeCompare(b.filename)); return { files, count: files.length, visitedDirs, reused, rescanned }; } /** Load all `video_dir_mtimes` rows into a Map keyed by abs_dir. */ function loadDirMtimeCache(): Map { const rows = rawDb.prepare(`SELECT abs_dir, mtime_ms FROM video_dir_mtimes`).all() as Array<{ abs_dir: string; mtime_ms: number }>; const out = new Map(); for (const r of rows) out.set(r.abs_dir, r.mtime_ms); return out; } /** Group the entire video_metadata table by dir_path so dir-cache * reuse is a single in-memory lookup per dir. One linear scan of the * table — cheap even at 80k rows. */ function loadCachedFileIndex(): Map { const rows = rawDb.prepare(` SELECT abs_path, rel_path, code, size_bytes, mtime_ms, dir_path FROM video_metadata `).all() as Array; const out = new Map(); for (const r of rows) { const arr = out.get(r.dir_path); if (arr) arr.push(r); else out.set(r.dir_path, [r]); } return out; } /** Upsert dir mtimes for visited dirs and prune rows for dirs we * didn't see this scan (deleted folders). */ function saveDirMtimeCache(mtimes: Map, visited: Set): void { const upsert = rawDb.prepare(` INSERT INTO video_dir_mtimes (abs_dir, mtime_ms, last_seen_at) VALUES (?, ?, ?) ON CONFLICT(abs_dir) DO UPDATE SET mtime_ms = excluded.mtime_ms, last_seen_at = excluded.last_seen_at `); const now = Date.now(); const tx = rawDb.transaction(() => { for (const [dir, mtime] of mtimes) { // Only persist dirs we actually visited this scan — others may // have been moved/renamed and their cache entry is stale. if (!visited.has(dir)) continue; upsert.run(dir, mtime, now); } // Prune rows whose dir we didn't see this scan. Drops cleanup of // deleted dirs in O(rows) — fine at any reasonable scale. const allRows = rawDb.prepare(`SELECT abs_dir FROM video_dir_mtimes`).all() as Array<{ abs_dir: string }>; const del = rawDb.prepare(`DELETE FROM video_dir_mtimes WHERE abs_dir = ?`); for (const r of allRows) { if (!visited.has(r.abs_dir)) del.run(r.abs_dir); } }); try { tx(); } catch (e) { console.error("[video] failed to save dir mtime cache:", e); } } /** * Walk every place a sidecar subtitle could live and return the set of * canonical codes that have at least one. Cheap signal — no ffprobe. * * - Each video's own directory, filtered to filenames that start with * the video stem (so a stray `OTHER-001.srt` next to `YUJ-001.mp4` * doesn't taint YUJ-001). * - Each entry in `subtitleExtraPaths` (recursive walk, depth 3) — * extracts the code from the filename directly. * - data/generated-subtitles/

/ — directory name IS the code.
 *
 * Result is consumed once by syncHasSubtitleColumn and discarded — no
 * persistent in-memory copy.
 */
async function collectSubtitleCodes(files: VideoFile[]): Promise> {
  const codes = new Set();

  // Same-folder scan: per video, look at sibling files. Cache directory
  // listings so a folder with N videos is only listed once.
  const dirCache = new Map();
  for (const file of files) {
    const dir = path.dirname(file.abs);
    let entries = dirCache.get(dir);
    if (!entries) {
      try {
        entries = await fs.readdir(dir, { withFileTypes: true });
      } catch {
        entries = [];
      }
      dirCache.set(dir, entries);
    }
    const stem = file.filename.slice(0, file.filename.length - path.extname(file.filename).length);
    const stemLower = stem.toLowerCase();
    const codeLower = file.code.toLowerCase();
    for (const e of entries) {
      if (!e.isFile()) continue;
      const ext = path.extname(e.name).toLowerCase();
      if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
      const lower = e.name.toLowerCase();
      // Code-substring match must treat the code as a delimited token
      // (start, end, or wrapped in non-alphanumeric) — bare `.includes`
      // would attribute `notes-yuj-001-bad.srt` to YUJ-001.
      const codeAsToken = (() => {
        const idx = lower.indexOf(codeLower);
        if (idx < 0) return false;
        const before = idx === 0 ? "" : lower[idx - 1]!;
        const afterIdx = idx + codeLower.length;
        const after = afterIdx >= lower.length ? "" : lower[afterIdx]!;
        const isBoundary = (c: string) => c === "" || !/[a-z0-9]/.test(c);
        return isBoundary(before) && isBoundary(after);
      })();
      if (lower.startsWith(stemLower + ".") || lower === stemLower + ext || codeAsToken) {
        codes.add(file.code);
        break;
      }
    }
  }

  // Persistent subtitle library roots — extract codes from filenames.
  const extraRoots = (getAppSetting("subtitleExtraPaths") ?? []).filter(Boolean);
  for (const root of extraRoots) {
    await walkSubtitleRoot(root, codes, 3);
  }

  // data/generated-subtitles// — directory name is the code.
  const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles");
  try {
    const subdirs = await fs.readdir(generatedRoot, { withFileTypes: true });
    for (const d of subdirs) {
      if (!d.isDirectory()) continue;
      const dirAbs = path.join(generatedRoot, d.name);
      let entries: import("node:fs").Dirent[];
      try {
        entries = await fs.readdir(dirAbs, { withFileTypes: true });
      } catch {
        continue;
      }
      const hasSub = entries.some(
        (e) => e.isFile() && SUBTITLE_EXTENSIONS.has(path.extname(e.name).toLowerCase()),
      );
      if (hasSub) {
        const norm = normalizeCode(d.name);
        if (norm) codes.add(norm);
      }
    }
  } catch { /* generated-subtitles not present yet — fine */ }

  return codes;
}

async function walkSubtitleRoot(root: string, out: Set, maxDepth: number): Promise {
  type Frame = { dir: string; depth: number };
  const stack: Frame[] = [{ dir: root, depth: 0 }];
  while (stack.length) {
    const { dir, depth } = stack.pop()!;
    let entries: import("node:fs").Dirent[];
    try {
      entries = await fs.readdir(dir, { withFileTypes: true });
    } catch {
      continue;
    }
    for (const e of entries) {
      const full = path.join(dir, e.name);
      if (e.isDirectory()) {
        if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 });
      } else if (e.isFile()) {
        const ext = path.extname(e.name).toLowerCase();
        if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
        const stem = e.name.slice(0, e.name.length - ext.length);
        const code = extractCode(stem);
        if (!code) continue;
        const norm = normalizeCode(code);
        if (norm) out.add(norm);
      }
    }
  }
}

/** Exposed for path-allowlist checks (e.g. subtitle file resolution). */
export function getConfiguredVideoRoots(): string[] {
  return configuredRoots();
}

function configuredRoots(): string[] {
  const main = (getAppSetting("videoLibraryPath") || "").trim();
  const extras = getAppSetting("videoExtraPaths") ?? [];
  const out: string[] = [];
  if (main) out.push(main);
  for (const e of extras) {
    const t = (e ?? "").trim();
    if (t) out.push(t);
  }
  return out;
}

function rootsEqual(a: string[], b: string[]): boolean {
  if (a.length !== b.length) return false;
  for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
  return true;
}

/**
 * Scan-state probe — used by API routes to decide whether the cached
 * data still matches current settings. Returns the empty state if the
 * configured roots have changed (caller can trigger a rescan).
 */
export function getVideoIndex(): VideoIndex {
  const roots = configuredRoots();
  if (roots.length === 0) return EMPTY_INDEX;
  if (!rootsEqual(cachedScanState.rootsScanned, roots)) return EMPTY_INDEX;
  return cachedScanState;
}

/** Rebuild the index from disk. Coalesces concurrent calls. Authoritative
 *  data lands in the `video_metadata` table; this function returns only
 *  scan-state metadata.
 *
 *  Default mode is incremental — directories whose mtime hasn't
 *  changed since the last scan reuse cached file rows without
 *  readdir-per-file. Pass `{force:true}` to bypass the dir-mtime
 *  cache (e.g. after content edits that don't bump dir mtime). */
export async function rescanVideoIndex(opts: { force?: boolean } = {}): Promise {
  const roots = configuredRoots();
  if (scanInFlight) return scanInFlight;
  scanInFlight = (async () => {
    try {
      const cleanRoots = roots.map((r) => (r ?? "").trim()).filter(Boolean);
      if (cleanRoots.length === 0) {
        cachedScanState = { ...EMPTY_INDEX };
        return cachedScanState;
      }
      const t0 = Date.now();
      const { files, count, reused, rescanned } = await walkAllRoots(cleanRoots, { force: opts.force });
      const walkMs = Date.now() - t0;
      console.log(
        `[video] rescan walk in ${walkMs}ms — ${count} files (${reused} reused, ${rescanned} dir(s) rewalked${opts.force ? ", forced" : ""})`,
      );
      // Persist the file table first — has_video / has_subtitle bulk
      // updates and metadata sync all run off it.
      await syncVideoMetadataIndex(files);
      syncHasVideoColumn(files);
      const subtitleCodes = await collectSubtitleCodes(files);
      syncHasSubtitleColumn(subtitleCodes);

      cachedScanState = {
        lastScannedAt: Date.now(),
        rootsScanned: cleanRoots,
        count,
      };
      return cachedScanState;
    } finally {
      scanInFlight = null;
    }
  })();
  return scanInFlight;
}

/**
 * Mirror the freshly-walked code list into images.has_video so SQL
 * filters / counts can use the column directly.
 */
function syncHasVideoColumn(files: VideoFile[]): void {
  const codes = Array.from(new Set(files.map((f) => f.code)));
  const tx = rawDb.transaction(() => {
    rawDb.prepare(`UPDATE images SET has_video = 0 WHERE has_video = 1`).run();
    if (codes.length === 0) return;
    // Chunk to stay well below SQLite's bind-parameter cap.
    const CHUNK = 500;
    for (let i = 0; i < codes.length; i += CHUNK) {
      const slice = codes.slice(i, i + CHUNK);
      const placeholders = slice.map(() => "?").join(",");
      rawDb.prepare(
        `UPDATE images SET has_video = 1 WHERE upper(code) IN (${placeholders})`,
      ).run(...slice);
    }
  });
  try {
    tx();
  } catch (e) {
    console.error("[video] failed to sync has_video column:", e);
  }
}

/** Mirror the freshly-walked subtitle code set into images.has_subtitle. */
function syncHasSubtitleColumn(subtitleCodes: Set): void {
  const codes = Array.from(subtitleCodes);
  const tx = rawDb.transaction(() => {
    rawDb.prepare(`UPDATE images SET has_subtitle = 0 WHERE has_subtitle = 1`).run();
    if (codes.length === 0) return;
    const CHUNK = 500;
    for (let i = 0; i < codes.length; i += CHUNK) {
      const slice = codes.slice(i, i + CHUNK);
      const placeholders = slice.map(() => "?").join(",");
      rawDb.prepare(
        `UPDATE images SET has_subtitle = 1 WHERE upper(code) IN (${placeholders})`,
      ).run(...slice);
    }
  });
  try {
    tx();
  } catch (e) {
    console.error("[video] failed to sync has_subtitle column:", e);
  }
}

interface VideoMetaRow {
  abs_path: string;
  rel_path: string;
  code: string;
  size_bytes: number;
  mtime_ms: number;
}

/** Look up files for a single normalized code. Reads directly from the
 *  video_metadata table so the result is always current with the most
 *  recent rescan. */
export function findVideosForCode(code: string | null | undefined): VideoFile[] {
  if (!code) return [];
  const norm = normalizeCode(code) ?? code.toUpperCase();
  const rows = rawDb.prepare(`
    SELECT abs_path, rel_path, code, size_bytes, mtime_ms
    FROM video_metadata
    WHERE upper(code) = ?
    ORDER BY rel_path COLLATE NOCASE
  `).all(norm) as VideoMetaRow[];
  return rows.map((r) => ({
    abs: r.abs_path,
    rel: r.rel_path,
    filename: path.basename(r.abs_path),
    code: r.code,
    size: r.size_bytes,
    mtime: r.mtime_ms,
  }));
}

/** Set of every code present in video_metadata — fast existence check. */
export function getCodesWithVideos(): Set {
  const rows = rawDb.prepare(`
    SELECT DISTINCT upper(code) AS code FROM video_metadata
  `).all() as Array<{ code: string }>;
  return new Set(rows.map((r) => r.code));
}

/** Set of every code with a discoverable subtitle sidecar. Reads from
 *  the images.has_subtitle column populated at rescan time. */
export function getCodesWithSubtitles(): Set {
  const rows = rawDb.prepare(`
    SELECT DISTINCT upper(code) AS code FROM images WHERE has_subtitle = 1 AND code IS NOT NULL
  `).all() as Array<{ code: string }>;
  return new Set(rows.map((r) => r.code));
}