540 lines
19 KiB
TypeScript
540 lines
19 KiB
TypeScript
import "server-only";
|
|
import path from "node:path";
|
|
import fs from "node:fs/promises";
|
|
import { extractCode, normalizeCode } from "@/lib/jav/codeParser";
|
|
import { getAppSetting } from "@/lib/db/appSettings";
|
|
import { rawDb } from "@/lib/db/client";
|
|
import { syncVideoMetadataIndex } from "./metadata";
|
|
|
|
export const VIDEO_EXTENSIONS = new Set([
|
|
".mp4", ".mkv", ".m4v", ".mov", ".webm", ".avi", ".wmv", ".ts", ".mpg", ".mpeg", ".flv",
|
|
]);
|
|
|
|
const SUBTITLE_EXTENSIONS = new Set([".srt", ".vtt", ".ass", ".ssa"]);
|
|
|
|
/** One video file the index found on disk. */
|
|
export interface VideoFile {
|
|
/** Absolute path on disk. */
|
|
abs: string;
|
|
/** Path relative to the configured video library root. */
|
|
rel: string;
|
|
/** Filename (with extension). */
|
|
filename: string;
|
|
/** Normalized JAV code parsed from the filename. */
|
|
code: string;
|
|
/** File size in bytes. */
|
|
size: number;
|
|
/** Last-modified timestamp (ms). */
|
|
mtime: number;
|
|
}
|
|
|
|
/**
|
|
* Lightweight scan-state record. Authoritative file data lives in the
|
|
* `video_metadata` SQLite table — accessors below query it directly,
|
|
* so this struct holds only what describes the most recent rescan.
|
|
*/
|
|
interface VideoIndex {
|
|
/** When the index was last built. */
|
|
lastScannedAt: number;
|
|
/** All folder roots that were scanned, in order: main first, extras after.
|
|
* Used both to display in the UI and to detect setting changes. */
|
|
rootsScanned: string[];
|
|
/** Total files matched by the most recent scan. */
|
|
count: number;
|
|
}
|
|
|
|
const EMPTY_INDEX: VideoIndex = {
|
|
lastScannedAt: 0,
|
|
rootsScanned: [],
|
|
count: 0,
|
|
};
|
|
|
|
let cachedScanState: VideoIndex = EMPTY_INDEX;
|
|
let scanInFlight: Promise<VideoIndex> | null = null;
|
|
|
|
interface CachedFileRow {
|
|
abs_path: string;
|
|
rel_path: string;
|
|
code: string;
|
|
size_bytes: number;
|
|
mtime_ms: number;
|
|
}
|
|
|
|
interface WalkOpts {
|
|
/** When true, ignore the dir-mtime cache and re-readdir every dir.
|
|
* Use after structural file edits that don't change dir mtime
|
|
* (e.g. content rewrite without rename). */
|
|
force?: boolean;
|
|
}
|
|
|
|
/**
|
|
* Walk the configured roots and produce a flat VideoFile[]. The caller
|
|
* writes the result to the `video_metadata` table — nothing is held in
|
|
* memory beyond the duration of one rescan.
|
|
*
|
|
* Incremental: each directory's mtime is compared to a stored value
|
|
* in `video_dir_mtimes`. If unchanged, the immediate-children file
|
|
* rows for that dir are reused from `video_metadata` instead of
|
|
* readdir + stat per file. Subdirs are still walked (their mtimes
|
|
* may have changed independently).
|
|
*/
|
|
async function walkAllRoots(
|
|
roots: string[],
|
|
opts: WalkOpts = {},
|
|
): Promise<{ files: VideoFile[]; count: number; visitedDirs: Set<string>; reused: number; rescanned: number }> {
|
|
const cachedMtimes = opts.force
|
|
? new Map<string, number>()
|
|
: loadDirMtimeCache();
|
|
const visitedDirs = new Set<string>();
|
|
const files: VideoFile[] = [];
|
|
const cachedFilesByDir = opts.force
|
|
? new Map<string, CachedFileRow[]>()
|
|
: loadCachedFileIndex();
|
|
|
|
let reused = 0;
|
|
let rescanned = 0;
|
|
|
|
for (const root of roots) {
|
|
type Frame = { dir: string };
|
|
const stack: Frame[] = [{ dir: root }];
|
|
while (stack.length) {
|
|
const { dir } = stack.pop()!;
|
|
visitedDirs.add(dir);
|
|
let dirStat: import("node:fs").Stats;
|
|
try {
|
|
dirStat = await fs.stat(dir);
|
|
} catch {
|
|
continue; // dir vanished mid-walk
|
|
}
|
|
const cachedMtime = cachedMtimes.get(dir);
|
|
const dirUnchanged = cachedMtime != null && cachedMtime === dirStat.mtimeMs;
|
|
|
|
// Always recurse — subdir mtimes are tracked independently.
|
|
// For *children* enumeration we use cached rows when unchanged.
|
|
// We still need the subdir list either way; if we're skipping
|
|
// the readdir for cache reuse, we need an alternate way to find
|
|
// subdirs. Cheapest: readdir the directory entries once for
|
|
// dirs (tiny per-dir cost) and use the dirent type directly.
|
|
let entries: import("node:fs").Dirent[];
|
|
try {
|
|
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
} catch {
|
|
continue;
|
|
}
|
|
|
|
// Push subdirs onto the stack regardless of cache state.
|
|
for (const e of entries) {
|
|
if (e.isDirectory()) {
|
|
stack.push({ dir: path.join(dir, e.name) });
|
|
}
|
|
}
|
|
|
|
if (dirUnchanged) {
|
|
// Reuse cached rows for files immediately in this directory.
|
|
const cached = cachedFilesByDir.get(dir);
|
|
if (cached) {
|
|
for (const row of cached) {
|
|
files.push({
|
|
abs: row.abs_path,
|
|
rel: path.relative(root, row.abs_path),
|
|
filename: path.basename(row.abs_path),
|
|
code: row.code,
|
|
size: row.size_bytes,
|
|
mtime: row.mtime_ms,
|
|
});
|
|
}
|
|
reused += cached.length;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Dir changed (or no cache entry yet). Readdir + stat each file.
|
|
rescanned++;
|
|
for (const e of entries) {
|
|
if (!e.isFile()) continue;
|
|
const ext = path.extname(e.name).toLowerCase();
|
|
if (!VIDEO_EXTENSIONS.has(ext)) continue;
|
|
const abs = path.join(dir, e.name);
|
|
const stem = e.name.slice(0, e.name.length - ext.length);
|
|
const code = extractCode(stem);
|
|
if (!code) continue;
|
|
const norm = normalizeCode(code);
|
|
if (!norm) continue;
|
|
let st: import("node:fs").Stats;
|
|
try {
|
|
st = await fs.stat(abs);
|
|
} catch {
|
|
continue;
|
|
}
|
|
files.push({
|
|
abs,
|
|
rel: path.relative(root, abs),
|
|
filename: e.name,
|
|
code: norm,
|
|
size: st.size,
|
|
mtime: st.mtimeMs,
|
|
});
|
|
}
|
|
// Update cached mtime so the NEXT scan sees this dir as fresh.
|
|
cachedMtimes.set(dir, dirStat.mtimeMs);
|
|
}
|
|
}
|
|
|
|
// Persist updated mtime cache for next scan.
|
|
saveDirMtimeCache(cachedMtimes, visitedDirs);
|
|
|
|
// Stable order across rescans.
|
|
files.sort((a, b) => a.code.localeCompare(b.code) || a.filename.localeCompare(b.filename));
|
|
return { files, count: files.length, visitedDirs, reused, rescanned };
|
|
}
|
|
|
|
/** Load all `video_dir_mtimes` rows into a Map keyed by abs_dir. */
|
|
function loadDirMtimeCache(): Map<string, number> {
|
|
const rows = rawDb.prepare(`SELECT abs_dir, mtime_ms FROM video_dir_mtimes`).all() as Array<{ abs_dir: string; mtime_ms: number }>;
|
|
const out = new Map<string, number>();
|
|
for (const r of rows) out.set(r.abs_dir, r.mtime_ms);
|
|
return out;
|
|
}
|
|
|
|
/** Group the entire video_metadata table by dir_path so dir-cache
|
|
* reuse is a single in-memory lookup per dir. One linear scan of the
|
|
* table — cheap even at 80k rows. */
|
|
function loadCachedFileIndex(): Map<string, CachedFileRow[]> {
|
|
const rows = rawDb.prepare(`
|
|
SELECT abs_path, rel_path, code, size_bytes, mtime_ms, dir_path
|
|
FROM video_metadata
|
|
`).all() as Array<CachedFileRow & { dir_path: string }>;
|
|
const out = new Map<string, CachedFileRow[]>();
|
|
for (const r of rows) {
|
|
const arr = out.get(r.dir_path);
|
|
if (arr) arr.push(r);
|
|
else out.set(r.dir_path, [r]);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
/** Upsert dir mtimes for visited dirs and prune rows for dirs we
|
|
* didn't see this scan (deleted folders). */
|
|
function saveDirMtimeCache(mtimes: Map<string, number>, visited: Set<string>): void {
|
|
const upsert = rawDb.prepare(`
|
|
INSERT INTO video_dir_mtimes (abs_dir, mtime_ms, last_seen_at)
|
|
VALUES (?, ?, ?)
|
|
ON CONFLICT(abs_dir) DO UPDATE SET
|
|
mtime_ms = excluded.mtime_ms,
|
|
last_seen_at = excluded.last_seen_at
|
|
`);
|
|
const now = Date.now();
|
|
const tx = rawDb.transaction(() => {
|
|
for (const [dir, mtime] of mtimes) {
|
|
// Only persist dirs we actually visited this scan — others may
|
|
// have been moved/renamed and their cache entry is stale.
|
|
if (!visited.has(dir)) continue;
|
|
upsert.run(dir, mtime, now);
|
|
}
|
|
// Prune rows whose dir we didn't see this scan. Drops cleanup of
|
|
// deleted dirs in O(rows) — fine at any reasonable scale.
|
|
const allRows = rawDb.prepare(`SELECT abs_dir FROM video_dir_mtimes`).all() as Array<{ abs_dir: string }>;
|
|
const del = rawDb.prepare(`DELETE FROM video_dir_mtimes WHERE abs_dir = ?`);
|
|
for (const r of allRows) {
|
|
if (!visited.has(r.abs_dir)) del.run(r.abs_dir);
|
|
}
|
|
});
|
|
try {
|
|
tx();
|
|
} catch (e) {
|
|
console.error("[video] failed to save dir mtime cache:", e);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Walk every place a sidecar subtitle could live and return the set of
|
|
* canonical codes that have at least one. Cheap signal — no ffprobe.
|
|
*
|
|
* - Each video's own directory, filtered to filenames that start with
|
|
* the video stem (so a stray `OTHER-001.srt` next to `YUJ-001.mp4`
|
|
* doesn't taint YUJ-001).
|
|
* - Each entry in `subtitleExtraPaths` (recursive walk, depth 3) —
|
|
* extracts the code from the filename directly.
|
|
* - data/generated-subtitles/<code>/ — directory name IS the code.
|
|
*
|
|
* Result is consumed once by syncHasSubtitleColumn and discarded — no
|
|
* persistent in-memory copy.
|
|
*/
|
|
async function collectSubtitleCodes(files: VideoFile[]): Promise<Set<string>> {
|
|
const codes = new Set<string>();
|
|
|
|
// Same-folder scan: per video, look at sibling files. Cache directory
|
|
// listings so a folder with N videos is only listed once.
|
|
const dirCache = new Map<string, import("node:fs").Dirent[]>();
|
|
for (const file of files) {
|
|
const dir = path.dirname(file.abs);
|
|
let entries = dirCache.get(dir);
|
|
if (!entries) {
|
|
try {
|
|
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
} catch {
|
|
entries = [];
|
|
}
|
|
dirCache.set(dir, entries);
|
|
}
|
|
const stem = file.filename.slice(0, file.filename.length - path.extname(file.filename).length);
|
|
const stemLower = stem.toLowerCase();
|
|
const codeLower = file.code.toLowerCase();
|
|
for (const e of entries) {
|
|
if (!e.isFile()) continue;
|
|
const ext = path.extname(e.name).toLowerCase();
|
|
if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
|
|
const lower = e.name.toLowerCase();
|
|
// Code-substring match must treat the code as a delimited token
|
|
// (start, end, or wrapped in non-alphanumeric) — bare `.includes`
|
|
// would attribute `notes-yuj-001-bad.srt` to YUJ-001.
|
|
const codeAsToken = (() => {
|
|
const idx = lower.indexOf(codeLower);
|
|
if (idx < 0) return false;
|
|
const before = idx === 0 ? "" : lower[idx - 1]!;
|
|
const afterIdx = idx + codeLower.length;
|
|
const after = afterIdx >= lower.length ? "" : lower[afterIdx]!;
|
|
const isBoundary = (c: string) => c === "" || !/[a-z0-9]/.test(c);
|
|
return isBoundary(before) && isBoundary(after);
|
|
})();
|
|
if (lower.startsWith(stemLower + ".") || lower === stemLower + ext || codeAsToken) {
|
|
codes.add(file.code);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Persistent subtitle library roots — extract codes from filenames.
|
|
const extraRoots = (getAppSetting("subtitleExtraPaths") ?? []).filter(Boolean);
|
|
for (const root of extraRoots) {
|
|
await walkSubtitleRoot(root, codes, 3);
|
|
}
|
|
|
|
// data/generated-subtitles/<code>/ — directory name is the code.
|
|
const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles");
|
|
try {
|
|
const subdirs = await fs.readdir(generatedRoot, { withFileTypes: true });
|
|
for (const d of subdirs) {
|
|
if (!d.isDirectory()) continue;
|
|
const dirAbs = path.join(generatedRoot, d.name);
|
|
let entries: import("node:fs").Dirent[];
|
|
try {
|
|
entries = await fs.readdir(dirAbs, { withFileTypes: true });
|
|
} catch {
|
|
continue;
|
|
}
|
|
const hasSub = entries.some(
|
|
(e) => e.isFile() && SUBTITLE_EXTENSIONS.has(path.extname(e.name).toLowerCase()),
|
|
);
|
|
if (hasSub) {
|
|
const norm = normalizeCode(d.name);
|
|
if (norm) codes.add(norm);
|
|
}
|
|
}
|
|
} catch { /* generated-subtitles not present yet — fine */ }
|
|
|
|
return codes;
|
|
}
|
|
|
|
async function walkSubtitleRoot(root: string, out: Set<string>, maxDepth: number): Promise<void> {
|
|
type Frame = { dir: string; depth: number };
|
|
const stack: Frame[] = [{ dir: root, depth: 0 }];
|
|
while (stack.length) {
|
|
const { dir, depth } = stack.pop()!;
|
|
let entries: import("node:fs").Dirent[];
|
|
try {
|
|
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
} catch {
|
|
continue;
|
|
}
|
|
for (const e of entries) {
|
|
const full = path.join(dir, e.name);
|
|
if (e.isDirectory()) {
|
|
if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 });
|
|
} else if (e.isFile()) {
|
|
const ext = path.extname(e.name).toLowerCase();
|
|
if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
|
|
const stem = e.name.slice(0, e.name.length - ext.length);
|
|
const code = extractCode(stem);
|
|
if (!code) continue;
|
|
const norm = normalizeCode(code);
|
|
if (norm) out.add(norm);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Exposed for path-allowlist checks (e.g. subtitle file resolution). */
|
|
export function getConfiguredVideoRoots(): string[] {
|
|
return configuredRoots();
|
|
}
|
|
|
|
function configuredRoots(): string[] {
|
|
const main = (getAppSetting("videoLibraryPath") || "").trim();
|
|
const extras = getAppSetting("videoExtraPaths") ?? [];
|
|
const out: string[] = [];
|
|
if (main) out.push(main);
|
|
for (const e of extras) {
|
|
const t = (e ?? "").trim();
|
|
if (t) out.push(t);
|
|
}
|
|
return out;
|
|
}
|
|
|
|
function rootsEqual(a: string[], b: string[]): boolean {
|
|
if (a.length !== b.length) return false;
|
|
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Scan-state probe — used by API routes to decide whether the cached
|
|
* data still matches current settings. Returns the empty state if the
|
|
* configured roots have changed (caller can trigger a rescan).
|
|
*/
|
|
export function getVideoIndex(): VideoIndex {
|
|
const roots = configuredRoots();
|
|
if (roots.length === 0) return EMPTY_INDEX;
|
|
if (!rootsEqual(cachedScanState.rootsScanned, roots)) return EMPTY_INDEX;
|
|
return cachedScanState;
|
|
}
|
|
|
|
/** Rebuild the index from disk. Coalesces concurrent calls. Authoritative
|
|
* data lands in the `video_metadata` table; this function returns only
|
|
* scan-state metadata.
|
|
*
|
|
* Default mode is incremental — directories whose mtime hasn't
|
|
* changed since the last scan reuse cached file rows without
|
|
* readdir-per-file. Pass `{force:true}` to bypass the dir-mtime
|
|
* cache (e.g. after content edits that don't bump dir mtime). */
|
|
export async function rescanVideoIndex(opts: { force?: boolean } = {}): Promise<VideoIndex> {
|
|
const roots = configuredRoots();
|
|
if (scanInFlight) return scanInFlight;
|
|
scanInFlight = (async () => {
|
|
try {
|
|
const cleanRoots = roots.map((r) => (r ?? "").trim()).filter(Boolean);
|
|
if (cleanRoots.length === 0) {
|
|
cachedScanState = { ...EMPTY_INDEX };
|
|
return cachedScanState;
|
|
}
|
|
const t0 = Date.now();
|
|
const { files, count, reused, rescanned } = await walkAllRoots(cleanRoots, { force: opts.force });
|
|
const walkMs = Date.now() - t0;
|
|
console.log(
|
|
`[video] rescan walk in ${walkMs}ms — ${count} files (${reused} reused, ${rescanned} dir(s) rewalked${opts.force ? ", forced" : ""})`,
|
|
);
|
|
// Persist the file table first — has_video / has_subtitle bulk
|
|
// updates and metadata sync all run off it.
|
|
await syncVideoMetadataIndex(files);
|
|
syncHasVideoColumn(files);
|
|
const subtitleCodes = await collectSubtitleCodes(files);
|
|
syncHasSubtitleColumn(subtitleCodes);
|
|
|
|
cachedScanState = {
|
|
lastScannedAt: Date.now(),
|
|
rootsScanned: cleanRoots,
|
|
count,
|
|
};
|
|
return cachedScanState;
|
|
} finally {
|
|
scanInFlight = null;
|
|
}
|
|
})();
|
|
return scanInFlight;
|
|
}
|
|
|
|
/**
|
|
* Mirror the freshly-walked code list into images.has_video so SQL
|
|
* filters / counts can use the column directly.
|
|
*/
|
|
function syncHasVideoColumn(files: VideoFile[]): void {
|
|
const codes = Array.from(new Set(files.map((f) => f.code)));
|
|
const tx = rawDb.transaction(() => {
|
|
rawDb.prepare(`UPDATE images SET has_video = 0 WHERE has_video = 1`).run();
|
|
if (codes.length === 0) return;
|
|
// Chunk to stay well below SQLite's bind-parameter cap.
|
|
const CHUNK = 500;
|
|
for (let i = 0; i < codes.length; i += CHUNK) {
|
|
const slice = codes.slice(i, i + CHUNK);
|
|
const placeholders = slice.map(() => "?").join(",");
|
|
rawDb.prepare(
|
|
`UPDATE images SET has_video = 1 WHERE upper(code) IN (${placeholders})`,
|
|
).run(...slice);
|
|
}
|
|
});
|
|
try {
|
|
tx();
|
|
} catch (e) {
|
|
console.error("[video] failed to sync has_video column:", e);
|
|
}
|
|
}
|
|
|
|
/** Mirror the freshly-walked subtitle code set into images.has_subtitle. */
|
|
function syncHasSubtitleColumn(subtitleCodes: Set<string>): void {
|
|
const codes = Array.from(subtitleCodes);
|
|
const tx = rawDb.transaction(() => {
|
|
rawDb.prepare(`UPDATE images SET has_subtitle = 0 WHERE has_subtitle = 1`).run();
|
|
if (codes.length === 0) return;
|
|
const CHUNK = 500;
|
|
for (let i = 0; i < codes.length; i += CHUNK) {
|
|
const slice = codes.slice(i, i + CHUNK);
|
|
const placeholders = slice.map(() => "?").join(",");
|
|
rawDb.prepare(
|
|
`UPDATE images SET has_subtitle = 1 WHERE upper(code) IN (${placeholders})`,
|
|
).run(...slice);
|
|
}
|
|
});
|
|
try {
|
|
tx();
|
|
} catch (e) {
|
|
console.error("[video] failed to sync has_subtitle column:", e);
|
|
}
|
|
}
|
|
|
|
interface VideoMetaRow {
|
|
abs_path: string;
|
|
rel_path: string;
|
|
code: string;
|
|
size_bytes: number;
|
|
mtime_ms: number;
|
|
}
|
|
|
|
/** Look up files for a single normalized code. Reads directly from the
|
|
* video_metadata table so the result is always current with the most
|
|
* recent rescan. */
|
|
export function findVideosForCode(code: string | null | undefined): VideoFile[] {
|
|
if (!code) return [];
|
|
const norm = normalizeCode(code) ?? code.toUpperCase();
|
|
const rows = rawDb.prepare(`
|
|
SELECT abs_path, rel_path, code, size_bytes, mtime_ms
|
|
FROM video_metadata
|
|
WHERE upper(code) = ?
|
|
ORDER BY rel_path COLLATE NOCASE
|
|
`).all(norm) as VideoMetaRow[];
|
|
return rows.map((r) => ({
|
|
abs: r.abs_path,
|
|
rel: r.rel_path,
|
|
filename: path.basename(r.abs_path),
|
|
code: r.code,
|
|
size: r.size_bytes,
|
|
mtime: r.mtime_ms,
|
|
}));
|
|
}
|
|
|
|
/** Set of every code present in video_metadata — fast existence check. */
|
|
export function getCodesWithVideos(): Set<string> {
|
|
const rows = rawDb.prepare(`
|
|
SELECT DISTINCT upper(code) AS code FROM video_metadata
|
|
`).all() as Array<{ code: string }>;
|
|
return new Set(rows.map((r) => r.code));
|
|
}
|
|
|
|
/** Set of every code with a discoverable subtitle sidecar. Reads from
|
|
* the images.has_subtitle column populated at rescan time. */
|
|
export function getCodesWithSubtitles(): Set<string> {
|
|
const rows = rawDb.prepare(`
|
|
SELECT DISTINCT upper(code) AS code FROM images WHERE has_subtitle = 1 AND code IS NOT NULL
|
|
`).all() as Array<{ code: string }>;
|
|
return new Set(rows.map((r) => r.code));
|
|
}
|