Initial commit
This commit is contained in:
@@ -0,0 +1,59 @@
|
||||
import "server-only";
|
||||
import { spawn } from "node:child_process";
|
||||
|
||||
const cache = new Map<string, number>();
|
||||
|
||||
const PROBE_TIMEOUT_MS = 10_000;
|
||||
|
||||
/**
|
||||
* Probe a video file's duration in seconds via ffprobe. Cached per-path
|
||||
* for the lifetime of the process — files don't change duration on us.
|
||||
* Returns null if ffprobe fails or returns garbage.
|
||||
*
|
||||
* Caps the probe at PROBE_TIMEOUT_MS and ties to an optional AbortSignal
|
||||
* so a hung ffprobe (network mount, weird codec, dead disk) can't leave
|
||||
* the request awaiting forever or zombie the subprocess.
|
||||
*/
|
||||
export async function probeDuration(abs: string, signal?: AbortSignal): Promise<number | null> {
|
||||
const cached = cache.get(abs);
|
||||
if (cached !== undefined) return cached;
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("ffprobe", [
|
||||
"-v", "error",
|
||||
"-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1",
|
||||
abs,
|
||||
]);
|
||||
let out = "";
|
||||
let settled = false;
|
||||
|
||||
const settle = (n: number | null) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
if (timeoutId) clearTimeout(timeoutId);
|
||||
if (signal && abortHandler) signal.removeEventListener("abort", abortHandler);
|
||||
if (n != null && Number.isFinite(n) && n > 0) {
|
||||
cache.set(abs, n);
|
||||
resolve(n);
|
||||
} else {
|
||||
resolve(null);
|
||||
}
|
||||
};
|
||||
|
||||
const kill = () => {
|
||||
try { proc.kill("SIGKILL"); } catch { /* ignore */ }
|
||||
settle(null);
|
||||
};
|
||||
|
||||
const timeoutId = setTimeout(kill, PROBE_TIMEOUT_MS);
|
||||
const abortHandler = signal ? () => kill() : null;
|
||||
if (signal && abortHandler) {
|
||||
if (signal.aborted) { kill(); return; }
|
||||
signal.addEventListener("abort", abortHandler, { once: true });
|
||||
}
|
||||
|
||||
proc.stdout?.on("data", (d) => { out += d.toString(); });
|
||||
proc.on("close", () => settle(Number(out.trim())));
|
||||
proc.on("error", () => settle(null));
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,539 @@
|
||||
import "server-only";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs/promises";
|
||||
import { extractCode, normalizeCode } from "@/lib/jav/codeParser";
|
||||
import { getAppSetting } from "@/lib/db/appSettings";
|
||||
import { rawDb } from "@/lib/db/client";
|
||||
import { syncVideoMetadataIndex } from "./metadata";
|
||||
|
||||
export const VIDEO_EXTENSIONS = new Set([
|
||||
".mp4", ".mkv", ".m4v", ".mov", ".webm", ".avi", ".wmv", ".ts", ".mpg", ".mpeg", ".flv",
|
||||
]);
|
||||
|
||||
const SUBTITLE_EXTENSIONS = new Set([".srt", ".vtt", ".ass", ".ssa"]);
|
||||
|
||||
/** One video file the index found on disk. */
|
||||
export interface VideoFile {
|
||||
/** Absolute path on disk. */
|
||||
abs: string;
|
||||
/** Path relative to the configured video library root. */
|
||||
rel: string;
|
||||
/** Filename (with extension). */
|
||||
filename: string;
|
||||
/** Normalized JAV code parsed from the filename. */
|
||||
code: string;
|
||||
/** File size in bytes. */
|
||||
size: number;
|
||||
/** Last-modified timestamp (ms). */
|
||||
mtime: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lightweight scan-state record. Authoritative file data lives in the
|
||||
* `video_metadata` SQLite table — accessors below query it directly,
|
||||
* so this struct holds only what describes the most recent rescan.
|
||||
*/
|
||||
interface VideoIndex {
|
||||
/** When the index was last built. */
|
||||
lastScannedAt: number;
|
||||
/** All folder roots that were scanned, in order: main first, extras after.
|
||||
* Used both to display in the UI and to detect setting changes. */
|
||||
rootsScanned: string[];
|
||||
/** Total files matched by the most recent scan. */
|
||||
count: number;
|
||||
}
|
||||
|
||||
const EMPTY_INDEX: VideoIndex = {
|
||||
lastScannedAt: 0,
|
||||
rootsScanned: [],
|
||||
count: 0,
|
||||
};
|
||||
|
||||
let cachedScanState: VideoIndex = EMPTY_INDEX;
|
||||
let scanInFlight: Promise<VideoIndex> | null = null;
|
||||
|
||||
interface CachedFileRow {
|
||||
abs_path: string;
|
||||
rel_path: string;
|
||||
code: string;
|
||||
size_bytes: number;
|
||||
mtime_ms: number;
|
||||
}
|
||||
|
||||
interface WalkOpts {
|
||||
/** When true, ignore the dir-mtime cache and re-readdir every dir.
|
||||
* Use after structural file edits that don't change dir mtime
|
||||
* (e.g. content rewrite without rename). */
|
||||
force?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the configured roots and produce a flat VideoFile[]. The caller
|
||||
* writes the result to the `video_metadata` table — nothing is held in
|
||||
* memory beyond the duration of one rescan.
|
||||
*
|
||||
* Incremental: each directory's mtime is compared to a stored value
|
||||
* in `video_dir_mtimes`. If unchanged, the immediate-children file
|
||||
* rows for that dir are reused from `video_metadata` instead of
|
||||
* readdir + stat per file. Subdirs are still walked (their mtimes
|
||||
* may have changed independently).
|
||||
*/
|
||||
async function walkAllRoots(
|
||||
roots: string[],
|
||||
opts: WalkOpts = {},
|
||||
): Promise<{ files: VideoFile[]; count: number; visitedDirs: Set<string>; reused: number; rescanned: number }> {
|
||||
const cachedMtimes = opts.force
|
||||
? new Map<string, number>()
|
||||
: loadDirMtimeCache();
|
||||
const visitedDirs = new Set<string>();
|
||||
const files: VideoFile[] = [];
|
||||
const cachedFilesByDir = opts.force
|
||||
? new Map<string, CachedFileRow[]>()
|
||||
: loadCachedFileIndex();
|
||||
|
||||
let reused = 0;
|
||||
let rescanned = 0;
|
||||
|
||||
for (const root of roots) {
|
||||
type Frame = { dir: string };
|
||||
const stack: Frame[] = [{ dir: root }];
|
||||
while (stack.length) {
|
||||
const { dir } = stack.pop()!;
|
||||
visitedDirs.add(dir);
|
||||
let dirStat: import("node:fs").Stats;
|
||||
try {
|
||||
dirStat = await fs.stat(dir);
|
||||
} catch {
|
||||
continue; // dir vanished mid-walk
|
||||
}
|
||||
const cachedMtime = cachedMtimes.get(dir);
|
||||
const dirUnchanged = cachedMtime != null && cachedMtime === dirStat.mtimeMs;
|
||||
|
||||
// Always recurse — subdir mtimes are tracked independently.
|
||||
// For *children* enumeration we use cached rows when unchanged.
|
||||
// We still need the subdir list either way; if we're skipping
|
||||
// the readdir for cache reuse, we need an alternate way to find
|
||||
// subdirs. Cheapest: readdir the directory entries once for
|
||||
// dirs (tiny per-dir cost) and use the dirent type directly.
|
||||
let entries: import("node:fs").Dirent[];
|
||||
try {
|
||||
entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Push subdirs onto the stack regardless of cache state.
|
||||
for (const e of entries) {
|
||||
if (e.isDirectory()) {
|
||||
stack.push({ dir: path.join(dir, e.name) });
|
||||
}
|
||||
}
|
||||
|
||||
if (dirUnchanged) {
|
||||
// Reuse cached rows for files immediately in this directory.
|
||||
const cached = cachedFilesByDir.get(dir);
|
||||
if (cached) {
|
||||
for (const row of cached) {
|
||||
files.push({
|
||||
abs: row.abs_path,
|
||||
rel: path.relative(root, row.abs_path),
|
||||
filename: path.basename(row.abs_path),
|
||||
code: row.code,
|
||||
size: row.size_bytes,
|
||||
mtime: row.mtime_ms,
|
||||
});
|
||||
}
|
||||
reused += cached.length;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Dir changed (or no cache entry yet). Readdir + stat each file.
|
||||
rescanned++;
|
||||
for (const e of entries) {
|
||||
if (!e.isFile()) continue;
|
||||
const ext = path.extname(e.name).toLowerCase();
|
||||
if (!VIDEO_EXTENSIONS.has(ext)) continue;
|
||||
const abs = path.join(dir, e.name);
|
||||
const stem = e.name.slice(0, e.name.length - ext.length);
|
||||
const code = extractCode(stem);
|
||||
if (!code) continue;
|
||||
const norm = normalizeCode(code);
|
||||
if (!norm) continue;
|
||||
let st: import("node:fs").Stats;
|
||||
try {
|
||||
st = await fs.stat(abs);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
files.push({
|
||||
abs,
|
||||
rel: path.relative(root, abs),
|
||||
filename: e.name,
|
||||
code: norm,
|
||||
size: st.size,
|
||||
mtime: st.mtimeMs,
|
||||
});
|
||||
}
|
||||
// Update cached mtime so the NEXT scan sees this dir as fresh.
|
||||
cachedMtimes.set(dir, dirStat.mtimeMs);
|
||||
}
|
||||
}
|
||||
|
||||
// Persist updated mtime cache for next scan.
|
||||
saveDirMtimeCache(cachedMtimes, visitedDirs);
|
||||
|
||||
// Stable order across rescans.
|
||||
files.sort((a, b) => a.code.localeCompare(b.code) || a.filename.localeCompare(b.filename));
|
||||
return { files, count: files.length, visitedDirs, reused, rescanned };
|
||||
}
|
||||
|
||||
/** Load all `video_dir_mtimes` rows into a Map keyed by abs_dir. */
|
||||
function loadDirMtimeCache(): Map<string, number> {
|
||||
const rows = rawDb.prepare(`SELECT abs_dir, mtime_ms FROM video_dir_mtimes`).all() as Array<{ abs_dir: string; mtime_ms: number }>;
|
||||
const out = new Map<string, number>();
|
||||
for (const r of rows) out.set(r.abs_dir, r.mtime_ms);
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Group the entire video_metadata table by dir_path so dir-cache
|
||||
* reuse is a single in-memory lookup per dir. One linear scan of the
|
||||
* table — cheap even at 80k rows. */
|
||||
function loadCachedFileIndex(): Map<string, CachedFileRow[]> {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT abs_path, rel_path, code, size_bytes, mtime_ms, dir_path
|
||||
FROM video_metadata
|
||||
`).all() as Array<CachedFileRow & { dir_path: string }>;
|
||||
const out = new Map<string, CachedFileRow[]>();
|
||||
for (const r of rows) {
|
||||
const arr = out.get(r.dir_path);
|
||||
if (arr) arr.push(r);
|
||||
else out.set(r.dir_path, [r]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Upsert dir mtimes for visited dirs and prune rows for dirs we
|
||||
* didn't see this scan (deleted folders). */
|
||||
function saveDirMtimeCache(mtimes: Map<string, number>, visited: Set<string>): void {
|
||||
const upsert = rawDb.prepare(`
|
||||
INSERT INTO video_dir_mtimes (abs_dir, mtime_ms, last_seen_at)
|
||||
VALUES (?, ?, ?)
|
||||
ON CONFLICT(abs_dir) DO UPDATE SET
|
||||
mtime_ms = excluded.mtime_ms,
|
||||
last_seen_at = excluded.last_seen_at
|
||||
`);
|
||||
const now = Date.now();
|
||||
const tx = rawDb.transaction(() => {
|
||||
for (const [dir, mtime] of mtimes) {
|
||||
// Only persist dirs we actually visited this scan — others may
|
||||
// have been moved/renamed and their cache entry is stale.
|
||||
if (!visited.has(dir)) continue;
|
||||
upsert.run(dir, mtime, now);
|
||||
}
|
||||
// Prune rows whose dir we didn't see this scan. Drops cleanup of
|
||||
// deleted dirs in O(rows) — fine at any reasonable scale.
|
||||
const allRows = rawDb.prepare(`SELECT abs_dir FROM video_dir_mtimes`).all() as Array<{ abs_dir: string }>;
|
||||
const del = rawDb.prepare(`DELETE FROM video_dir_mtimes WHERE abs_dir = ?`);
|
||||
for (const r of allRows) {
|
||||
if (!visited.has(r.abs_dir)) del.run(r.abs_dir);
|
||||
}
|
||||
});
|
||||
try {
|
||||
tx();
|
||||
} catch (e) {
|
||||
console.error("[video] failed to save dir mtime cache:", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk every place a sidecar subtitle could live and return the set of
|
||||
* canonical codes that have at least one. Cheap signal — no ffprobe.
|
||||
*
|
||||
* - Each video's own directory, filtered to filenames that start with
|
||||
* the video stem (so a stray `OTHER-001.srt` next to `YUJ-001.mp4`
|
||||
* doesn't taint YUJ-001).
|
||||
* - Each entry in `subtitleExtraPaths` (recursive walk, depth 3) —
|
||||
* extracts the code from the filename directly.
|
||||
* - data/generated-subtitles/<code>/ — directory name IS the code.
|
||||
*
|
||||
* Result is consumed once by syncHasSubtitleColumn and discarded — no
|
||||
* persistent in-memory copy.
|
||||
*/
|
||||
async function collectSubtitleCodes(files: VideoFile[]): Promise<Set<string>> {
|
||||
const codes = new Set<string>();
|
||||
|
||||
// Same-folder scan: per video, look at sibling files. Cache directory
|
||||
// listings so a folder with N videos is only listed once.
|
||||
const dirCache = new Map<string, import("node:fs").Dirent[]>();
|
||||
for (const file of files) {
|
||||
const dir = path.dirname(file.abs);
|
||||
let entries = dirCache.get(dir);
|
||||
if (!entries) {
|
||||
try {
|
||||
entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
entries = [];
|
||||
}
|
||||
dirCache.set(dir, entries);
|
||||
}
|
||||
const stem = file.filename.slice(0, file.filename.length - path.extname(file.filename).length);
|
||||
const stemLower = stem.toLowerCase();
|
||||
const codeLower = file.code.toLowerCase();
|
||||
for (const e of entries) {
|
||||
if (!e.isFile()) continue;
|
||||
const ext = path.extname(e.name).toLowerCase();
|
||||
if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
|
||||
const lower = e.name.toLowerCase();
|
||||
// Code-substring match must treat the code as a delimited token
|
||||
// (start, end, or wrapped in non-alphanumeric) — bare `.includes`
|
||||
// would attribute `notes-yuj-001-bad.srt` to YUJ-001.
|
||||
const codeAsToken = (() => {
|
||||
const idx = lower.indexOf(codeLower);
|
||||
if (idx < 0) return false;
|
||||
const before = idx === 0 ? "" : lower[idx - 1]!;
|
||||
const afterIdx = idx + codeLower.length;
|
||||
const after = afterIdx >= lower.length ? "" : lower[afterIdx]!;
|
||||
const isBoundary = (c: string) => c === "" || !/[a-z0-9]/.test(c);
|
||||
return isBoundary(before) && isBoundary(after);
|
||||
})();
|
||||
if (lower.startsWith(stemLower + ".") || lower === stemLower + ext || codeAsToken) {
|
||||
codes.add(file.code);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Persistent subtitle library roots — extract codes from filenames.
|
||||
const extraRoots = (getAppSetting("subtitleExtraPaths") ?? []).filter(Boolean);
|
||||
for (const root of extraRoots) {
|
||||
await walkSubtitleRoot(root, codes, 3);
|
||||
}
|
||||
|
||||
// data/generated-subtitles/<code>/ — directory name is the code.
|
||||
const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles");
|
||||
try {
|
||||
const subdirs = await fs.readdir(generatedRoot, { withFileTypes: true });
|
||||
for (const d of subdirs) {
|
||||
if (!d.isDirectory()) continue;
|
||||
const dirAbs = path.join(generatedRoot, d.name);
|
||||
let entries: import("node:fs").Dirent[];
|
||||
try {
|
||||
entries = await fs.readdir(dirAbs, { withFileTypes: true });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
const hasSub = entries.some(
|
||||
(e) => e.isFile() && SUBTITLE_EXTENSIONS.has(path.extname(e.name).toLowerCase()),
|
||||
);
|
||||
if (hasSub) {
|
||||
const norm = normalizeCode(d.name);
|
||||
if (norm) codes.add(norm);
|
||||
}
|
||||
}
|
||||
} catch { /* generated-subtitles not present yet — fine */ }
|
||||
|
||||
return codes;
|
||||
}
|
||||
|
||||
async function walkSubtitleRoot(root: string, out: Set<string>, maxDepth: number): Promise<void> {
|
||||
type Frame = { dir: string; depth: number };
|
||||
const stack: Frame[] = [{ dir: root, depth: 0 }];
|
||||
while (stack.length) {
|
||||
const { dir, depth } = stack.pop()!;
|
||||
let entries: import("node:fs").Dirent[];
|
||||
try {
|
||||
entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
for (const e of entries) {
|
||||
const full = path.join(dir, e.name);
|
||||
if (e.isDirectory()) {
|
||||
if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 });
|
||||
} else if (e.isFile()) {
|
||||
const ext = path.extname(e.name).toLowerCase();
|
||||
if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
|
||||
const stem = e.name.slice(0, e.name.length - ext.length);
|
||||
const code = extractCode(stem);
|
||||
if (!code) continue;
|
||||
const norm = normalizeCode(code);
|
||||
if (norm) out.add(norm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Exposed for path-allowlist checks (e.g. subtitle file resolution). */
|
||||
export function getConfiguredVideoRoots(): string[] {
|
||||
return configuredRoots();
|
||||
}
|
||||
|
||||
function configuredRoots(): string[] {
|
||||
const main = (getAppSetting("videoLibraryPath") || "").trim();
|
||||
const extras = getAppSetting("videoExtraPaths") ?? [];
|
||||
const out: string[] = [];
|
||||
if (main) out.push(main);
|
||||
for (const e of extras) {
|
||||
const t = (e ?? "").trim();
|
||||
if (t) out.push(t);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function rootsEqual(a: string[], b: string[]): boolean {
|
||||
if (a.length !== b.length) return false;
|
||||
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan-state probe — used by API routes to decide whether the cached
|
||||
* data still matches current settings. Returns the empty state if the
|
||||
* configured roots have changed (caller can trigger a rescan).
|
||||
*/
|
||||
export function getVideoIndex(): VideoIndex {
|
||||
const roots = configuredRoots();
|
||||
if (roots.length === 0) return EMPTY_INDEX;
|
||||
if (!rootsEqual(cachedScanState.rootsScanned, roots)) return EMPTY_INDEX;
|
||||
return cachedScanState;
|
||||
}
|
||||
|
||||
/** Rebuild the index from disk. Coalesces concurrent calls. Authoritative
|
||||
* data lands in the `video_metadata` table; this function returns only
|
||||
* scan-state metadata.
|
||||
*
|
||||
* Default mode is incremental — directories whose mtime hasn't
|
||||
* changed since the last scan reuse cached file rows without
|
||||
* readdir-per-file. Pass `{force:true}` to bypass the dir-mtime
|
||||
* cache (e.g. after content edits that don't bump dir mtime). */
|
||||
export async function rescanVideoIndex(opts: { force?: boolean } = {}): Promise<VideoIndex> {
|
||||
const roots = configuredRoots();
|
||||
if (scanInFlight) return scanInFlight;
|
||||
scanInFlight = (async () => {
|
||||
try {
|
||||
const cleanRoots = roots.map((r) => (r ?? "").trim()).filter(Boolean);
|
||||
if (cleanRoots.length === 0) {
|
||||
cachedScanState = { ...EMPTY_INDEX };
|
||||
return cachedScanState;
|
||||
}
|
||||
const t0 = Date.now();
|
||||
const { files, count, reused, rescanned } = await walkAllRoots(cleanRoots, { force: opts.force });
|
||||
const walkMs = Date.now() - t0;
|
||||
console.log(
|
||||
`[video] rescan walk in ${walkMs}ms — ${count} files (${reused} reused, ${rescanned} dir(s) rewalked${opts.force ? ", forced" : ""})`,
|
||||
);
|
||||
// Persist the file table first — has_video / has_subtitle bulk
|
||||
// updates and metadata sync all run off it.
|
||||
await syncVideoMetadataIndex(files);
|
||||
syncHasVideoColumn(files);
|
||||
const subtitleCodes = await collectSubtitleCodes(files);
|
||||
syncHasSubtitleColumn(subtitleCodes);
|
||||
|
||||
cachedScanState = {
|
||||
lastScannedAt: Date.now(),
|
||||
rootsScanned: cleanRoots,
|
||||
count,
|
||||
};
|
||||
return cachedScanState;
|
||||
} finally {
|
||||
scanInFlight = null;
|
||||
}
|
||||
})();
|
||||
return scanInFlight;
|
||||
}
|
||||
|
||||
/**
|
||||
* Mirror the freshly-walked code list into images.has_video so SQL
|
||||
* filters / counts can use the column directly.
|
||||
*/
|
||||
function syncHasVideoColumn(files: VideoFile[]): void {
|
||||
const codes = Array.from(new Set(files.map((f) => f.code)));
|
||||
const tx = rawDb.transaction(() => {
|
||||
rawDb.prepare(`UPDATE images SET has_video = 0 WHERE has_video = 1`).run();
|
||||
if (codes.length === 0) return;
|
||||
// Chunk to stay well below SQLite's bind-parameter cap.
|
||||
const CHUNK = 500;
|
||||
for (let i = 0; i < codes.length; i += CHUNK) {
|
||||
const slice = codes.slice(i, i + CHUNK);
|
||||
const placeholders = slice.map(() => "?").join(",");
|
||||
rawDb.prepare(
|
||||
`UPDATE images SET has_video = 1 WHERE upper(code) IN (${placeholders})`,
|
||||
).run(...slice);
|
||||
}
|
||||
});
|
||||
try {
|
||||
tx();
|
||||
} catch (e) {
|
||||
console.error("[video] failed to sync has_video column:", e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Mirror the freshly-walked subtitle code set into images.has_subtitle. */
|
||||
function syncHasSubtitleColumn(subtitleCodes: Set<string>): void {
|
||||
const codes = Array.from(subtitleCodes);
|
||||
const tx = rawDb.transaction(() => {
|
||||
rawDb.prepare(`UPDATE images SET has_subtitle = 0 WHERE has_subtitle = 1`).run();
|
||||
if (codes.length === 0) return;
|
||||
const CHUNK = 500;
|
||||
for (let i = 0; i < codes.length; i += CHUNK) {
|
||||
const slice = codes.slice(i, i + CHUNK);
|
||||
const placeholders = slice.map(() => "?").join(",");
|
||||
rawDb.prepare(
|
||||
`UPDATE images SET has_subtitle = 1 WHERE upper(code) IN (${placeholders})`,
|
||||
).run(...slice);
|
||||
}
|
||||
});
|
||||
try {
|
||||
tx();
|
||||
} catch (e) {
|
||||
console.error("[video] failed to sync has_subtitle column:", e);
|
||||
}
|
||||
}
|
||||
|
||||
interface VideoMetaRow {
|
||||
abs_path: string;
|
||||
rel_path: string;
|
||||
code: string;
|
||||
size_bytes: number;
|
||||
mtime_ms: number;
|
||||
}
|
||||
|
||||
/** Look up files for a single normalized code. Reads directly from the
|
||||
* video_metadata table so the result is always current with the most
|
||||
* recent rescan. */
|
||||
export function findVideosForCode(code: string | null | undefined): VideoFile[] {
|
||||
if (!code) return [];
|
||||
const norm = normalizeCode(code) ?? code.toUpperCase();
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT abs_path, rel_path, code, size_bytes, mtime_ms
|
||||
FROM video_metadata
|
||||
WHERE upper(code) = ?
|
||||
ORDER BY rel_path COLLATE NOCASE
|
||||
`).all(norm) as VideoMetaRow[];
|
||||
return rows.map((r) => ({
|
||||
abs: r.abs_path,
|
||||
rel: r.rel_path,
|
||||
filename: path.basename(r.abs_path),
|
||||
code: r.code,
|
||||
size: r.size_bytes,
|
||||
mtime: r.mtime_ms,
|
||||
}));
|
||||
}
|
||||
|
||||
/** Set of every code present in video_metadata — fast existence check. */
|
||||
export function getCodesWithVideos(): Set<string> {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT DISTINCT upper(code) AS code FROM video_metadata
|
||||
`).all() as Array<{ code: string }>;
|
||||
return new Set(rows.map((r) => r.code));
|
||||
}
|
||||
|
||||
/** Set of every code with a discoverable subtitle sidecar. Reads from
|
||||
* the images.has_subtitle column populated at rescan time. */
|
||||
export function getCodesWithSubtitles(): Set<string> {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT DISTINCT upper(code) AS code FROM images WHERE has_subtitle = 1 AND code IS NOT NULL
|
||||
`).all() as Array<{ code: string }>;
|
||||
return new Set(rows.map((r) => r.code));
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
import "server-only";
|
||||
import path from "node:path";
|
||||
import { rawDb } from "@/lib/db/client";
|
||||
|
||||
export interface ManualSubtitle {
|
||||
code: string;
|
||||
partIdx: number;
|
||||
absPath: string;
|
||||
attachedAt: number;
|
||||
}
|
||||
|
||||
interface ManualSubtitleRow {
|
||||
code: string;
|
||||
part_idx: number;
|
||||
abs_path: string;
|
||||
attached_at: number;
|
||||
}
|
||||
|
||||
function rowToEntry(r: ManualSubtitleRow): ManualSubtitle {
|
||||
return { code: r.code, partIdx: r.part_idx, absPath: r.abs_path, attachedAt: r.attached_at };
|
||||
}
|
||||
|
||||
export function listManualSubtitlesForVariant(code: string, partIdx: number): ManualSubtitle[] {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT code, part_idx, abs_path, attached_at FROM manual_subtitles
|
||||
WHERE code = ? AND part_idx = ?
|
||||
ORDER BY attached_at DESC
|
||||
`).all(code, partIdx) as ManualSubtitleRow[];
|
||||
return rows.map(rowToEntry);
|
||||
}
|
||||
|
||||
/** True iff this exact abs path is recorded against any (code, part). */
|
||||
export function isManualSubtitlePath(abs: string): boolean {
|
||||
const resolved = path.resolve(abs);
|
||||
// Windows paths are case-insensitive on disk but stored as-typed.
|
||||
// Compare with a case-insensitive LIKE on Windows, exact on POSIX.
|
||||
if (process.platform === "win32") {
|
||||
const row = rawDb.prepare(`
|
||||
SELECT 1 FROM manual_subtitles WHERE LOWER(abs_path) = LOWER(?) LIMIT 1
|
||||
`).get(resolved);
|
||||
return !!row;
|
||||
}
|
||||
const row = rawDb.prepare(`SELECT 1 FROM manual_subtitles WHERE abs_path = ? LIMIT 1`).get(resolved);
|
||||
return !!row;
|
||||
}
|
||||
|
||||
export function attachManualSubtitle(code: string, partIdx: number, absPath: string): void {
|
||||
rawDb.prepare(`
|
||||
INSERT OR REPLACE INTO manual_subtitles (code, part_idx, abs_path, attached_at)
|
||||
VALUES (?, ?, ?, ?)
|
||||
`).run(code, partIdx, path.resolve(absPath), Date.now());
|
||||
}
|
||||
|
||||
export function detachManualSubtitle(code: string, partIdx: number, absPath: string): void {
|
||||
rawDb.prepare(`
|
||||
DELETE FROM manual_subtitles WHERE code = ? AND part_idx = ? AND abs_path = ?
|
||||
`).run(code, partIdx, path.resolve(absPath));
|
||||
}
|
||||
@@ -0,0 +1,580 @@
|
||||
import "server-only";
|
||||
import path from "node:path";
|
||||
import { spawn } from "node:child_process";
|
||||
import fs from "node:fs/promises";
|
||||
import { revalidatePath } from "next/cache";
|
||||
import { rawDb } from "@/lib/db/client";
|
||||
import { getAppSetting } from "@/lib/db/appSettings";
|
||||
import { classifyGroup, compilePatterns } from "./partClassify";
|
||||
import type { VideoFile } from "./index";
|
||||
|
||||
const PROBE_TIMEOUT_MS = 10_000;
|
||||
|
||||
export type PlaybackMode = "direct" | "transcode";
|
||||
|
||||
export interface StoredVideoMetadata {
|
||||
absPath: string;
|
||||
relPath: string;
|
||||
code: string;
|
||||
sizeBytes: number;
|
||||
mtimeMs: number;
|
||||
probedAt: number | null;
|
||||
probeError: string | null;
|
||||
durationSec: number | null;
|
||||
videoCodec: string | null;
|
||||
videoBFrames: number | null;
|
||||
width: number | null;
|
||||
height: number | null;
|
||||
videoBitrate: number | null;
|
||||
playbackMode: PlaybackMode | null;
|
||||
partKind: "part" | "variant" | "single" | null;
|
||||
partIndex: number | null;
|
||||
variantGroup: string | null;
|
||||
}
|
||||
|
||||
interface VideoMetadataRow {
|
||||
abs_path: string;
|
||||
rel_path: string;
|
||||
code: string;
|
||||
size_bytes: number;
|
||||
mtime_ms: number;
|
||||
probed_at: number | null;
|
||||
probe_error: string | null;
|
||||
duration_sec: number | null;
|
||||
video_codec: string | null;
|
||||
video_b_frames: number | null;
|
||||
width: number | null;
|
||||
height: number | null;
|
||||
video_bitrate: number | null;
|
||||
playback_mode: string | null;
|
||||
part_kind: string | null;
|
||||
part_index: number | null;
|
||||
variant_group: string | null;
|
||||
}
|
||||
|
||||
interface FfprobeJson {
|
||||
streams?: Array<{
|
||||
codec_name?: string;
|
||||
width?: number;
|
||||
height?: number;
|
||||
bit_rate?: string;
|
||||
has_b_frames?: number;
|
||||
}>;
|
||||
format?: {
|
||||
duration?: string;
|
||||
bit_rate?: string;
|
||||
};
|
||||
}
|
||||
|
||||
function mapRow(row: VideoMetadataRow | undefined): StoredVideoMetadata | null {
|
||||
if (!row) return null;
|
||||
return {
|
||||
absPath: row.abs_path,
|
||||
relPath: row.rel_path,
|
||||
code: row.code,
|
||||
sizeBytes: row.size_bytes,
|
||||
mtimeMs: row.mtime_ms,
|
||||
probedAt: row.probed_at,
|
||||
probeError: row.probe_error,
|
||||
durationSec: row.duration_sec,
|
||||
videoCodec: row.video_codec,
|
||||
videoBFrames: row.video_b_frames,
|
||||
width: row.width,
|
||||
height: row.height,
|
||||
videoBitrate: row.video_bitrate,
|
||||
playbackMode: row.playback_mode === "direct" || row.playback_mode === "transcode" ? row.playback_mode : null,
|
||||
partKind: row.part_kind === "part" || row.part_kind === "variant" || row.part_kind === "single" ? row.part_kind : null,
|
||||
partIndex: row.part_index,
|
||||
variantGroup: row.variant_group,
|
||||
};
|
||||
}
|
||||
|
||||
function parseFiniteNumber(value: unknown): number | null {
|
||||
if (value == null || value === "N/A") return null;
|
||||
const n = typeof value === "number" ? value : Number(value);
|
||||
return Number.isFinite(n) && n > 0 ? n : null;
|
||||
}
|
||||
|
||||
function parseNonNegativeNumber(value: unknown): number | null {
|
||||
if (value == null || value === "N/A") return null;
|
||||
const n = typeof value === "number" ? value : Number(value);
|
||||
return Number.isFinite(n) && n >= 0 ? n : null;
|
||||
}
|
||||
|
||||
function isStatMatch(row: StoredVideoMetadata, sizeBytes: number, mtimeMs: number): boolean {
|
||||
return row.sizeBytes === sizeBytes && Math.abs(row.mtimeMs - mtimeMs) < 1;
|
||||
}
|
||||
|
||||
export function getStoredVideoMetadata(absPath: string): StoredVideoMetadata | null {
|
||||
return mapRow(rawDb.prepare(`SELECT * FROM video_metadata WHERE abs_path = ?`).get(absPath) as VideoMetadataRow | undefined);
|
||||
}
|
||||
|
||||
export function listStoredVideoMetadataForCode(code: string | null | undefined): StoredVideoMetadata[] {
|
||||
if (!code) return [];
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT * FROM video_metadata
|
||||
WHERE upper(code) = upper(?)
|
||||
ORDER BY rel_path ASC
|
||||
`).all(code) as VideoMetadataRow[];
|
||||
return rows.map((row) => mapRow(row)).filter((row): row is StoredVideoMetadata => row !== null);
|
||||
}
|
||||
|
||||
export function serializeVideoMetadata(meta: StoredVideoMetadata | null) {
|
||||
if (!meta) return null;
|
||||
return {
|
||||
absPath: meta.absPath,
|
||||
relPath: meta.relPath,
|
||||
code: meta.code,
|
||||
sizeBytes: meta.sizeBytes,
|
||||
mtimeMs: meta.mtimeMs,
|
||||
probedAt: meta.probedAt,
|
||||
probeError: meta.probeError,
|
||||
durationSec: meta.durationSec,
|
||||
videoCodec: meta.videoCodec,
|
||||
videoBFrames: meta.videoBFrames,
|
||||
width: meta.width,
|
||||
height: meta.height,
|
||||
videoBitrate: meta.videoBitrate,
|
||||
playbackMode: meta.playbackMode,
|
||||
partKind: meta.partKind,
|
||||
partIndex: meta.partIndex,
|
||||
variantGroup: meta.variantGroup,
|
||||
};
|
||||
}
|
||||
|
||||
export async function syncVideoMetadataIndex(files: VideoFile[]): Promise<void> {
|
||||
const found = new Set(files.map((file) => file.abs));
|
||||
const upsert = rawDb.prepare(`
|
||||
INSERT INTO video_metadata (abs_path, rel_path, code, size_bytes, mtime_ms, dir_path)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(abs_path) DO UPDATE SET
|
||||
rel_path = excluded.rel_path,
|
||||
code = excluded.code,
|
||||
dir_path = excluded.dir_path,
|
||||
probed_at = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.probed_at
|
||||
END,
|
||||
probe_error = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.probe_error
|
||||
END,
|
||||
duration_sec = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.duration_sec
|
||||
END,
|
||||
video_codec = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.video_codec
|
||||
END,
|
||||
video_b_frames = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.video_b_frames
|
||||
END,
|
||||
width = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.width
|
||||
END,
|
||||
height = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.height
|
||||
END,
|
||||
video_bitrate = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.video_bitrate
|
||||
END,
|
||||
playback_mode = CASE
|
||||
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
|
||||
ELSE video_metadata.playback_mode
|
||||
END,
|
||||
size_bytes = excluded.size_bytes,
|
||||
mtime_ms = excluded.mtime_ms
|
||||
`);
|
||||
const deleteStale = rawDb.prepare(`DELETE FROM video_metadata WHERE abs_path = ?`);
|
||||
const tx = rawDb.transaction(() => {
|
||||
for (const file of files) {
|
||||
const last = Math.max(file.abs.lastIndexOf("/"), file.abs.lastIndexOf("\\"));
|
||||
const dir = last >= 0 ? file.abs.slice(0, last) : "";
|
||||
upsert.run(file.abs, file.rel, file.code, file.size, file.mtime, dir);
|
||||
}
|
||||
const rows = rawDb.prepare(`SELECT abs_path FROM video_metadata`).all() as Array<{ abs_path: string }>;
|
||||
for (const row of rows) {
|
||||
if (!found.has(row.abs_path)) deleteStale.run(row.abs_path);
|
||||
}
|
||||
});
|
||||
tx();
|
||||
classifyAndPersist(files);
|
||||
|
||||
// Probe-data refresh runs in the background. Awaiting here used to
|
||||
// block rescan responses for minutes on libraries with many drifted
|
||||
// files (e.g. after a bulk rename). Each per-file probe completion
|
||||
// calls revalidatePath internally so detail pages update as soon as
|
||||
// their own video is fresh — no batch-level waiting.
|
||||
void reprobeDirtyFiles(files);
|
||||
}
|
||||
|
||||
const REPROBE_CONCURRENCY = 2;
|
||||
|
||||
async function reprobeDirtyFiles(files: VideoFile[]): Promise<void> {
|
||||
let dirty: Array<{ abs_path: string }>;
|
||||
try {
|
||||
dirty = rawDb
|
||||
.prepare(`SELECT abs_path FROM video_metadata WHERE probed_at IS NULL AND probe_error IS NULL`)
|
||||
.all() as Array<{ abs_path: string }>;
|
||||
} catch (e) {
|
||||
console.error("[video] reprobe-dirty query failed:", e);
|
||||
return;
|
||||
}
|
||||
if (dirty.length === 0) return;
|
||||
|
||||
const dirtySet = new Set(dirty.map((r) => r.abs_path));
|
||||
const targets = files.filter((f) => dirtySet.has(f.abs));
|
||||
if (targets.length === 0) return;
|
||||
|
||||
// Process in chunks of REPROBE_CONCURRENCY. ffprobe is mostly waiting
|
||||
// on disk; small parallelism is enough.
|
||||
let cursor = 0;
|
||||
const workers: Promise<void>[] = [];
|
||||
// Throttle revalidation calls: a burst of 1000 path invalidations
|
||||
// would itself thrash. Coalesce so each batch of N codes triggers
|
||||
// one revalidate per code, deduped within a short window.
|
||||
const codesSeen = new Set<string>();
|
||||
for (let i = 0; i < REPROBE_CONCURRENCY; i++) {
|
||||
workers.push((async () => {
|
||||
while (cursor < targets.length) {
|
||||
const idx = cursor++;
|
||||
const file = targets[idx];
|
||||
if (!file) break;
|
||||
try {
|
||||
await probeVideoMetadata(file);
|
||||
if (!codesSeen.has(file.code)) {
|
||||
codesSeen.add(file.code);
|
||||
try { revalidatePath("/id/[code]", "page"); } catch { /* ignore */ }
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`[video] reprobe failed for ${file.abs}:`, e);
|
||||
}
|
||||
}
|
||||
})());
|
||||
}
|
||||
await Promise.all(workers).catch(() => { /* swallowed */ });
|
||||
}
|
||||
|
||||
/**
|
||||
* Recompute part/variant classification for every file based on the
|
||||
* current `partSuffixPatterns` setting. Independent of probe data; safe
|
||||
* to run on every scan.
|
||||
*/
|
||||
function classifyAndPersist(files: VideoFile[]): void {
|
||||
const sources = getAppSetting("partSuffixPatterns") ?? [];
|
||||
const patterns = compilePatterns(sources);
|
||||
const byCode = new Map<string, VideoFile[]>();
|
||||
for (const f of files) {
|
||||
const arr = byCode.get(f.code);
|
||||
if (arr) arr.push(f);
|
||||
else byCode.set(f.code, [f]);
|
||||
}
|
||||
const update = rawDb.prepare(`
|
||||
UPDATE video_metadata SET part_kind = ?, part_index = ?, variant_group = ?
|
||||
WHERE abs_path = ?
|
||||
`);
|
||||
const tx = rawDb.transaction(() => {
|
||||
for (const group of byCode.values()) {
|
||||
const inputs = group.map((f) => ({
|
||||
key: f.abs,
|
||||
stem: stemOf(f.filename),
|
||||
}));
|
||||
const results = classifyGroup(inputs, patterns);
|
||||
for (const r of results) {
|
||||
update.run(r.partKind, r.partIndex, r.variantGroup, r.key);
|
||||
}
|
||||
}
|
||||
});
|
||||
tx();
|
||||
}
|
||||
|
||||
function stemOf(filename: string): string {
|
||||
const ext = path.extname(filename);
|
||||
return ext ? filename.slice(0, -ext.length) : filename;
|
||||
}
|
||||
|
||||
export interface SubtitleStreamInfo {
|
||||
index: number;
|
||||
codec: string;
|
||||
language: string | null;
|
||||
title: string | null;
|
||||
isImageBased: boolean;
|
||||
isTextBased: boolean;
|
||||
}
|
||||
|
||||
const TEXT_SUBTITLE_CODECS = new Set(["subrip", "ass", "ssa", "mov_text", "webvtt", "text"]);
|
||||
const IMAGE_SUBTITLE_CODECS = new Set(["hdmv_pgs_subtitle", "dvd_subtitle", "dvb_subtitle", "dvbsub", "pgssub"]);
|
||||
|
||||
interface FfprobeStream {
|
||||
index?: number;
|
||||
codec_type?: string;
|
||||
codec_name?: string;
|
||||
tags?: { language?: string; title?: string };
|
||||
}
|
||||
|
||||
/** Enumerate subtitle streams in a container. Computed on demand — not
|
||||
* persisted, since users frequently remux subs in/out and a stale list
|
||||
* is worse than re-probing. Returns [] on error or missing ffprobe. */
|
||||
export async function runFfprobeSubtitles(absPath: string): Promise<SubtitleStreamInfo[]> {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("ffprobe", [
|
||||
"-v", "error",
|
||||
"-select_streams", "s",
|
||||
"-show_entries", "stream=index,codec_name,codec_type:stream_tags=language,title",
|
||||
"-of", "json",
|
||||
absPath,
|
||||
]);
|
||||
let out = "";
|
||||
let settled = false;
|
||||
const settle = (val: SubtitleStreamInfo[]) => { if (!settled) { settled = true; clearTimeout(t); resolve(val); } };
|
||||
const t = setTimeout(() => { try { proc.kill("SIGKILL"); } catch {} settle([]); }, PROBE_TIMEOUT_MS);
|
||||
proc.stdout?.on("data", (d) => { out += d.toString(); });
|
||||
proc.on("error", () => settle([]));
|
||||
proc.on("close", (code) => {
|
||||
if (code !== 0) { settle([]); return; }
|
||||
try {
|
||||
const json = JSON.parse(out) as { streams?: FfprobeStream[] };
|
||||
const streams = (json.streams ?? []).filter((s) => s.codec_type === "subtitle");
|
||||
const result: SubtitleStreamInfo[] = streams.map((s, i) => {
|
||||
const codec = (s.codec_name ?? "unknown").toLowerCase();
|
||||
return {
|
||||
// Use the per-codec_type ordinal — that's what ffmpeg's
|
||||
// 0:s:N mapping wants, NOT the absolute stream index.
|
||||
index: i,
|
||||
codec,
|
||||
language: typeof s.tags?.language === "string" ? s.tags.language : null,
|
||||
title: typeof s.tags?.title === "string" ? s.tags.title : null,
|
||||
isImageBased: IMAGE_SUBTITLE_CODECS.has(codec),
|
||||
isTextBased: TEXT_SUBTITLE_CODECS.has(codec),
|
||||
};
|
||||
});
|
||||
settle(result);
|
||||
} catch {
|
||||
settle([]);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function runFfprobe(absPath: string, signal?: AbortSignal): Promise<{
|
||||
durationSec: number | null;
|
||||
videoCodec: string | null;
|
||||
videoBFrames: number | null;
|
||||
width: number | null;
|
||||
height: number | null;
|
||||
videoBitrate: number | null;
|
||||
}> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const proc = spawn("ffprobe", [
|
||||
"-v", "error",
|
||||
"-select_streams", "v:0",
|
||||
"-show_entries", "stream=codec_name,width,height,bit_rate,has_b_frames:format=duration,bit_rate",
|
||||
"-of", "json",
|
||||
absPath,
|
||||
]);
|
||||
let out = "";
|
||||
let err = "";
|
||||
let settled = false;
|
||||
|
||||
const settle = (fn: () => void) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
clearTimeout(timeoutId);
|
||||
if (signal && abortHandler) signal.removeEventListener("abort", abortHandler);
|
||||
fn();
|
||||
};
|
||||
const kill = (message: string) => {
|
||||
try { proc.kill("SIGKILL"); } catch {}
|
||||
settle(() => reject(new Error(message)));
|
||||
};
|
||||
|
||||
const timeoutId = setTimeout(() => kill("ffprobe timed out"), PROBE_TIMEOUT_MS);
|
||||
const abortHandler = signal ? () => kill("ffprobe aborted") : null;
|
||||
if (signal && abortHandler) {
|
||||
if (signal.aborted) { kill("ffprobe aborted"); return; }
|
||||
signal.addEventListener("abort", abortHandler, { once: true });
|
||||
}
|
||||
|
||||
proc.stdout?.on("data", (d) => { out += d.toString(); });
|
||||
proc.stderr?.on("data", (d) => { err += d.toString(); });
|
||||
proc.on("error", (e) => settle(() => reject(e)));
|
||||
proc.on("close", (code) => {
|
||||
settle(() => {
|
||||
if (code !== 0) {
|
||||
reject(new Error(err.trim() || `ffprobe exited ${code}`));
|
||||
return;
|
||||
}
|
||||
try {
|
||||
const json = JSON.parse(out) as FfprobeJson;
|
||||
const stream = json.streams?.[0] ?? {};
|
||||
const streamBitrate = parseFiniteNumber(stream.bit_rate);
|
||||
const formatBitrate = parseFiniteNumber(json.format?.bit_rate);
|
||||
resolve({
|
||||
durationSec: parseFiniteNumber(json.format?.duration),
|
||||
videoCodec: typeof stream.codec_name === "string" ? stream.codec_name : null,
|
||||
videoBFrames: parseNonNegativeNumber(stream.has_b_frames),
|
||||
width: parseFiniteNumber(stream.width),
|
||||
height: parseFiniteNumber(stream.height),
|
||||
videoBitrate: streamBitrate ?? formatBitrate,
|
||||
});
|
||||
} catch (e) {
|
||||
reject(e);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
export async function probeVideoMetadata(file: VideoFile, signal?: AbortSignal): Promise<StoredVideoMetadata> {
|
||||
const stat = await fs.stat(file.abs);
|
||||
const existing = getStoredVideoMetadata(file.abs);
|
||||
if (existing && isStatMatch(existing, stat.size, stat.mtimeMs)) {
|
||||
if (existing.probeError || existing.probedAt != null) return existing;
|
||||
}
|
||||
|
||||
const base = {
|
||||
absPath: file.abs,
|
||||
relPath: file.rel,
|
||||
code: file.code,
|
||||
sizeBytes: stat.size,
|
||||
mtimeMs: stat.mtimeMs,
|
||||
playbackMode: existing?.playbackMode ?? null,
|
||||
};
|
||||
|
||||
try {
|
||||
const probed = await runFfprobe(file.abs, signal);
|
||||
rawDb.prepare(`
|
||||
INSERT INTO video_metadata (
|
||||
abs_path, rel_path, code, size_bytes, mtime_ms, probed_at, probe_error,
|
||||
duration_sec, video_codec, video_b_frames, width, height, video_bitrate, playback_mode
|
||||
) VALUES (?, ?, ?, ?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(abs_path) DO UPDATE SET
|
||||
rel_path = excluded.rel_path,
|
||||
code = excluded.code,
|
||||
size_bytes = excluded.size_bytes,
|
||||
mtime_ms = excluded.mtime_ms,
|
||||
probed_at = excluded.probed_at,
|
||||
probe_error = NULL,
|
||||
duration_sec = excluded.duration_sec,
|
||||
video_codec = excluded.video_codec,
|
||||
video_b_frames = excluded.video_b_frames,
|
||||
width = excluded.width,
|
||||
height = excluded.height,
|
||||
video_bitrate = excluded.video_bitrate,
|
||||
playback_mode = excluded.playback_mode
|
||||
`).run(
|
||||
base.absPath, base.relPath, base.code, base.sizeBytes, base.mtimeMs, Date.now(),
|
||||
probed.durationSec, probed.videoCodec, probed.videoBFrames, probed.width, probed.height, probed.videoBitrate, base.playbackMode,
|
||||
);
|
||||
} catch (e) {
|
||||
rawDb.prepare(`
|
||||
INSERT INTO video_metadata (
|
||||
abs_path, rel_path, code, size_bytes, mtime_ms, probed_at, probe_error, playback_mode
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(abs_path) DO UPDATE SET
|
||||
rel_path = excluded.rel_path,
|
||||
code = excluded.code,
|
||||
size_bytes = excluded.size_bytes,
|
||||
mtime_ms = excluded.mtime_ms,
|
||||
probed_at = excluded.probed_at,
|
||||
probe_error = excluded.probe_error,
|
||||
playback_mode = excluded.playback_mode
|
||||
`).run(
|
||||
base.absPath, base.relPath, base.code, base.sizeBytes, base.mtimeMs, Date.now(),
|
||||
e instanceof Error ? e.message.slice(0, 500) : "ffprobe failed",
|
||||
base.playbackMode,
|
||||
);
|
||||
}
|
||||
|
||||
return getStoredVideoMetadata(file.abs) ?? {
|
||||
...base,
|
||||
probedAt: null,
|
||||
probeError: "metadata unavailable",
|
||||
durationSec: null,
|
||||
videoCodec: null,
|
||||
videoBFrames: null,
|
||||
width: null,
|
||||
height: null,
|
||||
videoBitrate: null,
|
||||
partKind: null,
|
||||
partIndex: null,
|
||||
variantGroup: null,
|
||||
};
|
||||
}
|
||||
|
||||
export function setVideoPlaybackMode(file: VideoFile, mode: PlaybackMode | null): void {
|
||||
rawDb.prepare(`
|
||||
INSERT INTO video_metadata (abs_path, rel_path, code, size_bytes, mtime_ms, playback_mode)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(abs_path) DO UPDATE SET
|
||||
rel_path = excluded.rel_path,
|
||||
code = excluded.code,
|
||||
size_bytes = excluded.size_bytes,
|
||||
mtime_ms = excluded.mtime_ms,
|
||||
playback_mode = excluded.playback_mode
|
||||
`).run(file.abs, file.rel, file.code, file.size, file.mtime, mode);
|
||||
}
|
||||
|
||||
export function formatDuration(sec: number | null | undefined): string | null {
|
||||
if (sec == null || !Number.isFinite(sec) || sec <= 0) return null;
|
||||
const total = Math.round(sec);
|
||||
const h = Math.floor(total / 3600);
|
||||
const m = Math.floor((total % 3600) / 60);
|
||||
const s = total % 60;
|
||||
if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
|
||||
return `${m}:${String(s).padStart(2, "0")}`;
|
||||
}
|
||||
|
||||
export function formatBitrate(bps: number | null | undefined): string | null {
|
||||
if (bps == null || !Number.isFinite(bps) || bps <= 0) return null;
|
||||
if (bps >= 1_000_000) return `${(bps / 1_000_000).toFixed(1)} Mbps`;
|
||||
if (bps >= 1_000) return `${Math.round(bps / 1_000)} Kbps`;
|
||||
return `${Math.round(bps)} bps`;
|
||||
}
|
||||
|
||||
export function formatBytes(bytes: number | null | undefined): string | null {
|
||||
if (bytes == null || !Number.isFinite(bytes) || bytes < 0) return null;
|
||||
const units = ["B", "KB", "MB", "GB", "TB"];
|
||||
let n = bytes;
|
||||
let i = 0;
|
||||
while (n >= 1024 && i < units.length - 1) {
|
||||
n /= 1024;
|
||||
i++;
|
||||
}
|
||||
return `${i === 0 ? Math.round(n) : n.toFixed(n >= 10 ? 1 : 2)} ${units[i]}`;
|
||||
}
|
||||
|
||||
export function formatResolution(width: number | null | undefined, height: number | null | undefined): string | null {
|
||||
if (!width || !height) return null;
|
||||
return `${width}x${height}`;
|
||||
}
|
||||
|
||||
export function formatCodec(codec: string | null | undefined): string | null {
|
||||
if (!codec) return null;
|
||||
const map: Record<string, string> = {
|
||||
h264: "H.264",
|
||||
hevc: "HEVC",
|
||||
h265: "HEVC",
|
||||
av1: "AV1",
|
||||
vp9: "VP9",
|
||||
mpeg4: "MPEG-4",
|
||||
};
|
||||
return map[codec.toLowerCase()] ?? codec.toUpperCase();
|
||||
}
|
||||
|
||||
export function formatVideoSummary(meta: StoredVideoMetadata | null | undefined): string | null {
|
||||
if (!meta || meta.probeError) return null;
|
||||
const parts = [
|
||||
formatResolution(meta.width, meta.height),
|
||||
formatCodec(meta.videoCodec),
|
||||
formatBitrate(meta.videoBitrate),
|
||||
formatBytes(meta.sizeBytes),
|
||||
formatDuration(meta.durationSec),
|
||||
].filter((part): part is string => Boolean(part));
|
||||
return parts.length > 0 ? parts.join(" · ") : null;
|
||||
}
|
||||
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
* Token-grammar classifier for video filenames in a JAVID group.
|
||||
*
|
||||
* Patterns use a simplified token grammar (option A1 from the mockups):
|
||||
* - `{N}` — one or more digits, captured as the part index
|
||||
* - `{L}` — single letter A–Z, captured (A=1, B=2, ...)
|
||||
* - everything else is a literal character
|
||||
*
|
||||
* Patterns match at the END of the filename stem (no extension),
|
||||
* case-insensitive.
|
||||
*
|
||||
* Classification rules for files sharing one normalized JAV code:
|
||||
* - "part" — stem ends with a configured pattern; index is the
|
||||
* captured numeric/letter value.
|
||||
* - "variant" — stem does NOT match any pattern but its prefix
|
||||
* (first dot-segment) equals a stem that DID match.
|
||||
* Variants belong to the matching part.
|
||||
* - "single" — lone file in its code group with no pattern match.
|
||||
*
|
||||
* Tiebreak for "default variant" (the one to play first): the file
|
||||
* whose stem equals the variant_group exactly. Otherwise the
|
||||
* alphabetically first stem in the group.
|
||||
*/
|
||||
export interface CompiledPattern {
|
||||
/** Original token-grammar source. */
|
||||
source: string;
|
||||
/** Compiled regex anchored to end-of-stem (case-insensitive). */
|
||||
re: RegExp;
|
||||
/** What the captured token represents. */
|
||||
kind: "digits" | "letter";
|
||||
}
|
||||
|
||||
/** Minimal description of one file presented to the classifier. */
|
||||
export interface ClassifyInput {
|
||||
/** Stable identifier, opaque to the classifier. */
|
||||
key: string;
|
||||
/** Filename stem (no extension), as on disk. */
|
||||
stem: string;
|
||||
}
|
||||
|
||||
export interface ClassifyResult {
|
||||
key: string;
|
||||
partKind: "part" | "variant" | "single";
|
||||
/** 1-based sort index for parts; null otherwise. */
|
||||
partIndex: number | null;
|
||||
/** Stem-with-suffix-stripped — variants share this with their part. */
|
||||
variantGroup: string | null;
|
||||
}
|
||||
|
||||
const TOKEN_RE = /\{[NL]\}/g;
|
||||
|
||||
/** Compile one token-grammar pattern into a regex. Throws on bad token. */
|
||||
export function compileToken(source: string): CompiledPattern | null {
|
||||
if (!source) return null;
|
||||
// Validate first: only {N} and {L} are allowed; nothing else may use {}.
|
||||
// A bare `{` without a known token is invalid.
|
||||
let kind: "digits" | "letter" | null = null;
|
||||
let body = "";
|
||||
let i = 0;
|
||||
while (i < source.length) {
|
||||
const c = source[i]!;
|
||||
if (c === "{") {
|
||||
const close = source.indexOf("}", i);
|
||||
if (close < 0) return null;
|
||||
const tok = source.slice(i, close + 1);
|
||||
if (tok === "{N}") {
|
||||
if (kind != null) return null; // only one capture per pattern
|
||||
body += "(\\d+)";
|
||||
kind = "digits";
|
||||
} else if (tok === "{L}") {
|
||||
if (kind != null) return null;
|
||||
body += "([A-Za-z])";
|
||||
kind = "letter";
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
i = close + 1;
|
||||
} else {
|
||||
body += c.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (kind == null) return null;
|
||||
return {
|
||||
source,
|
||||
re: new RegExp(body + "$", "i"),
|
||||
kind,
|
||||
};
|
||||
}
|
||||
|
||||
/** Compile a list of patterns; silently drops malformed ones. */
|
||||
export function compilePatterns(sources: string[]): CompiledPattern[] {
|
||||
const out: CompiledPattern[] = [];
|
||||
for (const s of sources) {
|
||||
const c = compileToken(s);
|
||||
if (c) out.push(c);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function indexFromCapture(capture: string, kind: "digits" | "letter"): number | null {
|
||||
if (kind === "digits") {
|
||||
const n = Number(capture);
|
||||
return Number.isFinite(n) && n > 0 ? Math.trunc(n) : null;
|
||||
}
|
||||
// Letter: A=1, B=2, ...
|
||||
const code = capture.toUpperCase().charCodeAt(0);
|
||||
if (code < 65 || code > 90) return null;
|
||||
return code - 64;
|
||||
}
|
||||
|
||||
interface PatternHit {
|
||||
partIndex: number;
|
||||
/** Stem with the matched suffix removed. */
|
||||
variantGroup: string;
|
||||
}
|
||||
|
||||
function tryMatch(stem: string, patterns: CompiledPattern[]): PatternHit | null {
|
||||
for (const p of patterns) {
|
||||
const m = stem.match(p.re);
|
||||
if (!m) continue;
|
||||
const idx = indexFromCapture(m[1] ?? "", p.kind);
|
||||
if (idx == null) continue;
|
||||
return {
|
||||
partIndex: idx,
|
||||
variantGroup: stem.slice(0, m.index!),
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify a group of files that share one normalized JAV code.
|
||||
*
|
||||
* Algorithm:
|
||||
* 1. Try each pattern against each stem; record matches.
|
||||
* 2. Files with no match are candidate variants. A candidate is a
|
||||
* variant of a matched file if its stem's first dot-segment
|
||||
* equals the matched file's variant_group's first dot-segment.
|
||||
* (This catches `XXX-001.fixed.mp4` aligning with `XXX-001-cd1.mp4`
|
||||
* → no, those don't share a dot-prefix; they'd stay singles. But
|
||||
* `XXX-001-cd1.fixed.mp4` would align with `XXX-001-cd1.mp4`.)
|
||||
* 3. If no patterns match anything in the group, all stems share
|
||||
* one variant_group (the longest common prefix of all stems,
|
||||
* trimmed at the last alpha-numeric run); kind = variant for >1
|
||||
* files, single for 1.
|
||||
*/
|
||||
export function classifyGroup(
|
||||
files: ClassifyInput[],
|
||||
patterns: CompiledPattern[],
|
||||
): ClassifyResult[] {
|
||||
if (files.length === 0) return [];
|
||||
if (files.length === 1) {
|
||||
const only = files[0]!;
|
||||
return [{ key: only.key, partKind: "single", partIndex: null, variantGroup: null }];
|
||||
}
|
||||
|
||||
// Pass 1: pattern match.
|
||||
const hits = new Map<string, PatternHit>();
|
||||
for (const f of files) {
|
||||
const hit = tryMatch(f.stem, patterns);
|
||||
if (hit) hits.set(f.key, hit);
|
||||
}
|
||||
|
||||
if (hits.size === 0) {
|
||||
// No part-style suffixes detected anywhere → treat the whole group
|
||||
// as variants of one part.
|
||||
const group = longestCommonPrefix(files.map((f) => f.stem));
|
||||
return files.map((f) => ({
|
||||
key: f.key,
|
||||
partKind: "variant" as const,
|
||||
partIndex: null,
|
||||
variantGroup: group || f.stem,
|
||||
}));
|
||||
}
|
||||
|
||||
// Pass 2: attach unmatched stems to the matched stem they extend.
|
||||
// A non-matching stem `S` is a variant of part group `G` iff `S`
|
||||
// starts with `G + "."` (i.e. `G` followed by a dot — the typical
|
||||
// "alt encode" suffix shape: `XXX-001-cd1.fixed.mp4`).
|
||||
const matchedGroupKeys = Array.from(new Set(Array.from(hits.values()).map((h) => h.variantGroup)));
|
||||
// Sort by length desc so longer (more specific) groups bind first.
|
||||
matchedGroupKeys.sort((a, b) => b.length - a.length);
|
||||
|
||||
const out: ClassifyResult[] = [];
|
||||
for (const f of files) {
|
||||
const hit = hits.get(f.key);
|
||||
if (hit) {
|
||||
out.push({
|
||||
key: f.key,
|
||||
partKind: "part",
|
||||
partIndex: hit.partIndex,
|
||||
variantGroup: hit.variantGroup,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Unmatched: try to attach to a part group via dot-prefix.
|
||||
const attached = matchedGroupKeys.find(
|
||||
(g) => g && (f.stem === g || f.stem.startsWith(g + ".")),
|
||||
);
|
||||
if (attached) {
|
||||
out.push({ key: f.key, partKind: "variant", partIndex: null, variantGroup: attached });
|
||||
} else {
|
||||
// No way to attach — the file is a stray. Mark single.
|
||||
out.push({ key: f.key, partKind: "single", partIndex: null, variantGroup: null });
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function longestCommonPrefix(strs: string[]): string {
|
||||
if (strs.length === 0) return "";
|
||||
let prefix = strs[0]!;
|
||||
for (let i = 1; i < strs.length; i++) {
|
||||
const s = strs[i]!;
|
||||
let j = 0;
|
||||
while (j < prefix.length && j < s.length && prefix[j] === s[j]) j++;
|
||||
prefix = prefix.slice(0, j);
|
||||
if (!prefix) return "";
|
||||
}
|
||||
// Trim trailing punctuation so we don't end on a half-word like "XXX-001.".
|
||||
return prefix.replace(/[\s._\-]+$/, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* From a set of files all sharing the same variantGroup, pick the one
|
||||
* to play by default. Rule: stem === group exactly; else alphabetically
|
||||
* first.
|
||||
*/
|
||||
export function pickDefaultVariant<T extends { stem: string }>(
|
||||
variants: T[],
|
||||
group: string,
|
||||
): T | null {
|
||||
if (variants.length === 0) return null;
|
||||
const exact = variants.find((v) => v.stem === group);
|
||||
if (exact) return exact;
|
||||
return [...variants].sort((a, b) => a.stem.localeCompare(b.stem))[0] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a short label for a variant relative to its group stem.
|
||||
* `XXX-001.fixed` with group `XXX-001` → `fixed`.
|
||||
* Falls back to `original` for the default / matching stem.
|
||||
*/
|
||||
export function variantLabel(stem: string, group: string): string {
|
||||
if (stem === group) return "original";
|
||||
if (stem.startsWith(group + ".")) {
|
||||
return stem.slice(group.length + 1) || "original";
|
||||
}
|
||||
if (stem.startsWith(group)) {
|
||||
return stem.slice(group.length).replace(/^[._\-\s]+/, "") || "original";
|
||||
}
|
||||
return stem;
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
import "server-only";
|
||||
import path from "node:path";
|
||||
import { getConfiguredVideoRoots } from "./index";
|
||||
import { getAppSetting } from "@/lib/db/appSettings";
|
||||
import { isManualSubtitlePath } from "./manualSubtitles";
|
||||
|
||||
/**
|
||||
* In-process set of subtitle paths the user picked via /api/pick-file
|
||||
* during this session. Covers the case where someone browses a .srt
|
||||
* sitting outside any indexed video root — the OS picker IS the
|
||||
* authorization. Entries time out after TTL_MS to bound how long an
|
||||
* old picked path remains servable.
|
||||
*/
|
||||
const TTL_MS = 60 * 60 * 1000; // 1 hour
|
||||
const trusted = new Map<string, number>();
|
||||
|
||||
function pruneExpired(now: number): void {
|
||||
for (const [k, expiresAt] of trusted) {
|
||||
if (expiresAt <= now) trusted.delete(k);
|
||||
}
|
||||
}
|
||||
|
||||
function normalize(p: string): string {
|
||||
// Path keys use the resolved + lowercased form on Windows so case
|
||||
// differences don't bypass the guard. POSIX is case-sensitive so we
|
||||
// keep original case there.
|
||||
const resolved = path.resolve(p);
|
||||
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
|
||||
}
|
||||
|
||||
export function trustSubtitlePath(abs: string): void {
|
||||
pruneExpired(Date.now());
|
||||
trusted.set(normalize(abs), Date.now() + TTL_MS);
|
||||
}
|
||||
|
||||
export function isSessionTrustedSubtitlePath(abs: string): boolean {
|
||||
const now = Date.now();
|
||||
pruneExpired(now);
|
||||
const key = normalize(abs);
|
||||
const exp = trusted.get(key);
|
||||
if (exp == null) return false;
|
||||
if (exp <= now) {
|
||||
trusted.delete(key);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
function isInside(child: string, parent: string): boolean {
|
||||
const c = process.platform === "win32" ? path.resolve(child).toLowerCase() : path.resolve(child);
|
||||
const p = process.platform === "win32" ? path.resolve(parent).toLowerCase() : path.resolve(parent);
|
||||
if (!p) return false;
|
||||
if (c === p) return true;
|
||||
const sep = path.sep;
|
||||
return c.startsWith(p.endsWith(sep) ? p : p + sep);
|
||||
}
|
||||
|
||||
/**
|
||||
* True if `abs` resolves under one of:
|
||||
* - a configured video root,
|
||||
* - a configured subtitleExtraPaths entry,
|
||||
* - the implicit data/generated-subtitles/ root (WhisperJAV output),
|
||||
* - a session-trusted pick-file path (exact match, not prefix),
|
||||
* - a path persisted in the manual_subtitles table (user explicitly
|
||||
* Browse'd it during a previous session).
|
||||
*/
|
||||
export function isAllowedSubtitlePath(abs: string): boolean {
|
||||
const resolved = path.resolve(abs);
|
||||
for (const root of getConfiguredVideoRoots()) {
|
||||
if (root && isInside(resolved, root)) return true;
|
||||
}
|
||||
const subRoots = getAppSetting("subtitleExtraPaths") ?? [];
|
||||
for (const root of subRoots) {
|
||||
if (root && isInside(resolved, root)) return true;
|
||||
}
|
||||
const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles");
|
||||
if (isInside(resolved, generatedRoot)) return true;
|
||||
if (isSessionTrustedSubtitlePath(resolved)) return true;
|
||||
if (isManualSubtitlePath(resolved)) return true;
|
||||
return false;
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
import "server-only";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import fsp from "node:fs/promises";
|
||||
import crypto from "node:crypto";
|
||||
|
||||
/**
|
||||
* Bump on any change to srtToVtt, the ffmpeg arg recipe, or the
|
||||
* cache-key composition. Old entries become unreachable automatically.
|
||||
* v1 → initial.
|
||||
* v2 → added decodeSubtitleBuffer for non-UTF-8 SRTs/VTTs (cp936,
|
||||
* shift-jis, big5, UTF-16). Existing UTF-8-only entries would
|
||||
* still be correct but the version bump ensures any cached
|
||||
* output produced with a buggy decode path is regenerated.
|
||||
*/
|
||||
export const CONVERTER_VERSION = 2;
|
||||
|
||||
const CACHE_DIR = path.join(process.cwd(), "data", "subtitle-cache");
|
||||
fs.mkdirSync(CACHE_DIR, { recursive: true });
|
||||
|
||||
export type CacheKind = "embedded" | "srt" | "ass" | "ssa";
|
||||
|
||||
export interface CacheKeyInput {
|
||||
abs: string;
|
||||
size: number;
|
||||
mtimeMs: number;
|
||||
kind: CacheKind;
|
||||
/** ffmpeg stream index for embedded; ext for sidecar files. */
|
||||
streamOrExt: string | number;
|
||||
}
|
||||
|
||||
export function cachePath(input: CacheKeyInput): string {
|
||||
const raw = [
|
||||
input.abs,
|
||||
input.size,
|
||||
Math.round(input.mtimeMs),
|
||||
input.kind,
|
||||
String(input.streamOrExt),
|
||||
CONVERTER_VERSION,
|
||||
].join("|");
|
||||
const hash = crypto.createHash("sha1").update(raw).digest("hex");
|
||||
return path.join(CACHE_DIR, `${hash}.vtt`);
|
||||
}
|
||||
|
||||
export async function readCache(file: string): Promise<Buffer | null> {
|
||||
try {
|
||||
const buf = await fsp.readFile(file);
|
||||
// Bump mtime so LRU pruning treats this entry as recently used.
|
||||
// Best effort: failure (read-only fs, locked file) is harmless.
|
||||
const now = Date.now() / 1000;
|
||||
fsp.utimes(file, now, now).catch(() => { /* ignore */ });
|
||||
return buf;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
let writesSinceLastPrune = 0;
|
||||
const PRUNE_WRITE_INTERVAL = 25;
|
||||
|
||||
export async function writeCache(file: string, data: Buffer | string): Promise<void> {
|
||||
// Atomic via rename — avoids partial files if the process is killed
|
||||
// mid-write or two requests race on the same key.
|
||||
const tmp = `${file}.${process.pid}.${Date.now()}.tmp`;
|
||||
try {
|
||||
await fsp.writeFile(tmp, data);
|
||||
await fsp.rename(tmp, file);
|
||||
} catch (e) {
|
||||
try { await fsp.unlink(tmp); } catch { /* ignore */ }
|
||||
throw e;
|
||||
}
|
||||
writesSinceLastPrune++;
|
||||
if (writesSinceLastPrune >= PRUNE_WRITE_INTERVAL) {
|
||||
writesSinceLastPrune = 0;
|
||||
void pruneSubtitleCacheIfNeeded();
|
||||
}
|
||||
}
|
||||
|
||||
interface PruneResult {
|
||||
scanned: number;
|
||||
removed: number;
|
||||
beforeBytes: number;
|
||||
afterBytes: number;
|
||||
}
|
||||
|
||||
/** LRU sweep keyed on file mtime. Walks `data/subtitle-cache/`,
|
||||
* computes total size, and if it exceeds the configured limit,
|
||||
* deletes the oldest-mtime entries until size drops below 80% of
|
||||
* the cap. No-op when the limit setting is 0 (unlimited). */
|
||||
export async function pruneSubtitleCacheIfNeeded(): Promise<PruneResult> {
|
||||
const { getAppSetting } = await import("@/lib/db/appSettings");
|
||||
const limitMb = Number(getAppSetting("subtitleCacheLimitMb"));
|
||||
const result: PruneResult = { scanned: 0, removed: 0, beforeBytes: 0, afterBytes: 0 };
|
||||
if (!Number.isFinite(limitMb) || limitMb <= 0) return result;
|
||||
const limitBytes = limitMb * 1024 * 1024;
|
||||
const lowWatermark = Math.floor(limitBytes * 0.8);
|
||||
|
||||
let entries: import("node:fs").Dirent[];
|
||||
try {
|
||||
entries = await fsp.readdir(CACHE_DIR, { withFileTypes: true });
|
||||
} catch {
|
||||
return result;
|
||||
}
|
||||
type CacheEntry = { abs: string; size: number; mtimeMs: number };
|
||||
const items: CacheEntry[] = [];
|
||||
for (const e of entries) {
|
||||
if (!e.isFile() || !e.name.endsWith(".vtt")) continue;
|
||||
const abs = path.join(CACHE_DIR, e.name);
|
||||
try {
|
||||
const stat = await fsp.stat(abs);
|
||||
items.push({ abs, size: stat.size, mtimeMs: stat.mtimeMs });
|
||||
result.scanned++;
|
||||
result.beforeBytes += stat.size;
|
||||
} catch { /* file vanished mid-walk; skip */ }
|
||||
}
|
||||
if (result.beforeBytes <= limitBytes) {
|
||||
result.afterBytes = result.beforeBytes;
|
||||
return result;
|
||||
}
|
||||
// Oldest first.
|
||||
items.sort((a, b) => a.mtimeMs - b.mtimeMs);
|
||||
let running = result.beforeBytes;
|
||||
for (const it of items) {
|
||||
if (running <= lowWatermark) break;
|
||||
try {
|
||||
await fsp.unlink(it.abs);
|
||||
running -= it.size;
|
||||
result.removed++;
|
||||
} catch { /* concurrent delete; skip */ }
|
||||
}
|
||||
result.afterBytes = running;
|
||||
if (result.removed > 0) {
|
||||
console.log(
|
||||
`[subtitle-cache] pruned ${result.removed}/${result.scanned} files; ${(result.beforeBytes / 1_048_576).toFixed(1)}MB → ${(running / 1_048_576).toFixed(1)}MB`,
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Bootstrap entrypoint — fire one sweep on module load (delayed so
|
||||
* other startup work isn't blocked). */
|
||||
function scheduleBootstrapPrune(): void {
|
||||
setTimeout(() => { void pruneSubtitleCacheIfNeeded(); }, 5_000);
|
||||
}
|
||||
scheduleBootstrapPrune();
|
||||
@@ -0,0 +1,195 @@
|
||||
import "server-only";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs/promises";
|
||||
import iconv from "iconv-lite";
|
||||
|
||||
export const SUBTITLE_EXTS = [".srt", ".vtt", ".ass", ".ssa"] as const;
|
||||
export type SubtitleExt = (typeof SUBTITLE_EXTS)[number];
|
||||
|
||||
const SUBTITLE_EXT_SET = new Set<string>(SUBTITLE_EXTS);
|
||||
|
||||
export type LangIso = "eng" | "zho" | "jpn";
|
||||
export type LangPref = "EN" | "CN" | "JP" | "off";
|
||||
|
||||
export interface SubtitleFileEntry {
|
||||
abs: string;
|
||||
filename: string;
|
||||
}
|
||||
|
||||
export async function walkSubtitles(root: string, maxDepth = 2): Promise<SubtitleFileEntry[]> {
|
||||
const out: SubtitleFileEntry[] = [];
|
||||
type Frame = { dir: string; depth: number };
|
||||
const stack: Frame[] = [{ dir: root, depth: 0 }];
|
||||
while (stack.length) {
|
||||
const { dir, depth } = stack.pop()!;
|
||||
let entries: import("node:fs").Dirent[];
|
||||
try {
|
||||
entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
for (const e of entries) {
|
||||
const full = path.join(dir, e.name);
|
||||
if (e.isDirectory()) {
|
||||
if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 });
|
||||
} else if (e.isFile()) {
|
||||
const ext = path.extname(e.name).toLowerCase();
|
||||
if (SUBTITLE_EXT_SET.has(ext)) out.push({ abs: full, filename: e.name });
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
const PREF_TO_ISO: Record<Exclude<LangPref, "off">, LangIso> = {
|
||||
EN: "eng",
|
||||
CN: "zho",
|
||||
JP: "jpn",
|
||||
};
|
||||
|
||||
const ISO_TO_PREF: Record<LangIso, Exclude<LangPref, "off">> = {
|
||||
eng: "EN",
|
||||
zho: "CN",
|
||||
jpn: "JP",
|
||||
};
|
||||
|
||||
export function isoFromPref(pref: LangPref): LangIso | null {
|
||||
return pref === "off" ? null : PREF_TO_ISO[pref];
|
||||
}
|
||||
|
||||
export function prefFromIso(iso: LangIso | null): LangPref {
|
||||
return iso == null ? "off" : ISO_TO_PREF[iso];
|
||||
}
|
||||
|
||||
const ENGLISH_TOKENS = new Set(["en", "eng", "english"]);
|
||||
const CHINESE_TOKENS = new Set([
|
||||
"zh", "zho", "chi", "chs", "cht", "chn", "cn", "chinese",
|
||||
"schinese", "tchinese", "simplified", "traditional",
|
||||
"zh-cn", "zh-tw", "zh-hans", "zh-hant",
|
||||
]);
|
||||
const JAPANESE_TOKENS = new Set(["ja", "jp", "jpn", "japanese", "jap"]);
|
||||
|
||||
export function normalizeLanguageTag(tag: string | null | undefined): LangIso | null {
|
||||
if (!tag) return null;
|
||||
const lower = tag.trim().toLowerCase();
|
||||
if (!lower) return null;
|
||||
if (ENGLISH_TOKENS.has(lower)) return "eng";
|
||||
if (CHINESE_TOKENS.has(lower)) return "zho";
|
||||
if (JAPANESE_TOKENS.has(lower)) return "jpn";
|
||||
return null;
|
||||
}
|
||||
|
||||
export function languageDisplay(iso: LangIso | null): string {
|
||||
if (iso === "eng") return "English";
|
||||
if (iso === "zho") return "Chinese";
|
||||
if (iso === "jpn") return "Japanese";
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
const TOKEN_SPLIT = /[\s._\-\[\]()+,;]+/g;
|
||||
|
||||
export interface DetectedLanguage {
|
||||
/** Single ISO code if exactly one language was detected. */
|
||||
lang: LangIso | null;
|
||||
/** Display label — "English", "Chinese", "English/Chinese", "Unknown". */
|
||||
label: string;
|
||||
}
|
||||
|
||||
/** Inspect a filename's stem for embedded language hints. Multiple hits
|
||||
* produce a compound label (e.g. "English/Chinese") but `lang` stays null
|
||||
* so sticky-pref matching only ever resolves to a single language. */
|
||||
export function detectLanguageFromName(filename: string): DetectedLanguage {
|
||||
const ext = path.extname(filename).toLowerCase();
|
||||
const stem = ext ? filename.slice(0, -ext.length) : filename;
|
||||
const tokens = stem.toLowerCase().split(TOKEN_SPLIT).filter(Boolean);
|
||||
const found = new Set<LangIso>();
|
||||
for (const t of tokens) {
|
||||
const iso = normalizeLanguageTag(t);
|
||||
if (iso) found.add(iso);
|
||||
}
|
||||
if (found.size === 0) return { lang: null, label: "Unknown" };
|
||||
if (found.size === 1) {
|
||||
const iso = [...found][0]!;
|
||||
return { lang: iso, label: languageDisplay(iso) };
|
||||
}
|
||||
const order: LangIso[] = ["eng", "zho", "jpn"];
|
||||
const ordered = order.filter((i) => found.has(i));
|
||||
return { lang: null, label: ordered.map(languageDisplay).join("/") };
|
||||
}
|
||||
|
||||
const SRT_TIMESTAMP = /(\d{1,2}:\d{2}:\d{2}),(\d{3})/g;
|
||||
|
||||
/** Pure JS SRT → WebVTT converter. Strips BOM, normalizes CRLF, swaps
|
||||
* the comma in HH:MM:SS,mmm timestamps for a dot, and prepends the
|
||||
* WEBVTT header. No styling translation. Cheap; runs on every sidecar
|
||||
* miss without spawning ffmpeg. */
|
||||
export function srtToVtt(srt: string): string {
|
||||
let body = srt;
|
||||
if (body.charCodeAt(0) === 0xfeff) body = body.slice(1);
|
||||
body = body.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
||||
body = body.replace(SRT_TIMESTAMP, "$1.$2");
|
||||
return `WEBVTT\n\n${body.trimStart()}`;
|
||||
}
|
||||
|
||||
export function stemOf(filename: string): string {
|
||||
const ext = path.extname(filename);
|
||||
return ext ? filename.slice(0, -ext.length) : filename;
|
||||
}
|
||||
|
||||
const REPLACEMENT_CHAR = "�";
|
||||
|
||||
/**
|
||||
* Decode a subtitle file buffer to a JS string with best-effort
|
||||
* encoding detection. Many older Asian SRTs ship as cp936/GBK or
|
||||
* Shift-JIS — feeding them through `Buffer.toString("utf8")` produces
|
||||
* mojibake. Strategy:
|
||||
* 1. Strip BOM if present (UTF-8 / UTF-16 LE / UTF-16 BE).
|
||||
* 2. Try UTF-8 strict. If it decodes without invalid sequences, use it.
|
||||
* 3. Otherwise decode as UTF-8 / shift_jis / gb18030 / big5 and
|
||||
* pick whichever has the fewest replacement chars per kbyte.
|
||||
* 4. Tie-break preference: shift_jis when katakana/hiragana ranges
|
||||
* appear in the JS surrogates, gb18030 otherwise — common
|
||||
* heuristic for JP vs CN fansub source material.
|
||||
*/
|
||||
export function decodeSubtitleBuffer(buf: Buffer): string {
|
||||
// BOM detection — if present, the encoding is unambiguous.
|
||||
if (buf.length >= 3 && buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
|
||||
return buf.subarray(3).toString("utf8");
|
||||
}
|
||||
if (buf.length >= 2 && buf[0] === 0xff && buf[1] === 0xfe) {
|
||||
return iconv.decode(buf.subarray(2), "utf-16le");
|
||||
}
|
||||
if (buf.length >= 2 && buf[0] === 0xfe && buf[1] === 0xff) {
|
||||
return iconv.decode(buf.subarray(2), "utf-16be");
|
||||
}
|
||||
|
||||
// UTF-8 strict — fast path for the common case.
|
||||
try {
|
||||
const decoder = new TextDecoder("utf-8", { fatal: true });
|
||||
return decoder.decode(buf);
|
||||
} catch { /* fall through to heuristic */ }
|
||||
|
||||
// Compare candidate encodings by replacement-char count.
|
||||
const candidates: Array<"utf8" | "shift_jis" | "gb18030" | "big5"> = [
|
||||
"utf8", "shift_jis", "gb18030", "big5",
|
||||
];
|
||||
let best: { encoding: typeof candidates[number]; text: string; score: number } | null = null;
|
||||
for (const encoding of candidates) {
|
||||
const text = iconv.decode(buf, encoding);
|
||||
let bad = 0;
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
if (text[i] === REPLACEMENT_CHAR) bad++;
|
||||
}
|
||||
// Tie-break preference: shift_jis when text contains kana, since
|
||||
// gb18030 happens to map many JP code points without errors but
|
||||
// produces gibberish that we wouldn't catch by rep-count alone.
|
||||
const hasKana = /[-ヿ]/.test(text);
|
||||
const adjusted = hasKana && encoding === "shift_jis"
|
||||
? bad - 1
|
||||
: encoding === "utf8" ? bad - 1 : bad;
|
||||
if (best == null || adjusted < best.score) {
|
||||
best = { encoding, text, score: adjusted };
|
||||
}
|
||||
}
|
||||
return best?.text ?? buf.toString("utf8");
|
||||
}
|
||||
Reference in New Issue
Block a user