Initial commit

This commit is contained in:
admin
2026-05-26 22:46:00 +02:00
commit 7e2c2ff89c
256 changed files with 51523 additions and 0 deletions
+59
View File
@@ -0,0 +1,59 @@
import "server-only";
import { spawn } from "node:child_process";
const cache = new Map<string, number>();
const PROBE_TIMEOUT_MS = 10_000;
/**
* Probe a video file's duration in seconds via ffprobe. Cached per-path
* for the lifetime of the process — files don't change duration on us.
* Returns null if ffprobe fails or returns garbage.
*
* Caps the probe at PROBE_TIMEOUT_MS and ties to an optional AbortSignal
* so a hung ffprobe (network mount, weird codec, dead disk) can't leave
* the request awaiting forever or zombie the subprocess.
*/
export async function probeDuration(abs: string, signal?: AbortSignal): Promise<number | null> {
const cached = cache.get(abs);
if (cached !== undefined) return cached;
return new Promise((resolve) => {
const proc = spawn("ffprobe", [
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
abs,
]);
let out = "";
let settled = false;
const settle = (n: number | null) => {
if (settled) return;
settled = true;
if (timeoutId) clearTimeout(timeoutId);
if (signal && abortHandler) signal.removeEventListener("abort", abortHandler);
if (n != null && Number.isFinite(n) && n > 0) {
cache.set(abs, n);
resolve(n);
} else {
resolve(null);
}
};
const kill = () => {
try { proc.kill("SIGKILL"); } catch { /* ignore */ }
settle(null);
};
const timeoutId = setTimeout(kill, PROBE_TIMEOUT_MS);
const abortHandler = signal ? () => kill() : null;
if (signal && abortHandler) {
if (signal.aborted) { kill(); return; }
signal.addEventListener("abort", abortHandler, { once: true });
}
proc.stdout?.on("data", (d) => { out += d.toString(); });
proc.on("close", () => settle(Number(out.trim())));
proc.on("error", () => settle(null));
});
}
+539
View File
@@ -0,0 +1,539 @@
import "server-only";
import path from "node:path";
import fs from "node:fs/promises";
import { extractCode, normalizeCode } from "@/lib/jav/codeParser";
import { getAppSetting } from "@/lib/db/appSettings";
import { rawDb } from "@/lib/db/client";
import { syncVideoMetadataIndex } from "./metadata";
export const VIDEO_EXTENSIONS = new Set([
".mp4", ".mkv", ".m4v", ".mov", ".webm", ".avi", ".wmv", ".ts", ".mpg", ".mpeg", ".flv",
]);
const SUBTITLE_EXTENSIONS = new Set([".srt", ".vtt", ".ass", ".ssa"]);
/** One video file the index found on disk. */
export interface VideoFile {
/** Absolute path on disk. */
abs: string;
/** Path relative to the configured video library root. */
rel: string;
/** Filename (with extension). */
filename: string;
/** Normalized JAV code parsed from the filename. */
code: string;
/** File size in bytes. */
size: number;
/** Last-modified timestamp (ms). */
mtime: number;
}
/**
* Lightweight scan-state record. Authoritative file data lives in the
* `video_metadata` SQLite table — accessors below query it directly,
* so this struct holds only what describes the most recent rescan.
*/
interface VideoIndex {
/** When the index was last built. */
lastScannedAt: number;
/** All folder roots that were scanned, in order: main first, extras after.
* Used both to display in the UI and to detect setting changes. */
rootsScanned: string[];
/** Total files matched by the most recent scan. */
count: number;
}
const EMPTY_INDEX: VideoIndex = {
lastScannedAt: 0,
rootsScanned: [],
count: 0,
};
let cachedScanState: VideoIndex = EMPTY_INDEX;
let scanInFlight: Promise<VideoIndex> | null = null;
interface CachedFileRow {
abs_path: string;
rel_path: string;
code: string;
size_bytes: number;
mtime_ms: number;
}
interface WalkOpts {
/** When true, ignore the dir-mtime cache and re-readdir every dir.
* Use after structural file edits that don't change dir mtime
* (e.g. content rewrite without rename). */
force?: boolean;
}
/**
* Walk the configured roots and produce a flat VideoFile[]. The caller
* writes the result to the `video_metadata` table — nothing is held in
* memory beyond the duration of one rescan.
*
* Incremental: each directory's mtime is compared to a stored value
* in `video_dir_mtimes`. If unchanged, the immediate-children file
* rows for that dir are reused from `video_metadata` instead of
* readdir + stat per file. Subdirs are still walked (their mtimes
* may have changed independently).
*/
async function walkAllRoots(
roots: string[],
opts: WalkOpts = {},
): Promise<{ files: VideoFile[]; count: number; visitedDirs: Set<string>; reused: number; rescanned: number }> {
const cachedMtimes = opts.force
? new Map<string, number>()
: loadDirMtimeCache();
const visitedDirs = new Set<string>();
const files: VideoFile[] = [];
const cachedFilesByDir = opts.force
? new Map<string, CachedFileRow[]>()
: loadCachedFileIndex();
let reused = 0;
let rescanned = 0;
for (const root of roots) {
type Frame = { dir: string };
const stack: Frame[] = [{ dir: root }];
while (stack.length) {
const { dir } = stack.pop()!;
visitedDirs.add(dir);
let dirStat: import("node:fs").Stats;
try {
dirStat = await fs.stat(dir);
} catch {
continue; // dir vanished mid-walk
}
const cachedMtime = cachedMtimes.get(dir);
const dirUnchanged = cachedMtime != null && cachedMtime === dirStat.mtimeMs;
// Always recurse — subdir mtimes are tracked independently.
// For *children* enumeration we use cached rows when unchanged.
// We still need the subdir list either way; if we're skipping
// the readdir for cache reuse, we need an alternate way to find
// subdirs. Cheapest: readdir the directory entries once for
// dirs (tiny per-dir cost) and use the dirent type directly.
let entries: import("node:fs").Dirent[];
try {
entries = await fs.readdir(dir, { withFileTypes: true });
} catch {
continue;
}
// Push subdirs onto the stack regardless of cache state.
for (const e of entries) {
if (e.isDirectory()) {
stack.push({ dir: path.join(dir, e.name) });
}
}
if (dirUnchanged) {
// Reuse cached rows for files immediately in this directory.
const cached = cachedFilesByDir.get(dir);
if (cached) {
for (const row of cached) {
files.push({
abs: row.abs_path,
rel: path.relative(root, row.abs_path),
filename: path.basename(row.abs_path),
code: row.code,
size: row.size_bytes,
mtime: row.mtime_ms,
});
}
reused += cached.length;
}
continue;
}
// Dir changed (or no cache entry yet). Readdir + stat each file.
rescanned++;
for (const e of entries) {
if (!e.isFile()) continue;
const ext = path.extname(e.name).toLowerCase();
if (!VIDEO_EXTENSIONS.has(ext)) continue;
const abs = path.join(dir, e.name);
const stem = e.name.slice(0, e.name.length - ext.length);
const code = extractCode(stem);
if (!code) continue;
const norm = normalizeCode(code);
if (!norm) continue;
let st: import("node:fs").Stats;
try {
st = await fs.stat(abs);
} catch {
continue;
}
files.push({
abs,
rel: path.relative(root, abs),
filename: e.name,
code: norm,
size: st.size,
mtime: st.mtimeMs,
});
}
// Update cached mtime so the NEXT scan sees this dir as fresh.
cachedMtimes.set(dir, dirStat.mtimeMs);
}
}
// Persist updated mtime cache for next scan.
saveDirMtimeCache(cachedMtimes, visitedDirs);
// Stable order across rescans.
files.sort((a, b) => a.code.localeCompare(b.code) || a.filename.localeCompare(b.filename));
return { files, count: files.length, visitedDirs, reused, rescanned };
}
/** Load all `video_dir_mtimes` rows into a Map keyed by abs_dir. */
function loadDirMtimeCache(): Map<string, number> {
const rows = rawDb.prepare(`SELECT abs_dir, mtime_ms FROM video_dir_mtimes`).all() as Array<{ abs_dir: string; mtime_ms: number }>;
const out = new Map<string, number>();
for (const r of rows) out.set(r.abs_dir, r.mtime_ms);
return out;
}
/** Group the entire video_metadata table by dir_path so dir-cache
* reuse is a single in-memory lookup per dir. One linear scan of the
* table — cheap even at 80k rows. */
function loadCachedFileIndex(): Map<string, CachedFileRow[]> {
const rows = rawDb.prepare(`
SELECT abs_path, rel_path, code, size_bytes, mtime_ms, dir_path
FROM video_metadata
`).all() as Array<CachedFileRow & { dir_path: string }>;
const out = new Map<string, CachedFileRow[]>();
for (const r of rows) {
const arr = out.get(r.dir_path);
if (arr) arr.push(r);
else out.set(r.dir_path, [r]);
}
return out;
}
/** Upsert dir mtimes for visited dirs and prune rows for dirs we
* didn't see this scan (deleted folders). */
function saveDirMtimeCache(mtimes: Map<string, number>, visited: Set<string>): void {
const upsert = rawDb.prepare(`
INSERT INTO video_dir_mtimes (abs_dir, mtime_ms, last_seen_at)
VALUES (?, ?, ?)
ON CONFLICT(abs_dir) DO UPDATE SET
mtime_ms = excluded.mtime_ms,
last_seen_at = excluded.last_seen_at
`);
const now = Date.now();
const tx = rawDb.transaction(() => {
for (const [dir, mtime] of mtimes) {
// Only persist dirs we actually visited this scan — others may
// have been moved/renamed and their cache entry is stale.
if (!visited.has(dir)) continue;
upsert.run(dir, mtime, now);
}
// Prune rows whose dir we didn't see this scan. Drops cleanup of
// deleted dirs in O(rows) — fine at any reasonable scale.
const allRows = rawDb.prepare(`SELECT abs_dir FROM video_dir_mtimes`).all() as Array<{ abs_dir: string }>;
const del = rawDb.prepare(`DELETE FROM video_dir_mtimes WHERE abs_dir = ?`);
for (const r of allRows) {
if (!visited.has(r.abs_dir)) del.run(r.abs_dir);
}
});
try {
tx();
} catch (e) {
console.error("[video] failed to save dir mtime cache:", e);
}
}
/**
* Walk every place a sidecar subtitle could live and return the set of
* canonical codes that have at least one. Cheap signal — no ffprobe.
*
* - Each video's own directory, filtered to filenames that start with
* the video stem (so a stray `OTHER-001.srt` next to `YUJ-001.mp4`
* doesn't taint YUJ-001).
* - Each entry in `subtitleExtraPaths` (recursive walk, depth 3) —
* extracts the code from the filename directly.
* - data/generated-subtitles/<code>/ — directory name IS the code.
*
* Result is consumed once by syncHasSubtitleColumn and discarded — no
* persistent in-memory copy.
*/
async function collectSubtitleCodes(files: VideoFile[]): Promise<Set<string>> {
const codes = new Set<string>();
// Same-folder scan: per video, look at sibling files. Cache directory
// listings so a folder with N videos is only listed once.
const dirCache = new Map<string, import("node:fs").Dirent[]>();
for (const file of files) {
const dir = path.dirname(file.abs);
let entries = dirCache.get(dir);
if (!entries) {
try {
entries = await fs.readdir(dir, { withFileTypes: true });
} catch {
entries = [];
}
dirCache.set(dir, entries);
}
const stem = file.filename.slice(0, file.filename.length - path.extname(file.filename).length);
const stemLower = stem.toLowerCase();
const codeLower = file.code.toLowerCase();
for (const e of entries) {
if (!e.isFile()) continue;
const ext = path.extname(e.name).toLowerCase();
if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
const lower = e.name.toLowerCase();
// Code-substring match must treat the code as a delimited token
// (start, end, or wrapped in non-alphanumeric) — bare `.includes`
// would attribute `notes-yuj-001-bad.srt` to YUJ-001.
const codeAsToken = (() => {
const idx = lower.indexOf(codeLower);
if (idx < 0) return false;
const before = idx === 0 ? "" : lower[idx - 1]!;
const afterIdx = idx + codeLower.length;
const after = afterIdx >= lower.length ? "" : lower[afterIdx]!;
const isBoundary = (c: string) => c === "" || !/[a-z0-9]/.test(c);
return isBoundary(before) && isBoundary(after);
})();
if (lower.startsWith(stemLower + ".") || lower === stemLower + ext || codeAsToken) {
codes.add(file.code);
break;
}
}
}
// Persistent subtitle library roots — extract codes from filenames.
const extraRoots = (getAppSetting("subtitleExtraPaths") ?? []).filter(Boolean);
for (const root of extraRoots) {
await walkSubtitleRoot(root, codes, 3);
}
// data/generated-subtitles/<code>/ — directory name is the code.
const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles");
try {
const subdirs = await fs.readdir(generatedRoot, { withFileTypes: true });
for (const d of subdirs) {
if (!d.isDirectory()) continue;
const dirAbs = path.join(generatedRoot, d.name);
let entries: import("node:fs").Dirent[];
try {
entries = await fs.readdir(dirAbs, { withFileTypes: true });
} catch {
continue;
}
const hasSub = entries.some(
(e) => e.isFile() && SUBTITLE_EXTENSIONS.has(path.extname(e.name).toLowerCase()),
);
if (hasSub) {
const norm = normalizeCode(d.name);
if (norm) codes.add(norm);
}
}
} catch { /* generated-subtitles not present yet — fine */ }
return codes;
}
async function walkSubtitleRoot(root: string, out: Set<string>, maxDepth: number): Promise<void> {
type Frame = { dir: string; depth: number };
const stack: Frame[] = [{ dir: root, depth: 0 }];
while (stack.length) {
const { dir, depth } = stack.pop()!;
let entries: import("node:fs").Dirent[];
try {
entries = await fs.readdir(dir, { withFileTypes: true });
} catch {
continue;
}
for (const e of entries) {
const full = path.join(dir, e.name);
if (e.isDirectory()) {
if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 });
} else if (e.isFile()) {
const ext = path.extname(e.name).toLowerCase();
if (!SUBTITLE_EXTENSIONS.has(ext)) continue;
const stem = e.name.slice(0, e.name.length - ext.length);
const code = extractCode(stem);
if (!code) continue;
const norm = normalizeCode(code);
if (norm) out.add(norm);
}
}
}
}
/** Exposed for path-allowlist checks (e.g. subtitle file resolution). */
export function getConfiguredVideoRoots(): string[] {
return configuredRoots();
}
function configuredRoots(): string[] {
const main = (getAppSetting("videoLibraryPath") || "").trim();
const extras = getAppSetting("videoExtraPaths") ?? [];
const out: string[] = [];
if (main) out.push(main);
for (const e of extras) {
const t = (e ?? "").trim();
if (t) out.push(t);
}
return out;
}
function rootsEqual(a: string[], b: string[]): boolean {
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
return true;
}
/**
* Scan-state probe — used by API routes to decide whether the cached
* data still matches current settings. Returns the empty state if the
* configured roots have changed (caller can trigger a rescan).
*/
export function getVideoIndex(): VideoIndex {
const roots = configuredRoots();
if (roots.length === 0) return EMPTY_INDEX;
if (!rootsEqual(cachedScanState.rootsScanned, roots)) return EMPTY_INDEX;
return cachedScanState;
}
/** Rebuild the index from disk. Coalesces concurrent calls. Authoritative
* data lands in the `video_metadata` table; this function returns only
* scan-state metadata.
*
* Default mode is incremental — directories whose mtime hasn't
* changed since the last scan reuse cached file rows without
* readdir-per-file. Pass `{force:true}` to bypass the dir-mtime
* cache (e.g. after content edits that don't bump dir mtime). */
export async function rescanVideoIndex(opts: { force?: boolean } = {}): Promise<VideoIndex> {
const roots = configuredRoots();
if (scanInFlight) return scanInFlight;
scanInFlight = (async () => {
try {
const cleanRoots = roots.map((r) => (r ?? "").trim()).filter(Boolean);
if (cleanRoots.length === 0) {
cachedScanState = { ...EMPTY_INDEX };
return cachedScanState;
}
const t0 = Date.now();
const { files, count, reused, rescanned } = await walkAllRoots(cleanRoots, { force: opts.force });
const walkMs = Date.now() - t0;
console.log(
`[video] rescan walk in ${walkMs}ms — ${count} files (${reused} reused, ${rescanned} dir(s) rewalked${opts.force ? ", forced" : ""})`,
);
// Persist the file table first — has_video / has_subtitle bulk
// updates and metadata sync all run off it.
await syncVideoMetadataIndex(files);
syncHasVideoColumn(files);
const subtitleCodes = await collectSubtitleCodes(files);
syncHasSubtitleColumn(subtitleCodes);
cachedScanState = {
lastScannedAt: Date.now(),
rootsScanned: cleanRoots,
count,
};
return cachedScanState;
} finally {
scanInFlight = null;
}
})();
return scanInFlight;
}
/**
* Mirror the freshly-walked code list into images.has_video so SQL
* filters / counts can use the column directly.
*/
function syncHasVideoColumn(files: VideoFile[]): void {
const codes = Array.from(new Set(files.map((f) => f.code)));
const tx = rawDb.transaction(() => {
rawDb.prepare(`UPDATE images SET has_video = 0 WHERE has_video = 1`).run();
if (codes.length === 0) return;
// Chunk to stay well below SQLite's bind-parameter cap.
const CHUNK = 500;
for (let i = 0; i < codes.length; i += CHUNK) {
const slice = codes.slice(i, i + CHUNK);
const placeholders = slice.map(() => "?").join(",");
rawDb.prepare(
`UPDATE images SET has_video = 1 WHERE upper(code) IN (${placeholders})`,
).run(...slice);
}
});
try {
tx();
} catch (e) {
console.error("[video] failed to sync has_video column:", e);
}
}
/** Mirror the freshly-walked subtitle code set into images.has_subtitle. */
function syncHasSubtitleColumn(subtitleCodes: Set<string>): void {
const codes = Array.from(subtitleCodes);
const tx = rawDb.transaction(() => {
rawDb.prepare(`UPDATE images SET has_subtitle = 0 WHERE has_subtitle = 1`).run();
if (codes.length === 0) return;
const CHUNK = 500;
for (let i = 0; i < codes.length; i += CHUNK) {
const slice = codes.slice(i, i + CHUNK);
const placeholders = slice.map(() => "?").join(",");
rawDb.prepare(
`UPDATE images SET has_subtitle = 1 WHERE upper(code) IN (${placeholders})`,
).run(...slice);
}
});
try {
tx();
} catch (e) {
console.error("[video] failed to sync has_subtitle column:", e);
}
}
interface VideoMetaRow {
abs_path: string;
rel_path: string;
code: string;
size_bytes: number;
mtime_ms: number;
}
/** Look up files for a single normalized code. Reads directly from the
* video_metadata table so the result is always current with the most
* recent rescan. */
export function findVideosForCode(code: string | null | undefined): VideoFile[] {
if (!code) return [];
const norm = normalizeCode(code) ?? code.toUpperCase();
const rows = rawDb.prepare(`
SELECT abs_path, rel_path, code, size_bytes, mtime_ms
FROM video_metadata
WHERE upper(code) = ?
ORDER BY rel_path COLLATE NOCASE
`).all(norm) as VideoMetaRow[];
return rows.map((r) => ({
abs: r.abs_path,
rel: r.rel_path,
filename: path.basename(r.abs_path),
code: r.code,
size: r.size_bytes,
mtime: r.mtime_ms,
}));
}
/** Set of every code present in video_metadata — fast existence check. */
export function getCodesWithVideos(): Set<string> {
const rows = rawDb.prepare(`
SELECT DISTINCT upper(code) AS code FROM video_metadata
`).all() as Array<{ code: string }>;
return new Set(rows.map((r) => r.code));
}
/** Set of every code with a discoverable subtitle sidecar. Reads from
* the images.has_subtitle column populated at rescan time. */
export function getCodesWithSubtitles(): Set<string> {
const rows = rawDb.prepare(`
SELECT DISTINCT upper(code) AS code FROM images WHERE has_subtitle = 1 AND code IS NOT NULL
`).all() as Array<{ code: string }>;
return new Set(rows.map((r) => r.code));
}
+58
View File
@@ -0,0 +1,58 @@
import "server-only";
import path from "node:path";
import { rawDb } from "@/lib/db/client";
export interface ManualSubtitle {
code: string;
partIdx: number;
absPath: string;
attachedAt: number;
}
interface ManualSubtitleRow {
code: string;
part_idx: number;
abs_path: string;
attached_at: number;
}
function rowToEntry(r: ManualSubtitleRow): ManualSubtitle {
return { code: r.code, partIdx: r.part_idx, absPath: r.abs_path, attachedAt: r.attached_at };
}
export function listManualSubtitlesForVariant(code: string, partIdx: number): ManualSubtitle[] {
const rows = rawDb.prepare(`
SELECT code, part_idx, abs_path, attached_at FROM manual_subtitles
WHERE code = ? AND part_idx = ?
ORDER BY attached_at DESC
`).all(code, partIdx) as ManualSubtitleRow[];
return rows.map(rowToEntry);
}
/** True iff this exact abs path is recorded against any (code, part). */
export function isManualSubtitlePath(abs: string): boolean {
const resolved = path.resolve(abs);
// Windows paths are case-insensitive on disk but stored as-typed.
// Compare with a case-insensitive LIKE on Windows, exact on POSIX.
if (process.platform === "win32") {
const row = rawDb.prepare(`
SELECT 1 FROM manual_subtitles WHERE LOWER(abs_path) = LOWER(?) LIMIT 1
`).get(resolved);
return !!row;
}
const row = rawDb.prepare(`SELECT 1 FROM manual_subtitles WHERE abs_path = ? LIMIT 1`).get(resolved);
return !!row;
}
export function attachManualSubtitle(code: string, partIdx: number, absPath: string): void {
rawDb.prepare(`
INSERT OR REPLACE INTO manual_subtitles (code, part_idx, abs_path, attached_at)
VALUES (?, ?, ?, ?)
`).run(code, partIdx, path.resolve(absPath), Date.now());
}
export function detachManualSubtitle(code: string, partIdx: number, absPath: string): void {
rawDb.prepare(`
DELETE FROM manual_subtitles WHERE code = ? AND part_idx = ? AND abs_path = ?
`).run(code, partIdx, path.resolve(absPath));
}
+580
View File
@@ -0,0 +1,580 @@
import "server-only";
import path from "node:path";
import { spawn } from "node:child_process";
import fs from "node:fs/promises";
import { revalidatePath } from "next/cache";
import { rawDb } from "@/lib/db/client";
import { getAppSetting } from "@/lib/db/appSettings";
import { classifyGroup, compilePatterns } from "./partClassify";
import type { VideoFile } from "./index";
const PROBE_TIMEOUT_MS = 10_000;
export type PlaybackMode = "direct" | "transcode";
export interface StoredVideoMetadata {
absPath: string;
relPath: string;
code: string;
sizeBytes: number;
mtimeMs: number;
probedAt: number | null;
probeError: string | null;
durationSec: number | null;
videoCodec: string | null;
videoBFrames: number | null;
width: number | null;
height: number | null;
videoBitrate: number | null;
playbackMode: PlaybackMode | null;
partKind: "part" | "variant" | "single" | null;
partIndex: number | null;
variantGroup: string | null;
}
interface VideoMetadataRow {
abs_path: string;
rel_path: string;
code: string;
size_bytes: number;
mtime_ms: number;
probed_at: number | null;
probe_error: string | null;
duration_sec: number | null;
video_codec: string | null;
video_b_frames: number | null;
width: number | null;
height: number | null;
video_bitrate: number | null;
playback_mode: string | null;
part_kind: string | null;
part_index: number | null;
variant_group: string | null;
}
interface FfprobeJson {
streams?: Array<{
codec_name?: string;
width?: number;
height?: number;
bit_rate?: string;
has_b_frames?: number;
}>;
format?: {
duration?: string;
bit_rate?: string;
};
}
function mapRow(row: VideoMetadataRow | undefined): StoredVideoMetadata | null {
if (!row) return null;
return {
absPath: row.abs_path,
relPath: row.rel_path,
code: row.code,
sizeBytes: row.size_bytes,
mtimeMs: row.mtime_ms,
probedAt: row.probed_at,
probeError: row.probe_error,
durationSec: row.duration_sec,
videoCodec: row.video_codec,
videoBFrames: row.video_b_frames,
width: row.width,
height: row.height,
videoBitrate: row.video_bitrate,
playbackMode: row.playback_mode === "direct" || row.playback_mode === "transcode" ? row.playback_mode : null,
partKind: row.part_kind === "part" || row.part_kind === "variant" || row.part_kind === "single" ? row.part_kind : null,
partIndex: row.part_index,
variantGroup: row.variant_group,
};
}
function parseFiniteNumber(value: unknown): number | null {
if (value == null || value === "N/A") return null;
const n = typeof value === "number" ? value : Number(value);
return Number.isFinite(n) && n > 0 ? n : null;
}
function parseNonNegativeNumber(value: unknown): number | null {
if (value == null || value === "N/A") return null;
const n = typeof value === "number" ? value : Number(value);
return Number.isFinite(n) && n >= 0 ? n : null;
}
function isStatMatch(row: StoredVideoMetadata, sizeBytes: number, mtimeMs: number): boolean {
return row.sizeBytes === sizeBytes && Math.abs(row.mtimeMs - mtimeMs) < 1;
}
export function getStoredVideoMetadata(absPath: string): StoredVideoMetadata | null {
return mapRow(rawDb.prepare(`SELECT * FROM video_metadata WHERE abs_path = ?`).get(absPath) as VideoMetadataRow | undefined);
}
export function listStoredVideoMetadataForCode(code: string | null | undefined): StoredVideoMetadata[] {
if (!code) return [];
const rows = rawDb.prepare(`
SELECT * FROM video_metadata
WHERE upper(code) = upper(?)
ORDER BY rel_path ASC
`).all(code) as VideoMetadataRow[];
return rows.map((row) => mapRow(row)).filter((row): row is StoredVideoMetadata => row !== null);
}
export function serializeVideoMetadata(meta: StoredVideoMetadata | null) {
if (!meta) return null;
return {
absPath: meta.absPath,
relPath: meta.relPath,
code: meta.code,
sizeBytes: meta.sizeBytes,
mtimeMs: meta.mtimeMs,
probedAt: meta.probedAt,
probeError: meta.probeError,
durationSec: meta.durationSec,
videoCodec: meta.videoCodec,
videoBFrames: meta.videoBFrames,
width: meta.width,
height: meta.height,
videoBitrate: meta.videoBitrate,
playbackMode: meta.playbackMode,
partKind: meta.partKind,
partIndex: meta.partIndex,
variantGroup: meta.variantGroup,
};
}
export async function syncVideoMetadataIndex(files: VideoFile[]): Promise<void> {
const found = new Set(files.map((file) => file.abs));
const upsert = rawDb.prepare(`
INSERT INTO video_metadata (abs_path, rel_path, code, size_bytes, mtime_ms, dir_path)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(abs_path) DO UPDATE SET
rel_path = excluded.rel_path,
code = excluded.code,
dir_path = excluded.dir_path,
probed_at = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.probed_at
END,
probe_error = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.probe_error
END,
duration_sec = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.duration_sec
END,
video_codec = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.video_codec
END,
video_b_frames = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.video_b_frames
END,
width = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.width
END,
height = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.height
END,
video_bitrate = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.video_bitrate
END,
playback_mode = CASE
WHEN video_metadata.size_bytes != excluded.size_bytes OR video_metadata.mtime_ms != excluded.mtime_ms THEN NULL
ELSE video_metadata.playback_mode
END,
size_bytes = excluded.size_bytes,
mtime_ms = excluded.mtime_ms
`);
const deleteStale = rawDb.prepare(`DELETE FROM video_metadata WHERE abs_path = ?`);
const tx = rawDb.transaction(() => {
for (const file of files) {
const last = Math.max(file.abs.lastIndexOf("/"), file.abs.lastIndexOf("\\"));
const dir = last >= 0 ? file.abs.slice(0, last) : "";
upsert.run(file.abs, file.rel, file.code, file.size, file.mtime, dir);
}
const rows = rawDb.prepare(`SELECT abs_path FROM video_metadata`).all() as Array<{ abs_path: string }>;
for (const row of rows) {
if (!found.has(row.abs_path)) deleteStale.run(row.abs_path);
}
});
tx();
classifyAndPersist(files);
// Probe-data refresh runs in the background. Awaiting here used to
// block rescan responses for minutes on libraries with many drifted
// files (e.g. after a bulk rename). Each per-file probe completion
// calls revalidatePath internally so detail pages update as soon as
// their own video is fresh — no batch-level waiting.
void reprobeDirtyFiles(files);
}
const REPROBE_CONCURRENCY = 2;
async function reprobeDirtyFiles(files: VideoFile[]): Promise<void> {
let dirty: Array<{ abs_path: string }>;
try {
dirty = rawDb
.prepare(`SELECT abs_path FROM video_metadata WHERE probed_at IS NULL AND probe_error IS NULL`)
.all() as Array<{ abs_path: string }>;
} catch (e) {
console.error("[video] reprobe-dirty query failed:", e);
return;
}
if (dirty.length === 0) return;
const dirtySet = new Set(dirty.map((r) => r.abs_path));
const targets = files.filter((f) => dirtySet.has(f.abs));
if (targets.length === 0) return;
// Process in chunks of REPROBE_CONCURRENCY. ffprobe is mostly waiting
// on disk; small parallelism is enough.
let cursor = 0;
const workers: Promise<void>[] = [];
// Throttle revalidation calls: a burst of 1000 path invalidations
// would itself thrash. Coalesce so each batch of N codes triggers
// one revalidate per code, deduped within a short window.
const codesSeen = new Set<string>();
for (let i = 0; i < REPROBE_CONCURRENCY; i++) {
workers.push((async () => {
while (cursor < targets.length) {
const idx = cursor++;
const file = targets[idx];
if (!file) break;
try {
await probeVideoMetadata(file);
if (!codesSeen.has(file.code)) {
codesSeen.add(file.code);
try { revalidatePath("/id/[code]", "page"); } catch { /* ignore */ }
}
} catch (e) {
console.error(`[video] reprobe failed for ${file.abs}:`, e);
}
}
})());
}
await Promise.all(workers).catch(() => { /* swallowed */ });
}
/**
* Recompute part/variant classification for every file based on the
* current `partSuffixPatterns` setting. Independent of probe data; safe
* to run on every scan.
*/
function classifyAndPersist(files: VideoFile[]): void {
const sources = getAppSetting("partSuffixPatterns") ?? [];
const patterns = compilePatterns(sources);
const byCode = new Map<string, VideoFile[]>();
for (const f of files) {
const arr = byCode.get(f.code);
if (arr) arr.push(f);
else byCode.set(f.code, [f]);
}
const update = rawDb.prepare(`
UPDATE video_metadata SET part_kind = ?, part_index = ?, variant_group = ?
WHERE abs_path = ?
`);
const tx = rawDb.transaction(() => {
for (const group of byCode.values()) {
const inputs = group.map((f) => ({
key: f.abs,
stem: stemOf(f.filename),
}));
const results = classifyGroup(inputs, patterns);
for (const r of results) {
update.run(r.partKind, r.partIndex, r.variantGroup, r.key);
}
}
});
tx();
}
function stemOf(filename: string): string {
const ext = path.extname(filename);
return ext ? filename.slice(0, -ext.length) : filename;
}
export interface SubtitleStreamInfo {
index: number;
codec: string;
language: string | null;
title: string | null;
isImageBased: boolean;
isTextBased: boolean;
}
const TEXT_SUBTITLE_CODECS = new Set(["subrip", "ass", "ssa", "mov_text", "webvtt", "text"]);
const IMAGE_SUBTITLE_CODECS = new Set(["hdmv_pgs_subtitle", "dvd_subtitle", "dvb_subtitle", "dvbsub", "pgssub"]);
interface FfprobeStream {
index?: number;
codec_type?: string;
codec_name?: string;
tags?: { language?: string; title?: string };
}
/** Enumerate subtitle streams in a container. Computed on demand — not
* persisted, since users frequently remux subs in/out and a stale list
* is worse than re-probing. Returns [] on error or missing ffprobe. */
export async function runFfprobeSubtitles(absPath: string): Promise<SubtitleStreamInfo[]> {
return new Promise((resolve) => {
const proc = spawn("ffprobe", [
"-v", "error",
"-select_streams", "s",
"-show_entries", "stream=index,codec_name,codec_type:stream_tags=language,title",
"-of", "json",
absPath,
]);
let out = "";
let settled = false;
const settle = (val: SubtitleStreamInfo[]) => { if (!settled) { settled = true; clearTimeout(t); resolve(val); } };
const t = setTimeout(() => { try { proc.kill("SIGKILL"); } catch {} settle([]); }, PROBE_TIMEOUT_MS);
proc.stdout?.on("data", (d) => { out += d.toString(); });
proc.on("error", () => settle([]));
proc.on("close", (code) => {
if (code !== 0) { settle([]); return; }
try {
const json = JSON.parse(out) as { streams?: FfprobeStream[] };
const streams = (json.streams ?? []).filter((s) => s.codec_type === "subtitle");
const result: SubtitleStreamInfo[] = streams.map((s, i) => {
const codec = (s.codec_name ?? "unknown").toLowerCase();
return {
// Use the per-codec_type ordinal — that's what ffmpeg's
// 0:s:N mapping wants, NOT the absolute stream index.
index: i,
codec,
language: typeof s.tags?.language === "string" ? s.tags.language : null,
title: typeof s.tags?.title === "string" ? s.tags.title : null,
isImageBased: IMAGE_SUBTITLE_CODECS.has(codec),
isTextBased: TEXT_SUBTITLE_CODECS.has(codec),
};
});
settle(result);
} catch {
settle([]);
}
});
});
}
async function runFfprobe(absPath: string, signal?: AbortSignal): Promise<{
durationSec: number | null;
videoCodec: string | null;
videoBFrames: number | null;
width: number | null;
height: number | null;
videoBitrate: number | null;
}> {
return new Promise((resolve, reject) => {
const proc = spawn("ffprobe", [
"-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=codec_name,width,height,bit_rate,has_b_frames:format=duration,bit_rate",
"-of", "json",
absPath,
]);
let out = "";
let err = "";
let settled = false;
const settle = (fn: () => void) => {
if (settled) return;
settled = true;
clearTimeout(timeoutId);
if (signal && abortHandler) signal.removeEventListener("abort", abortHandler);
fn();
};
const kill = (message: string) => {
try { proc.kill("SIGKILL"); } catch {}
settle(() => reject(new Error(message)));
};
const timeoutId = setTimeout(() => kill("ffprobe timed out"), PROBE_TIMEOUT_MS);
const abortHandler = signal ? () => kill("ffprobe aborted") : null;
if (signal && abortHandler) {
if (signal.aborted) { kill("ffprobe aborted"); return; }
signal.addEventListener("abort", abortHandler, { once: true });
}
proc.stdout?.on("data", (d) => { out += d.toString(); });
proc.stderr?.on("data", (d) => { err += d.toString(); });
proc.on("error", (e) => settle(() => reject(e)));
proc.on("close", (code) => {
settle(() => {
if (code !== 0) {
reject(new Error(err.trim() || `ffprobe exited ${code}`));
return;
}
try {
const json = JSON.parse(out) as FfprobeJson;
const stream = json.streams?.[0] ?? {};
const streamBitrate = parseFiniteNumber(stream.bit_rate);
const formatBitrate = parseFiniteNumber(json.format?.bit_rate);
resolve({
durationSec: parseFiniteNumber(json.format?.duration),
videoCodec: typeof stream.codec_name === "string" ? stream.codec_name : null,
videoBFrames: parseNonNegativeNumber(stream.has_b_frames),
width: parseFiniteNumber(stream.width),
height: parseFiniteNumber(stream.height),
videoBitrate: streamBitrate ?? formatBitrate,
});
} catch (e) {
reject(e);
}
});
});
});
}
export async function probeVideoMetadata(file: VideoFile, signal?: AbortSignal): Promise<StoredVideoMetadata> {
const stat = await fs.stat(file.abs);
const existing = getStoredVideoMetadata(file.abs);
if (existing && isStatMatch(existing, stat.size, stat.mtimeMs)) {
if (existing.probeError || existing.probedAt != null) return existing;
}
const base = {
absPath: file.abs,
relPath: file.rel,
code: file.code,
sizeBytes: stat.size,
mtimeMs: stat.mtimeMs,
playbackMode: existing?.playbackMode ?? null,
};
try {
const probed = await runFfprobe(file.abs, signal);
rawDb.prepare(`
INSERT INTO video_metadata (
abs_path, rel_path, code, size_bytes, mtime_ms, probed_at, probe_error,
duration_sec, video_codec, video_b_frames, width, height, video_bitrate, playback_mode
) VALUES (?, ?, ?, ?, ?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(abs_path) DO UPDATE SET
rel_path = excluded.rel_path,
code = excluded.code,
size_bytes = excluded.size_bytes,
mtime_ms = excluded.mtime_ms,
probed_at = excluded.probed_at,
probe_error = NULL,
duration_sec = excluded.duration_sec,
video_codec = excluded.video_codec,
video_b_frames = excluded.video_b_frames,
width = excluded.width,
height = excluded.height,
video_bitrate = excluded.video_bitrate,
playback_mode = excluded.playback_mode
`).run(
base.absPath, base.relPath, base.code, base.sizeBytes, base.mtimeMs, Date.now(),
probed.durationSec, probed.videoCodec, probed.videoBFrames, probed.width, probed.height, probed.videoBitrate, base.playbackMode,
);
} catch (e) {
rawDb.prepare(`
INSERT INTO video_metadata (
abs_path, rel_path, code, size_bytes, mtime_ms, probed_at, probe_error, playback_mode
) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(abs_path) DO UPDATE SET
rel_path = excluded.rel_path,
code = excluded.code,
size_bytes = excluded.size_bytes,
mtime_ms = excluded.mtime_ms,
probed_at = excluded.probed_at,
probe_error = excluded.probe_error,
playback_mode = excluded.playback_mode
`).run(
base.absPath, base.relPath, base.code, base.sizeBytes, base.mtimeMs, Date.now(),
e instanceof Error ? e.message.slice(0, 500) : "ffprobe failed",
base.playbackMode,
);
}
return getStoredVideoMetadata(file.abs) ?? {
...base,
probedAt: null,
probeError: "metadata unavailable",
durationSec: null,
videoCodec: null,
videoBFrames: null,
width: null,
height: null,
videoBitrate: null,
partKind: null,
partIndex: null,
variantGroup: null,
};
}
export function setVideoPlaybackMode(file: VideoFile, mode: PlaybackMode | null): void {
rawDb.prepare(`
INSERT INTO video_metadata (abs_path, rel_path, code, size_bytes, mtime_ms, playback_mode)
VALUES (?, ?, ?, ?, ?, ?)
ON CONFLICT(abs_path) DO UPDATE SET
rel_path = excluded.rel_path,
code = excluded.code,
size_bytes = excluded.size_bytes,
mtime_ms = excluded.mtime_ms,
playback_mode = excluded.playback_mode
`).run(file.abs, file.rel, file.code, file.size, file.mtime, mode);
}
export function formatDuration(sec: number | null | undefined): string | null {
if (sec == null || !Number.isFinite(sec) || sec <= 0) return null;
const total = Math.round(sec);
const h = Math.floor(total / 3600);
const m = Math.floor((total % 3600) / 60);
const s = total % 60;
if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
return `${m}:${String(s).padStart(2, "0")}`;
}
export function formatBitrate(bps: number | null | undefined): string | null {
if (bps == null || !Number.isFinite(bps) || bps <= 0) return null;
if (bps >= 1_000_000) return `${(bps / 1_000_000).toFixed(1)} Mbps`;
if (bps >= 1_000) return `${Math.round(bps / 1_000)} Kbps`;
return `${Math.round(bps)} bps`;
}
export function formatBytes(bytes: number | null | undefined): string | null {
if (bytes == null || !Number.isFinite(bytes) || bytes < 0) return null;
const units = ["B", "KB", "MB", "GB", "TB"];
let n = bytes;
let i = 0;
while (n >= 1024 && i < units.length - 1) {
n /= 1024;
i++;
}
return `${i === 0 ? Math.round(n) : n.toFixed(n >= 10 ? 1 : 2)} ${units[i]}`;
}
export function formatResolution(width: number | null | undefined, height: number | null | undefined): string | null {
if (!width || !height) return null;
return `${width}x${height}`;
}
export function formatCodec(codec: string | null | undefined): string | null {
if (!codec) return null;
const map: Record<string, string> = {
h264: "H.264",
hevc: "HEVC",
h265: "HEVC",
av1: "AV1",
vp9: "VP9",
mpeg4: "MPEG-4",
};
return map[codec.toLowerCase()] ?? codec.toUpperCase();
}
export function formatVideoSummary(meta: StoredVideoMetadata | null | undefined): string | null {
if (!meta || meta.probeError) return null;
const parts = [
formatResolution(meta.width, meta.height),
formatCodec(meta.videoCodec),
formatBitrate(meta.videoBitrate),
formatBytes(meta.sizeBytes),
formatDuration(meta.durationSec),
].filter((part): part is string => Boolean(part));
return parts.length > 0 ? parts.join(" · ") : null;
}
+254
View File
@@ -0,0 +1,254 @@
/**
* Token-grammar classifier for video filenames in a JAVID group.
*
* Patterns use a simplified token grammar (option A1 from the mockups):
* - `{N}` — one or more digits, captured as the part index
* - `{L}` — single letter AZ, captured (A=1, B=2, ...)
* - everything else is a literal character
*
* Patterns match at the END of the filename stem (no extension),
* case-insensitive.
*
* Classification rules for files sharing one normalized JAV code:
* - "part" — stem ends with a configured pattern; index is the
* captured numeric/letter value.
* - "variant" — stem does NOT match any pattern but its prefix
* (first dot-segment) equals a stem that DID match.
* Variants belong to the matching part.
* - "single" — lone file in its code group with no pattern match.
*
* Tiebreak for "default variant" (the one to play first): the file
* whose stem equals the variant_group exactly. Otherwise the
* alphabetically first stem in the group.
*/
export interface CompiledPattern {
/** Original token-grammar source. */
source: string;
/** Compiled regex anchored to end-of-stem (case-insensitive). */
re: RegExp;
/** What the captured token represents. */
kind: "digits" | "letter";
}
/** Minimal description of one file presented to the classifier. */
export interface ClassifyInput {
/** Stable identifier, opaque to the classifier. */
key: string;
/** Filename stem (no extension), as on disk. */
stem: string;
}
export interface ClassifyResult {
key: string;
partKind: "part" | "variant" | "single";
/** 1-based sort index for parts; null otherwise. */
partIndex: number | null;
/** Stem-with-suffix-stripped — variants share this with their part. */
variantGroup: string | null;
}
const TOKEN_RE = /\{[NL]\}/g;
/** Compile one token-grammar pattern into a regex. Throws on bad token. */
export function compileToken(source: string): CompiledPattern | null {
if (!source) return null;
// Validate first: only {N} and {L} are allowed; nothing else may use {}.
// A bare `{` without a known token is invalid.
let kind: "digits" | "letter" | null = null;
let body = "";
let i = 0;
while (i < source.length) {
const c = source[i]!;
if (c === "{") {
const close = source.indexOf("}", i);
if (close < 0) return null;
const tok = source.slice(i, close + 1);
if (tok === "{N}") {
if (kind != null) return null; // only one capture per pattern
body += "(\\d+)";
kind = "digits";
} else if (tok === "{L}") {
if (kind != null) return null;
body += "([A-Za-z])";
kind = "letter";
} else {
return null;
}
i = close + 1;
} else {
body += c.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
i++;
}
}
if (kind == null) return null;
return {
source,
re: new RegExp(body + "$", "i"),
kind,
};
}
/** Compile a list of patterns; silently drops malformed ones. */
export function compilePatterns(sources: string[]): CompiledPattern[] {
const out: CompiledPattern[] = [];
for (const s of sources) {
const c = compileToken(s);
if (c) out.push(c);
}
return out;
}
function indexFromCapture(capture: string, kind: "digits" | "letter"): number | null {
if (kind === "digits") {
const n = Number(capture);
return Number.isFinite(n) && n > 0 ? Math.trunc(n) : null;
}
// Letter: A=1, B=2, ...
const code = capture.toUpperCase().charCodeAt(0);
if (code < 65 || code > 90) return null;
return code - 64;
}
interface PatternHit {
partIndex: number;
/** Stem with the matched suffix removed. */
variantGroup: string;
}
function tryMatch(stem: string, patterns: CompiledPattern[]): PatternHit | null {
for (const p of patterns) {
const m = stem.match(p.re);
if (!m) continue;
const idx = indexFromCapture(m[1] ?? "", p.kind);
if (idx == null) continue;
return {
partIndex: idx,
variantGroup: stem.slice(0, m.index!),
};
}
return null;
}
/**
* Classify a group of files that share one normalized JAV code.
*
* Algorithm:
* 1. Try each pattern against each stem; record matches.
* 2. Files with no match are candidate variants. A candidate is a
* variant of a matched file if its stem's first dot-segment
* equals the matched file's variant_group's first dot-segment.
* (This catches `XXX-001.fixed.mp4` aligning with `XXX-001-cd1.mp4`
* → no, those don't share a dot-prefix; they'd stay singles. But
* `XXX-001-cd1.fixed.mp4` would align with `XXX-001-cd1.mp4`.)
* 3. If no patterns match anything in the group, all stems share
* one variant_group (the longest common prefix of all stems,
* trimmed at the last alpha-numeric run); kind = variant for >1
* files, single for 1.
*/
export function classifyGroup(
files: ClassifyInput[],
patterns: CompiledPattern[],
): ClassifyResult[] {
if (files.length === 0) return [];
if (files.length === 1) {
const only = files[0]!;
return [{ key: only.key, partKind: "single", partIndex: null, variantGroup: null }];
}
// Pass 1: pattern match.
const hits = new Map<string, PatternHit>();
for (const f of files) {
const hit = tryMatch(f.stem, patterns);
if (hit) hits.set(f.key, hit);
}
if (hits.size === 0) {
// No part-style suffixes detected anywhere → treat the whole group
// as variants of one part.
const group = longestCommonPrefix(files.map((f) => f.stem));
return files.map((f) => ({
key: f.key,
partKind: "variant" as const,
partIndex: null,
variantGroup: group || f.stem,
}));
}
// Pass 2: attach unmatched stems to the matched stem they extend.
// A non-matching stem `S` is a variant of part group `G` iff `S`
// starts with `G + "."` (i.e. `G` followed by a dot — the typical
// "alt encode" suffix shape: `XXX-001-cd1.fixed.mp4`).
const matchedGroupKeys = Array.from(new Set(Array.from(hits.values()).map((h) => h.variantGroup)));
// Sort by length desc so longer (more specific) groups bind first.
matchedGroupKeys.sort((a, b) => b.length - a.length);
const out: ClassifyResult[] = [];
for (const f of files) {
const hit = hits.get(f.key);
if (hit) {
out.push({
key: f.key,
partKind: "part",
partIndex: hit.partIndex,
variantGroup: hit.variantGroup,
});
continue;
}
// Unmatched: try to attach to a part group via dot-prefix.
const attached = matchedGroupKeys.find(
(g) => g && (f.stem === g || f.stem.startsWith(g + ".")),
);
if (attached) {
out.push({ key: f.key, partKind: "variant", partIndex: null, variantGroup: attached });
} else {
// No way to attach — the file is a stray. Mark single.
out.push({ key: f.key, partKind: "single", partIndex: null, variantGroup: null });
}
}
return out;
}
function longestCommonPrefix(strs: string[]): string {
if (strs.length === 0) return "";
let prefix = strs[0]!;
for (let i = 1; i < strs.length; i++) {
const s = strs[i]!;
let j = 0;
while (j < prefix.length && j < s.length && prefix[j] === s[j]) j++;
prefix = prefix.slice(0, j);
if (!prefix) return "";
}
// Trim trailing punctuation so we don't end on a half-word like "XXX-001.".
return prefix.replace(/[\s._\-]+$/, "");
}
/**
* From a set of files all sharing the same variantGroup, pick the one
* to play by default. Rule: stem === group exactly; else alphabetically
* first.
*/
export function pickDefaultVariant<T extends { stem: string }>(
variants: T[],
group: string,
): T | null {
if (variants.length === 0) return null;
const exact = variants.find((v) => v.stem === group);
if (exact) return exact;
return [...variants].sort((a, b) => a.stem.localeCompare(b.stem))[0] ?? null;
}
/**
* Compute a short label for a variant relative to its group stem.
* `XXX-001.fixed` with group `XXX-001` → `fixed`.
* Falls back to `original` for the default / matching stem.
*/
export function variantLabel(stem: string, group: string): string {
if (stem === group) return "original";
if (stem.startsWith(group + ".")) {
return stem.slice(group.length + 1) || "original";
}
if (stem.startsWith(group)) {
return stem.slice(group.length).replace(/^[._\-\s]+/, "") || "original";
}
return stem;
}
+81
View File
@@ -0,0 +1,81 @@
import "server-only";
import path from "node:path";
import { getConfiguredVideoRoots } from "./index";
import { getAppSetting } from "@/lib/db/appSettings";
import { isManualSubtitlePath } from "./manualSubtitles";
/**
* In-process set of subtitle paths the user picked via /api/pick-file
* during this session. Covers the case where someone browses a .srt
* sitting outside any indexed video root — the OS picker IS the
* authorization. Entries time out after TTL_MS to bound how long an
* old picked path remains servable.
*/
const TTL_MS = 60 * 60 * 1000; // 1 hour
const trusted = new Map<string, number>();
function pruneExpired(now: number): void {
for (const [k, expiresAt] of trusted) {
if (expiresAt <= now) trusted.delete(k);
}
}
function normalize(p: string): string {
// Path keys use the resolved + lowercased form on Windows so case
// differences don't bypass the guard. POSIX is case-sensitive so we
// keep original case there.
const resolved = path.resolve(p);
return process.platform === "win32" ? resolved.toLowerCase() : resolved;
}
export function trustSubtitlePath(abs: string): void {
pruneExpired(Date.now());
trusted.set(normalize(abs), Date.now() + TTL_MS);
}
export function isSessionTrustedSubtitlePath(abs: string): boolean {
const now = Date.now();
pruneExpired(now);
const key = normalize(abs);
const exp = trusted.get(key);
if (exp == null) return false;
if (exp <= now) {
trusted.delete(key);
return false;
}
return true;
}
function isInside(child: string, parent: string): boolean {
const c = process.platform === "win32" ? path.resolve(child).toLowerCase() : path.resolve(child);
const p = process.platform === "win32" ? path.resolve(parent).toLowerCase() : path.resolve(parent);
if (!p) return false;
if (c === p) return true;
const sep = path.sep;
return c.startsWith(p.endsWith(sep) ? p : p + sep);
}
/**
* True if `abs` resolves under one of:
* - a configured video root,
* - a configured subtitleExtraPaths entry,
* - the implicit data/generated-subtitles/ root (WhisperJAV output),
* - a session-trusted pick-file path (exact match, not prefix),
* - a path persisted in the manual_subtitles table (user explicitly
* Browse'd it during a previous session).
*/
export function isAllowedSubtitlePath(abs: string): boolean {
const resolved = path.resolve(abs);
for (const root of getConfiguredVideoRoots()) {
if (root && isInside(resolved, root)) return true;
}
const subRoots = getAppSetting("subtitleExtraPaths") ?? [];
for (const root of subRoots) {
if (root && isInside(resolved, root)) return true;
}
const generatedRoot = path.join(process.cwd(), "data", "generated-subtitles");
if (isInside(resolved, generatedRoot)) return true;
if (isSessionTrustedSubtitlePath(resolved)) return true;
if (isManualSubtitlePath(resolved)) return true;
return false;
}
+145
View File
@@ -0,0 +1,145 @@
import "server-only";
import path from "node:path";
import fs from "node:fs";
import fsp from "node:fs/promises";
import crypto from "node:crypto";
/**
* Bump on any change to srtToVtt, the ffmpeg arg recipe, or the
* cache-key composition. Old entries become unreachable automatically.
* v1 → initial.
* v2 → added decodeSubtitleBuffer for non-UTF-8 SRTs/VTTs (cp936,
* shift-jis, big5, UTF-16). Existing UTF-8-only entries would
* still be correct but the version bump ensures any cached
* output produced with a buggy decode path is regenerated.
*/
export const CONVERTER_VERSION = 2;
const CACHE_DIR = path.join(process.cwd(), "data", "subtitle-cache");
fs.mkdirSync(CACHE_DIR, { recursive: true });
export type CacheKind = "embedded" | "srt" | "ass" | "ssa";
export interface CacheKeyInput {
abs: string;
size: number;
mtimeMs: number;
kind: CacheKind;
/** ffmpeg stream index for embedded; ext for sidecar files. */
streamOrExt: string | number;
}
export function cachePath(input: CacheKeyInput): string {
const raw = [
input.abs,
input.size,
Math.round(input.mtimeMs),
input.kind,
String(input.streamOrExt),
CONVERTER_VERSION,
].join("|");
const hash = crypto.createHash("sha1").update(raw).digest("hex");
return path.join(CACHE_DIR, `${hash}.vtt`);
}
export async function readCache(file: string): Promise<Buffer | null> {
try {
const buf = await fsp.readFile(file);
// Bump mtime so LRU pruning treats this entry as recently used.
// Best effort: failure (read-only fs, locked file) is harmless.
const now = Date.now() / 1000;
fsp.utimes(file, now, now).catch(() => { /* ignore */ });
return buf;
} catch {
return null;
}
}
let writesSinceLastPrune = 0;
const PRUNE_WRITE_INTERVAL = 25;
export async function writeCache(file: string, data: Buffer | string): Promise<void> {
// Atomic via rename — avoids partial files if the process is killed
// mid-write or two requests race on the same key.
const tmp = `${file}.${process.pid}.${Date.now()}.tmp`;
try {
await fsp.writeFile(tmp, data);
await fsp.rename(tmp, file);
} catch (e) {
try { await fsp.unlink(tmp); } catch { /* ignore */ }
throw e;
}
writesSinceLastPrune++;
if (writesSinceLastPrune >= PRUNE_WRITE_INTERVAL) {
writesSinceLastPrune = 0;
void pruneSubtitleCacheIfNeeded();
}
}
interface PruneResult {
scanned: number;
removed: number;
beforeBytes: number;
afterBytes: number;
}
/** LRU sweep keyed on file mtime. Walks `data/subtitle-cache/`,
* computes total size, and if it exceeds the configured limit,
* deletes the oldest-mtime entries until size drops below 80% of
* the cap. No-op when the limit setting is 0 (unlimited). */
export async function pruneSubtitleCacheIfNeeded(): Promise<PruneResult> {
const { getAppSetting } = await import("@/lib/db/appSettings");
const limitMb = Number(getAppSetting("subtitleCacheLimitMb"));
const result: PruneResult = { scanned: 0, removed: 0, beforeBytes: 0, afterBytes: 0 };
if (!Number.isFinite(limitMb) || limitMb <= 0) return result;
const limitBytes = limitMb * 1024 * 1024;
const lowWatermark = Math.floor(limitBytes * 0.8);
let entries: import("node:fs").Dirent[];
try {
entries = await fsp.readdir(CACHE_DIR, { withFileTypes: true });
} catch {
return result;
}
type CacheEntry = { abs: string; size: number; mtimeMs: number };
const items: CacheEntry[] = [];
for (const e of entries) {
if (!e.isFile() || !e.name.endsWith(".vtt")) continue;
const abs = path.join(CACHE_DIR, e.name);
try {
const stat = await fsp.stat(abs);
items.push({ abs, size: stat.size, mtimeMs: stat.mtimeMs });
result.scanned++;
result.beforeBytes += stat.size;
} catch { /* file vanished mid-walk; skip */ }
}
if (result.beforeBytes <= limitBytes) {
result.afterBytes = result.beforeBytes;
return result;
}
// Oldest first.
items.sort((a, b) => a.mtimeMs - b.mtimeMs);
let running = result.beforeBytes;
for (const it of items) {
if (running <= lowWatermark) break;
try {
await fsp.unlink(it.abs);
running -= it.size;
result.removed++;
} catch { /* concurrent delete; skip */ }
}
result.afterBytes = running;
if (result.removed > 0) {
console.log(
`[subtitle-cache] pruned ${result.removed}/${result.scanned} files; ${(result.beforeBytes / 1_048_576).toFixed(1)}MB → ${(running / 1_048_576).toFixed(1)}MB`,
);
}
return result;
}
/** Bootstrap entrypoint — fire one sweep on module load (delayed so
* other startup work isn't blocked). */
function scheduleBootstrapPrune(): void {
setTimeout(() => { void pruneSubtitleCacheIfNeeded(); }, 5_000);
}
scheduleBootstrapPrune();
+195
View File
@@ -0,0 +1,195 @@
import "server-only";
import path from "node:path";
import fs from "node:fs/promises";
import iconv from "iconv-lite";
export const SUBTITLE_EXTS = [".srt", ".vtt", ".ass", ".ssa"] as const;
export type SubtitleExt = (typeof SUBTITLE_EXTS)[number];
const SUBTITLE_EXT_SET = new Set<string>(SUBTITLE_EXTS);
export type LangIso = "eng" | "zho" | "jpn";
export type LangPref = "EN" | "CN" | "JP" | "off";
export interface SubtitleFileEntry {
abs: string;
filename: string;
}
export async function walkSubtitles(root: string, maxDepth = 2): Promise<SubtitleFileEntry[]> {
const out: SubtitleFileEntry[] = [];
type Frame = { dir: string; depth: number };
const stack: Frame[] = [{ dir: root, depth: 0 }];
while (stack.length) {
const { dir, depth } = stack.pop()!;
let entries: import("node:fs").Dirent[];
try {
entries = await fs.readdir(dir, { withFileTypes: true });
} catch {
continue;
}
for (const e of entries) {
const full = path.join(dir, e.name);
if (e.isDirectory()) {
if (depth < maxDepth) stack.push({ dir: full, depth: depth + 1 });
} else if (e.isFile()) {
const ext = path.extname(e.name).toLowerCase();
if (SUBTITLE_EXT_SET.has(ext)) out.push({ abs: full, filename: e.name });
}
}
}
return out;
}
const PREF_TO_ISO: Record<Exclude<LangPref, "off">, LangIso> = {
EN: "eng",
CN: "zho",
JP: "jpn",
};
const ISO_TO_PREF: Record<LangIso, Exclude<LangPref, "off">> = {
eng: "EN",
zho: "CN",
jpn: "JP",
};
export function isoFromPref(pref: LangPref): LangIso | null {
return pref === "off" ? null : PREF_TO_ISO[pref];
}
export function prefFromIso(iso: LangIso | null): LangPref {
return iso == null ? "off" : ISO_TO_PREF[iso];
}
const ENGLISH_TOKENS = new Set(["en", "eng", "english"]);
const CHINESE_TOKENS = new Set([
"zh", "zho", "chi", "chs", "cht", "chn", "cn", "chinese",
"schinese", "tchinese", "simplified", "traditional",
"zh-cn", "zh-tw", "zh-hans", "zh-hant",
]);
const JAPANESE_TOKENS = new Set(["ja", "jp", "jpn", "japanese", "jap"]);
export function normalizeLanguageTag(tag: string | null | undefined): LangIso | null {
if (!tag) return null;
const lower = tag.trim().toLowerCase();
if (!lower) return null;
if (ENGLISH_TOKENS.has(lower)) return "eng";
if (CHINESE_TOKENS.has(lower)) return "zho";
if (JAPANESE_TOKENS.has(lower)) return "jpn";
return null;
}
export function languageDisplay(iso: LangIso | null): string {
if (iso === "eng") return "English";
if (iso === "zho") return "Chinese";
if (iso === "jpn") return "Japanese";
return "Unknown";
}
const TOKEN_SPLIT = /[\s._\-\[\]()+,;]+/g;
export interface DetectedLanguage {
/** Single ISO code if exactly one language was detected. */
lang: LangIso | null;
/** Display label — "English", "Chinese", "English/Chinese", "Unknown". */
label: string;
}
/** Inspect a filename's stem for embedded language hints. Multiple hits
* produce a compound label (e.g. "English/Chinese") but `lang` stays null
* so sticky-pref matching only ever resolves to a single language. */
export function detectLanguageFromName(filename: string): DetectedLanguage {
const ext = path.extname(filename).toLowerCase();
const stem = ext ? filename.slice(0, -ext.length) : filename;
const tokens = stem.toLowerCase().split(TOKEN_SPLIT).filter(Boolean);
const found = new Set<LangIso>();
for (const t of tokens) {
const iso = normalizeLanguageTag(t);
if (iso) found.add(iso);
}
if (found.size === 0) return { lang: null, label: "Unknown" };
if (found.size === 1) {
const iso = [...found][0]!;
return { lang: iso, label: languageDisplay(iso) };
}
const order: LangIso[] = ["eng", "zho", "jpn"];
const ordered = order.filter((i) => found.has(i));
return { lang: null, label: ordered.map(languageDisplay).join("/") };
}
const SRT_TIMESTAMP = /(\d{1,2}:\d{2}:\d{2}),(\d{3})/g;
/** Pure JS SRT → WebVTT converter. Strips BOM, normalizes CRLF, swaps
* the comma in HH:MM:SS,mmm timestamps for a dot, and prepends the
* WEBVTT header. No styling translation. Cheap; runs on every sidecar
* miss without spawning ffmpeg. */
export function srtToVtt(srt: string): string {
let body = srt;
if (body.charCodeAt(0) === 0xfeff) body = body.slice(1);
body = body.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
body = body.replace(SRT_TIMESTAMP, "$1.$2");
return `WEBVTT\n\n${body.trimStart()}`;
}
export function stemOf(filename: string): string {
const ext = path.extname(filename);
return ext ? filename.slice(0, -ext.length) : filename;
}
const REPLACEMENT_CHAR = "";
/**
* Decode a subtitle file buffer to a JS string with best-effort
* encoding detection. Many older Asian SRTs ship as cp936/GBK or
* Shift-JIS — feeding them through `Buffer.toString("utf8")` produces
* mojibake. Strategy:
* 1. Strip BOM if present (UTF-8 / UTF-16 LE / UTF-16 BE).
* 2. Try UTF-8 strict. If it decodes without invalid sequences, use it.
* 3. Otherwise decode as UTF-8 / shift_jis / gb18030 / big5 and
* pick whichever has the fewest replacement chars per kbyte.
* 4. Tie-break preference: shift_jis when katakana/hiragana ranges
* appear in the JS surrogates, gb18030 otherwise — common
* heuristic for JP vs CN fansub source material.
*/
export function decodeSubtitleBuffer(buf: Buffer): string {
// BOM detection — if present, the encoding is unambiguous.
if (buf.length >= 3 && buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
return buf.subarray(3).toString("utf8");
}
if (buf.length >= 2 && buf[0] === 0xff && buf[1] === 0xfe) {
return iconv.decode(buf.subarray(2), "utf-16le");
}
if (buf.length >= 2 && buf[0] === 0xfe && buf[1] === 0xff) {
return iconv.decode(buf.subarray(2), "utf-16be");
}
// UTF-8 strict — fast path for the common case.
try {
const decoder = new TextDecoder("utf-8", { fatal: true });
return decoder.decode(buf);
} catch { /* fall through to heuristic */ }
// Compare candidate encodings by replacement-char count.
const candidates: Array<"utf8" | "shift_jis" | "gb18030" | "big5"> = [
"utf8", "shift_jis", "gb18030", "big5",
];
let best: { encoding: typeof candidates[number]; text: string; score: number } | null = null;
for (const encoding of candidates) {
const text = iconv.decode(buf, encoding);
let bad = 0;
for (let i = 0; i < text.length; i++) {
if (text[i] === REPLACEMENT_CHAR) bad++;
}
// Tie-break preference: shift_jis when text contains kana, since
// gb18030 happens to map many JP code points without errors but
// produces gibberish that we wouldn't catch by rep-count alone.
const hasKana = /[぀-ヿ]/.test(text);
const adjusted = hasKana && encoding === "shift_jis"
? bad - 1
: encoding === "utf8" ? bad - 1 : bad;
if (best == null || adjusted < best.score) {
best = { encoding, text, score: adjusted };
}
}
return best?.text ?? buf.toString("utf8");
}