315 lines
10 KiB
TypeScript
315 lines
10 KiB
TypeScript
import "server-only";
|
|
import { spawn, type ChildProcess } from "node:child_process";
|
|
import path from "node:path";
|
|
import fs from "node:fs";
|
|
import fsp from "node:fs/promises";
|
|
import { randomUUID } from "node:crypto";
|
|
import type { WhisperJavSettings } from "@/lib/db/appSettings";
|
|
|
|
const VERIFY_TIMEOUT_MS = 30_000;
|
|
|
|
const BANNER_CHARS = /^[╔╗╚╝═║│┌┐└┘─\s]*$/;
|
|
const NOISE_PATTERNS: RegExp[] = [
|
|
/RequestsDependencyWarning/i,
|
|
/urllib3 \(\d+\.\d+/,
|
|
/chardet|charset_normalizer/,
|
|
/You are about to download and run code from an untrusted repository/,
|
|
/^Downloading: /,
|
|
/UserWarning:/,
|
|
/^\s*warnings\.warn/,
|
|
/^_check_repo_is_trusted/,
|
|
/^\s*$/, // blank
|
|
];
|
|
|
|
function isNoise(line: string): boolean {
|
|
if (BANNER_CHARS.test(line)) return true;
|
|
for (const re of NOISE_PATTERNS) {
|
|
if (re.test(line)) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
const STAGE_RE = /Step\s+(\d+)\s*\/\s*(\d+):\s*(.+?)\s*$/;
|
|
|
|
export interface ParsedStage {
|
|
index: number;
|
|
total: number;
|
|
stage: string;
|
|
}
|
|
|
|
export function parseStageLine(line: string): ParsedStage | null {
|
|
const m = line.match(STAGE_RE);
|
|
if (!m) return null;
|
|
const index = Number(m[1]);
|
|
const total = Number(m[2]);
|
|
if (!Number.isFinite(index) || !Number.isFinite(total)) return null;
|
|
return { index, total, stage: m[3]!.trim() };
|
|
}
|
|
|
|
const VERSION_RE = /WhisperJAV\s+(\d+\.\d+\.\d+)/;
|
|
|
|
export interface VerifyResult {
|
|
ok: boolean;
|
|
version?: string;
|
|
resolvedPath?: string;
|
|
error?: string;
|
|
}
|
|
|
|
/** Run `<cliPath> --version` and parse stdout. Stderr ignored
|
|
* (RequestsDependencyWarning is benign). */
|
|
export function verifyCli(cliPath: string): Promise<VerifyResult> {
|
|
return new Promise((resolve) => {
|
|
const t0 = Date.now();
|
|
let proc: ChildProcess;
|
|
try {
|
|
proc = spawn(cliPath, ["--version"], { stdio: ["ignore", "pipe", "pipe"] });
|
|
} catch (e) {
|
|
console.error(`[whisperjav verify] spawn failed (${Date.now() - t0}ms):`, (e as Error).message);
|
|
resolve({ ok: false, error: (e as Error).message });
|
|
return;
|
|
}
|
|
let stdout = "";
|
|
let stderr = "";
|
|
let settled = false;
|
|
const settle = (val: VerifyResult) => {
|
|
if (settled) return;
|
|
settled = true;
|
|
clearTimeout(t);
|
|
console.log(`[whisperjav verify] ${val.ok ? "ok" : "fail"} in ${Date.now() - t0}ms`);
|
|
resolve(val);
|
|
};
|
|
const t = setTimeout(() => {
|
|
try { proc.kill("SIGKILL"); } catch {}
|
|
console.error(`[whisperjav verify] timeout after ${VERIFY_TIMEOUT_MS}ms; stdout="${stdout.trim()}" stderr_tail="${stderr.trim().split("\n").slice(-3).join(" | ")}"`);
|
|
settle({ ok: false, error: "verify timed out" });
|
|
}, VERIFY_TIMEOUT_MS);
|
|
proc.stdout?.on("data", (d) => { stdout += d.toString(); });
|
|
proc.stderr?.on("data", (d) => { stderr += d.toString(); });
|
|
proc.on("error", (e) => settle({ ok: false, error: e.message }));
|
|
proc.on("close", () => {
|
|
const merged = stdout + "\n" + stderr;
|
|
const m = merged.match(VERSION_RE);
|
|
if (m) {
|
|
settle({ ok: true, version: m[1], resolvedPath: cliPath });
|
|
return;
|
|
}
|
|
settle({ ok: false, error: stderr.trim().split("\n").slice(-3).join("\n") || "no version detected in output" });
|
|
});
|
|
});
|
|
}
|
|
|
|
/** Look up the CLI on PATH via the OS-specific where/which. Returns
|
|
* the first match or null. */
|
|
export async function autoDetectCli(): Promise<string | null> {
|
|
return new Promise((resolve) => {
|
|
const cmd = process.platform === "win32" ? "where" : "which";
|
|
let proc: ChildProcess;
|
|
try {
|
|
proc = spawn(cmd, ["whisperjav"], { stdio: ["ignore", "pipe", "ignore"] });
|
|
} catch {
|
|
resolve(null);
|
|
return;
|
|
}
|
|
let stdout = "";
|
|
proc.stdout?.on("data", (d) => { stdout += d.toString(); });
|
|
proc.on("error", () => resolve(null));
|
|
proc.on("close", () => {
|
|
const first = stdout.split(/\r?\n/).map((l) => l.trim()).find(Boolean);
|
|
resolve(first ?? null);
|
|
});
|
|
});
|
|
}
|
|
|
|
const QUALITY_TO_MODE: Record<WhisperJavSettings["quality"], string> = {
|
|
fast: "fast",
|
|
balanced: "balanced",
|
|
qwen: "qwen",
|
|
};
|
|
|
|
/** Build the argv (without the program name) for a generation job. */
|
|
export function buildJobArgs(opts: {
|
|
videoAbs: string;
|
|
outputDir: string;
|
|
statsPath: string;
|
|
settings: WhisperJavSettings;
|
|
}): string[] {
|
|
const args: string[] = [
|
|
opts.videoAbs,
|
|
"--mode", QUALITY_TO_MODE[opts.settings.quality],
|
|
"--language", opts.settings.sourceLanguage,
|
|
"--subs-language", opts.settings.outputMode,
|
|
"--sensitivity", opts.settings.sensitivity,
|
|
"--output-dir", opts.outputDir,
|
|
"--no-progress",
|
|
"--verbosity", "summary",
|
|
"--stats-file", opts.statsPath,
|
|
];
|
|
if (opts.settings.noSignature) args.push("--no-signature");
|
|
return args;
|
|
}
|
|
|
|
export interface SpawnedJob {
|
|
proc: ChildProcess;
|
|
/** Best-effort kill that takes Python child workers down too. */
|
|
kill: () => void;
|
|
}
|
|
|
|
/** Spawn a generation job. Caller wires stderr/stdout consumers. */
|
|
export function spawnJob(cliPath: string, args: string[]): SpawnedJob {
|
|
const proc = spawn(cliPath, args, { stdio: ["ignore", "pipe", "pipe"] });
|
|
return {
|
|
proc,
|
|
kill: () => killTree(proc),
|
|
};
|
|
}
|
|
|
|
function killTree(proc: ChildProcess): void {
|
|
if (!proc.pid) return;
|
|
if (process.platform === "win32") {
|
|
// taskkill /T cascades to children. /F forces. Spawn fire-and-forget.
|
|
try {
|
|
spawn("taskkill", ["/pid", String(proc.pid), "/T", "/F"], { stdio: "ignore" });
|
|
} catch { /* ignore */ }
|
|
return;
|
|
}
|
|
try { proc.kill("SIGTERM"); } catch { /* ignore */ }
|
|
// Escalate after a short grace period.
|
|
setTimeout(() => {
|
|
try { proc.kill("SIGKILL"); } catch { /* ignore */ }
|
|
}, 3000);
|
|
}
|
|
|
|
export interface StatsEntry {
|
|
status?: string;
|
|
metadata?: {
|
|
output_files?: { final_srt?: string };
|
|
summary?: { final_subtitles_refined?: number };
|
|
errors?: unknown[];
|
|
};
|
|
}
|
|
|
|
export interface ValidationResult {
|
|
/** Strict success — exit 0, stats success, errors empty, srt exists. */
|
|
success: boolean;
|
|
/** Set when success === true but cue count is 0 (warning state). */
|
|
warning: boolean;
|
|
finalSrtPath: string | null;
|
|
cueCount: number | null;
|
|
/** Human-readable reason when validation failed. */
|
|
reason: string | null;
|
|
}
|
|
|
|
/** Apply the success criteria from the plan. Caller passes exit code
|
|
* and the stats path; we read + parse + check. */
|
|
export async function validateOutcome(opts: {
|
|
exitCode: number | null;
|
|
statsPath: string;
|
|
jobDir: string;
|
|
}): Promise<ValidationResult> {
|
|
if (opts.exitCode !== 0) {
|
|
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: `exit code ${opts.exitCode}` };
|
|
}
|
|
let raw: string;
|
|
try {
|
|
raw = await fsp.readFile(opts.statsPath, "utf8");
|
|
} catch {
|
|
// Stats missing fallback: accept if exactly one .srt exists in jobDir.
|
|
const stray = await findSingleSrt(opts.jobDir);
|
|
if (stray) {
|
|
return {
|
|
success: true,
|
|
warning: true,
|
|
finalSrtPath: stray,
|
|
cueCount: null,
|
|
reason: "stats unavailable, accepted by file presence",
|
|
};
|
|
}
|
|
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: "stats.json missing and no .srt found" };
|
|
}
|
|
let parsed: unknown;
|
|
try {
|
|
parsed = JSON.parse(raw);
|
|
} catch {
|
|
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: "stats.json malformed" };
|
|
}
|
|
const arr = Array.isArray(parsed) ? parsed : null;
|
|
const entry = arr && arr.length > 0 ? (arr[0] as StatsEntry) : null;
|
|
if (!entry) {
|
|
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: "stats.json has no entries" };
|
|
}
|
|
if (entry.status !== "success") {
|
|
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: `stats reports status=${entry.status}` };
|
|
}
|
|
const errors = entry.metadata?.errors ?? [];
|
|
if (Array.isArray(errors) && errors.length > 0) {
|
|
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: `stats reports ${errors.length} error(s)` };
|
|
}
|
|
const final = entry.metadata?.output_files?.final_srt ?? null;
|
|
if (!final || !fs.existsSync(final)) {
|
|
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: "final_srt missing from disk" };
|
|
}
|
|
const cueCount = entry.metadata?.summary?.final_subtitles_refined ?? null;
|
|
const warning = cueCount === 0;
|
|
return {
|
|
success: true,
|
|
warning,
|
|
finalSrtPath: final,
|
|
cueCount,
|
|
reason: warning ? "0 cues — likely no speech" : null,
|
|
};
|
|
}
|
|
|
|
async function findSingleSrt(dir: string): Promise<string | null> {
|
|
let entries: import("node:fs").Dirent[];
|
|
try {
|
|
entries = await fsp.readdir(dir, { withFileTypes: true });
|
|
} catch {
|
|
return null;
|
|
}
|
|
const srts = entries
|
|
.filter((e) => e.isFile() && e.name.toLowerCase().endsWith(".srt"))
|
|
.map((e) => path.join(dir, e.name));
|
|
return srts.length === 1 ? srts[0]! : null;
|
|
}
|
|
|
|
export function newJobId(): string {
|
|
return randomUUID();
|
|
}
|
|
|
|
export function jobBaseDir(): string {
|
|
return path.join(process.cwd(), "data", "whisperjav-jobs");
|
|
}
|
|
|
|
export function jobDirFor(id: string): string {
|
|
return path.join(jobBaseDir(), id);
|
|
}
|
|
|
|
/** Cross-device-aware move. Falls back to copy + unlink when rename
|
|
* hits EXDEV (different filesystems / drives). */
|
|
export async function moveFile(src: string, dest: string): Promise<void> {
|
|
await fsp.mkdir(path.dirname(dest), { recursive: true });
|
|
try {
|
|
await fsp.rename(src, dest);
|
|
return;
|
|
} catch (e) {
|
|
const code = (e as NodeJS.ErrnoException).code;
|
|
if (code !== "EXDEV") throw e;
|
|
}
|
|
await fsp.copyFile(src, dest);
|
|
await fsp.unlink(src).catch(() => { /* best effort */ });
|
|
}
|
|
|
|
/** True if a directory is writable (heuristic — try to create + remove
|
|
* a probe file). Used to choose between beside-video output and the
|
|
* data-folder fallback. */
|
|
export async function isDirWritable(dir: string): Promise<boolean> {
|
|
const probe = path.join(dir, `.pinkudex-write-probe-${process.pid}-${Date.now()}`);
|
|
try {
|
|
await fsp.writeFile(probe, "");
|
|
await fsp.unlink(probe).catch(() => { /* ignore */ });
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|