Initial commit
This commit is contained in:
@@ -0,0 +1,202 @@
|
||||
import "server-only";
|
||||
import { rawDb } from "@/lib/db/client";
|
||||
import type { JobRow, JobStatus } from "./types";
|
||||
|
||||
interface JobDbRow {
|
||||
id: string;
|
||||
code: string;
|
||||
video_abs: string;
|
||||
job_dir: string;
|
||||
target_subtitle_path: string | null;
|
||||
status: JobStatus;
|
||||
enqueued_at: number;
|
||||
started_at: number | null;
|
||||
ended_at: number | null;
|
||||
exit_code: number | null;
|
||||
error: string | null;
|
||||
stage: string | null;
|
||||
stage_index: number | null;
|
||||
stage_total: number | null;
|
||||
cue_count: number | null;
|
||||
cli_args: string;
|
||||
log_path: string;
|
||||
stats_path: string | null;
|
||||
video_duration_sec: number | null;
|
||||
mode: string | null;
|
||||
}
|
||||
|
||||
function rowFromDb(r: JobDbRow): JobRow {
|
||||
return {
|
||||
id: r.id,
|
||||
code: r.code,
|
||||
videoAbs: r.video_abs,
|
||||
jobDir: r.job_dir,
|
||||
targetSubtitlePath: r.target_subtitle_path,
|
||||
status: r.status,
|
||||
enqueuedAt: r.enqueued_at,
|
||||
startedAt: r.started_at,
|
||||
endedAt: r.ended_at,
|
||||
exitCode: r.exit_code,
|
||||
error: r.error,
|
||||
stage: r.stage,
|
||||
stageIndex: r.stage_index,
|
||||
stageTotal: r.stage_total,
|
||||
cueCount: r.cue_count,
|
||||
cliArgs: r.cli_args,
|
||||
logPath: r.log_path,
|
||||
statsPath: r.stats_path,
|
||||
videoDurationSec: r.video_duration_sec,
|
||||
mode: r.mode,
|
||||
};
|
||||
}
|
||||
|
||||
export function insertJob(row: Omit<JobRow, "startedAt" | "endedAt" | "exitCode" | "error" | "stage" | "stageIndex" | "stageTotal" | "cueCount" | "targetSubtitlePath">): void {
|
||||
rawDb.prepare(`
|
||||
INSERT INTO whisperjav_jobs (
|
||||
id, code, video_abs, job_dir, target_subtitle_path, status,
|
||||
enqueued_at, started_at, ended_at, exit_code, error,
|
||||
stage, stage_index, stage_total, cue_count,
|
||||
cli_args, log_path, stats_path, video_duration_sec, mode
|
||||
) VALUES (?, ?, ?, ?, NULL, ?, ?, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, ?, ?, ?, ?, ?)
|
||||
`).run(
|
||||
row.id, row.code, row.videoAbs, row.jobDir, row.status, row.enqueuedAt,
|
||||
row.cliArgs, row.logPath, row.statsPath, row.videoDurationSec, row.mode,
|
||||
);
|
||||
}
|
||||
|
||||
/** Returns avg(elapsed_sec / video_duration_sec) over recent
|
||||
* successful jobs for the given mode. Used to estimate remaining time
|
||||
* for an in-flight job. Falls back to a per-mode seed when no history
|
||||
* is available. */
|
||||
export function estimateRealtimeMultiplier(mode: string): number {
|
||||
const rows = rawDb
|
||||
.prepare(
|
||||
`SELECT started_at, ended_at, video_duration_sec
|
||||
FROM whisperjav_jobs
|
||||
WHERE status IN ('completed', 'warning')
|
||||
AND mode = ?
|
||||
AND started_at IS NOT NULL
|
||||
AND ended_at IS NOT NULL
|
||||
AND video_duration_sec IS NOT NULL
|
||||
AND video_duration_sec > 0
|
||||
ORDER BY ended_at DESC
|
||||
LIMIT 10`,
|
||||
)
|
||||
.all(mode) as Array<{ started_at: number; ended_at: number; video_duration_sec: number }>;
|
||||
if (rows.length === 0) {
|
||||
if (mode === "fast") return 0.8;
|
||||
if (mode === "qwen") return 6.0;
|
||||
return 2.0; // balanced default
|
||||
}
|
||||
let sum = 0;
|
||||
let n = 0;
|
||||
for (const r of rows) {
|
||||
const elapsed = (r.ended_at - r.started_at) / 1000;
|
||||
if (elapsed <= 0) continue;
|
||||
sum += elapsed / r.video_duration_sec;
|
||||
n++;
|
||||
}
|
||||
return n > 0 ? sum / n : 2.0;
|
||||
}
|
||||
|
||||
export function getJob(id: string): JobRow | null {
|
||||
const r = rawDb.prepare(`SELECT * FROM whisperjav_jobs WHERE id = ?`).get(id) as JobDbRow | undefined;
|
||||
return r ? rowFromDb(r) : null;
|
||||
}
|
||||
|
||||
export function listJobsForCode(code: string, limit = 5): JobRow[] {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT * FROM whisperjav_jobs WHERE code = ?
|
||||
ORDER BY enqueued_at DESC LIMIT ?
|
||||
`).all(code, limit) as JobDbRow[];
|
||||
return rows.map(rowFromDb);
|
||||
}
|
||||
|
||||
/** Earliest queued job, regardless of code. */
|
||||
export function nextQueuedJob(): JobRow | null {
|
||||
const r = rawDb.prepare(`
|
||||
SELECT * FROM whisperjav_jobs WHERE status = 'queued'
|
||||
ORDER BY enqueued_at ASC LIMIT 1
|
||||
`).get() as JobDbRow | undefined;
|
||||
return r ? rowFromDb(r) : null;
|
||||
}
|
||||
|
||||
/** Most recent non-terminal (queued/running) job for a code, if any. */
|
||||
export function activeJobForCode(code: string): JobRow | null {
|
||||
const r = rawDb.prepare(`
|
||||
SELECT * FROM whisperjav_jobs
|
||||
WHERE code = ? AND status IN ('queued','running')
|
||||
ORDER BY enqueued_at DESC LIMIT 1
|
||||
`).get(code) as JobDbRow | undefined;
|
||||
return r ? rowFromDb(r) : null;
|
||||
}
|
||||
|
||||
export function setStatus(id: string, status: JobStatus, fields: Partial<{
|
||||
startedAt: number | null;
|
||||
endedAt: number | null;
|
||||
exitCode: number | null;
|
||||
error: string | null;
|
||||
targetSubtitlePath: string | null;
|
||||
cueCount: number | null;
|
||||
}> = {}): void {
|
||||
const sets: string[] = ["status = ?"];
|
||||
const args: (string | number | null)[] = [status];
|
||||
const map: Record<string, string> = {
|
||||
startedAt: "started_at",
|
||||
endedAt: "ended_at",
|
||||
exitCode: "exit_code",
|
||||
error: "error",
|
||||
targetSubtitlePath: "target_subtitle_path",
|
||||
cueCount: "cue_count",
|
||||
};
|
||||
for (const [k, col] of Object.entries(map)) {
|
||||
if (k in fields) {
|
||||
sets.push(`${col} = ?`);
|
||||
args.push((fields as Record<string, string | number | null>)[k] ?? null);
|
||||
}
|
||||
}
|
||||
args.push(id);
|
||||
rawDb.prepare(`UPDATE whisperjav_jobs SET ${sets.join(", ")} WHERE id = ?`).run(...args);
|
||||
}
|
||||
|
||||
export function updateProgress(id: string, stage: string | null, idx: number | null, total: number | null): void {
|
||||
rawDb.prepare(`
|
||||
UPDATE whisperjav_jobs SET stage = ?, stage_index = ?, stage_total = ? WHERE id = ?
|
||||
`).run(stage, idx, total, id);
|
||||
}
|
||||
|
||||
/** Rows older than `cutoffMs` whose status is one of the terminal
|
||||
* retention candidates (failed/cancelled). Used by the retention
|
||||
* sweep to find job dirs to delete. */
|
||||
export function listAgedTerminalJobs(cutoffMs: number): Array<{ id: string; jobDir: string }> {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT id, job_dir FROM whisperjav_jobs
|
||||
WHERE status IN ('failed', 'cancelled')
|
||||
AND COALESCE(ended_at, enqueued_at) < ?
|
||||
`).all(cutoffMs) as Array<{ id: string; job_dir: string }>;
|
||||
return rows.map((r) => ({ id: r.id, jobDir: r.job_dir }));
|
||||
}
|
||||
|
||||
/** Used by the "Clear all job history" Settings action. */
|
||||
export function listAllJobDirs(): string[] {
|
||||
const rows = rawDb.prepare(`SELECT job_dir FROM whisperjav_jobs`).all() as Array<{ job_dir: string }>;
|
||||
return rows.map((r) => r.job_dir);
|
||||
}
|
||||
|
||||
export function deleteAllJobs(): number {
|
||||
const result = rawDb.prepare(`DELETE FROM whisperjav_jobs WHERE status NOT IN ('queued', 'running')`).run();
|
||||
return result.changes ?? 0;
|
||||
}
|
||||
|
||||
/** Mark any running rows as failed (their child processes are dead).
|
||||
* Queued rows remain queued — they're still waiting their turn. */
|
||||
export function recoverOrphanedJobs(): number {
|
||||
const result = rawDb.prepare(`
|
||||
UPDATE whisperjav_jobs
|
||||
SET status = 'failed',
|
||||
error = 'process did not survive restart',
|
||||
ended_at = ?
|
||||
WHERE status = 'running'
|
||||
`).run(Date.now());
|
||||
return result.changes ?? 0;
|
||||
}
|
||||
@@ -0,0 +1,438 @@
|
||||
import "server-only";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import fsp from "node:fs/promises";
|
||||
import readline from "node:readline";
|
||||
import { Readable } from "node:stream";
|
||||
import { getAppSetting } from "@/lib/db/appSettings";
|
||||
import { rawDb } from "@/lib/db/client";
|
||||
import {
|
||||
insertJob,
|
||||
getJob,
|
||||
setStatus,
|
||||
updateProgress,
|
||||
nextQueuedJob,
|
||||
recoverOrphanedJobs,
|
||||
activeJobForCode,
|
||||
listAgedTerminalJobs,
|
||||
listAllJobDirs,
|
||||
deleteAllJobs,
|
||||
} from "./db";
|
||||
import {
|
||||
buildJobArgs,
|
||||
parseStageLine,
|
||||
spawnJob,
|
||||
validateOutcome,
|
||||
moveFile,
|
||||
isDirWritable,
|
||||
jobBaseDir,
|
||||
jobDirFor,
|
||||
newJobId,
|
||||
} from "./spawn";
|
||||
import { findVideosForCode, rescanVideoIndex } from "@/lib/video";
|
||||
import { getStoredVideoMetadata } from "@/lib/video/metadata";
|
||||
import type { JobRow } from "./types";
|
||||
|
||||
declare global {
|
||||
// eslint-disable-next-line no-var
|
||||
var __whisperjavWorkerStarted: boolean | undefined;
|
||||
// eslint-disable-next-line no-var
|
||||
var __whisperjavRunningKill: ((reason: "cancel") => void) | undefined;
|
||||
// eslint-disable-next-line no-var
|
||||
var __whisperjavRunningId: string | null | undefined;
|
||||
}
|
||||
|
||||
if (!global.__whisperjavWorkerStarted) {
|
||||
global.__whisperjavWorkerStarted = true;
|
||||
global.__whisperjavRunningKill = undefined;
|
||||
global.__whisperjavRunningId = null;
|
||||
// Restart sweep: any rows in `running` from a prior process are dead.
|
||||
try {
|
||||
const recovered = recoverOrphanedJobs();
|
||||
if (recovered > 0) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.log(`[whisperjav] recovered ${recovered} orphaned job(s) on bootstrap`);
|
||||
}
|
||||
} catch (e) {
|
||||
// eslint-disable-next-line no-console
|
||||
console.error("[whisperjav] orphan recovery failed:", e);
|
||||
}
|
||||
}
|
||||
|
||||
export interface EnqueueResult {
|
||||
jobId: string;
|
||||
}
|
||||
|
||||
export interface EnqueueAlreadyExists {
|
||||
alreadyExists: true;
|
||||
abs: string;
|
||||
}
|
||||
|
||||
const BANNER_CHARS = /^[╔╗╚╝═║│┌┐└┘─\s]*$/;
|
||||
const NOISE = [
|
||||
/RequestsDependencyWarning/i,
|
||||
/urllib3 \(\d+\.\d+/,
|
||||
/chardet|charset_normalizer/,
|
||||
/You are about to download and run code from an untrusted repository/,
|
||||
/^Downloading: /,
|
||||
/UserWarning:/,
|
||||
/^\s*warnings\.warn/,
|
||||
/^_check_repo_is_trusted/,
|
||||
];
|
||||
|
||||
function isNoiseLine(s: string): boolean {
|
||||
if (!s.trim()) return true;
|
||||
if (BANNER_CHARS.test(s)) return true;
|
||||
for (const re of NOISE) if (re.test(s)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function generatedSubtitlesDir(code: string): string {
|
||||
return path.join(process.cwd(), "data", "generated-subtitles", code);
|
||||
}
|
||||
|
||||
/** Compute the destination dir for a given video, given settings.
|
||||
* Returns the dir absolute path. Creates it if needed. */
|
||||
async function resolveDestDir(videoAbs: string, code: string): Promise<string> {
|
||||
const s = getAppSetting("whisperjav");
|
||||
if (s.outputLocation === "beside-video") {
|
||||
const dir = path.dirname(videoAbs);
|
||||
if (await isDirWritable(dir)) return dir;
|
||||
}
|
||||
const fallback = generatedSubtitlesDir(code);
|
||||
await fsp.mkdir(fallback, { recursive: true });
|
||||
return fallback;
|
||||
}
|
||||
|
||||
/** Filename pattern WhisperJAV emits — derived from settings, used for
|
||||
* pre-flight idempotency checks. The CLI may add a suffix beyond this
|
||||
* pattern but the language tag is stable. */
|
||||
function expectedSubtitleStemPrefix(videoStem: string, langTag: string): string {
|
||||
return `${videoStem}.${langTag}`;
|
||||
}
|
||||
|
||||
function langTagForSettings(): "ja" | "ko" | "zh" | "en" {
|
||||
const s = getAppSetting("whisperjav");
|
||||
if (s.outputMode === "direct-to-english") return "en";
|
||||
switch (s.sourceLanguage) {
|
||||
case "japanese": return "ja";
|
||||
case "korean": return "ko";
|
||||
case "chinese": return "zh";
|
||||
case "english": return "en";
|
||||
}
|
||||
}
|
||||
|
||||
async function existingGeneratedFor(videoAbs: string, code: string): Promise<string | null> {
|
||||
const langTag = langTagForSettings();
|
||||
const stem = path.basename(videoAbs, path.extname(videoAbs));
|
||||
const prefix = expectedSubtitleStemPrefix(stem, langTag);
|
||||
const candidates = [
|
||||
path.dirname(videoAbs),
|
||||
generatedSubtitlesDir(code),
|
||||
];
|
||||
for (const dir of candidates) {
|
||||
let entries: import("node:fs").Dirent[];
|
||||
try {
|
||||
entries = await fsp.readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
for (const e of entries) {
|
||||
if (!e.isFile()) continue;
|
||||
const lower = e.name.toLowerCase();
|
||||
if (!lower.endsWith(".srt")) continue;
|
||||
// Match `<stem>.<lang>` followed by ANY further token before .srt.
|
||||
// Catches both `<stem>.<lang>.srt` and `<stem>.<lang>.whisperjav.srt`.
|
||||
if (lower.startsWith(prefix.toLowerCase() + ".") || lower === `${prefix.toLowerCase()}.srt`) {
|
||||
return path.join(dir, e.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function enqueueJob(opts: { code: string; partIdx: number; overwrite?: boolean }): Promise<EnqueueResult | EnqueueAlreadyExists> {
|
||||
const settings = getAppSetting("whisperjav");
|
||||
if (!settings.cliPath) {
|
||||
throw new Error("WhisperJAV CLI path not configured");
|
||||
}
|
||||
let files = findVideosForCode(opts.code);
|
||||
if (files.length === 0) {
|
||||
// Index might be empty in dev — kick a rescan once.
|
||||
await rescanVideoIndex();
|
||||
files = findVideosForCode(opts.code);
|
||||
}
|
||||
const variant = files[opts.partIdx];
|
||||
if (!variant) throw new Error(`No video found for code=${opts.code} part=${opts.partIdx}`);
|
||||
|
||||
const existing = await existingGeneratedFor(variant.abs, opts.code);
|
||||
if (existing) {
|
||||
if (!opts.overwrite) {
|
||||
return { alreadyExists: true, abs: existing };
|
||||
}
|
||||
// User confirmed overwrite — remove the prior generated file so the
|
||||
// post-run move at the queue worker doesn't trip its collision guard.
|
||||
try {
|
||||
await fsp.unlink(existing);
|
||||
} catch (e) {
|
||||
throw new Error(`Could not remove existing subtitle: ${(e as Error).message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const id = newJobId();
|
||||
const jobDir = jobDirFor(id);
|
||||
await fsp.mkdir(jobDir, { recursive: true });
|
||||
const statsPath = path.join(jobDir, "stats.json");
|
||||
const logPath = path.join(jobDir, "stderr.log");
|
||||
const args = buildJobArgs({
|
||||
videoAbs: variant.abs,
|
||||
outputDir: jobDir,
|
||||
statsPath,
|
||||
settings,
|
||||
});
|
||||
|
||||
// Capture duration + mode at enqueue so the running-job UI can show
|
||||
// an ETA. Duration may be missing if the file hasn't been probed yet
|
||||
// — that's fine, ETA is best-effort.
|
||||
const stored = getStoredVideoMetadata(variant.abs);
|
||||
const videoDurationSec = stored?.durationSec && stored.durationSec > 0 ? stored.durationSec : null;
|
||||
|
||||
insertJob({
|
||||
id,
|
||||
code: opts.code,
|
||||
videoAbs: variant.abs,
|
||||
jobDir,
|
||||
status: "queued",
|
||||
enqueuedAt: Date.now(),
|
||||
cliArgs: JSON.stringify(args),
|
||||
logPath,
|
||||
statsPath,
|
||||
videoDurationSec,
|
||||
mode: settings.quality,
|
||||
});
|
||||
|
||||
scheduleTick();
|
||||
return { jobId: id };
|
||||
}
|
||||
|
||||
export function cancelJob(id: string): boolean {
|
||||
const job = getJob(id);
|
||||
if (!job) return false;
|
||||
if (job.status === "queued") {
|
||||
// Atomic flip: if the worker just picked this job up between our
|
||||
// read and write, the WHERE clause matches zero rows and we fall
|
||||
// through to the running branch.
|
||||
const info = rawDb.prepare(
|
||||
`UPDATE whisperjav_jobs SET status = 'cancelled', ended_at = ? WHERE id = ? AND status = 'queued'`,
|
||||
).run(Date.now(), id);
|
||||
if (info.changes > 0) return true;
|
||||
}
|
||||
// Re-read after the failed conditional update — status may now be 'running'.
|
||||
const fresh = getJob(id) ?? job;
|
||||
if (fresh.status === "running" && global.__whisperjavRunningId === id && global.__whisperjavRunningKill) {
|
||||
global.__whisperjavRunningKill("cancel");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
let tickPending = false;
|
||||
function scheduleTick(): void {
|
||||
if (tickPending) return;
|
||||
tickPending = true;
|
||||
// Defer to next tick so callers' DB writes are visible.
|
||||
setImmediate(() => {
|
||||
tickPending = false;
|
||||
void runOne();
|
||||
});
|
||||
}
|
||||
|
||||
let workerBusy = false;
|
||||
async function runOne(): Promise<void> {
|
||||
if (workerBusy) return;
|
||||
workerBusy = true;
|
||||
try {
|
||||
while (true) {
|
||||
const next = nextQueuedJob();
|
||||
if (!next) break;
|
||||
await processJob(next);
|
||||
}
|
||||
} finally {
|
||||
workerBusy = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function processJob(job: JobRow): Promise<void> {
|
||||
const settings = getAppSetting("whisperjav");
|
||||
if (!settings.cliPath) {
|
||||
setStatus(job.id, "failed", { startedAt: Date.now(), endedAt: Date.now(), error: "WhisperJAV CLI path cleared while job was queued" });
|
||||
return;
|
||||
}
|
||||
|
||||
setStatus(job.id, "running", { startedAt: Date.now() });
|
||||
|
||||
let args: string[];
|
||||
try {
|
||||
args = JSON.parse(job.cliArgs) as string[];
|
||||
} catch {
|
||||
setStatus(job.id, "failed", { endedAt: Date.now(), error: "stored cli_args malformed" });
|
||||
return;
|
||||
}
|
||||
|
||||
const logStream = fs.createWriteStream(job.logPath, { flags: "a" });
|
||||
const spawned = spawnJob(settings.cliPath, args);
|
||||
global.__whisperjavRunningId = job.id;
|
||||
let cancelled = false;
|
||||
global.__whisperjavRunningKill = (reason) => {
|
||||
if (reason === "cancel") cancelled = true;
|
||||
spawned.kill();
|
||||
};
|
||||
|
||||
|
||||
// Stage parser — write-through on every match. WhisperJAV emits
|
||||
// at most a handful of "Step N/M:" lines per job; the prior
|
||||
// debounce raced the rl close drain and dropped the final stage.
|
||||
const rl = readline.createInterface({ input: spawned.proc.stderr ?? Readable.from([]) });
|
||||
rl.on("line", (raw: string) => {
|
||||
// Strip CSI escape sequences (optional ESC byte + "[...m").
|
||||
const line = raw.replace(/?\[[0-9;]*m/g, "");
|
||||
logStream.write(line + "\n");
|
||||
if (isNoiseLine(line)) return;
|
||||
const stage = parseStageLine(line);
|
||||
if (stage) {
|
||||
updateProgress(job.id, stage.stage, stage.index, stage.total);
|
||||
}
|
||||
});
|
||||
|
||||
// We don't currently expect anything on stdout; tee anyway.
|
||||
spawned.proc.stdout?.on("data", (b) => { logStream.write(b); });
|
||||
|
||||
const exitCode: number | null = await new Promise((resolve) => {
|
||||
spawned.proc.on("close", (code) => resolve(code));
|
||||
spawned.proc.on("error", () => resolve(null));
|
||||
});
|
||||
|
||||
rl.close();
|
||||
await new Promise<void>((res) => logStream.end(() => res()));
|
||||
|
||||
global.__whisperjavRunningKill = undefined;
|
||||
global.__whisperjavRunningId = null;
|
||||
|
||||
if (cancelled) {
|
||||
setStatus(job.id, "cancelled", { endedAt: Date.now(), exitCode });
|
||||
// Keep job dir for diagnosis. Could prune later via retention sweep.
|
||||
return;
|
||||
}
|
||||
|
||||
const result = await validateOutcome({
|
||||
exitCode,
|
||||
statsPath: job.statsPath ?? path.join(job.jobDir, "stats.json"),
|
||||
jobDir: job.jobDir,
|
||||
});
|
||||
|
||||
if (!result.success) {
|
||||
setStatus(job.id, "failed", { endedAt: Date.now(), exitCode, error: result.reason ?? "validation failed" });
|
||||
return;
|
||||
}
|
||||
|
||||
// Move the .srt to its final destination, fail loudly on collision.
|
||||
if (!result.finalSrtPath) {
|
||||
setStatus(job.id, "failed", { endedAt: Date.now(), exitCode, error: "no final_srt path resolved" });
|
||||
return;
|
||||
}
|
||||
let destDir: string;
|
||||
try {
|
||||
destDir = await resolveDestDir(job.videoAbs, job.code);
|
||||
} catch (e) {
|
||||
setStatus(job.id, "failed", { endedAt: Date.now(), exitCode, error: `dest dir resolve failed: ${(e as Error).message}` });
|
||||
return;
|
||||
}
|
||||
const destFile = path.join(destDir, path.basename(result.finalSrtPath));
|
||||
if (fs.existsSync(destFile)) {
|
||||
setStatus(job.id, "failed", { endedAt: Date.now(), exitCode, error: `Output already exists at ${destFile}` });
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await moveFile(result.finalSrtPath, destFile);
|
||||
} catch (e) {
|
||||
setStatus(job.id, "failed", { endedAt: Date.now(), exitCode, error: `move failed: ${(e as Error).message}` });
|
||||
return;
|
||||
}
|
||||
|
||||
// Cleanup temp dir on success/warning.
|
||||
try {
|
||||
await fsp.rm(job.jobDir, { recursive: true, force: true });
|
||||
} catch { /* best effort */ }
|
||||
|
||||
setStatus(job.id, result.warning ? "warning" : "completed", {
|
||||
endedAt: Date.now(),
|
||||
exitCode,
|
||||
targetSubtitlePath: destFile,
|
||||
cueCount: result.cueCount,
|
||||
error: result.warning ? result.reason : null,
|
||||
});
|
||||
|
||||
// Opportunistic retention sweep — keeps the disk tidy without a
|
||||
// separate scheduler. No-op when retention is 0.
|
||||
void runRetentionSweep();
|
||||
}
|
||||
|
||||
/** Mark every queued (not-yet-running) job cancelled. Used by the
|
||||
* "Stop Batch" button to drain the queue without touching the
|
||||
* currently-running job. Returns the count cancelled. */
|
||||
export function cancelAllQueued(): number {
|
||||
const result = rawDb.prepare(`
|
||||
UPDATE whisperjav_jobs SET status = 'cancelled', ended_at = ?
|
||||
WHERE status = 'queued'
|
||||
`).run(Date.now());
|
||||
return result.changes ?? 0;
|
||||
}
|
||||
|
||||
/** Module-load side effect: make sure leftover queued jobs from a
|
||||
* prior process get picked up. Safe to call repeatedly. */
|
||||
export function bootstrapQueue(): void {
|
||||
scheduleTick();
|
||||
void runRetentionSweep();
|
||||
}
|
||||
|
||||
/** Delete failed/cancelled job dirs older than `retentionDays`. Always
|
||||
* safe to call — no-ops when retention is 0 or no aged rows exist. */
|
||||
export async function runRetentionSweep(): Promise<{ removed: number }> {
|
||||
const settings = getAppSetting("whisperjav");
|
||||
const days = Number(settings.retentionDays);
|
||||
if (!Number.isFinite(days) || days <= 0) return { removed: 0 };
|
||||
const cutoff = Date.now() - days * 24 * 60 * 60 * 1000;
|
||||
const aged = listAgedTerminalJobs(cutoff);
|
||||
let removed = 0;
|
||||
for (const row of aged) {
|
||||
try {
|
||||
await fsp.rm(row.jobDir, { recursive: true, force: true });
|
||||
removed++;
|
||||
} catch (e) {
|
||||
console.error(`[whisperjav] failed to prune ${row.jobDir}:`, e);
|
||||
}
|
||||
}
|
||||
if (removed > 0) {
|
||||
console.log(`[whisperjav] retention sweep removed ${removed} job dir(s)`);
|
||||
}
|
||||
return { removed };
|
||||
}
|
||||
|
||||
/** Wipe every non-running job row + every temp dir on disk. Used by
|
||||
* the "Clear all job history" Settings action. Returns counts. */
|
||||
export async function clearAllJobHistory(): Promise<{ rows: number; dirs: number }> {
|
||||
const dirs = listAllJobDirs();
|
||||
let dirsRemoved = 0;
|
||||
for (const dir of dirs) {
|
||||
try {
|
||||
await fsp.rm(dir, { recursive: true, force: true });
|
||||
dirsRemoved++;
|
||||
} catch { /* best effort */ }
|
||||
}
|
||||
const rows = deleteAllJobs();
|
||||
return { rows, dirs: dirsRemoved };
|
||||
}
|
||||
|
||||
bootstrapQueue();
|
||||
|
||||
export { activeJobForCode, getJob, jobBaseDir };
|
||||
@@ -0,0 +1,314 @@
|
||||
import "server-only";
|
||||
import { spawn, type ChildProcess } from "node:child_process";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs";
|
||||
import fsp from "node:fs/promises";
|
||||
import { randomUUID } from "node:crypto";
|
||||
import type { WhisperJavSettings } from "@/lib/db/appSettings";
|
||||
|
||||
const VERIFY_TIMEOUT_MS = 30_000;
|
||||
|
||||
const BANNER_CHARS = /^[╔╗╚╝═║│┌┐└┘─\s]*$/;
|
||||
const NOISE_PATTERNS: RegExp[] = [
|
||||
/RequestsDependencyWarning/i,
|
||||
/urllib3 \(\d+\.\d+/,
|
||||
/chardet|charset_normalizer/,
|
||||
/You are about to download and run code from an untrusted repository/,
|
||||
/^Downloading: /,
|
||||
/UserWarning:/,
|
||||
/^\s*warnings\.warn/,
|
||||
/^_check_repo_is_trusted/,
|
||||
/^\s*$/, // blank
|
||||
];
|
||||
|
||||
function isNoise(line: string): boolean {
|
||||
if (BANNER_CHARS.test(line)) return true;
|
||||
for (const re of NOISE_PATTERNS) {
|
||||
if (re.test(line)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const STAGE_RE = /Step\s+(\d+)\s*\/\s*(\d+):\s*(.+?)\s*$/;
|
||||
|
||||
export interface ParsedStage {
|
||||
index: number;
|
||||
total: number;
|
||||
stage: string;
|
||||
}
|
||||
|
||||
export function parseStageLine(line: string): ParsedStage | null {
|
||||
const m = line.match(STAGE_RE);
|
||||
if (!m) return null;
|
||||
const index = Number(m[1]);
|
||||
const total = Number(m[2]);
|
||||
if (!Number.isFinite(index) || !Number.isFinite(total)) return null;
|
||||
return { index, total, stage: m[3]!.trim() };
|
||||
}
|
||||
|
||||
const VERSION_RE = /WhisperJAV\s+(\d+\.\d+\.\d+)/;
|
||||
|
||||
export interface VerifyResult {
|
||||
ok: boolean;
|
||||
version?: string;
|
||||
resolvedPath?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/** Run `<cliPath> --version` and parse stdout. Stderr ignored
|
||||
* (RequestsDependencyWarning is benign). */
|
||||
export function verifyCli(cliPath: string): Promise<VerifyResult> {
|
||||
return new Promise((resolve) => {
|
||||
const t0 = Date.now();
|
||||
let proc: ChildProcess;
|
||||
try {
|
||||
proc = spawn(cliPath, ["--version"], { stdio: ["ignore", "pipe", "pipe"] });
|
||||
} catch (e) {
|
||||
console.error(`[whisperjav verify] spawn failed (${Date.now() - t0}ms):`, (e as Error).message);
|
||||
resolve({ ok: false, error: (e as Error).message });
|
||||
return;
|
||||
}
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let settled = false;
|
||||
const settle = (val: VerifyResult) => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
clearTimeout(t);
|
||||
console.log(`[whisperjav verify] ${val.ok ? "ok" : "fail"} in ${Date.now() - t0}ms`);
|
||||
resolve(val);
|
||||
};
|
||||
const t = setTimeout(() => {
|
||||
try { proc.kill("SIGKILL"); } catch {}
|
||||
console.error(`[whisperjav verify] timeout after ${VERIFY_TIMEOUT_MS}ms; stdout="${stdout.trim()}" stderr_tail="${stderr.trim().split("\n").slice(-3).join(" | ")}"`);
|
||||
settle({ ok: false, error: "verify timed out" });
|
||||
}, VERIFY_TIMEOUT_MS);
|
||||
proc.stdout?.on("data", (d) => { stdout += d.toString(); });
|
||||
proc.stderr?.on("data", (d) => { stderr += d.toString(); });
|
||||
proc.on("error", (e) => settle({ ok: false, error: e.message }));
|
||||
proc.on("close", () => {
|
||||
const merged = stdout + "\n" + stderr;
|
||||
const m = merged.match(VERSION_RE);
|
||||
if (m) {
|
||||
settle({ ok: true, version: m[1], resolvedPath: cliPath });
|
||||
return;
|
||||
}
|
||||
settle({ ok: false, error: stderr.trim().split("\n").slice(-3).join("\n") || "no version detected in output" });
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/** Look up the CLI on PATH via the OS-specific where/which. Returns
|
||||
* the first match or null. */
|
||||
export async function autoDetectCli(): Promise<string | null> {
|
||||
return new Promise((resolve) => {
|
||||
const cmd = process.platform === "win32" ? "where" : "which";
|
||||
let proc: ChildProcess;
|
||||
try {
|
||||
proc = spawn(cmd, ["whisperjav"], { stdio: ["ignore", "pipe", "ignore"] });
|
||||
} catch {
|
||||
resolve(null);
|
||||
return;
|
||||
}
|
||||
let stdout = "";
|
||||
proc.stdout?.on("data", (d) => { stdout += d.toString(); });
|
||||
proc.on("error", () => resolve(null));
|
||||
proc.on("close", () => {
|
||||
const first = stdout.split(/\r?\n/).map((l) => l.trim()).find(Boolean);
|
||||
resolve(first ?? null);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const QUALITY_TO_MODE: Record<WhisperJavSettings["quality"], string> = {
|
||||
fast: "fast",
|
||||
balanced: "balanced",
|
||||
qwen: "qwen",
|
||||
};
|
||||
|
||||
/** Build the argv (without the program name) for a generation job. */
|
||||
export function buildJobArgs(opts: {
|
||||
videoAbs: string;
|
||||
outputDir: string;
|
||||
statsPath: string;
|
||||
settings: WhisperJavSettings;
|
||||
}): string[] {
|
||||
const args: string[] = [
|
||||
opts.videoAbs,
|
||||
"--mode", QUALITY_TO_MODE[opts.settings.quality],
|
||||
"--language", opts.settings.sourceLanguage,
|
||||
"--subs-language", opts.settings.outputMode,
|
||||
"--sensitivity", opts.settings.sensitivity,
|
||||
"--output-dir", opts.outputDir,
|
||||
"--no-progress",
|
||||
"--verbosity", "summary",
|
||||
"--stats-file", opts.statsPath,
|
||||
];
|
||||
if (opts.settings.noSignature) args.push("--no-signature");
|
||||
return args;
|
||||
}
|
||||
|
||||
export interface SpawnedJob {
|
||||
proc: ChildProcess;
|
||||
/** Best-effort kill that takes Python child workers down too. */
|
||||
kill: () => void;
|
||||
}
|
||||
|
||||
/** Spawn a generation job. Caller wires stderr/stdout consumers. */
|
||||
export function spawnJob(cliPath: string, args: string[]): SpawnedJob {
|
||||
const proc = spawn(cliPath, args, { stdio: ["ignore", "pipe", "pipe"] });
|
||||
return {
|
||||
proc,
|
||||
kill: () => killTree(proc),
|
||||
};
|
||||
}
|
||||
|
||||
function killTree(proc: ChildProcess): void {
|
||||
if (!proc.pid) return;
|
||||
if (process.platform === "win32") {
|
||||
// taskkill /T cascades to children. /F forces. Spawn fire-and-forget.
|
||||
try {
|
||||
spawn("taskkill", ["/pid", String(proc.pid), "/T", "/F"], { stdio: "ignore" });
|
||||
} catch { /* ignore */ }
|
||||
return;
|
||||
}
|
||||
try { proc.kill("SIGTERM"); } catch { /* ignore */ }
|
||||
// Escalate after a short grace period.
|
||||
setTimeout(() => {
|
||||
try { proc.kill("SIGKILL"); } catch { /* ignore */ }
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
export interface StatsEntry {
|
||||
status?: string;
|
||||
metadata?: {
|
||||
output_files?: { final_srt?: string };
|
||||
summary?: { final_subtitles_refined?: number };
|
||||
errors?: unknown[];
|
||||
};
|
||||
}
|
||||
|
||||
export interface ValidationResult {
|
||||
/** Strict success — exit 0, stats success, errors empty, srt exists. */
|
||||
success: boolean;
|
||||
/** Set when success === true but cue count is 0 (warning state). */
|
||||
warning: boolean;
|
||||
finalSrtPath: string | null;
|
||||
cueCount: number | null;
|
||||
/** Human-readable reason when validation failed. */
|
||||
reason: string | null;
|
||||
}
|
||||
|
||||
/** Apply the success criteria from the plan. Caller passes exit code
|
||||
* and the stats path; we read + parse + check. */
|
||||
export async function validateOutcome(opts: {
|
||||
exitCode: number | null;
|
||||
statsPath: string;
|
||||
jobDir: string;
|
||||
}): Promise<ValidationResult> {
|
||||
if (opts.exitCode !== 0) {
|
||||
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: `exit code ${opts.exitCode}` };
|
||||
}
|
||||
let raw: string;
|
||||
try {
|
||||
raw = await fsp.readFile(opts.statsPath, "utf8");
|
||||
} catch {
|
||||
// Stats missing fallback: accept if exactly one .srt exists in jobDir.
|
||||
const stray = await findSingleSrt(opts.jobDir);
|
||||
if (stray) {
|
||||
return {
|
||||
success: true,
|
||||
warning: true,
|
||||
finalSrtPath: stray,
|
||||
cueCount: null,
|
||||
reason: "stats unavailable, accepted by file presence",
|
||||
};
|
||||
}
|
||||
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: "stats.json missing and no .srt found" };
|
||||
}
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(raw);
|
||||
} catch {
|
||||
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: "stats.json malformed" };
|
||||
}
|
||||
const arr = Array.isArray(parsed) ? parsed : null;
|
||||
const entry = arr && arr.length > 0 ? (arr[0] as StatsEntry) : null;
|
||||
if (!entry) {
|
||||
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: "stats.json has no entries" };
|
||||
}
|
||||
if (entry.status !== "success") {
|
||||
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: `stats reports status=${entry.status}` };
|
||||
}
|
||||
const errors = entry.metadata?.errors ?? [];
|
||||
if (Array.isArray(errors) && errors.length > 0) {
|
||||
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: `stats reports ${errors.length} error(s)` };
|
||||
}
|
||||
const final = entry.metadata?.output_files?.final_srt ?? null;
|
||||
if (!final || !fs.existsSync(final)) {
|
||||
return { success: false, warning: false, finalSrtPath: null, cueCount: null, reason: "final_srt missing from disk" };
|
||||
}
|
||||
const cueCount = entry.metadata?.summary?.final_subtitles_refined ?? null;
|
||||
const warning = cueCount === 0;
|
||||
return {
|
||||
success: true,
|
||||
warning,
|
||||
finalSrtPath: final,
|
||||
cueCount,
|
||||
reason: warning ? "0 cues — likely no speech" : null,
|
||||
};
|
||||
}
|
||||
|
||||
async function findSingleSrt(dir: string): Promise<string | null> {
|
||||
let entries: import("node:fs").Dirent[];
|
||||
try {
|
||||
entries = await fsp.readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
const srts = entries
|
||||
.filter((e) => e.isFile() && e.name.toLowerCase().endsWith(".srt"))
|
||||
.map((e) => path.join(dir, e.name));
|
||||
return srts.length === 1 ? srts[0]! : null;
|
||||
}
|
||||
|
||||
export function newJobId(): string {
|
||||
return randomUUID();
|
||||
}
|
||||
|
||||
export function jobBaseDir(): string {
|
||||
return path.join(process.cwd(), "data", "whisperjav-jobs");
|
||||
}
|
||||
|
||||
export function jobDirFor(id: string): string {
|
||||
return path.join(jobBaseDir(), id);
|
||||
}
|
||||
|
||||
/** Cross-device-aware move. Falls back to copy + unlink when rename
|
||||
* hits EXDEV (different filesystems / drives). */
|
||||
export async function moveFile(src: string, dest: string): Promise<void> {
|
||||
await fsp.mkdir(path.dirname(dest), { recursive: true });
|
||||
try {
|
||||
await fsp.rename(src, dest);
|
||||
return;
|
||||
} catch (e) {
|
||||
const code = (e as NodeJS.ErrnoException).code;
|
||||
if (code !== "EXDEV") throw e;
|
||||
}
|
||||
await fsp.copyFile(src, dest);
|
||||
await fsp.unlink(src).catch(() => { /* best effort */ });
|
||||
}
|
||||
|
||||
/** True if a directory is writable (heuristic — try to create + remove
|
||||
* a probe file). Used to choose between beside-video output and the
|
||||
* data-folder fallback. */
|
||||
export async function isDirWritable(dir: string): Promise<boolean> {
|
||||
const probe = path.join(dir, `.pinkudex-write-probe-${process.pid}-${Date.now()}`);
|
||||
try {
|
||||
await fsp.writeFile(probe, "");
|
||||
await fsp.unlink(probe).catch(() => { /* ignore */ });
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
import "server-only";
|
||||
|
||||
export type JobStatus =
|
||||
| "queued"
|
||||
| "running"
|
||||
| "completed"
|
||||
| "warning"
|
||||
| "failed"
|
||||
| "cancelled";
|
||||
|
||||
export interface JobRow {
|
||||
id: string;
|
||||
code: string;
|
||||
videoAbs: string;
|
||||
jobDir: string;
|
||||
/** Final destination of the moved .srt, populated on success/warning. */
|
||||
targetSubtitlePath: string | null;
|
||||
status: JobStatus;
|
||||
enqueuedAt: number;
|
||||
startedAt: number | null;
|
||||
endedAt: number | null;
|
||||
exitCode: number | null;
|
||||
error: string | null;
|
||||
/** Latest "Step X/Y: <description>" parsed from stderr. */
|
||||
stage: string | null;
|
||||
stageIndex: number | null;
|
||||
stageTotal: number | null;
|
||||
cueCount: number | null;
|
||||
/** JSON-stringified args array passed to whisperjav. */
|
||||
cliArgs: string;
|
||||
logPath: string;
|
||||
statsPath: string | null;
|
||||
/** Source video duration (sec) at enqueue time — used for ETA. Null
|
||||
* when the video hasn't been probed yet. */
|
||||
videoDurationSec: number | null;
|
||||
/** WhisperJAV --mode at enqueue time. Persisted so historical ETA
|
||||
* multipliers can be grouped per mode. */
|
||||
mode: string | null;
|
||||
}
|
||||
|
||||
/** Snapshot returned by the detail endpoint — JobRow plus a tail of log lines. */
|
||||
export interface JobDetail extends JobRow {
|
||||
logTail: string[];
|
||||
}
|
||||
|
||||
export interface EnqueueRequest {
|
||||
code: string;
|
||||
partIdx: number;
|
||||
/** When true, allow overwriting an existing target subtitle file. */
|
||||
overwrite?: boolean;
|
||||
}
|
||||
Reference in New Issue
Block a user