Files
2026-05-26 22:46:00 +02:00

390 lines
16 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import "server-only";
import path from "node:path";
import fs from "node:fs/promises";
import crypto from "node:crypto";
import sharp from "sharp";
import { db, rawDb } from "@/lib/db/client";
import { images } from "@/lib/db/schema";
import { eq } from "drizzle-orm";
import { sanitizeFilename, uniqueFilePath, letterBucket, canonicalThumbName } from "@/lib/filename";
import { extractCode, normalizeCode } from "@/lib/jav/codeParser";
import { computeDHash } from "@/lib/jav/phash";
import { parseNfo, type NfoMetadata } from "@/lib/jav/nfoParser";
import { upsertStudio, upsertSeries, upsertActress, upsertGenre } from "@/lib/jav/upsert";
const LIBRARY_ROOT = path.join(process.cwd(), "library");
const THUMB_ROOT = path.join(process.cwd(), "data", "thumbs");
const SUPERSEDED_ROOT = path.join(process.cwd(), "library", ".superseded");
export type CollisionBucket = "upgrade" | "downgrade" | "sidegrade" | "mixed";
export interface CollisionInfo {
existingId: number;
existingFilename: string;
existingWidth: number;
existingHeight: number;
existingBytes: number;
existingThumbPath: string;
incomingWidth: number;
incomingHeight: number;
incomingBytes: number;
bucket: CollisionBucket;
}
export interface IngestResult {
imageId: number;
duplicate: boolean;
filename: string;
code: string | null;
/** Present when the upload was deferred because a row with the same
* canonical code already exists. The caller must re-invoke ingest with
* resolution: "replace" or "skip". When "skip", the staged file has
* already been cleaned up and no DB write happens. */
collision?: CollisionInfo;
}
function classifyCollision(
oldW: number, oldH: number, oldBytes: number,
newW: number, newH: number, newBytes: number,
): CollisionBucket {
const oldPx = oldW * oldH;
const newPx = newW * newH;
// Upgrade: incoming has ≥1.5× pixel area.
if (newPx >= oldPx * 1.5) return "upgrade";
// Downgrade: incoming smaller in both dims AND bytes.
if (newW <= oldW && newH <= oldH && newBytes <= oldBytes && (newW < oldW || newH < oldH || newBytes < oldBytes)) {
return "downgrade";
}
// Sidegrade: dims within ±2px and bytes within ±15%.
const dimsClose = Math.abs(newW - oldW) <= 2 && Math.abs(newH - oldH) <= 2;
const bytesClose = Math.abs(newBytes - oldBytes) <= oldBytes * 0.15;
if (dimsClose && bytesClose) return "sidegrade";
return "mixed";
}
export async function ingestFile(
buffer: Buffer,
originalFilename: string,
opts?: {
/** Optional .nfo XML payload to seed metadata from. */
nfoXml?: string;
autoAssign?: { tagName?: string; collectionId?: number };
/** When set, the new image is attached as an extra (back cover / still) of this parent. */
parentImageId?: number;
/** Override filename to store on disk and in the DB (e.g. "DDK-134.jpg"). */
targetFilename?: string;
/** Explicit actress names to attach (link existing or create-new). */
actressNames?: string[];
/** When set, controls how a same-code collision (different SHA) is
* resolved. "detect" (default) returns a collision result without
* writing. "replace" overwrites the existing row's bytes/sha/dims
* in place, preserving relational state. "skip" returns the existing
* row unchanged. */
onCollision?: "detect" | "replace" | "skip";
},
): Promise<IngestResult> {
const sha = crypto.createHash("sha256").update(buffer).digest("hex");
const existing = db.select().from(images).where(eq(images.sha256, sha)).get();
if (existing) {
// If we're re-uploading an existing attachment, re-bind that attachment to
// the requested parent. Do not turn an existing cover into its own child.
if (opts?.parentImageId != null) {
if (existing.parentImageId != null && existing.id !== opts.parentImageId) {
rawDb.prepare(`
UPDATE images
SET parent_image_id = ?, deleted_at = NULL
WHERE id = ?
`).run(opts.parentImageId, existing.id);
}
} else if (existing.deletedAt != null) {
// Plain re-upload of a soft-deleted cover: revive it.
rawDb.prepare(`UPDATE images SET deleted_at = NULL WHERE id = ?`).run(existing.id);
}
// Re-uploads can carry fresh actress decisions from the preview
// dialog. Merge them into the existing row's links so duplicates
// aren't a dead end for metadata. INSERT OR IGNORE keeps already-
// linked actresses as no-ops; only attach to top-level covers.
if (opts?.actressNames?.length && existing.parentImageId == null) {
for (const name of opts.actressNames) {
const trimmed = name.trim();
if (!trimmed) continue;
const id = upsertActress(trimmed);
rawDb.prepare(`INSERT OR IGNORE INTO image_actresses (image_id, actress_id) VALUES (?, ?)`).run(existing.id, id);
}
}
if (opts?.autoAssign) applyAutoAssign(existing.id, opts.autoAssign);
return { imageId: existing.id, duplicate: true, filename: existing.filename, code: existing.code };
}
const filenameForStorage = opts?.targetFilename?.trim() || originalFilename;
const { base, ext } = sanitizeFilename(filenameForStorage);
// Resolve metadata BEFORE choosing the bucket: the on-disk partition is
// keyed off the cover's first letter, so we need the code (or the
// parent's code, for attached images) up front.
const isAttached = opts?.parentImageId != null;
if (isAttached) {
const parent = rawDb.prepare(`
SELECT id FROM images
WHERE id = ? AND deleted_at IS NULL AND parent_image_id IS NULL
`).get(opts.parentImageId) as { id: number } | undefined;
if (!parent) throw new Error("Attachment parent not found");
}
const nfo = opts?.nfoXml ? parseNfo(opts.nfoXml) : null;
const code = isAttached
? null
: (normalizeCode(nfo?.code) ?? extractCode(filenameForStorage) ?? extractCode(originalFilename));
let bucketCode: string | null = code;
if (isAttached) {
const parentRow = rawDb.prepare(`SELECT code FROM images WHERE id = ?`).get(opts.parentImageId) as
| { code: string | null }
| undefined;
bucketCode = parentRow?.code ?? null;
}
const dirRel = letterBucket(bucketCode).dirRel;
const dirAbs = path.join(LIBRARY_ROOT, dirRel);
await fs.mkdir(dirAbs, { recursive: true });
const fileAbs = await uniqueFilePath(dirAbs, base, ext);
const fileRel = path.posix.join(dirRel, path.basename(fileAbs));
await fs.mkdir(THUMB_ROOT, { recursive: true });
// Use the bucket code (which already accounts for attached → parent's
// code) as the prefix so attached thumbs sort with their cover.
const thumbName = canonicalThumbName(isAttached ? bucketCode : code, sha);
const thumbAbs = path.join(THUMB_ROOT, thumbName);
// If thumb generation or metadata extraction fails, clean up the source
// file we just wrote — otherwise it's an orphan in library/ with no DB row.
let width = 0;
let height = 0;
let phash: string | null = null;
try {
await fs.writeFile(fileAbs, buffer);
const meta = await sharp(buffer, { failOn: "none" }).metadata();
width = meta.width ?? 0;
height = meta.height ?? 0;
await sharp(buffer, { failOn: "none" })
.rotate()
.resize({ width: 768, height: 768, fit: "inside", withoutEnlargement: true })
.webp({ quality: 82 })
.toFile(thumbAbs);
// Perceptual hash for near-duplicate detection. Failure here is
// non-fatal — we just leave phash null and the maintenance scanner
// can backfill later.
try { phash = await computeDHash(buffer); } catch { phash = null; }
} catch (e) {
await fs.rm(fileAbs, { force: true }).catch(() => {});
await fs.rm(thumbAbs, { force: true }).catch(() => {});
throw e;
}
// Collision detection: a primary cover (no parent, not soft-deleted) with
// the same canonical code already exists. We've already missed SHA dedup,
// so this is a different encode of the same release.
if (!isAttached && code) {
const collision = rawDb.prepare(`
SELECT id, filename, rel_path, thumb_path, width, height, bytes
FROM images
WHERE code = ? AND parent_image_id IS NULL AND deleted_at IS NULL
ORDER BY id LIMIT 1
`).get(code) as
| { id: number; filename: string; rel_path: string; thumb_path: string; width: number; height: number; bytes: number }
| undefined;
if (collision) {
const mode = opts?.onCollision ?? "detect";
if (mode === "skip") {
await fs.rm(fileAbs, { force: true }).catch(() => {});
await fs.rm(thumbAbs, { force: true }).catch(() => {});
return { imageId: collision.id, duplicate: true, filename: collision.filename, code };
}
if (mode === "replace") {
// Move the old file + thumb to .superseded/ for recovery, then
// update the existing row in place. All relational state
// (actresses, tags, collections, rating, watched, notes) is
// preserved because we keep the same row id.
await fs.mkdir(SUPERSEDED_ROOT, { recursive: true });
const stamp = Date.now();
const oldExt = path.extname(collision.rel_path) || ".bin";
const supersededFile = path.join(SUPERSEDED_ROOT, `${collision.id}-${stamp}${oldExt}`);
const supersededThumb = path.join(SUPERSEDED_ROOT, `${collision.id}-${stamp}.thumb.webp`);
try {
await fs.rename(path.join(LIBRARY_ROOT, collision.rel_path), supersededFile).catch(() => {});
await fs.rename(path.join(THUMB_ROOT, collision.thumb_path), supersededThumb).catch(() => {});
} catch {
// Best-effort recovery copy; proceed even if the old files
// were already missing on disk.
}
const update = rawDb.transaction(() => {
rawDb.prepare(`
UPDATE images SET
filename = ?, rel_path = ?, thumb_path = ?, sha256 = ?,
width = ?, height = ?, bytes = ?, phash = ?
WHERE id = ?
`).run(filenameForStorage, fileRel, thumbName, sha, width, height, buffer.length, phash, collision.id);
});
try {
update();
} catch (e) {
// Restore on failure (e.g. UNIQUE(sha256) clash with an unrelated row).
await fs.rename(supersededFile, path.join(LIBRARY_ROOT, collision.rel_path)).catch(() => {});
await fs.rename(supersededThumb, path.join(THUMB_ROOT, collision.thumb_path)).catch(() => {});
await fs.rm(fileAbs, { force: true }).catch(() => {});
await fs.rm(thumbAbs, { force: true }).catch(() => {});
throw e;
}
// Replace upgrades bytes but should also merge any fresh actress
// decisions the user made — same semantics as the dedup branch
// up top. Existing actress links are preserved; INSERT OR IGNORE
// only adds new ones.
if (opts?.actressNames?.length) {
for (const name of opts.actressNames) {
const trimmed = name.trim();
if (!trimmed) continue;
const id = upsertActress(trimmed);
rawDb.prepare(`INSERT OR IGNORE INTO image_actresses (image_id, actress_id) VALUES (?, ?)`).run(collision.id, id);
}
}
if (opts?.autoAssign) applyAutoAssign(collision.id, opts.autoAssign);
return { imageId: collision.id, duplicate: false, filename: filenameForStorage, code };
}
// mode === "detect": back out the staged files, return collision
// info, and let the caller decide. No DB write.
await fs.rm(fileAbs, { force: true }).catch(() => {});
await fs.rm(thumbAbs, { force: true }).catch(() => {});
return {
imageId: collision.id,
duplicate: false,
filename: filenameForStorage,
code,
collision: {
existingId: collision.id,
existingFilename: collision.filename,
existingWidth: collision.width,
existingHeight: collision.height,
existingBytes: collision.bytes,
existingThumbPath: collision.thumb_path,
incomingWidth: width,
incomingHeight: height,
incomingBytes: buffer.length,
bucket: classifyCollision(
collision.width, collision.height, collision.bytes,
width, height, buffer.length,
),
},
};
}
}
const studioId = !isAttached && nfo?.studio ? upsertStudio(nfo.studio) : null;
const seriesId = !isAttached && nfo?.series ? upsertSeries(nfo.series) : null;
const insert = rawDb.transaction(() => {
const result = rawDb.prepare(`
INSERT INTO images (
filename, rel_path, thumb_path, sha256, width, height, bytes,
parent_image_id, code, title, release_date, runtime_min, director,
studio_id, series_id, notes, phash
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`).run(
filenameForStorage,
fileRel,
thumbName,
sha,
width,
height,
buffer.length,
opts?.parentImageId ?? null,
code,
isAttached ? null : (nfo?.title ?? null),
isAttached ? null : (nfo?.releaseDate ?? null),
isAttached ? null : (nfo?.runtimeMin ?? null),
isAttached ? null : (nfo?.director ?? null),
studioId,
seriesId,
isAttached ? null : (nfo?.notes ?? null),
phash,
);
const imageId = Number(result.lastInsertRowid);
if (!isAttached && nfo) {
attachNfoChildren(imageId, nfo);
}
if (!isAttached && opts?.actressNames && opts.actressNames.length > 0) {
for (const name of opts.actressNames) {
const trimmed = name.trim();
if (!trimmed) continue;
const id = upsertActress(trimmed);
rawDb.prepare(`INSERT OR IGNORE INTO image_actresses (image_id, actress_id) VALUES (?, ?)`).run(imageId, id);
}
}
return imageId;
});
let imageId: number;
try {
imageId = insert();
} catch (e) {
// Concurrent uploads of the same file can race past the dedup check
// above; the UNIQUE(sha256) / UNIQUE(rel_path) constraints will catch
// the loser. Treat as duplicate and clean up the file we just wrote.
const msg = (e as Error).message ?? "";
if (/UNIQUE constraint failed/i.test(msg)) {
await fs.rm(fileAbs, { force: true }).catch(() => {});
await fs.rm(thumbAbs, { force: true }).catch(() => {});
const winner = db.select().from(images).where(eq(images.sha256, sha)).get();
if (winner) {
if (opts?.autoAssign) applyAutoAssign(winner.id, opts.autoAssign);
return { imageId: winner.id, duplicate: true, filename: winner.filename, code: winner.code };
}
}
throw e;
}
if (opts?.autoAssign) applyAutoAssign(imageId, opts.autoAssign);
return { imageId, duplicate: false, filename: filenameForStorage, code };
}
function attachNfoChildren(imageId: number, nfo: NfoMetadata) {
if (nfo.actresses) {
for (const name of nfo.actresses) {
const id = upsertActress(name);
rawDb.prepare(`INSERT OR IGNORE INTO image_actresses (image_id, actress_id) VALUES (?, ?)`).run(imageId, id);
}
}
if (nfo.genres) {
for (const name of nfo.genres) {
const id = upsertGenre(name);
rawDb.prepare(`INSERT OR IGNORE INTO image_genres (image_id, genre_id) VALUES (?, ?)`).run(imageId, id);
}
}
}
function applyAutoAssign(imageId: number, opts: { tagName?: string; collectionId?: number }) {
if (opts.tagName) {
const trimmed = opts.tagName.trim().toLowerCase();
if (trimmed) {
const tag = rawDb.prepare(`
INSERT INTO tags (name) VALUES (?) ON CONFLICT(name) DO UPDATE SET name=excluded.name RETURNING id
`).get(trimmed) as { id: number };
rawDb.prepare(`INSERT OR IGNORE INTO image_tags (image_id, tag_id) VALUES (?, ?)`).run(imageId, tag.id);
}
}
if (opts.collectionId != null) {
const collectionId = opts.collectionId;
const tx = rawDb.transaction(() => {
const max = rawDb.prepare(`SELECT COALESCE(MAX(position), -1) AS m FROM collection_images WHERE collection_id = ?`).get(collectionId) as { m: number };
rawDb.prepare(`
INSERT OR IGNORE INTO collection_images (collection_id, image_id, position) VALUES (?, ?, ?)
`).run(collectionId, imageId, max.m + 1);
});
tx();
}
}