Files
2026-05-26 22:46:00 +02:00

621 lines
22 KiB
TypeScript

"use server";
import path from "node:path";
import fs from "node:fs/promises";
import sharp from "sharp";
import { rawDb } from "@/lib/db/client";
import { sanitizeFilename, uniqueFilePath, letterBucket, canonicalThumbName } from "@/lib/filename";
import { extractCode } from "@/lib/jav/codeParser";
import { computeDHash, hammingDistance } from "@/lib/jav/phash";
import { clearAppSettingsCache } from "@/lib/db/appSettings";
import { safeJoin } from "@/lib/safePath";
import { revalidatePath } from "next/cache";
const LIBRARY_ROOT = path.join(process.cwd(), "library");
const THUMB_ROOT = path.join(process.cwd(), "data", "thumbs");
const PORTRAIT_ROOT = path.join(process.cwd(), "data", "portraits");
const CATEGORY_COVER_ROOT = path.join(process.cwd(), "data", "category-covers");
const COLLECTION_COVER_ROOT = path.join(process.cwd(), "data", "collection-covers");
const SYSTEM_FILES = new Set([".ds_store", "thumbs.db", "desktop.ini"]);
interface OrphanReport {
libraryFiles: string[];
thumbFiles: string[];
portraitFiles: string[];
categoryCoverFiles: string[];
collectionCoverFiles: string[];
bytes: number;
}
async function walk(dir: string): Promise<string[]> {
let entries: import("node:fs").Dirent[] = [];
try {
entries = await fs.readdir(dir, { withFileTypes: true });
} catch {
return [];
}
const out: string[] = [];
await Promise.all(entries.map(async (e) => {
const full = path.join(dir, e.name);
if (e.isDirectory()) {
out.push(...(await walk(full)));
} else if (e.isFile() && !SYSTEM_FILES.has(e.name.toLowerCase())) {
out.push(full);
}
}));
return out;
}
async function findOrphans(): Promise<OrphanReport> {
const knownLibrary = new Set(
(rawDb.prepare(`SELECT rel_path FROM images`).all() as Array<{ rel_path: string }>)
.map((r) => path.normalize(r.rel_path)),
);
const knownThumbs = new Set(
(rawDb.prepare(`SELECT thumb_path FROM images`).all() as Array<{ thumb_path: string }>)
.map((r) => path.normalize(r.thumb_path)),
);
const knownPortraits = new Set(
(rawDb
.prepare(`
SELECT portrait_path AS p FROM actresses WHERE portrait_path IS NOT NULL
UNION ALL SELECT portrait2_path FROM actresses WHERE portrait2_path IS NOT NULL
UNION ALL SELECT portrait3_path FROM actresses WHERE portrait3_path IS NOT NULL
UNION ALL SELECT portrait4_path FROM actresses WHERE portrait4_path IS NOT NULL
UNION ALL SELECT portraith_path FROM actresses WHERE portraith_path IS NOT NULL
`)
.all() as Array<{ p: string }>)
.map((r) => path.normalize(r.p)),
);
const knownCategoryCovers = new Set(
(rawDb
.prepare(`
SELECT cover_portrait_path AS p FROM tag_categories WHERE cover_portrait_path IS NOT NULL
UNION ALL SELECT cover_landscape_path FROM tag_categories WHERE cover_landscape_path IS NOT NULL
`)
.all() as Array<{ p: string }>)
.map((r) => path.normalize(r.p)),
);
const knownCollectionCovers = new Set(
(rawDb
.prepare(`
SELECT cover_portrait_path AS p FROM collections WHERE cover_portrait_path IS NOT NULL
UNION ALL SELECT cover_landscape_path FROM collections WHERE cover_landscape_path IS NOT NULL
`)
.all() as Array<{ p: string }>)
.map((r) => path.normalize(r.p)),
);
const [libFiles, thumbFiles, portraitFiles, categoryCoverFiles, collectionCoverFiles] = await Promise.all([
walk(LIBRARY_ROOT),
walk(THUMB_ROOT),
walk(PORTRAIT_ROOT),
walk(CATEGORY_COVER_ROOT),
walk(COLLECTION_COVER_ROOT),
]);
const libraryOrphans = libFiles.filter((abs) => {
const rel = path.normalize(path.relative(LIBRARY_ROOT, abs));
return !knownLibrary.has(rel);
});
const thumbOrphans = thumbFiles.filter((abs) => {
const rel = path.normalize(path.relative(THUMB_ROOT, abs));
return !knownThumbs.has(rel);
});
const portraitOrphans = portraitFiles.filter((abs) => {
const rel = path.normalize(path.relative(PORTRAIT_ROOT, abs));
return !knownPortraits.has(rel);
});
const categoryCoverOrphans = categoryCoverFiles.filter((abs) => {
const rel = path.normalize(path.relative(CATEGORY_COVER_ROOT, abs));
return !knownCategoryCovers.has(rel);
});
const collectionCoverOrphans = collectionCoverFiles.filter((abs) => {
const rel = path.normalize(path.relative(COLLECTION_COVER_ROOT, abs));
return !knownCollectionCovers.has(rel);
});
let bytes = 0;
await Promise.all([
...libraryOrphans, ...thumbOrphans, ...portraitOrphans,
...categoryCoverOrphans, ...collectionCoverOrphans,
].map(async (f) => {
try { bytes += (await fs.stat(f)).size; } catch {}
}));
return {
libraryFiles: libraryOrphans,
thumbFiles: thumbOrphans,
portraitFiles: portraitOrphans,
categoryCoverFiles: categoryCoverOrphans,
collectionCoverFiles: collectionCoverOrphans,
bytes,
};
}
export async function previewOrphanFiles(): Promise<{ count: number; bytes: number }> {
const report = await findOrphans();
const count =
report.libraryFiles.length +
report.thumbFiles.length +
report.portraitFiles.length +
report.categoryCoverFiles.length +
report.collectionCoverFiles.length;
return { count, bytes: report.bytes };
}
export async function purgeOrphanFiles(): Promise<{ deleted: number; bytes: number }> {
const report = await findOrphans();
const all = [
...report.libraryFiles,
...report.thumbFiles,
...report.portraitFiles,
...report.categoryCoverFiles,
...report.collectionCoverFiles,
];
// Bound concurrency: Promise.all over thousands of fs.rm calls can
// exhaust file descriptors (EMFILE) on Windows / low-ulimit hosts.
const CONCURRENCY = 32;
for (let i = 0; i < all.length; i += CONCURRENCY) {
await Promise.all(all.slice(i, i + CONCURRENCY).map((f) => fs.rm(f, { force: true })));
}
// Sweep empty subdirs across every root that just shed files.
await Promise.all([
cleanEmptyDirs(LIBRARY_ROOT),
cleanEmptyDirs(THUMB_ROOT),
cleanEmptyDirs(PORTRAIT_ROOT),
cleanEmptyDirs(CATEGORY_COVER_ROOT),
cleanEmptyDirs(COLLECTION_COVER_ROOT),
]);
// Indexes that show cover/portrait/thumb counts need to refetch.
revalidatePath("/");
revalidatePath("/category");
revalidatePath("/collection");
revalidatePath("/actress");
return { deleted: all.length, bytes: report.bytes };
}
interface ReorganizePreview {
total: number;
toMove: number;
}
interface ImageRow {
id: number;
filename: string;
rel_path: string;
code: string | null;
parent_image_id: number | null;
}
/**
* Resolve the target letter-bucket directory for a row. Attached images
* (parent_image_id set) bucket with their parent's code so related files
* stay together on disk.
*/
function plannedDirRel(row: ImageRow, parentCodeById: Map<number, string | null>): string {
if (row.parent_image_id != null) {
const parentCode = parentCodeById.get(row.parent_image_id) ?? null;
return letterBucket(parentCode).dirRel;
}
return letterBucket(row.code).dirRel;
}
function loadAllImages(): { rows: ImageRow[]; parentCodeById: Map<number, string | null> } {
const rows = rawDb.prepare(`SELECT id, filename, rel_path, code, parent_image_id FROM images`).all() as ImageRow[];
const parentCodeById = new Map<number, string | null>();
for (const r of rows) parentCodeById.set(r.id, r.code);
return { rows, parentCodeById };
}
export async function previewReorganize(): Promise<ReorganizePreview> {
const { rows, parentCodeById } = loadAllImages();
let toMove = 0;
for (const r of rows) {
const target = plannedDirRel(r, parentCodeById);
const currentDir = path.posix.dirname(r.rel_path.replace(/\\/g, "/"));
if (currentDir !== target) toMove++;
}
return { total: rows.length, toMove };
}
export async function reorganizeFiles(): Promise<{ moved: number; skipped: number; errors: number }> {
const { rows, parentCodeById } = loadAllImages();
let moved = 0, skipped = 0, errors = 0;
for (const r of rows) {
const target = plannedDirRel(r, parentCodeById);
const currentDir = path.posix.dirname(r.rel_path.replace(/\\/g, "/"));
if (currentDir === target) { skipped++; continue; }
const oldAbs = path.join(LIBRARY_ROOT, r.rel_path);
try {
await fs.access(oldAbs);
} catch {
errors++;
continue;
}
const { base, ext } = sanitizeFilename(r.filename || `image${path.extname(r.rel_path)}`);
const dirAbs = path.join(LIBRARY_ROOT, target);
try {
await fs.mkdir(dirAbs, { recursive: true });
const newAbs = await uniqueFilePath(dirAbs, base, ext);
await fs.rename(oldAbs, newAbs);
const newRel = path.posix.join(target, path.basename(newAbs));
rawDb.prepare(`UPDATE images SET rel_path = ? WHERE id = ?`).run(newRel, r.id);
moved++;
} catch {
errors++;
}
}
await cleanEmptyDirs(LIBRARY_ROOT);
revalidatePath("/");
return { moved, skipped, errors };
}
export async function clearCache(): Promise<{ ok: true }> {
clearAppSettingsCache();
for (const p of ["/", "/collection", "/tag", "/category", "/actress", "/studios", "/series", "/genres", "/queue"]) {
revalidatePath(p);
}
return { ok: true };
}
export interface UndersizedCover {
id: number;
code: string | null;
filename: string;
width: number;
height: number;
bytes: number;
thumbPath: string;
}
/**
* Scan top-level covers whose pixel dimensions look smaller than a
* standard JAV cover (typically 800x538). Catches accidental imports of
* thumbnails, web previews, or other non-cover images.
*
* Defaults are deliberately permissive — the standard is 800x538 but real
* scans/rips drift by a few pixels in either direction. The 147x200
* outlier the user spotted falls well below the floor.
*/
export async function scanUndersizedCovers(opts?: {
minWidth?: number;
minHeight?: number;
}): Promise<UndersizedCover[]> {
const minW = opts?.minWidth ?? 750;
const minH = opts?.minHeight ?? 500;
return rawDb.prepare(`
SELECT id, code, filename, width, height, bytes, thumb_path AS thumbPath
FROM images
WHERE parent_image_id IS NULL
AND deleted_at IS NULL
AND (width < ? OR height < ?)
ORDER BY (width * height) ASC, id ASC
`).all(minW, minH) as UndersizedCover[];
}
interface RegenThumbsPreview {
total: number;
missing: number;
staleNames: number;
}
/**
* Resolve the planned canonical filename for a row: includes parent code
* lookup for attached images so back-covers inherit the prefix.
*/
function plannedThumbName(row: { sha256: string; code: string | null; parent_image_id: number | null }): string {
if (row.parent_image_id != null) {
const parent = rawDb.prepare(`SELECT code FROM images WHERE id = ?`).get(row.parent_image_id) as
| { code: string | null }
| undefined;
return canonicalThumbName(parent?.code ?? null, row.sha256);
}
return canonicalThumbName(row.code, row.sha256);
}
/** Count covers whose thumb file is missing on disk or whose stored name is stale. */
export async function previewRegenThumbnails(): Promise<RegenThumbsPreview> {
const rows = rawDb.prepare(`
SELECT thumb_path, sha256, code, parent_image_id FROM images WHERE deleted_at IS NULL
`).all() as Array<{ thumb_path: string; sha256: string; code: string | null; parent_image_id: number | null }>;
let missing = 0;
let staleNames = 0;
// Sequential is fine for personal-library scale; a bulk Promise.all here
// can blow up with EMFILE on very large libraries.
for (const r of rows) {
const target = plannedThumbName(r);
if (target !== r.thumb_path) staleNames++;
const targetAbs = path.join(THUMB_ROOT, target);
try { await fs.access(targetAbs); } catch { missing++; }
}
return { total: rows.length, missing, staleNames };
}
/**
* Rebuild thumbnails. Three paths per row:
* 1. Canonical file already on disk → skip (unless `force`).
* 2. Legacy file (different name from canonical) is on disk → rename it
* to canonical and update thumb_path. No re-encode needed; this is
* the migration path for libraries that predate the code-prefix
* naming.
* 3. Neither file is on disk → read original from library/ and encode
* from scratch.
*/
export async function regenerateThumbnails(opts?: { force?: boolean }): Promise<{ regenerated: number; renamed: number; skipped: number; errors: number }> {
const force = opts?.force ?? false;
const rows = rawDb.prepare(`
SELECT id, rel_path, thumb_path, sha256, code, parent_image_id FROM images WHERE deleted_at IS NULL
`).all() as Array<{ id: number; rel_path: string; thumb_path: string; sha256: string; code: string | null; parent_image_id: number | null }>;
await fs.mkdir(THUMB_ROOT, { recursive: true });
let regenerated = 0, renamed = 0, skipped = 0, errors = 0;
for (const r of rows) {
const target = plannedThumbName(r);
const targetAbs = path.join(THUMB_ROOT, target);
if (!force) {
try {
await fs.access(targetAbs);
// Canonical file exists. If the DB still has the legacy name,
// sync the column so future operations don't drift.
if (r.thumb_path !== target) {
rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
}
skipped++;
continue;
} catch { /* missing — fall through */ }
}
// Try the legacy/current path: if a thumb exists at the stored
// thumb_path that's different from canonical, rename it instead of
// re-encoding. Faster, lossless, preserves whatever the file already
// was.
if (r.thumb_path !== target) {
const oldAbs = safeJoin(THUMB_ROOT, r.thumb_path);
if (oldAbs) {
try {
await fs.access(oldAbs);
if (force) {
// Force mode: drop the old file and re-encode at canonical.
await fs.rm(oldAbs, { force: true }).catch(() => {});
} else {
await fs.rename(oldAbs, targetAbs);
rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
renamed++;
continue;
}
} catch { /* legacy file missing — fall through to encode */ }
}
}
const libAbs = safeJoin(LIBRARY_ROOT, r.rel_path);
if (!libAbs) {
errors++;
continue;
}
try {
// Pass the file path to sharp instead of reading into a buffer.
// The library can contain multi-GB videos that were misclassified
// as images; reading those into memory would OOM the server.
// sharp streams from disk and reports its own decode errors.
// Mirrors lib/ingest/ingest.ts's resize pipeline.
await sharp(libAbs, { failOn: "none" })
.rotate()
.resize({ width: 768, height: 768, fit: "inside", withoutEnlargement: true })
.webp({ quality: 82 })
.toFile(targetAbs);
if (r.thumb_path !== target) {
rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
}
regenerated++;
} catch {
errors++;
}
}
revalidatePath("/");
return { regenerated, renamed, skipped, errors };
}
async function cleanEmptyDirs(root: string): Promise<void> {
let entries: import("node:fs").Dirent[] = [];
try { entries = await fs.readdir(root, { withFileTypes: true }); } catch { return; }
for (const e of entries) {
if (!e.isDirectory()) continue;
const dir = path.join(root, e.name);
await cleanEmptyDirs(dir);
try {
const remaining = await fs.readdir(dir);
if (remaining.length === 0) await fs.rmdir(dir);
} catch {}
}
}
export interface ReparseCodesPreview {
total: number;
/** Rows with no code where extractCode now finds one — safe to fill. */
missing: number;
/** Rows where extractCode disagrees with the stored code — overwrite
* is destructive of any manual edit, so it's gated behind force=true. */
changed: number;
/** Sample of up to 20 changed rows for the preview UI. */
sampleChanges: Array<{ id: number; filename: string; oldCode: string; newCode: string }>;
}
/**
* Walk every top-level cover (parent_image_id IS NULL, not soft-deleted)
* and re-run extractCode against the stored filename. Reports how many
* rows would change so the user can preview before committing.
*/
export async function previewReparseCodes(): Promise<ReparseCodesPreview> {
const rows = rawDb.prepare(`
SELECT id, filename, code FROM images
WHERE deleted_at IS NULL AND parent_image_id IS NULL
`).all() as Array<{ id: number; filename: string; code: string | null }>;
let missing = 0, changed = 0;
const sampleChanges: ReparseCodesPreview["sampleChanges"] = [];
for (const r of rows) {
const extracted = extractCode(r.filename);
if (!extracted) continue;
if (r.code == null) {
missing++;
} else if (r.code !== extracted) {
changed++;
if (sampleChanges.length < 20) {
sampleChanges.push({ id: r.id, filename: r.filename, oldCode: r.code, newCode: extracted });
}
}
}
return { total: rows.length, missing, changed, sampleChanges };
}
/**
* Apply the re-parse. By default only fills rows with NULL code (safe);
* pass force=true to overwrite codes that disagree with extractCode.
*
* Note: this only updates the DB. Files won't move into their new
* letter buckets until you also run Reorganize. Same for thumbnail
* filenames — the code prefix in `<CODE>-<sha>.webp` won't update until
* Regenerate Thumbnails runs.
*/
export async function reparseCodes(opts?: { force?: boolean }): Promise<{ filled: number; updated: number; skipped: number }> {
const force = opts?.force ?? false;
const rows = rawDb.prepare(`
SELECT id, filename, code FROM images
WHERE deleted_at IS NULL AND parent_image_id IS NULL
`).all() as Array<{ id: number; filename: string; code: string | null }>;
let filled = 0, updated = 0, skipped = 0;
const tx = rawDb.transaction(() => {
const update = rawDb.prepare(`UPDATE images SET code = ? WHERE id = ?`);
for (const r of rows) {
const extracted = extractCode(r.filename);
if (!extracted) { skipped++; continue; }
if (r.code == null) {
update.run(extracted, r.id);
filled++;
} else if (r.code !== extracted) {
if (force) {
update.run(extracted, r.id);
updated++;
} else {
skipped++;
}
} else {
skipped++;
}
}
});
tx();
revalidatePath("/");
return { filled, updated, skipped };
}
export interface NearDupePair {
a: { id: number; code: string | null; filename: string; thumbPath: string; width: number; height: number; bytes: number };
b: { id: number; code: string | null; filename: string; thumbPath: string; width: number; height: number; bytes: number };
distance: number;
}
export interface NearDupesPreview {
total: number;
hashed: number;
unhashed: number;
}
/** Quick stats: how many rows already have a phash vs need backfilling. */
export async function previewNearDupes(): Promise<NearDupesPreview> {
const row = rawDb.prepare(`
SELECT
COUNT(*) AS total,
SUM(CASE WHEN phash IS NOT NULL THEN 1 ELSE 0 END) AS hashed
FROM images WHERE deleted_at IS NULL
`).get() as { total: number; hashed: number };
return {
total: row.total,
hashed: row.hashed,
unhashed: row.total - row.hashed,
};
}
/**
* Backfill `phash` for every row that doesn't have one yet. Reads the
* library file, computes dHash, writes to DB. Skips rows whose file is
* missing on disk.
*/
export async function backfillPhashes(): Promise<{ hashed: number; skipped: number; errors: number }> {
const rows = rawDb.prepare(`
SELECT id, rel_path FROM images
WHERE deleted_at IS NULL AND phash IS NULL
`).all() as Array<{ id: number; rel_path: string }>;
let hashed = 0, skipped = 0, errors = 0;
const update = rawDb.prepare(`UPDATE images SET phash = ? WHERE id = ?`);
for (const r of rows) {
const abs = safeJoin(LIBRARY_ROOT, r.rel_path);
if (!abs) { errors++; continue; }
try {
const buf = await fs.readFile(abs);
const hash = await computeDHash(buf);
update.run(hash, r.id);
hashed++;
} catch {
errors++;
}
}
return { hashed, skipped, errors };
}
/**
* Find pairs of covers whose dHashes are within `threshold` Hamming
* distance. Brute force O(n²); fine for personal-library scale (5k
* covers ≈ 12.5M comparisons, runs in well under a second).
*
* Excludes pairs that are already SHA-identical (those are caught by
* upload dedup) and excludes attached-image pairs (those are
* intentionally similar to their parent).
*
* Default threshold = 10 (out of 64 bits) is a strong "same image,
* different encode" signal.
*/
export async function findNearDuplicates(opts?: { threshold?: number; limit?: number }): Promise<NearDupePair[]> {
const threshold = opts?.threshold ?? 10;
const limit = opts?.limit ?? 200;
const rows = rawDb.prepare(`
SELECT id, code, filename, rel_path, thumb_path AS thumbPath, sha256, phash, width, height, bytes
FROM images
WHERE deleted_at IS NULL AND parent_image_id IS NULL AND phash IS NOT NULL
ORDER BY id ASC
`).all() as Array<{
id: number; code: string | null; filename: string; rel_path: string; thumbPath: string;
sha256: string; phash: string; width: number; height: number; bytes: number;
}>;
const pairs: NearDupePair[] = [];
for (let i = 0; i < rows.length && pairs.length < limit; i++) {
for (let j = i + 1; j < rows.length && pairs.length < limit; j++) {
const a = rows[i];
const b = rows[j];
if (a.sha256 === b.sha256) continue; // SHA-identical pairs handled elsewhere
const d = hammingDistance(a.phash, b.phash);
if (d <= threshold) {
pairs.push({
a: { id: a.id, code: a.code, filename: a.filename, thumbPath: a.thumbPath, width: a.width, height: a.height, bytes: a.bytes },
b: { id: b.id, code: b.code, filename: b.filename, thumbPath: b.thumbPath, width: b.width, height: b.height, bytes: b.bytes },
distance: d,
});
}
}
}
// Sort tightest matches first, then by lowest id pair for stability.
pairs.sort((x, y) => x.distance - y.distance || x.a.id - y.a.id || x.b.id - y.b.id);
return pairs;
}