Initial commit
This commit is contained in:
@@ -0,0 +1,620 @@
|
||||
"use server";
|
||||
import path from "node:path";
|
||||
import fs from "node:fs/promises";
|
||||
import sharp from "sharp";
|
||||
import { rawDb } from "@/lib/db/client";
|
||||
import { sanitizeFilename, uniqueFilePath, letterBucket, canonicalThumbName } from "@/lib/filename";
|
||||
import { extractCode } from "@/lib/jav/codeParser";
|
||||
import { computeDHash, hammingDistance } from "@/lib/jav/phash";
|
||||
import { clearAppSettingsCache } from "@/lib/db/appSettings";
|
||||
import { safeJoin } from "@/lib/safePath";
|
||||
import { revalidatePath } from "next/cache";
|
||||
|
||||
const LIBRARY_ROOT = path.join(process.cwd(), "library");
|
||||
const THUMB_ROOT = path.join(process.cwd(), "data", "thumbs");
|
||||
const PORTRAIT_ROOT = path.join(process.cwd(), "data", "portraits");
|
||||
const CATEGORY_COVER_ROOT = path.join(process.cwd(), "data", "category-covers");
|
||||
const COLLECTION_COVER_ROOT = path.join(process.cwd(), "data", "collection-covers");
|
||||
|
||||
const SYSTEM_FILES = new Set([".ds_store", "thumbs.db", "desktop.ini"]);
|
||||
|
||||
interface OrphanReport {
|
||||
libraryFiles: string[];
|
||||
thumbFiles: string[];
|
||||
portraitFiles: string[];
|
||||
categoryCoverFiles: string[];
|
||||
collectionCoverFiles: string[];
|
||||
bytes: number;
|
||||
}
|
||||
|
||||
async function walk(dir: string): Promise<string[]> {
|
||||
let entries: import("node:fs").Dirent[] = [];
|
||||
try {
|
||||
entries = await fs.readdir(dir, { withFileTypes: true });
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
const out: string[] = [];
|
||||
await Promise.all(entries.map(async (e) => {
|
||||
const full = path.join(dir, e.name);
|
||||
if (e.isDirectory()) {
|
||||
out.push(...(await walk(full)));
|
||||
} else if (e.isFile() && !SYSTEM_FILES.has(e.name.toLowerCase())) {
|
||||
out.push(full);
|
||||
}
|
||||
}));
|
||||
return out;
|
||||
}
|
||||
|
||||
async function findOrphans(): Promise<OrphanReport> {
|
||||
const knownLibrary = new Set(
|
||||
(rawDb.prepare(`SELECT rel_path FROM images`).all() as Array<{ rel_path: string }>)
|
||||
.map((r) => path.normalize(r.rel_path)),
|
||||
);
|
||||
const knownThumbs = new Set(
|
||||
(rawDb.prepare(`SELECT thumb_path FROM images`).all() as Array<{ thumb_path: string }>)
|
||||
.map((r) => path.normalize(r.thumb_path)),
|
||||
);
|
||||
const knownPortraits = new Set(
|
||||
(rawDb
|
||||
.prepare(`
|
||||
SELECT portrait_path AS p FROM actresses WHERE portrait_path IS NOT NULL
|
||||
UNION ALL SELECT portrait2_path FROM actresses WHERE portrait2_path IS NOT NULL
|
||||
UNION ALL SELECT portrait3_path FROM actresses WHERE portrait3_path IS NOT NULL
|
||||
UNION ALL SELECT portrait4_path FROM actresses WHERE portrait4_path IS NOT NULL
|
||||
UNION ALL SELECT portraith_path FROM actresses WHERE portraith_path IS NOT NULL
|
||||
`)
|
||||
.all() as Array<{ p: string }>)
|
||||
.map((r) => path.normalize(r.p)),
|
||||
);
|
||||
const knownCategoryCovers = new Set(
|
||||
(rawDb
|
||||
.prepare(`
|
||||
SELECT cover_portrait_path AS p FROM tag_categories WHERE cover_portrait_path IS NOT NULL
|
||||
UNION ALL SELECT cover_landscape_path FROM tag_categories WHERE cover_landscape_path IS NOT NULL
|
||||
`)
|
||||
.all() as Array<{ p: string }>)
|
||||
.map((r) => path.normalize(r.p)),
|
||||
);
|
||||
const knownCollectionCovers = new Set(
|
||||
(rawDb
|
||||
.prepare(`
|
||||
SELECT cover_portrait_path AS p FROM collections WHERE cover_portrait_path IS NOT NULL
|
||||
UNION ALL SELECT cover_landscape_path FROM collections WHERE cover_landscape_path IS NOT NULL
|
||||
`)
|
||||
.all() as Array<{ p: string }>)
|
||||
.map((r) => path.normalize(r.p)),
|
||||
);
|
||||
|
||||
const [libFiles, thumbFiles, portraitFiles, categoryCoverFiles, collectionCoverFiles] = await Promise.all([
|
||||
walk(LIBRARY_ROOT),
|
||||
walk(THUMB_ROOT),
|
||||
walk(PORTRAIT_ROOT),
|
||||
walk(CATEGORY_COVER_ROOT),
|
||||
walk(COLLECTION_COVER_ROOT),
|
||||
]);
|
||||
|
||||
const libraryOrphans = libFiles.filter((abs) => {
|
||||
const rel = path.normalize(path.relative(LIBRARY_ROOT, abs));
|
||||
return !knownLibrary.has(rel);
|
||||
});
|
||||
const thumbOrphans = thumbFiles.filter((abs) => {
|
||||
const rel = path.normalize(path.relative(THUMB_ROOT, abs));
|
||||
return !knownThumbs.has(rel);
|
||||
});
|
||||
const portraitOrphans = portraitFiles.filter((abs) => {
|
||||
const rel = path.normalize(path.relative(PORTRAIT_ROOT, abs));
|
||||
return !knownPortraits.has(rel);
|
||||
});
|
||||
const categoryCoverOrphans = categoryCoverFiles.filter((abs) => {
|
||||
const rel = path.normalize(path.relative(CATEGORY_COVER_ROOT, abs));
|
||||
return !knownCategoryCovers.has(rel);
|
||||
});
|
||||
const collectionCoverOrphans = collectionCoverFiles.filter((abs) => {
|
||||
const rel = path.normalize(path.relative(COLLECTION_COVER_ROOT, abs));
|
||||
return !knownCollectionCovers.has(rel);
|
||||
});
|
||||
|
||||
let bytes = 0;
|
||||
await Promise.all([
|
||||
...libraryOrphans, ...thumbOrphans, ...portraitOrphans,
|
||||
...categoryCoverOrphans, ...collectionCoverOrphans,
|
||||
].map(async (f) => {
|
||||
try { bytes += (await fs.stat(f)).size; } catch {}
|
||||
}));
|
||||
|
||||
return {
|
||||
libraryFiles: libraryOrphans,
|
||||
thumbFiles: thumbOrphans,
|
||||
portraitFiles: portraitOrphans,
|
||||
categoryCoverFiles: categoryCoverOrphans,
|
||||
collectionCoverFiles: collectionCoverOrphans,
|
||||
bytes,
|
||||
};
|
||||
}
|
||||
|
||||
export async function previewOrphanFiles(): Promise<{ count: number; bytes: number }> {
|
||||
const report = await findOrphans();
|
||||
const count =
|
||||
report.libraryFiles.length +
|
||||
report.thumbFiles.length +
|
||||
report.portraitFiles.length +
|
||||
report.categoryCoverFiles.length +
|
||||
report.collectionCoverFiles.length;
|
||||
return { count, bytes: report.bytes };
|
||||
}
|
||||
|
||||
export async function purgeOrphanFiles(): Promise<{ deleted: number; bytes: number }> {
|
||||
const report = await findOrphans();
|
||||
const all = [
|
||||
...report.libraryFiles,
|
||||
...report.thumbFiles,
|
||||
...report.portraitFiles,
|
||||
...report.categoryCoverFiles,
|
||||
...report.collectionCoverFiles,
|
||||
];
|
||||
// Bound concurrency: Promise.all over thousands of fs.rm calls can
|
||||
// exhaust file descriptors (EMFILE) on Windows / low-ulimit hosts.
|
||||
const CONCURRENCY = 32;
|
||||
for (let i = 0; i < all.length; i += CONCURRENCY) {
|
||||
await Promise.all(all.slice(i, i + CONCURRENCY).map((f) => fs.rm(f, { force: true })));
|
||||
}
|
||||
// Sweep empty subdirs across every root that just shed files.
|
||||
await Promise.all([
|
||||
cleanEmptyDirs(LIBRARY_ROOT),
|
||||
cleanEmptyDirs(THUMB_ROOT),
|
||||
cleanEmptyDirs(PORTRAIT_ROOT),
|
||||
cleanEmptyDirs(CATEGORY_COVER_ROOT),
|
||||
cleanEmptyDirs(COLLECTION_COVER_ROOT),
|
||||
]);
|
||||
// Indexes that show cover/portrait/thumb counts need to refetch.
|
||||
revalidatePath("/");
|
||||
revalidatePath("/category");
|
||||
revalidatePath("/collection");
|
||||
revalidatePath("/actress");
|
||||
return { deleted: all.length, bytes: report.bytes };
|
||||
}
|
||||
|
||||
interface ReorganizePreview {
|
||||
total: number;
|
||||
toMove: number;
|
||||
}
|
||||
|
||||
interface ImageRow {
|
||||
id: number;
|
||||
filename: string;
|
||||
rel_path: string;
|
||||
code: string | null;
|
||||
parent_image_id: number | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the target letter-bucket directory for a row. Attached images
|
||||
* (parent_image_id set) bucket with their parent's code so related files
|
||||
* stay together on disk.
|
||||
*/
|
||||
function plannedDirRel(row: ImageRow, parentCodeById: Map<number, string | null>): string {
|
||||
if (row.parent_image_id != null) {
|
||||
const parentCode = parentCodeById.get(row.parent_image_id) ?? null;
|
||||
return letterBucket(parentCode).dirRel;
|
||||
}
|
||||
return letterBucket(row.code).dirRel;
|
||||
}
|
||||
|
||||
function loadAllImages(): { rows: ImageRow[]; parentCodeById: Map<number, string | null> } {
|
||||
const rows = rawDb.prepare(`SELECT id, filename, rel_path, code, parent_image_id FROM images`).all() as ImageRow[];
|
||||
const parentCodeById = new Map<number, string | null>();
|
||||
for (const r of rows) parentCodeById.set(r.id, r.code);
|
||||
return { rows, parentCodeById };
|
||||
}
|
||||
|
||||
export async function previewReorganize(): Promise<ReorganizePreview> {
|
||||
const { rows, parentCodeById } = loadAllImages();
|
||||
let toMove = 0;
|
||||
for (const r of rows) {
|
||||
const target = plannedDirRel(r, parentCodeById);
|
||||
const currentDir = path.posix.dirname(r.rel_path.replace(/\\/g, "/"));
|
||||
if (currentDir !== target) toMove++;
|
||||
}
|
||||
return { total: rows.length, toMove };
|
||||
}
|
||||
|
||||
export async function reorganizeFiles(): Promise<{ moved: number; skipped: number; errors: number }> {
|
||||
const { rows, parentCodeById } = loadAllImages();
|
||||
|
||||
let moved = 0, skipped = 0, errors = 0;
|
||||
for (const r of rows) {
|
||||
const target = plannedDirRel(r, parentCodeById);
|
||||
const currentDir = path.posix.dirname(r.rel_path.replace(/\\/g, "/"));
|
||||
if (currentDir === target) { skipped++; continue; }
|
||||
|
||||
const oldAbs = path.join(LIBRARY_ROOT, r.rel_path);
|
||||
try {
|
||||
await fs.access(oldAbs);
|
||||
} catch {
|
||||
errors++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const { base, ext } = sanitizeFilename(r.filename || `image${path.extname(r.rel_path)}`);
|
||||
const dirAbs = path.join(LIBRARY_ROOT, target);
|
||||
try {
|
||||
await fs.mkdir(dirAbs, { recursive: true });
|
||||
const newAbs = await uniqueFilePath(dirAbs, base, ext);
|
||||
await fs.rename(oldAbs, newAbs);
|
||||
const newRel = path.posix.join(target, path.basename(newAbs));
|
||||
rawDb.prepare(`UPDATE images SET rel_path = ? WHERE id = ?`).run(newRel, r.id);
|
||||
moved++;
|
||||
} catch {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
await cleanEmptyDirs(LIBRARY_ROOT);
|
||||
revalidatePath("/");
|
||||
return { moved, skipped, errors };
|
||||
}
|
||||
|
||||
export async function clearCache(): Promise<{ ok: true }> {
|
||||
clearAppSettingsCache();
|
||||
for (const p of ["/", "/collection", "/tag", "/category", "/actress", "/studios", "/series", "/genres", "/queue"]) {
|
||||
revalidatePath(p);
|
||||
}
|
||||
return { ok: true };
|
||||
}
|
||||
|
||||
export interface UndersizedCover {
|
||||
id: number;
|
||||
code: string | null;
|
||||
filename: string;
|
||||
width: number;
|
||||
height: number;
|
||||
bytes: number;
|
||||
thumbPath: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan top-level covers whose pixel dimensions look smaller than a
|
||||
* standard JAV cover (typically 800x538). Catches accidental imports of
|
||||
* thumbnails, web previews, or other non-cover images.
|
||||
*
|
||||
* Defaults are deliberately permissive — the standard is 800x538 but real
|
||||
* scans/rips drift by a few pixels in either direction. The 147x200
|
||||
* outlier the user spotted falls well below the floor.
|
||||
*/
|
||||
export async function scanUndersizedCovers(opts?: {
|
||||
minWidth?: number;
|
||||
minHeight?: number;
|
||||
}): Promise<UndersizedCover[]> {
|
||||
const minW = opts?.minWidth ?? 750;
|
||||
const minH = opts?.minHeight ?? 500;
|
||||
return rawDb.prepare(`
|
||||
SELECT id, code, filename, width, height, bytes, thumb_path AS thumbPath
|
||||
FROM images
|
||||
WHERE parent_image_id IS NULL
|
||||
AND deleted_at IS NULL
|
||||
AND (width < ? OR height < ?)
|
||||
ORDER BY (width * height) ASC, id ASC
|
||||
`).all(minW, minH) as UndersizedCover[];
|
||||
}
|
||||
|
||||
interface RegenThumbsPreview {
|
||||
total: number;
|
||||
missing: number;
|
||||
staleNames: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the planned canonical filename for a row: includes parent code
|
||||
* lookup for attached images so back-covers inherit the prefix.
|
||||
*/
|
||||
function plannedThumbName(row: { sha256: string; code: string | null; parent_image_id: number | null }): string {
|
||||
if (row.parent_image_id != null) {
|
||||
const parent = rawDb.prepare(`SELECT code FROM images WHERE id = ?`).get(row.parent_image_id) as
|
||||
| { code: string | null }
|
||||
| undefined;
|
||||
return canonicalThumbName(parent?.code ?? null, row.sha256);
|
||||
}
|
||||
return canonicalThumbName(row.code, row.sha256);
|
||||
}
|
||||
|
||||
/** Count covers whose thumb file is missing on disk or whose stored name is stale. */
|
||||
export async function previewRegenThumbnails(): Promise<RegenThumbsPreview> {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT thumb_path, sha256, code, parent_image_id FROM images WHERE deleted_at IS NULL
|
||||
`).all() as Array<{ thumb_path: string; sha256: string; code: string | null; parent_image_id: number | null }>;
|
||||
let missing = 0;
|
||||
let staleNames = 0;
|
||||
// Sequential is fine for personal-library scale; a bulk Promise.all here
|
||||
// can blow up with EMFILE on very large libraries.
|
||||
for (const r of rows) {
|
||||
const target = plannedThumbName(r);
|
||||
if (target !== r.thumb_path) staleNames++;
|
||||
const targetAbs = path.join(THUMB_ROOT, target);
|
||||
try { await fs.access(targetAbs); } catch { missing++; }
|
||||
}
|
||||
return { total: rows.length, missing, staleNames };
|
||||
}
|
||||
|
||||
/**
|
||||
* Rebuild thumbnails. Three paths per row:
|
||||
* 1. Canonical file already on disk → skip (unless `force`).
|
||||
* 2. Legacy file (different name from canonical) is on disk → rename it
|
||||
* to canonical and update thumb_path. No re-encode needed; this is
|
||||
* the migration path for libraries that predate the code-prefix
|
||||
* naming.
|
||||
* 3. Neither file is on disk → read original from library/ and encode
|
||||
* from scratch.
|
||||
*/
|
||||
export async function regenerateThumbnails(opts?: { force?: boolean }): Promise<{ regenerated: number; renamed: number; skipped: number; errors: number }> {
|
||||
const force = opts?.force ?? false;
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT id, rel_path, thumb_path, sha256, code, parent_image_id FROM images WHERE deleted_at IS NULL
|
||||
`).all() as Array<{ id: number; rel_path: string; thumb_path: string; sha256: string; code: string | null; parent_image_id: number | null }>;
|
||||
|
||||
await fs.mkdir(THUMB_ROOT, { recursive: true });
|
||||
|
||||
let regenerated = 0, renamed = 0, skipped = 0, errors = 0;
|
||||
for (const r of rows) {
|
||||
const target = plannedThumbName(r);
|
||||
const targetAbs = path.join(THUMB_ROOT, target);
|
||||
|
||||
if (!force) {
|
||||
try {
|
||||
await fs.access(targetAbs);
|
||||
// Canonical file exists. If the DB still has the legacy name,
|
||||
// sync the column so future operations don't drift.
|
||||
if (r.thumb_path !== target) {
|
||||
rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
|
||||
}
|
||||
skipped++;
|
||||
continue;
|
||||
} catch { /* missing — fall through */ }
|
||||
}
|
||||
|
||||
// Try the legacy/current path: if a thumb exists at the stored
|
||||
// thumb_path that's different from canonical, rename it instead of
|
||||
// re-encoding. Faster, lossless, preserves whatever the file already
|
||||
// was.
|
||||
if (r.thumb_path !== target) {
|
||||
const oldAbs = safeJoin(THUMB_ROOT, r.thumb_path);
|
||||
if (oldAbs) {
|
||||
try {
|
||||
await fs.access(oldAbs);
|
||||
if (force) {
|
||||
// Force mode: drop the old file and re-encode at canonical.
|
||||
await fs.rm(oldAbs, { force: true }).catch(() => {});
|
||||
} else {
|
||||
await fs.rename(oldAbs, targetAbs);
|
||||
rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
|
||||
renamed++;
|
||||
continue;
|
||||
}
|
||||
} catch { /* legacy file missing — fall through to encode */ }
|
||||
}
|
||||
}
|
||||
|
||||
const libAbs = safeJoin(LIBRARY_ROOT, r.rel_path);
|
||||
if (!libAbs) {
|
||||
errors++;
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
// Pass the file path to sharp instead of reading into a buffer.
|
||||
// The library can contain multi-GB videos that were misclassified
|
||||
// as images; reading those into memory would OOM the server.
|
||||
// sharp streams from disk and reports its own decode errors.
|
||||
// Mirrors lib/ingest/ingest.ts's resize pipeline.
|
||||
await sharp(libAbs, { failOn: "none" })
|
||||
.rotate()
|
||||
.resize({ width: 768, height: 768, fit: "inside", withoutEnlargement: true })
|
||||
.webp({ quality: 82 })
|
||||
.toFile(targetAbs);
|
||||
if (r.thumb_path !== target) {
|
||||
rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
|
||||
}
|
||||
regenerated++;
|
||||
} catch {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
revalidatePath("/");
|
||||
return { regenerated, renamed, skipped, errors };
|
||||
}
|
||||
|
||||
async function cleanEmptyDirs(root: string): Promise<void> {
|
||||
let entries: import("node:fs").Dirent[] = [];
|
||||
try { entries = await fs.readdir(root, { withFileTypes: true }); } catch { return; }
|
||||
for (const e of entries) {
|
||||
if (!e.isDirectory()) continue;
|
||||
const dir = path.join(root, e.name);
|
||||
await cleanEmptyDirs(dir);
|
||||
try {
|
||||
const remaining = await fs.readdir(dir);
|
||||
if (remaining.length === 0) await fs.rmdir(dir);
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
export interface ReparseCodesPreview {
|
||||
total: number;
|
||||
/** Rows with no code where extractCode now finds one — safe to fill. */
|
||||
missing: number;
|
||||
/** Rows where extractCode disagrees with the stored code — overwrite
|
||||
* is destructive of any manual edit, so it's gated behind force=true. */
|
||||
changed: number;
|
||||
/** Sample of up to 20 changed rows for the preview UI. */
|
||||
sampleChanges: Array<{ id: number; filename: string; oldCode: string; newCode: string }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk every top-level cover (parent_image_id IS NULL, not soft-deleted)
|
||||
* and re-run extractCode against the stored filename. Reports how many
|
||||
* rows would change so the user can preview before committing.
|
||||
*/
|
||||
export async function previewReparseCodes(): Promise<ReparseCodesPreview> {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT id, filename, code FROM images
|
||||
WHERE deleted_at IS NULL AND parent_image_id IS NULL
|
||||
`).all() as Array<{ id: number; filename: string; code: string | null }>;
|
||||
let missing = 0, changed = 0;
|
||||
const sampleChanges: ReparseCodesPreview["sampleChanges"] = [];
|
||||
for (const r of rows) {
|
||||
const extracted = extractCode(r.filename);
|
||||
if (!extracted) continue;
|
||||
if (r.code == null) {
|
||||
missing++;
|
||||
} else if (r.code !== extracted) {
|
||||
changed++;
|
||||
if (sampleChanges.length < 20) {
|
||||
sampleChanges.push({ id: r.id, filename: r.filename, oldCode: r.code, newCode: extracted });
|
||||
}
|
||||
}
|
||||
}
|
||||
return { total: rows.length, missing, changed, sampleChanges };
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply the re-parse. By default only fills rows with NULL code (safe);
|
||||
* pass force=true to overwrite codes that disagree with extractCode.
|
||||
*
|
||||
* Note: this only updates the DB. Files won't move into their new
|
||||
* letter buckets until you also run Reorganize. Same for thumbnail
|
||||
* filenames — the code prefix in `<CODE>-<sha>.webp` won't update until
|
||||
* Regenerate Thumbnails runs.
|
||||
*/
|
||||
export async function reparseCodes(opts?: { force?: boolean }): Promise<{ filled: number; updated: number; skipped: number }> {
|
||||
const force = opts?.force ?? false;
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT id, filename, code FROM images
|
||||
WHERE deleted_at IS NULL AND parent_image_id IS NULL
|
||||
`).all() as Array<{ id: number; filename: string; code: string | null }>;
|
||||
|
||||
let filled = 0, updated = 0, skipped = 0;
|
||||
const tx = rawDb.transaction(() => {
|
||||
const update = rawDb.prepare(`UPDATE images SET code = ? WHERE id = ?`);
|
||||
for (const r of rows) {
|
||||
const extracted = extractCode(r.filename);
|
||||
if (!extracted) { skipped++; continue; }
|
||||
if (r.code == null) {
|
||||
update.run(extracted, r.id);
|
||||
filled++;
|
||||
} else if (r.code !== extracted) {
|
||||
if (force) {
|
||||
update.run(extracted, r.id);
|
||||
updated++;
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
} else {
|
||||
skipped++;
|
||||
}
|
||||
}
|
||||
});
|
||||
tx();
|
||||
revalidatePath("/");
|
||||
return { filled, updated, skipped };
|
||||
}
|
||||
|
||||
export interface NearDupePair {
|
||||
a: { id: number; code: string | null; filename: string; thumbPath: string; width: number; height: number; bytes: number };
|
||||
b: { id: number; code: string | null; filename: string; thumbPath: string; width: number; height: number; bytes: number };
|
||||
distance: number;
|
||||
}
|
||||
|
||||
export interface NearDupesPreview {
|
||||
total: number;
|
||||
hashed: number;
|
||||
unhashed: number;
|
||||
}
|
||||
|
||||
/** Quick stats: how many rows already have a phash vs need backfilling. */
|
||||
export async function previewNearDupes(): Promise<NearDupesPreview> {
|
||||
const row = rawDb.prepare(`
|
||||
SELECT
|
||||
COUNT(*) AS total,
|
||||
SUM(CASE WHEN phash IS NOT NULL THEN 1 ELSE 0 END) AS hashed
|
||||
FROM images WHERE deleted_at IS NULL
|
||||
`).get() as { total: number; hashed: number };
|
||||
return {
|
||||
total: row.total,
|
||||
hashed: row.hashed,
|
||||
unhashed: row.total - row.hashed,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Backfill `phash` for every row that doesn't have one yet. Reads the
|
||||
* library file, computes dHash, writes to DB. Skips rows whose file is
|
||||
* missing on disk.
|
||||
*/
|
||||
export async function backfillPhashes(): Promise<{ hashed: number; skipped: number; errors: number }> {
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT id, rel_path FROM images
|
||||
WHERE deleted_at IS NULL AND phash IS NULL
|
||||
`).all() as Array<{ id: number; rel_path: string }>;
|
||||
|
||||
let hashed = 0, skipped = 0, errors = 0;
|
||||
const update = rawDb.prepare(`UPDATE images SET phash = ? WHERE id = ?`);
|
||||
for (const r of rows) {
|
||||
const abs = safeJoin(LIBRARY_ROOT, r.rel_path);
|
||||
if (!abs) { errors++; continue; }
|
||||
try {
|
||||
const buf = await fs.readFile(abs);
|
||||
const hash = await computeDHash(buf);
|
||||
update.run(hash, r.id);
|
||||
hashed++;
|
||||
} catch {
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
return { hashed, skipped, errors };
|
||||
}
|
||||
|
||||
/**
|
||||
* Find pairs of covers whose dHashes are within `threshold` Hamming
|
||||
* distance. Brute force O(n²); fine for personal-library scale (5k
|
||||
* covers ≈ 12.5M comparisons, runs in well under a second).
|
||||
*
|
||||
* Excludes pairs that are already SHA-identical (those are caught by
|
||||
* upload dedup) and excludes attached-image pairs (those are
|
||||
* intentionally similar to their parent).
|
||||
*
|
||||
* Default threshold = 10 (out of 64 bits) is a strong "same image,
|
||||
* different encode" signal.
|
||||
*/
|
||||
export async function findNearDuplicates(opts?: { threshold?: number; limit?: number }): Promise<NearDupePair[]> {
|
||||
const threshold = opts?.threshold ?? 10;
|
||||
const limit = opts?.limit ?? 200;
|
||||
const rows = rawDb.prepare(`
|
||||
SELECT id, code, filename, rel_path, thumb_path AS thumbPath, sha256, phash, width, height, bytes
|
||||
FROM images
|
||||
WHERE deleted_at IS NULL AND parent_image_id IS NULL AND phash IS NOT NULL
|
||||
ORDER BY id ASC
|
||||
`).all() as Array<{
|
||||
id: number; code: string | null; filename: string; rel_path: string; thumbPath: string;
|
||||
sha256: string; phash: string; width: number; height: number; bytes: number;
|
||||
}>;
|
||||
|
||||
const pairs: NearDupePair[] = [];
|
||||
for (let i = 0; i < rows.length && pairs.length < limit; i++) {
|
||||
for (let j = i + 1; j < rows.length && pairs.length < limit; j++) {
|
||||
const a = rows[i];
|
||||
const b = rows[j];
|
||||
if (a.sha256 === b.sha256) continue; // SHA-identical pairs handled elsewhere
|
||||
const d = hammingDistance(a.phash, b.phash);
|
||||
if (d <= threshold) {
|
||||
pairs.push({
|
||||
a: { id: a.id, code: a.code, filename: a.filename, thumbPath: a.thumbPath, width: a.width, height: a.height, bytes: a.bytes },
|
||||
b: { id: b.id, code: b.code, filename: b.filename, thumbPath: b.thumbPath, width: b.width, height: b.height, bytes: b.bytes },
|
||||
distance: d,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sort tightest matches first, then by lowest id pair for stability.
|
||||
pairs.sort((x, y) => x.distance - y.distance || x.a.id - y.a.id || x.b.id - y.b.id);
|
||||
return pairs;
|
||||
}
|
||||
Reference in New Issue
Block a user