"use server"; import path from "node:path"; import fs from "node:fs/promises"; import sharp from "sharp"; import { rawDb } from "@/lib/db/client"; import { sanitizeFilename, uniqueFilePath, letterBucket, canonicalThumbName } from "@/lib/filename"; import { extractCode } from "@/lib/jav/codeParser"; import { computeDHash, hammingDistance } from "@/lib/jav/phash"; import { clearAppSettingsCache } from "@/lib/db/appSettings"; import { safeJoin } from "@/lib/safePath"; import { revalidatePath } from "next/cache"; const LIBRARY_ROOT = path.join(process.cwd(), "library"); const THUMB_ROOT = path.join(process.cwd(), "data", "thumbs"); const PORTRAIT_ROOT = path.join(process.cwd(), "data", "portraits"); const CATEGORY_COVER_ROOT = path.join(process.cwd(), "data", "category-covers"); const COLLECTION_COVER_ROOT = path.join(process.cwd(), "data", "collection-covers"); const SYSTEM_FILES = new Set([".ds_store", "thumbs.db", "desktop.ini"]); interface OrphanReport { libraryFiles: string[]; thumbFiles: string[]; portraitFiles: string[]; categoryCoverFiles: string[]; collectionCoverFiles: string[]; bytes: number; } async function walk(dir: string): Promise { let entries: import("node:fs").Dirent[] = []; try { entries = await fs.readdir(dir, { withFileTypes: true }); } catch { return []; } const out: string[] = []; await Promise.all(entries.map(async (e) => { const full = path.join(dir, e.name); if (e.isDirectory()) { out.push(...(await walk(full))); } else if (e.isFile() && !SYSTEM_FILES.has(e.name.toLowerCase())) { out.push(full); } })); return out; } async function findOrphans(): Promise { const knownLibrary = new Set( (rawDb.prepare(`SELECT rel_path FROM images`).all() as Array<{ rel_path: string }>) .map((r) => path.normalize(r.rel_path)), ); const knownThumbs = new Set( (rawDb.prepare(`SELECT thumb_path FROM images`).all() as Array<{ thumb_path: string }>) .map((r) => path.normalize(r.thumb_path)), ); const knownPortraits = new Set( (rawDb .prepare(` SELECT portrait_path AS p FROM actresses WHERE portrait_path IS NOT NULL UNION ALL SELECT portrait2_path FROM actresses WHERE portrait2_path IS NOT NULL UNION ALL SELECT portrait3_path FROM actresses WHERE portrait3_path IS NOT NULL UNION ALL SELECT portrait4_path FROM actresses WHERE portrait4_path IS NOT NULL UNION ALL SELECT portraith_path FROM actresses WHERE portraith_path IS NOT NULL `) .all() as Array<{ p: string }>) .map((r) => path.normalize(r.p)), ); const knownCategoryCovers = new Set( (rawDb .prepare(` SELECT cover_portrait_path AS p FROM tag_categories WHERE cover_portrait_path IS NOT NULL UNION ALL SELECT cover_landscape_path FROM tag_categories WHERE cover_landscape_path IS NOT NULL `) .all() as Array<{ p: string }>) .map((r) => path.normalize(r.p)), ); const knownCollectionCovers = new Set( (rawDb .prepare(` SELECT cover_portrait_path AS p FROM collections WHERE cover_portrait_path IS NOT NULL UNION ALL SELECT cover_landscape_path FROM collections WHERE cover_landscape_path IS NOT NULL `) .all() as Array<{ p: string }>) .map((r) => path.normalize(r.p)), ); const [libFiles, thumbFiles, portraitFiles, categoryCoverFiles, collectionCoverFiles] = await Promise.all([ walk(LIBRARY_ROOT), walk(THUMB_ROOT), walk(PORTRAIT_ROOT), walk(CATEGORY_COVER_ROOT), walk(COLLECTION_COVER_ROOT), ]); const libraryOrphans = libFiles.filter((abs) => { const rel = path.normalize(path.relative(LIBRARY_ROOT, abs)); return !knownLibrary.has(rel); }); const thumbOrphans = thumbFiles.filter((abs) => { const rel = path.normalize(path.relative(THUMB_ROOT, abs)); return !knownThumbs.has(rel); }); const portraitOrphans = portraitFiles.filter((abs) => { const rel = path.normalize(path.relative(PORTRAIT_ROOT, abs)); return !knownPortraits.has(rel); }); const categoryCoverOrphans = categoryCoverFiles.filter((abs) => { const rel = path.normalize(path.relative(CATEGORY_COVER_ROOT, abs)); return !knownCategoryCovers.has(rel); }); const collectionCoverOrphans = collectionCoverFiles.filter((abs) => { const rel = path.normalize(path.relative(COLLECTION_COVER_ROOT, abs)); return !knownCollectionCovers.has(rel); }); let bytes = 0; await Promise.all([ ...libraryOrphans, ...thumbOrphans, ...portraitOrphans, ...categoryCoverOrphans, ...collectionCoverOrphans, ].map(async (f) => { try { bytes += (await fs.stat(f)).size; } catch {} })); return { libraryFiles: libraryOrphans, thumbFiles: thumbOrphans, portraitFiles: portraitOrphans, categoryCoverFiles: categoryCoverOrphans, collectionCoverFiles: collectionCoverOrphans, bytes, }; } export async function previewOrphanFiles(): Promise<{ count: number; bytes: number }> { const report = await findOrphans(); const count = report.libraryFiles.length + report.thumbFiles.length + report.portraitFiles.length + report.categoryCoverFiles.length + report.collectionCoverFiles.length; return { count, bytes: report.bytes }; } export async function purgeOrphanFiles(): Promise<{ deleted: number; bytes: number }> { const report = await findOrphans(); const all = [ ...report.libraryFiles, ...report.thumbFiles, ...report.portraitFiles, ...report.categoryCoverFiles, ...report.collectionCoverFiles, ]; // Bound concurrency: Promise.all over thousands of fs.rm calls can // exhaust file descriptors (EMFILE) on Windows / low-ulimit hosts. const CONCURRENCY = 32; for (let i = 0; i < all.length; i += CONCURRENCY) { await Promise.all(all.slice(i, i + CONCURRENCY).map((f) => fs.rm(f, { force: true }))); } // Sweep empty subdirs across every root that just shed files. await Promise.all([ cleanEmptyDirs(LIBRARY_ROOT), cleanEmptyDirs(THUMB_ROOT), cleanEmptyDirs(PORTRAIT_ROOT), cleanEmptyDirs(CATEGORY_COVER_ROOT), cleanEmptyDirs(COLLECTION_COVER_ROOT), ]); // Indexes that show cover/portrait/thumb counts need to refetch. revalidatePath("/"); revalidatePath("/category"); revalidatePath("/collection"); revalidatePath("/actress"); return { deleted: all.length, bytes: report.bytes }; } interface ReorganizePreview { total: number; toMove: number; } interface ImageRow { id: number; filename: string; rel_path: string; code: string | null; parent_image_id: number | null; } /** * Resolve the target letter-bucket directory for a row. Attached images * (parent_image_id set) bucket with their parent's code so related files * stay together on disk. */ function plannedDirRel(row: ImageRow, parentCodeById: Map): string { if (row.parent_image_id != null) { const parentCode = parentCodeById.get(row.parent_image_id) ?? null; return letterBucket(parentCode).dirRel; } return letterBucket(row.code).dirRel; } function loadAllImages(): { rows: ImageRow[]; parentCodeById: Map } { const rows = rawDb.prepare(`SELECT id, filename, rel_path, code, parent_image_id FROM images`).all() as ImageRow[]; const parentCodeById = new Map(); for (const r of rows) parentCodeById.set(r.id, r.code); return { rows, parentCodeById }; } export async function previewReorganize(): Promise { const { rows, parentCodeById } = loadAllImages(); let toMove = 0; for (const r of rows) { const target = plannedDirRel(r, parentCodeById); const currentDir = path.posix.dirname(r.rel_path.replace(/\\/g, "/")); if (currentDir !== target) toMove++; } return { total: rows.length, toMove }; } export async function reorganizeFiles(): Promise<{ moved: number; skipped: number; errors: number }> { const { rows, parentCodeById } = loadAllImages(); let moved = 0, skipped = 0, errors = 0; for (const r of rows) { const target = plannedDirRel(r, parentCodeById); const currentDir = path.posix.dirname(r.rel_path.replace(/\\/g, "/")); if (currentDir === target) { skipped++; continue; } const oldAbs = path.join(LIBRARY_ROOT, r.rel_path); try { await fs.access(oldAbs); } catch { errors++; continue; } const { base, ext } = sanitizeFilename(r.filename || `image${path.extname(r.rel_path)}`); const dirAbs = path.join(LIBRARY_ROOT, target); try { await fs.mkdir(dirAbs, { recursive: true }); const newAbs = await uniqueFilePath(dirAbs, base, ext); await fs.rename(oldAbs, newAbs); const newRel = path.posix.join(target, path.basename(newAbs)); rawDb.prepare(`UPDATE images SET rel_path = ? WHERE id = ?`).run(newRel, r.id); moved++; } catch { errors++; } } await cleanEmptyDirs(LIBRARY_ROOT); revalidatePath("/"); return { moved, skipped, errors }; } export async function clearCache(): Promise<{ ok: true }> { clearAppSettingsCache(); for (const p of ["/", "/collection", "/tag", "/category", "/actress", "/studios", "/series", "/genres", "/queue"]) { revalidatePath(p); } return { ok: true }; } export interface UndersizedCover { id: number; code: string | null; filename: string; width: number; height: number; bytes: number; thumbPath: string; } /** * Scan top-level covers whose pixel dimensions look smaller than a * standard JAV cover (typically 800x538). Catches accidental imports of * thumbnails, web previews, or other non-cover images. * * Defaults are deliberately permissive — the standard is 800x538 but real * scans/rips drift by a few pixels in either direction. The 147x200 * outlier the user spotted falls well below the floor. */ export async function scanUndersizedCovers(opts?: { minWidth?: number; minHeight?: number; }): Promise { const minW = opts?.minWidth ?? 750; const minH = opts?.minHeight ?? 500; return rawDb.prepare(` SELECT id, code, filename, width, height, bytes, thumb_path AS thumbPath FROM images WHERE parent_image_id IS NULL AND deleted_at IS NULL AND (width < ? OR height < ?) ORDER BY (width * height) ASC, id ASC `).all(minW, minH) as UndersizedCover[]; } interface RegenThumbsPreview { total: number; missing: number; staleNames: number; } /** * Resolve the planned canonical filename for a row: includes parent code * lookup for attached images so back-covers inherit the prefix. */ function plannedThumbName(row: { sha256: string; code: string | null; parent_image_id: number | null }): string { if (row.parent_image_id != null) { const parent = rawDb.prepare(`SELECT code FROM images WHERE id = ?`).get(row.parent_image_id) as | { code: string | null } | undefined; return canonicalThumbName(parent?.code ?? null, row.sha256); } return canonicalThumbName(row.code, row.sha256); } /** Count covers whose thumb file is missing on disk or whose stored name is stale. */ export async function previewRegenThumbnails(): Promise { const rows = rawDb.prepare(` SELECT thumb_path, sha256, code, parent_image_id FROM images WHERE deleted_at IS NULL `).all() as Array<{ thumb_path: string; sha256: string; code: string | null; parent_image_id: number | null }>; let missing = 0; let staleNames = 0; // Sequential is fine for personal-library scale; a bulk Promise.all here // can blow up with EMFILE on very large libraries. for (const r of rows) { const target = plannedThumbName(r); if (target !== r.thumb_path) staleNames++; const targetAbs = path.join(THUMB_ROOT, target); try { await fs.access(targetAbs); } catch { missing++; } } return { total: rows.length, missing, staleNames }; } /** * Rebuild thumbnails. Three paths per row: * 1. Canonical file already on disk → skip (unless `force`). * 2. Legacy file (different name from canonical) is on disk → rename it * to canonical and update thumb_path. No re-encode needed; this is * the migration path for libraries that predate the code-prefix * naming. * 3. Neither file is on disk → read original from library/ and encode * from scratch. */ export async function regenerateThumbnails(opts?: { force?: boolean }): Promise<{ regenerated: number; renamed: number; skipped: number; errors: number }> { const force = opts?.force ?? false; const rows = rawDb.prepare(` SELECT id, rel_path, thumb_path, sha256, code, parent_image_id FROM images WHERE deleted_at IS NULL `).all() as Array<{ id: number; rel_path: string; thumb_path: string; sha256: string; code: string | null; parent_image_id: number | null }>; await fs.mkdir(THUMB_ROOT, { recursive: true }); let regenerated = 0, renamed = 0, skipped = 0, errors = 0; for (const r of rows) { const target = plannedThumbName(r); const targetAbs = path.join(THUMB_ROOT, target); if (!force) { try { await fs.access(targetAbs); // Canonical file exists. If the DB still has the legacy name, // sync the column so future operations don't drift. if (r.thumb_path !== target) { rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id); } skipped++; continue; } catch { /* missing — fall through */ } } // Try the legacy/current path: if a thumb exists at the stored // thumb_path that's different from canonical, rename it instead of // re-encoding. Faster, lossless, preserves whatever the file already // was. if (r.thumb_path !== target) { const oldAbs = safeJoin(THUMB_ROOT, r.thumb_path); if (oldAbs) { try { await fs.access(oldAbs); if (force) { // Force mode: drop the old file and re-encode at canonical. await fs.rm(oldAbs, { force: true }).catch(() => {}); } else { await fs.rename(oldAbs, targetAbs); rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id); renamed++; continue; } } catch { /* legacy file missing — fall through to encode */ } } } const libAbs = safeJoin(LIBRARY_ROOT, r.rel_path); if (!libAbs) { errors++; continue; } try { // Pass the file path to sharp instead of reading into a buffer. // The library can contain multi-GB videos that were misclassified // as images; reading those into memory would OOM the server. // sharp streams from disk and reports its own decode errors. // Mirrors lib/ingest/ingest.ts's resize pipeline. await sharp(libAbs, { failOn: "none" }) .rotate() .resize({ width: 768, height: 768, fit: "inside", withoutEnlargement: true }) .webp({ quality: 82 }) .toFile(targetAbs); if (r.thumb_path !== target) { rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id); } regenerated++; } catch { errors++; } } revalidatePath("/"); return { regenerated, renamed, skipped, errors }; } async function cleanEmptyDirs(root: string): Promise { let entries: import("node:fs").Dirent[] = []; try { entries = await fs.readdir(root, { withFileTypes: true }); } catch { return; } for (const e of entries) { if (!e.isDirectory()) continue; const dir = path.join(root, e.name); await cleanEmptyDirs(dir); try { const remaining = await fs.readdir(dir); if (remaining.length === 0) await fs.rmdir(dir); } catch {} } } export interface ReparseCodesPreview { total: number; /** Rows with no code where extractCode now finds one — safe to fill. */ missing: number; /** Rows where extractCode disagrees with the stored code — overwrite * is destructive of any manual edit, so it's gated behind force=true. */ changed: number; /** Sample of up to 20 changed rows for the preview UI. */ sampleChanges: Array<{ id: number; filename: string; oldCode: string; newCode: string }>; } /** * Walk every top-level cover (parent_image_id IS NULL, not soft-deleted) * and re-run extractCode against the stored filename. Reports how many * rows would change so the user can preview before committing. */ export async function previewReparseCodes(): Promise { const rows = rawDb.prepare(` SELECT id, filename, code FROM images WHERE deleted_at IS NULL AND parent_image_id IS NULL `).all() as Array<{ id: number; filename: string; code: string | null }>; let missing = 0, changed = 0; const sampleChanges: ReparseCodesPreview["sampleChanges"] = []; for (const r of rows) { const extracted = extractCode(r.filename); if (!extracted) continue; if (r.code == null) { missing++; } else if (r.code !== extracted) { changed++; if (sampleChanges.length < 20) { sampleChanges.push({ id: r.id, filename: r.filename, oldCode: r.code, newCode: extracted }); } } } return { total: rows.length, missing, changed, sampleChanges }; } /** * Apply the re-parse. By default only fills rows with NULL code (safe); * pass force=true to overwrite codes that disagree with extractCode. * * Note: this only updates the DB. Files won't move into their new * letter buckets until you also run Reorganize. Same for thumbnail * filenames — the code prefix in `

-.webp` won't update until
 * Regenerate Thumbnails runs.
 */
export async function reparseCodes(opts?: { force?: boolean }): Promise<{ filled: number; updated: number; skipped: number }> {
  const force = opts?.force ?? false;
  const rows = rawDb.prepare(`
    SELECT id, filename, code FROM images
    WHERE deleted_at IS NULL AND parent_image_id IS NULL
  `).all() as Array<{ id: number; filename: string; code: string | null }>;

  let filled = 0, updated = 0, skipped = 0;
  const tx = rawDb.transaction(() => {
    const update = rawDb.prepare(`UPDATE images SET code = ? WHERE id = ?`);
    for (const r of rows) {
      const extracted = extractCode(r.filename);
      if (!extracted) { skipped++; continue; }
      if (r.code == null) {
        update.run(extracted, r.id);
        filled++;
      } else if (r.code !== extracted) {
        if (force) {
          update.run(extracted, r.id);
          updated++;
        } else {
          skipped++;
        }
      } else {
        skipped++;
      }
    }
  });
  tx();
  revalidatePath("/");
  return { filled, updated, skipped };
}

export interface NearDupePair {
  a: { id: number; code: string | null; filename: string; thumbPath: string; width: number; height: number; bytes: number };
  b: { id: number; code: string | null; filename: string; thumbPath: string; width: number; height: number; bytes: number };
  distance: number;
}

export interface NearDupesPreview {
  total: number;
  hashed: number;
  unhashed: number;
}

/** Quick stats: how many rows already have a phash vs need backfilling. */
export async function previewNearDupes(): Promise {
  const row = rawDb.prepare(`
    SELECT
      COUNT(*) AS total,
      SUM(CASE WHEN phash IS NOT NULL THEN 1 ELSE 0 END) AS hashed
    FROM images WHERE deleted_at IS NULL
  `).get() as { total: number; hashed: number };
  return {
    total: row.total,
    hashed: row.hashed,
    unhashed: row.total - row.hashed,
  };
}

/**
 * Backfill `phash` for every row that doesn't have one yet. Reads the
 * library file, computes dHash, writes to DB. Skips rows whose file is
 * missing on disk.
 */
export async function backfillPhashes(): Promise<{ hashed: number; skipped: number; errors: number }> {
  const rows = rawDb.prepare(`
    SELECT id, rel_path FROM images
    WHERE deleted_at IS NULL AND phash IS NULL
  `).all() as Array<{ id: number; rel_path: string }>;

  let hashed = 0, skipped = 0, errors = 0;
  const update = rawDb.prepare(`UPDATE images SET phash = ? WHERE id = ?`);
  for (const r of rows) {
    const abs = safeJoin(LIBRARY_ROOT, r.rel_path);
    if (!abs) { errors++; continue; }
    try {
      const buf = await fs.readFile(abs);
      const hash = await computeDHash(buf);
      update.run(hash, r.id);
      hashed++;
    } catch {
      errors++;
    }
  }
  return { hashed, skipped, errors };
}

/**
 * Find pairs of covers whose dHashes are within `threshold` Hamming
 * distance. Brute force O(n²); fine for personal-library scale (5k
 * covers ≈ 12.5M comparisons, runs in well under a second).
 *
 * Excludes pairs that are already SHA-identical (those are caught by
 * upload dedup) and excludes attached-image pairs (those are
 * intentionally similar to their parent).
 *
 * Default threshold = 10 (out of 64 bits) is a strong "same image,
 * different encode" signal.
 */
export async function findNearDuplicates(opts?: { threshold?: number; limit?: number }): Promise {
  const threshold = opts?.threshold ?? 10;
  const limit = opts?.limit ?? 200;
  const rows = rawDb.prepare(`
    SELECT id, code, filename, rel_path, thumb_path AS thumbPath, sha256, phash, width, height, bytes
    FROM images
    WHERE deleted_at IS NULL AND parent_image_id IS NULL AND phash IS NOT NULL
    ORDER BY id ASC
  `).all() as Array<{
    id: number; code: string | null; filename: string; rel_path: string; thumbPath: string;
    sha256: string; phash: string; width: number; height: number; bytes: number;
  }>;

  const pairs: NearDupePair[] = [];
  for (let i = 0; i < rows.length && pairs.length < limit; i++) {
    for (let j = i + 1; j < rows.length && pairs.length < limit; j++) {
      const a = rows[i];
      const b = rows[j];
      if (a.sha256 === b.sha256) continue; // SHA-identical pairs handled elsewhere
      const d = hammingDistance(a.phash, b.phash);
      if (d <= threshold) {
        pairs.push({
          a: { id: a.id, code: a.code, filename: a.filename, thumbPath: a.thumbPath, width: a.width, height: a.height, bytes: a.bytes },
          b: { id: b.id, code: b.code, filename: b.filename, thumbPath: b.thumbPath, width: b.width, height: b.height, bytes: b.bytes },
          distance: d,
        });
      }
    }
  }
  // Sort tightest matches first, then by lowest id pair for stability.
  pairs.sort((x, y) => x.distance - y.distance || x.a.id - y.a.id || x.b.id - y.b.id);
  return pairs;
}