pinkudex/app/actions/maintenance.ts

"use server";
import path from "node:path";
import fs from "node:fs/promises";
import sharp from "sharp";
import { rawDb } from "@/lib/db/client";
import { sanitizeFilename, uniqueFilePath, letterBucket, canonicalThumbName } from "@/lib/filename";
import { extractCode } from "@/lib/jav/codeParser";
import { computeDHash, hammingDistance } from "@/lib/jav/phash";
import { clearAppSettingsCache } from "@/lib/db/appSettings";
import { safeJoin } from "@/lib/safePath";
import { revalidatePath } from "next/cache";

const LIBRARY_ROOT = path.join(process.cwd(), "library");
const THUMB_ROOT = path.join(process.cwd(), "data", "thumbs");
const PORTRAIT_ROOT = path.join(process.cwd(), "data", "portraits");
const CATEGORY_COVER_ROOT = path.join(process.cwd(), "data", "category-covers");
const COLLECTION_COVER_ROOT = path.join(process.cwd(), "data", "collection-covers");

const SYSTEM_FILES = new Set([".ds_store", "thumbs.db", "desktop.ini"]);

interface OrphanReport {
  libraryFiles: string[];
  thumbFiles: string[];
  portraitFiles: string[];
  categoryCoverFiles: string[];
  collectionCoverFiles: string[];
  bytes: number;
}

async function walk(dir: string): Promise<string[]> {
  let entries: import("node:fs").Dirent[] = [];
  try {
    entries = await fs.readdir(dir, { withFileTypes: true });
  } catch {
    return [];
  }
  const out: string[] = [];
  await Promise.all(entries.map(async (e) => {
    const full = path.join(dir, e.name);
    if (e.isDirectory()) {
      out.push(...(await walk(full)));
    } else if (e.isFile() && !SYSTEM_FILES.has(e.name.toLowerCase())) {
      out.push(full);
    }
  }));
  return out;
}

async function findOrphans(): Promise<OrphanReport> {
  const knownLibrary = new Set(
    (rawDb.prepare(`SELECT rel_path FROM images`).all() as Array<{ rel_path: string }>)
      .map((r) => path.normalize(r.rel_path)),
  );
  const knownThumbs = new Set(
    (rawDb.prepare(`SELECT thumb_path FROM images`).all() as Array<{ thumb_path: string }>)
      .map((r) => path.normalize(r.thumb_path)),
  );
  const knownPortraits = new Set(
    (rawDb
      .prepare(`
        SELECT portrait_path AS p FROM actresses WHERE portrait_path IS NOT NULL
        UNION ALL SELECT portrait2_path FROM actresses WHERE portrait2_path IS NOT NULL
        UNION ALL SELECT portrait3_path FROM actresses WHERE portrait3_path IS NOT NULL
        UNION ALL SELECT portrait4_path FROM actresses WHERE portrait4_path IS NOT NULL
        UNION ALL SELECT portraith_path FROM actresses WHERE portraith_path IS NOT NULL
      `)
      .all() as Array<{ p: string }>)
      .map((r) => path.normalize(r.p)),
  );
  const knownCategoryCovers = new Set(
    (rawDb
      .prepare(`
        SELECT cover_portrait_path AS p FROM tag_categories WHERE cover_portrait_path IS NOT NULL
        UNION ALL SELECT cover_landscape_path FROM tag_categories WHERE cover_landscape_path IS NOT NULL
      `)
      .all() as Array<{ p: string }>)
      .map((r) => path.normalize(r.p)),
  );
  const knownCollectionCovers = new Set(
    (rawDb
      .prepare(`
        SELECT cover_portrait_path AS p FROM collections WHERE cover_portrait_path IS NOT NULL
        UNION ALL SELECT cover_landscape_path FROM collections WHERE cover_landscape_path IS NOT NULL
      `)
      .all() as Array<{ p: string }>)
      .map((r) => path.normalize(r.p)),
  );

  const [libFiles, thumbFiles, portraitFiles, categoryCoverFiles, collectionCoverFiles] = await Promise.all([
    walk(LIBRARY_ROOT),
    walk(THUMB_ROOT),
    walk(PORTRAIT_ROOT),
    walk(CATEGORY_COVER_ROOT),
    walk(COLLECTION_COVER_ROOT),
  ]);

  const libraryOrphans = libFiles.filter((abs) => {
    const rel = path.normalize(path.relative(LIBRARY_ROOT, abs));
    return !knownLibrary.has(rel);
  });
  const thumbOrphans = thumbFiles.filter((abs) => {
    const rel = path.normalize(path.relative(THUMB_ROOT, abs));
    return !knownThumbs.has(rel);
  });
  const portraitOrphans = portraitFiles.filter((abs) => {
    const rel = path.normalize(path.relative(PORTRAIT_ROOT, abs));
    return !knownPortraits.has(rel);
  });
  const categoryCoverOrphans = categoryCoverFiles.filter((abs) => {
    const rel = path.normalize(path.relative(CATEGORY_COVER_ROOT, abs));
    return !knownCategoryCovers.has(rel);
  });
  const collectionCoverOrphans = collectionCoverFiles.filter((abs) => {
    const rel = path.normalize(path.relative(COLLECTION_COVER_ROOT, abs));
    return !knownCollectionCovers.has(rel);
  });

  let bytes = 0;
  await Promise.all([
    ...libraryOrphans, ...thumbOrphans, ...portraitOrphans,
    ...categoryCoverOrphans, ...collectionCoverOrphans,
  ].map(async (f) => {
    try { bytes += (await fs.stat(f)).size; } catch {}
  }));

  return {
    libraryFiles: libraryOrphans,
    thumbFiles: thumbOrphans,
    portraitFiles: portraitOrphans,
    categoryCoverFiles: categoryCoverOrphans,
    collectionCoverFiles: collectionCoverOrphans,
    bytes,
  };
}

export async function previewOrphanFiles(): Promise<{ count: number; bytes: number }> {
  const report = await findOrphans();
  const count =
    report.libraryFiles.length +
    report.thumbFiles.length +
    report.portraitFiles.length +
    report.categoryCoverFiles.length +
    report.collectionCoverFiles.length;
  return { count, bytes: report.bytes };
}

export async function purgeOrphanFiles(): Promise<{ deleted: number; bytes: number }> {
  const report = await findOrphans();
  const all = [
    ...report.libraryFiles,
    ...report.thumbFiles,
    ...report.portraitFiles,
    ...report.categoryCoverFiles,
    ...report.collectionCoverFiles,
  ];
  // Bound concurrency: Promise.all over thousands of fs.rm calls can
  // exhaust file descriptors (EMFILE) on Windows / low-ulimit hosts.
  const CONCURRENCY = 32;
  for (let i = 0; i < all.length; i += CONCURRENCY) {
    await Promise.all(all.slice(i, i + CONCURRENCY).map((f) => fs.rm(f, { force: true })));
  }
  // Sweep empty subdirs across every root that just shed files.
  await Promise.all([
    cleanEmptyDirs(LIBRARY_ROOT),
    cleanEmptyDirs(THUMB_ROOT),
    cleanEmptyDirs(PORTRAIT_ROOT),
    cleanEmptyDirs(CATEGORY_COVER_ROOT),
    cleanEmptyDirs(COLLECTION_COVER_ROOT),
  ]);
  // Indexes that show cover/portrait/thumb counts need to refetch.
  revalidatePath("/");
  revalidatePath("/category");
  revalidatePath("/collection");
  revalidatePath("/actress");
  return { deleted: all.length, bytes: report.bytes };
}

interface ReorganizePreview {
  total: number;
  toMove: number;
}

interface ImageRow {
  id: number;
  filename: string;
  rel_path: string;
  code: string | null;
  parent_image_id: number | null;
}

/**
 * Resolve the target letter-bucket directory for a row. Attached images
 * (parent_image_id set) bucket with their parent's code so related files
 * stay together on disk.
 */
function plannedDirRel(row: ImageRow, parentCodeById: Map<number, string | null>): string {
  if (row.parent_image_id != null) {
    const parentCode = parentCodeById.get(row.parent_image_id) ?? null;
    return letterBucket(parentCode).dirRel;
  }
  return letterBucket(row.code).dirRel;
}

function loadAllImages(): { rows: ImageRow[]; parentCodeById: Map<number, string | null> } {
  const rows = rawDb.prepare(`SELECT id, filename, rel_path, code, parent_image_id FROM images`).all() as ImageRow[];
  const parentCodeById = new Map<number, string | null>();
  for (const r of rows) parentCodeById.set(r.id, r.code);
  return { rows, parentCodeById };
}

export async function previewReorganize(): Promise<ReorganizePreview> {
  const { rows, parentCodeById } = loadAllImages();
  let toMove = 0;
  for (const r of rows) {
    const target = plannedDirRel(r, parentCodeById);
    const currentDir = path.posix.dirname(r.rel_path.replace(/\\/g, "/"));
    if (currentDir !== target) toMove++;
  }
  return { total: rows.length, toMove };
}

export async function reorganizeFiles(): Promise<{ moved: number; skipped: number; errors: number }> {
  const { rows, parentCodeById } = loadAllImages();

  let moved = 0, skipped = 0, errors = 0;
  for (const r of rows) {
    const target = plannedDirRel(r, parentCodeById);
    const currentDir = path.posix.dirname(r.rel_path.replace(/\\/g, "/"));
    if (currentDir === target) { skipped++; continue; }

    const oldAbs = path.join(LIBRARY_ROOT, r.rel_path);
    try {
      await fs.access(oldAbs);
    } catch {
      errors++;
      continue;
    }

    const { base, ext } = sanitizeFilename(r.filename || `image${path.extname(r.rel_path)}`);
    const dirAbs = path.join(LIBRARY_ROOT, target);
    try {
      await fs.mkdir(dirAbs, { recursive: true });
      const newAbs = await uniqueFilePath(dirAbs, base, ext);
      await fs.rename(oldAbs, newAbs);
      const newRel = path.posix.join(target, path.basename(newAbs));
      rawDb.prepare(`UPDATE images SET rel_path = ? WHERE id = ?`).run(newRel, r.id);
      moved++;
    } catch {
      errors++;
    }
  }

  await cleanEmptyDirs(LIBRARY_ROOT);
  revalidatePath("/");
  return { moved, skipped, errors };
}

export async function clearCache(): Promise<{ ok: true }> {
  clearAppSettingsCache();
  for (const p of ["/", "/collection", "/tag", "/category", "/actress", "/studios", "/series", "/genres", "/queue"]) {
    revalidatePath(p);
  }
  return { ok: true };
}

export interface UndersizedCover {
  id: number;
  code: string | null;
  filename: string;
  width: number;
  height: number;
  bytes: number;
  thumbPath: string;
}

/**
 * Scan top-level covers whose pixel dimensions look smaller than a
 * standard JAV cover (typically 800x538). Catches accidental imports of
 * thumbnails, web previews, or other non-cover images.
 *
 * Defaults are deliberately permissive — the standard is 800x538 but real
 * scans/rips drift by a few pixels in either direction. The 147x200
 * outlier the user spotted falls well below the floor.
 */
export async function scanUndersizedCovers(opts?: {
  minWidth?: number;
  minHeight?: number;
}): Promise<UndersizedCover[]> {
  const minW = opts?.minWidth ?? 750;
  const minH = opts?.minHeight ?? 500;
  return rawDb.prepare(`
    SELECT id, code, filename, width, height, bytes, thumb_path AS thumbPath
    FROM images
    WHERE parent_image_id IS NULL
      AND deleted_at IS NULL
      AND (width < ? OR height < ?)
    ORDER BY (width * height) ASC, id ASC
  `).all(minW, minH) as UndersizedCover[];
}

interface RegenThumbsPreview {
  total: number;
  missing: number;
  staleNames: number;
}

/**
 * Resolve the planned canonical filename for a row: includes parent code
 * lookup for attached images so back-covers inherit the prefix.
 */
function plannedThumbName(row: { sha256: string; code: string | null; parent_image_id: number | null }): string {
  if (row.parent_image_id != null) {
    const parent = rawDb.prepare(`SELECT code FROM images WHERE id = ?`).get(row.parent_image_id) as
      | { code: string | null }
      | undefined;
    return canonicalThumbName(parent?.code ?? null, row.sha256);
  }
  return canonicalThumbName(row.code, row.sha256);
}

/** Count covers whose thumb file is missing on disk or whose stored name is stale. */
export async function previewRegenThumbnails(): Promise<RegenThumbsPreview> {
  const rows = rawDb.prepare(`
    SELECT thumb_path, sha256, code, parent_image_id FROM images WHERE deleted_at IS NULL
  `).all() as Array<{ thumb_path: string; sha256: string; code: string | null; parent_image_id: number | null }>;
  let missing = 0;
  let staleNames = 0;
  // Sequential is fine for personal-library scale; a bulk Promise.all here
  // can blow up with EMFILE on very large libraries.
  for (const r of rows) {
    const target = plannedThumbName(r);
    if (target !== r.thumb_path) staleNames++;
    const targetAbs = path.join(THUMB_ROOT, target);
    try { await fs.access(targetAbs); } catch { missing++; }
  }
  return { total: rows.length, missing, staleNames };
}

/**
 * Rebuild thumbnails. Three paths per row:
 *   1. Canonical file already on disk → skip (unless `force`).
 *   2. Legacy file (different name from canonical) is on disk → rename it
 *      to canonical and update thumb_path. No re-encode needed; this is
 *      the migration path for libraries that predate the code-prefix
 *      naming.
 *   3. Neither file is on disk → read original from library/ and encode
 *      from scratch.
 */
export async function regenerateThumbnails(opts?: { force?: boolean }): Promise<{ regenerated: number; renamed: number; skipped: number; errors: number }> {
  const force = opts?.force ?? false;
  const rows = rawDb.prepare(`
    SELECT id, rel_path, thumb_path, sha256, code, parent_image_id FROM images WHERE deleted_at IS NULL
  `).all() as Array<{ id: number; rel_path: string; thumb_path: string; sha256: string; code: string | null; parent_image_id: number | null }>;

  await fs.mkdir(THUMB_ROOT, { recursive: true });

  let regenerated = 0, renamed = 0, skipped = 0, errors = 0;
  for (const r of rows) {
    const target = plannedThumbName(r);
    const targetAbs = path.join(THUMB_ROOT, target);

    if (!force) {
      try {
        await fs.access(targetAbs);
        // Canonical file exists. If the DB still has the legacy name,
        // sync the column so future operations don't drift.
        if (r.thumb_path !== target) {
          rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
        }
        skipped++;
        continue;
      } catch { /* missing — fall through */ }
    }

    // Try the legacy/current path: if a thumb exists at the stored
    // thumb_path that's different from canonical, rename it instead of
    // re-encoding. Faster, lossless, preserves whatever the file already
    // was.
    if (r.thumb_path !== target) {
      const oldAbs = safeJoin(THUMB_ROOT, r.thumb_path);
      if (oldAbs) {
        try {
          await fs.access(oldAbs);
          if (force) {
            // Force mode: drop the old file and re-encode at canonical.
            await fs.rm(oldAbs, { force: true }).catch(() => {});
          } else {
            await fs.rename(oldAbs, targetAbs);
            rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
            renamed++;
            continue;
          }
        } catch { /* legacy file missing — fall through to encode */ }
      }
    }

    const libAbs = safeJoin(LIBRARY_ROOT, r.rel_path);
    if (!libAbs) {
      errors++;
      continue;
    }

    try {
      // Pass the file path to sharp instead of reading into a buffer.
      // The library can contain multi-GB videos that were misclassified
      // as images; reading those into memory would OOM the server.
      // sharp streams from disk and reports its own decode errors.
      // Mirrors lib/ingest/ingest.ts's resize pipeline.
      await sharp(libAbs, { failOn: "none" })
        .rotate()
        .resize({ width: 768, height: 768, fit: "inside", withoutEnlargement: true })
        .webp({ quality: 82 })
        .toFile(targetAbs);
      if (r.thumb_path !== target) {
        rawDb.prepare(`UPDATE images SET thumb_path = ? WHERE id = ?`).run(target, r.id);
      }
      regenerated++;
    } catch {
      errors++;
    }
  }

  revalidatePath("/");
  return { regenerated, renamed, skipped, errors };
}

async function cleanEmptyDirs(root: string): Promise<void> {
  let entries: import("node:fs").Dirent[] = [];
  try { entries = await fs.readdir(root, { withFileTypes: true }); } catch { return; }
  for (const e of entries) {
    if (!e.isDirectory()) continue;
    const dir = path.join(root, e.name);
    await cleanEmptyDirs(dir);
    try {
      const remaining = await fs.readdir(dir);
      if (remaining.length === 0) await fs.rmdir(dir);
    } catch {}
  }
}

export interface ReparseCodesPreview {
  total: number;
  /** Rows with no code where extractCode now finds one — safe to fill. */
  missing: number;
  /** Rows where extractCode disagrees with the stored code — overwrite
   *  is destructive of any manual edit, so it's gated behind force=true. */
  changed: number;
  /** Sample of up to 20 changed rows for the preview UI. */
  sampleChanges: Array<{ id: number; filename: string; oldCode: string; newCode: string }>;
}

/**
 * Walk every top-level cover (parent_image_id IS NULL, not soft-deleted)
 * and re-run extractCode against the stored filename. Reports how many
 * rows would change so the user can preview before committing.
 */
export async function previewReparseCodes(): Promise<ReparseCodesPreview> {
  const rows = rawDb.prepare(`
    SELECT id, filename, code FROM images
    WHERE deleted_at IS NULL AND parent_image_id IS NULL
  `).all() as Array<{ id: number; filename: string; code: string | null }>;
  let missing = 0, changed = 0;
  const sampleChanges: ReparseCodesPreview["sampleChanges"] = [];
  for (const r of rows) {
    const extracted = extractCode(r.filename);
    if (!extracted) continue;
    if (r.code == null) {
      missing++;
    } else if (r.code !== extracted) {
      changed++;
      if (sampleChanges.length < 20) {
        sampleChanges.push({ id: r.id, filename: r.filename, oldCode: r.code, newCode: extracted });
      }
    }
  }
  return { total: rows.length, missing, changed, sampleChanges };
}

/**
 * Apply the re-parse. By default only fills rows with NULL code (safe);
 * pass force=true to overwrite codes that disagree with extractCode.
 *
 * Note: this only updates the DB. Files won't move into their new
 * letter buckets until you also run Reorganize. Same for thumbnail
 * filenames — the code prefix in `<CODE>-<sha>.webp` won't update until
 * Regenerate Thumbnails runs.
 */
export async function reparseCodes(opts?: { force?: boolean }): Promise<{ filled: number; updated: number; skipped: number }> {
  const force = opts?.force ?? false;
  const rows = rawDb.prepare(`
    SELECT id, filename, code FROM images
    WHERE deleted_at IS NULL AND parent_image_id IS NULL
  `).all() as Array<{ id: number; filename: string; code: string | null }>;

  let filled = 0, updated = 0, skipped = 0;
  const tx = rawDb.transaction(() => {
    const update = rawDb.prepare(`UPDATE images SET code = ? WHERE id = ?`);
    for (const r of rows) {
      const extracted = extractCode(r.filename);
      if (!extracted) { skipped++; continue; }
      if (r.code == null) {
        update.run(extracted, r.id);
        filled++;
      } else if (r.code !== extracted) {
        if (force) {
          update.run(extracted, r.id);
          updated++;
        } else {
          skipped++;
        }
      } else {
        skipped++;
      }
    }
  });
  tx();
  revalidatePath("/");
  return { filled, updated, skipped };
}

export interface NearDupePair {
  a: { id: number; code: string | null; filename: string; thumbPath: string; width: number; height: number; bytes: number };
  b: { id: number; code: string | null; filename: string; thumbPath: string; width: number; height: number; bytes: number };
  distance: number;
}

export interface NearDupesPreview {
  total: number;
  hashed: number;
  unhashed: number;
}

/** Quick stats: how many rows already have a phash vs need backfilling. */
export async function previewNearDupes(): Promise<NearDupesPreview> {
  const row = rawDb.prepare(`
    SELECT
      COUNT(*) AS total,
      SUM(CASE WHEN phash IS NOT NULL THEN 1 ELSE 0 END) AS hashed
    FROM images WHERE deleted_at IS NULL
  `).get() as { total: number; hashed: number };
  return {
    total: row.total,
    hashed: row.hashed,
    unhashed: row.total - row.hashed,
  };
}

/**
 * Backfill `phash` for every row that doesn't have one yet. Reads the
 * library file, computes dHash, writes to DB. Skips rows whose file is
 * missing on disk.
 */
export async function backfillPhashes(): Promise<{ hashed: number; skipped: number; errors: number }> {
  const rows = rawDb.prepare(`
    SELECT id, rel_path FROM images
    WHERE deleted_at IS NULL AND phash IS NULL
  `).all() as Array<{ id: number; rel_path: string }>;

  let hashed = 0, skipped = 0, errors = 0;
  const update = rawDb.prepare(`UPDATE images SET phash = ? WHERE id = ?`);
  for (const r of rows) {
    const abs = safeJoin(LIBRARY_ROOT, r.rel_path);
    if (!abs) { errors++; continue; }
    try {
      const buf = await fs.readFile(abs);
      const hash = await computeDHash(buf);
      update.run(hash, r.id);
      hashed++;
    } catch {
      errors++;
    }
  }
  return { hashed, skipped, errors };
}

/**
 * Find pairs of covers whose dHashes are within `threshold` Hamming
 * distance. Brute force O(n²); fine for personal-library scale (5k
 * covers ≈ 12.5M comparisons, runs in well under a second).
 *
 * Excludes pairs that are already SHA-identical (those are caught by
 * upload dedup) and excludes attached-image pairs (those are
 * intentionally similar to their parent).
 *
 * Default threshold = 10 (out of 64 bits) is a strong "same image,
 * different encode" signal.
 */
export async function findNearDuplicates(opts?: { threshold?: number; limit?: number }): Promise<NearDupePair[]> {
  const threshold = opts?.threshold ?? 10;
  const limit = opts?.limit ?? 200;
  const rows = rawDb.prepare(`
    SELECT id, code, filename, rel_path, thumb_path AS thumbPath, sha256, phash, width, height, bytes
    FROM images
    WHERE deleted_at IS NULL AND parent_image_id IS NULL AND phash IS NOT NULL
    ORDER BY id ASC
  `).all() as Array<{
    id: number; code: string | null; filename: string; rel_path: string; thumbPath: string;
    sha256: string; phash: string; width: number; height: number; bytes: number;
  }>;

  const pairs: NearDupePair[] = [];
  for (let i = 0; i < rows.length && pairs.length < limit; i++) {
    for (let j = i + 1; j < rows.length && pairs.length < limit; j++) {
      const a = rows[i];
      const b = rows[j];
      if (a.sha256 === b.sha256) continue; // SHA-identical pairs handled elsewhere
      const d = hammingDistance(a.phash, b.phash);
      if (d <= threshold) {
        pairs.push({
          a: { id: a.id, code: a.code, filename: a.filename, thumbPath: a.thumbPath, width: a.width, height: a.height, bytes: a.bytes },
          b: { id: b.id, code: b.code, filename: b.filename, thumbPath: b.thumbPath, width: b.width, height: b.height, bytes: b.bytes },
          distance: d,
        });
      }
    }
  }
  // Sort tightest matches first, then by lowest id pair for stability.
  pairs.sort((x, y) => x.distance - y.distance || x.a.id - y.a.id || x.b.id - y.b.id);
  return pairs;
}