Step 10i: rc-jav.py becomes a thin shim; main() lives in rcjav/cli.py

The real entrypoint moved into rcjav/cli.py (845 lines: imports + the remaining top-level glue + collectors + main()). rc-jav.py is now a 25-line shim that does: - `from rcjav import *` to re-export the package surface for callers that load this script via importlib.spec_from_file_location (tests/test_rules.py, fixtures/run.py, the native-messaging host via importlib). - `from rcjav.cli import main` and call it under `__main__`. Verified all four entry points: - python rc-jav.py --help → ok (legacy CLI invocation) - python -m rcjav.cli --help → ok (package-direct) - python fixtures/run.py → 17/17 cases pass - python -m unittest tests.test_rules → 5/5 OK Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 22:01:52 +02:00
parent fb5700cdab
commit 1cc2c38128
2 changed files with 858 additions and 833 deletions
@@ -0,0 +1,845 @@
+#!/usr/bin/env python3
+"""Scan rclone remotes for duplicate JAV files grouped by ID."""
+from __future__ import annotations
+
+import argparse
+import csv
+import fnmatch
+import json
+import os
+import re
+import subprocess
+import sys
+import threading
+import time
+import xml.etree.ElementTree as ET
+from dataclasses import dataclass, asdict
+from datetime import datetime
+from pathlib import Path
+from typing import Iterable
+
+from rich.console import Console
+from rich.panel import Panel
+from rich.progress import (
+    BarColumn,
+    MofNCompleteColumn,
+    Progress,
+    SpinnerColumn,
+    TextColumn,
+    TimeElapsedColumn,
+    TimeRemainingColumn,
+)
+from rich.table import Table
+from rich.text import Text
+
+from rcjav.model import FileEntry
+from rcjav import ids as _rcjav_ids
+from rcjav.ids import (
+    PRIMARY_ID_RE,
+    FALLBACK_ID_RE,
+    COMPOUND_ID_RE,
+    RANGE_RE,
+    BUILTIN_PART_RES,
+    configure_part_patterns,
+    detect_part,
+    detect_part_from_stem,
+    part_key,
+    extract_id,
+    normalize_id,
+    describe_id_match,
+    expand_range,
+    _VARIANT_SUFFIX_RE,
+    _RES_LABEL_RE,
+    _RESOLUTION_TAG_RE,
+    _BRACKET_ID_RE,
+    _NOHYPHEN_ID_RE,
+    _VIDEO_EXTS,
+    _LOWEST_KEEP_PRIORITY_EXTS,
+)
+
+
+# PART_RES is rebound by configure_part_patterns(); always read it dynamically
+# from the rcjav.ids module rather than capturing a stale binding at import time.
+def _current_part_res():
+    return _rcjav_ids.PART_RES
+
+
+from rcjav.rclone_io import (
+    RCLONE_BIN,
+    DURATION_RE,
+    set_basic as _set_rclone_basic,
+    set_rclone_bin as _set_rclone_bin,
+    quick_search_remote,
+    choose_search_mode,
+    name_to_include_patterns,
+    name_match,
+    query_to_include_patterns,
+    remote_file_count,
+    parse_duration,
+    walk_remote,
+)
+from rcjav import output as _output
+from rcjav.output import (
+    human_size,
+    ansi,
+    ANSI_RESET,
+    ANSI_GREEN,
+    ANSI_RED,
+    ANSI_YELLOW,
+    ANSI_CYAN,
+    ANSI_DIM,
+    ANSI_BOLD,
+    strip_markup,
+    BasicProgress,
+    make_progress,
+    render_banner,
+    render_search,
+    render_name_matches,
+    render_name_matches_plain,
+    render_dupes,
+    render_banner_plain,
+    render_search_plain,
+    render_dupes_plain,
+    write_txt,
+    write_csv,
+    write_json,
+    describe_skipped_id,
+    dupes_to_obj,
+    set_use_ansi as _set_output_use_ansi,
+    set_basic as _set_output_basic,
+    set_console_no_color as _set_output_no_color,
+)
+
+# rc-jav.py keeps its own local rich Console for the prints that haven't
+# moved to rcjav.output yet (collectors, main()). When --no-color is in
+# play we rebind both this and rcjav.output's console.
+console = Console()
+
+
+# Mirror of rcjav.rclone_io.BASIC for in-tree readers that haven't been
+# updated yet (output renderers, BasicProgress checks in main()). Set in
+# main() via both this name and _set_rclone_basic().
+BASIC = False  # set by --basic
+console = Console()  # replaced in main() if --no-color
+
+
+# Default remotes used when --search is invoked without explicit --source/--target.
+DEFAULT_SOURCE = ["cq:personal-files/ClearJAV"]
+DEFAULT_TARGET = ["cq:personal-files/JAV/TMP"]
+
+# Default WinCatalog export folder (or specific files). Folder entries auto-discover *.csv / *.xml.
+DEFAULT_CATALOG: list[str] = [str(Path(__file__).resolve().parent / "wincatalog")]
+
+from rcjav.catalog import (
+    CATALOG_COL_NAME,
+    CATALOG_COL_PATH,
+    CATALOG_COL_SIZE,
+    CATALOG_COL_DISC,
+    normalize_catalog_path,
+    load_catalog_csv,
+    load_catalog_xml,
+    load_catalogs,
+    _expand_catalog_paths,
+)
+from rcjav.cache import (
+    CACHE_PATH,
+    CACHE_VERSION,
+    CACHE_STALE_HOURS,
+    load_cache,
+    save_cache,
+    cache_age_hours,
+    fmt_age,
+)
+
+from rcjav.dupes import (
+    DEFAULT_KEEP_RANKING,
+    set_keep_ranking,
+    decide_keep_with_reason,
+    decide_keep,
+    find_dupes,
+    describe_dupe_risks,
+    find_variant_alerts,
+)
+from rcjav.library import (
+    find_library_issues,
+    rename_file_in_remote,
+    rename_files_batch,
+    _bracket_to_canonical,
+    _nohyphen_to_canonical,
+)
+
+CONFIG_PATH = Path(__file__).resolve().parent / "config.json"
+
+def load_config() -> dict:
+    if not CONFIG_PATH.exists():
+        return {}
+    try:
+        data = json.loads(CONFIG_PATH.read_text(encoding="utf-8"))
+        if not isinstance(data, dict):
+            return {}
+        return data
+    except (json.JSONDecodeError, OSError):
+        return {}
+
+
+def save_config(cfg: dict) -> None:
+    tmp = CONFIG_PATH.with_suffix(CONFIG_PATH.suffix + ".tmp")
+    tmp.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
+    os.replace(tmp, CONFIG_PATH)
+
+
+# ---------- collectors ----------
+
+def collect_with_progress(remotes_by_label: list[tuple[str, str]],
+                          skipped: list[tuple[str, str]]
+                          ) -> list[FileEntry]:
+    """Dupe-mode collect — every remote freshly walked with progress."""
+    out: list[FileEntry] = []
+    if not remotes_by_label:
+        return out
+    with make_progress() as progress:
+        tasks = {(label, r): progress.add_task(f"{label} {r}", total=1)
+                 for label, r in remotes_by_label}
+        for (label, r), tid in tasks.items():
+            entries, _ = walk_remote(r, label, skipped, progress, tid)
+            out.extend(entries)
+    return out
+
+
+def cached_collect(remotes: list[str], source_label: str,
+                   skipped: list[tuple[str, str]],
+                   cache: dict, use_cache: bool, force_update: bool,
+                   cache_meta: dict[str, dict],
+                   scan_since: str | None = None) -> list[FileEntry]:
+    """Search-mode collect with cache. Always recursive.
+    scan_since: rclone duration string (`24h`, `7d`). When set during a forced
+    update, only files modified within the window are walked and merged on top
+    of the existing cache entry; files older than the window keep their cached
+    record. If there's no prior cache entry for a remote, falls through to a
+    full scan."""
+    out: list[FileEntry] = []
+    to_scan: list[str] = []
+    to_incremental: list[tuple[str, dict]] = []  # (remote, existing_entry)
+    for r in remotes:
+        if scan_since and force_update and use_cache:
+            existing = cache["remotes"].get(r)
+            if existing:
+                to_incremental.append((r, existing))
+                continue
+            # No prior cache for this remote -> can't be incremental, fall back.
+        entry = cache["remotes"].get(r) if use_cache and not force_update else None
+        if entry:
+            age = cache_age_hours(entry["scanned_at"])
+            age_str = fmt_age(age) if age is not None else "?"
+            stale = age is not None and age > CACHE_STALE_HOURS
+            cache_meta[r] = {"cached": True, "age": age_str, "stale": stale,
+                             "file_count": len(entry["files"])}
+            for f in entry["files"]:
+                out.append(FileEntry(source=source_label, remote=r, path=f["path"],
+                                     size=f["size"], mod_time=f.get("mod_time", ""),
+                                     jav_id=f["jav_id"]))
+            for s in entry.get("skipped", []):
+                skipped.append((r, s))
+        else:
+            to_scan.append(r)
+
+    if to_scan:
+        with make_progress() as progress:
+            tids = {r: progress.add_task(f"{source_label} {r}", total=1) for r in to_scan}
+            for r_idx, r in enumerate(to_scan):
+                _total: int | None = None
+                if BASIC:
+                    # Emit SCAN_REMOTE_START immediately so the UI shows the remote name.
+                    # Then probe the file count; once known, emit SCAN_REMOTE_COUNTED so
+                    # the UI can show "N / total" without waiting for the first 100 files.
+                    sys.stderr.write("SCAN_REMOTE_START " + json.dumps({
+                        "remote": r, "label": source_label,
+                        "index": r_idx + 1, "of": len(to_scan),
+                        "total": None,
+                    }) + "\n")
+                    sys.stderr.flush()
+                    _total = remote_file_count(r)
+                    sys.stderr.write("SCAN_REMOTE_COUNTED " + json.dumps({
+                        "remote": r, "total": _total,
+                    }) + "\n")
+                    sys.stderr.flush()
+                fresh, local_skipped = walk_remote(r, source_label, skipped, progress, tids[r],
+                                                   _total_override=_total)
+                out.extend(fresh)
+                cache_meta[r] = {"cached": False, "age": "fresh", "stale": False,
+                                 "file_count": len(fresh)}
+                if use_cache:
+                    cache["remotes"][r] = {
+                        "scanned_at": datetime.now().astimezone().isoformat(),
+                        "recursive": True,
+                        "files": [{"path": e.path, "size": e.size, "mod_time": e.mod_time,
+                                   "jav_id": e.jav_id} for e in fresh],
+                        "skipped": local_skipped,
+                    }
+                if BASIC:
+                    sys.stderr.write("SCAN_PROGRESS " + json.dumps({
+                        "remote": r, "label": source_label,
+                        "files": len(fresh), "files_total": len(out),
+                    }) + "\n")
+                    sys.stderr.flush()
+
+    if to_incremental:
+        with make_progress() as progress:
+            tids = {r: progress.add_task(f"{source_label} {r} (since {scan_since})", total=1)
+                    for r, _ in to_incremental}
+            for r_idx, (r, existing) in enumerate(to_incremental):
+                if BASIC:
+                    sys.stderr.write("SCAN_REMOTE_START " + json.dumps({
+                        "remote": r, "label": source_label,
+                        "index": r_idx + 1, "of": len(to_incremental),
+                        "total": None, "incremental": True,
+                    }) + "\n")
+                    sys.stderr.flush()
+                fresh, local_skipped = walk_remote(
+                    r, source_label, skipped, progress, tids[r], max_age=scan_since,
+                )
+                # Merge: replace entries at paths we just walked, keep all others.
+                new_paths = {e.path for e in fresh}
+                old_files = [f for f in existing.get("files", [])
+                             if f["path"] not in new_paths]
+                merged_files = old_files + [
+                    {"path": e.path, "size": e.size, "mod_time": e.mod_time,
+                     "jav_id": e.jav_id} for e in fresh
+                ]
+                # Merge skipped lists (de-dupe).
+                old_skipped = set(existing.get("skipped", []))
+                old_skipped.update(local_skipped)
+                # Emit FileEntry for everything (old + new) so the caller sees the
+                # full set, not just deltas.
+                for f in merged_files:
+                    out.append(FileEntry(source=source_label, remote=r, path=f["path"],
+                                         size=f["size"], mod_time=f.get("mod_time", ""),
+                                         jav_id=f["jav_id"]))
+                for s in old_skipped:
+                    skipped.append((r, s))
+                cache_meta[r] = {
+                    "cached": False, "age": f"incremental {scan_since}",
+                    "stale": False, "file_count": len(merged_files),
+                    "added_or_updated": len(fresh),
+                }
+                if use_cache:
+                    cache["remotes"][r] = {
+                        "scanned_at": datetime.now().astimezone().isoformat(),
+                        "recursive": True,
+                        "files": merged_files,
+                        "skipped": sorted(old_skipped),
+                    }
+                if BASIC:
+                    sys.stderr.write("SCAN_PROGRESS " + json.dumps({
+                        "remote": r, "label": source_label,
+                        "files": len(fresh), "files_total": len(out),
+                        "incremental": True,
+                        "file_count": len(merged_files),
+                    }) + "\n")
+                    sys.stderr.flush()
+    return out
+
+
+# ---------- main ----------
+
+def main():
+    ap = argparse.ArgumentParser(description="Report duplicate JAV files across rclone remotes (read-only).")
+    ap.add_argument("--source", "-s", action="append", default=[], metavar="REMOTE",
+                    help="Source remote path (priority — wins dupes regardless of size). Repeatable.")
+    ap.add_argument("--target", "-t", action="append", default=[], metavar="REMOTE",
+                    help="Target remote path (non-priority — largest size wins among targets). Repeatable.")
+    ap.add_argument("--format", choices=["console", "txt", "csv", "json", "all"],
+                    default="console")
+    ap.add_argument("--output-dir", default="./reports", help="Where to write txt/csv/json.")
+    ap.add_argument("--no-color", action="store_true")
+    ap.add_argument("--rclone-bin", default="rclone",
+                    help="Path to rclone executable (default: 'rclone' on PATH).")
+    ap.add_argument("--search", action="append", default=[], metavar="ID",
+                    help="Search mode: look up a JAV ID (e.g. SSIS-001). Repeatable. "
+                         "If no --source/--target given, default target is used.")
+    ap.add_argument("--name", action="append", default=[], metavar="STR",
+                    help="Substring/glob search against filename. Case-insensitive. "
+                         "Repeatable; OR semantics (any token match = hit). "
+                         "Supports * and ? wildcards. Use quotes for spaces.")
+    ap.add_argument("--update", "-u", action="store_true",
+                    help="Search mode: force re-scan and overwrite cache for requested remotes.")
+    ap.add_argument("--no-cache", action="store_true",
+                    help="Search mode: bypass cache entirely (no read, no write).")
+    ap.add_argument("--quick", "-q", action="store_true",
+                    help="Force quick mode: skip cache, query rclone directly with --include glob. "
+                         "Default is auto: single exact IDs use quick, wildcards/ranges/multi use cached.")
+    ap.add_argument("--cache", action="store_true",
+                    help="Force cached mode (opposite of --quick).")
+    ap.add_argument("--save", action="store_true",
+                    help="Persist the --source / --target / --catalog values you passed "
+                         "as new defaults in config.json next to the script. "
+                         "Only keys you explicitly passed are saved.")
+    ap.add_argument("--scan", action="store_true",
+                    help="Walk configured remotes, refresh cache, exit. No search/dupe output. "
+                         "Default scope: DEFAULT_TARGET. Override with --source/--target. "
+                         "Always overwrites cache. Suitable for Task Scheduler / cron.")
+    ap.add_argument("--scan-since", metavar="DURATION",
+                    help="Incremental scan: only walk files modified within DURATION "
+                         "(e.g. 24h, 7d, 30m, 90s). Merges new/changed entries on top of "
+                         "the existing cache; old entries are preserved. Falls back to a "
+                         "full scan if there's no prior cache for a remote. Requires --scan.")
+    ap.add_argument("--catalog", action="append", default=[], metavar="PATH",
+                    help="Path to a WinCatalog CSV or XML export. Repeatable. "
+                         "Listed under 'Catalog' in results (informational, never KEEP/DELETE?).")
+    ap.add_argument("--part-pattern", action="append", default=[], metavar="REGEX",
+                    help="Extra multipart filename regex. Repeatable; first capture group must be the part number. "
+                         "Patterns run against the filename stem after built-in part detectors.")
+    ap.add_argument("--library-issues", action="store_true",
+                    help="Report non-canonical filenames (bracket-wrapped IDs, no-hyphen IDs). "
+                         "Reads from cache. Outputs JSON when --format json, plain otherwise.")
+    ap.add_argument("--rename-file", action="store_true",
+                    help="Rename one file in a remote and patch cache. "
+                         "Requires --remote, --old-path, --new-path. Outputs JSON.")
+    ap.add_argument("--rename-files-batch", action="store_true",
+                    help="Rename multiple files in one call, writing cache once. "
+                         "Reads JSON array of {remote, old_path, new_path} from stdin. Outputs JSON.")
+    ap.add_argument("--remote", metavar="REMOTE",
+                    help="Remote path root for --rename-file (e.g. cq:JAV).")
+    ap.add_argument("--old-path", metavar="PATH",
+                    help="Relative path of the file to rename (within --remote).")
+    ap.add_argument("--new-path", metavar="PATH",
+                    help="New relative path after rename (within --remote).")
+    ap.add_argument("--basic", action="store_true",
+                    help="Plain text output, no rich tables/panels/progress bars. "
+                         "Useful for piping or low-bandwidth terminals.")
+    ap.add_argument("--clearjav", action="store_true",
+                    help="Shortcut: use DEFAULT_SOURCE as --source and DEFAULT_TARGET as --target, "
+                         "Equivalent to "
+                         "`--source cq:personal-files/ClearJAV --target cq:personal-files/JAV/TMP`.")
+    args = ap.parse_args()
+
+    global console, BASIC, DEFAULT_SOURCE, DEFAULT_TARGET, DEFAULT_CATALOG
+    _set_rclone_bin(args.rclone_bin)
+    BASIC = args.basic or args.format == "json"
+    _set_output_basic(BASIC)
+
+    # Apply persisted config overrides BEFORE defaults are consulted.
+    cfg = load_config()
+    if "default_source" in cfg:
+        DEFAULT_SOURCE = list(cfg["default_source"])
+    if "default_target" in cfg:
+        DEFAULT_TARGET = list(cfg["default_target"])
+    if "default_catalog" in cfg:
+        DEFAULT_CATALOG = list(cfg["default_catalog"])
+    set_keep_ranking(cfg.get("keep_ranking") or {})
+    part_patterns = list(cfg.get("part_patterns") or []) + list(args.part_pattern)
+    pattern_errors = configure_part_patterns(part_patterns)
+    if pattern_errors:
+        for err in pattern_errors:
+            console.print(f"[red]invalid part pattern:[/] {err}")
+        sys.exit(2)
+
+    # --save: persist explicitly-passed values, exit.
+    if args.save:
+        if not (args.source or args.target or args.catalog or args.part_pattern):
+            console.print("[red]--save needs at least one --source/--target/--catalog/--part-pattern value to persist.[/]")
+            sys.exit(2)
+        new_cfg = dict(cfg)
+        if args.source:
+            new_cfg["default_source"] = list(args.source)
+        if args.target:
+            new_cfg["default_target"] = list(args.target)
+        if args.catalog:
+            new_cfg["default_catalog"] = list(args.catalog)
+        if args.part_pattern:
+            new_cfg["part_patterns"] = list(args.part_pattern)
+        save_config(new_cfg)
+        console.print(f"[green]Saved to {CONFIG_PATH}:[/]")
+        for k in ("default_source", "default_target", "default_catalog", "part_patterns"):
+            if k in new_cfg:
+                console.print(f"  {k} = {new_cfg[k]}")
+        sys.exit(0)
+    _set_output_use_ansi(not args.no_color)
+    if args.no_color or BASIC:
+        console = Console(no_color=True, color_system=None, highlight=False)
+        _set_output_no_color()
+
+    # Search mode: defaults kick in if no remotes specified.
+    if args.clearjav:
+        if not args.source:
+            args.source = list(DEFAULT_SOURCE)
+        if not args.target:
+            args.target = list(DEFAULT_TARGET)
+
+    if args.search and not args.source and not args.target:
+        args.target = list(DEFAULT_TARGET)
+
+    # --scan: default to DEFAULT_TARGET only, always overwrite cache.
+    if args.scan:
+        if not args.source and not args.target:
+            args.target = list(DEFAULT_TARGET)
+        args.update = True
+
+    # Use default catalog(s) if user passed none.
+    if not args.catalog and DEFAULT_CATALOG:
+        args.catalog = list(DEFAULT_CATALOG)
+
+    # --library-issues: read-only cache scan for non-canonical filenames.
+    if args.library_issues:
+        cache = load_cache()
+        issues = find_library_issues(cache)
+        if args.format == "json" or BASIC:
+            print(json.dumps({"ok": True, **issues}))
+        else:
+            bracket = issues["bracket_names"]
+            nohyphen = issues["nohyphen_names"]
+            total = len(bracket) + len(nohyphen)
+            if not total:
+                console.print(Panel("[bold green]No library issues found.[/]", title="Library Issues"))
+            else:
+                from rich.table import Table
+                t = Table(title=f"Library Issues ({total} file(s))", show_lines=True)
+                t.add_column("Issue", style="yellow", width=14)
+                t.add_column("Current Name")
+                t.add_column("Canonical Name", style="green")
+                t.add_column("Remote", style="dim")
+                for e in bracket:
+                    t.add_row("bracket ID", Path(e["path"]).name,
+                              e["canonical_name"], e["remote"])
+                for e in nohyphen:
+                    t.add_row("no hyphen", Path(e["path"]).name,
+                              e["canonical_name"], e["remote"])
+                console.print(t)
+        sys.exit(0)
+
+    # --rename-files-batch: rename multiple files, single cache write.
+    if args.rename_files_batch:
+        try:
+            renames = json.loads(sys.stdin.read())
+        except json.JSONDecodeError as e:
+            print(json.dumps({"ok": False, "error": f"Invalid JSON on stdin: {e}"}))
+            sys.exit(1)
+        if not isinstance(renames, list):
+            print(json.dumps({"ok": False, "error": "stdin must be a JSON array"}))
+            sys.exit(1)
+        cache = load_cache()
+        results = rename_files_batch(renames, cache, rclone_bin=RCLONE_BIN)
+        ok = any(r["ok"] for r in results)
+        print(json.dumps({"ok": ok, "results": results}))
+        sys.exit(0 if ok else 1)
+
+    # --rename-file: rename one file in a remote and patch cache.
+    if args.rename_file:
+        if not args.remote or not args.old_path or not args.new_path:
+            ap.error("--rename-file requires --remote, --old-path, and --new-path.")
+        cache = load_cache()
+        result = rename_file_in_remote(
+            args.remote, args.old_path, args.new_path, cache, rclone_bin=RCLONE_BIN
+        )
+        print(json.dumps(result))
+        sys.exit(0 if result["ok"] else 1)
+
+    if not args.source and not args.target and not args.catalog:
+        ap.error("Provide at least one --source, --target, or --catalog.")
+
+    # Scan-only mode: walk remotes, write cache, summary, exit.
+    if args.scan:
+        scan_since = None
+        if args.scan_since:
+            scan_since = parse_duration(args.scan_since)
+            if not scan_since:
+                console.print(f"[red]invalid --scan-since value: {args.scan_since!r} "
+                              f"(expected e.g. 24h, 7d, 30m, 90s)[/]")
+                sys.exit(2)
+        cache = load_cache()
+        cache_meta: dict[str, dict] = {}
+        skipped: list[tuple[str, str]] = []
+        t0 = time.perf_counter()
+        if BASIC:
+            # `--scan` resolves its default target above. Report only the
+            # remotes that this scan will actually walk; falling back here to
+            # DEFAULT_SOURCE would resurrect retired source roots in job UI.
+            _all_remotes = list(args.source) + list(args.target)
+            sys.stderr.write("SCAN_START " + json.dumps({
+                "remotes": _all_remotes, "total": len(_all_remotes),
+            }) + "\n")
+            sys.stderr.flush()
+        entries = (cached_collect(args.source, "Source", skipped, cache,
+                                  use_cache=not args.no_cache, force_update=True,
+                                  cache_meta=cache_meta, scan_since=scan_since)
+                   + cached_collect(args.target, "Target", skipped, cache,
+                                    use_cache=not args.no_cache, force_update=True,
+                                    cache_meta=cache_meta, scan_since=scan_since))
+        if not args.no_cache:
+            save_cache(cache)
+        elapsed = time.perf_counter() - t0
+        if BASIC:
+            sys.stderr.write(f"Scan complete: {len(entries)} files in {elapsed:.2f}s\n")
+            sys.stderr.write(f"Cache: {CACHE_PATH}\n" if not args.no_cache
+                             else "Cache: (skipped, --no-cache)\n")
+        else:
+            console.print(f"[bold green]Scan complete:[/] {len(entries)} files in {elapsed:.2f}s")
+            if not args.no_cache:
+                console.print(f"[dim]Cache: {CACHE_PATH}[/]")
+            else:
+                console.print("[dim]Cache: (skipped, --no-cache)[/]")
+        sys.exit(0)
+
+    skipped: list[tuple[str, str]] = []
+    t0 = time.perf_counter()
+
+    if args.search or args.name:
+        search_timings: dict[str, int] = {}
+        # If --name was passed without explicit remotes, fall back to default target
+        # (catalog default already injected earlier; don't let it suppress remote defaulting).
+        if args.name and not args.search and not args.source and not args.target:
+            args.target = list(DEFAULT_TARGET)
+        # Substring name search can't be server-side filtered on most backends — cache wins.
+        # Only the ID search shape benefits from quick (server-side prefix glob).
+        if args.name and not args.quick:
+            mode, reason = "cached", "name substring search — cache is faster than rclone --include"
+        else:
+            combined = list(args.search) + list(args.name)
+            mode, reason = choose_search_mode(combined, args.quick, args.cache)
+        if BASIC:
+            sys.stderr.write(f"Mode: {mode}  ({reason})\n")
+        else:
+            mode_color = "green" if mode == "quick" else "cyan"
+            console.print(f"[{mode_color}]Mode: {mode}[/]  [dim]({reason})[/]")
+
+        phase_t0 = time.perf_counter()
+        cache = load_cache()
+        search_timings["cache_load_ms"] = round((time.perf_counter() - phase_t0) * 1000)
+        use_cache = not args.no_cache and mode == "cached"
+        cache_meta: dict[str, dict] = {}
+        phase_t0 = time.perf_counter()
+        if mode == "quick":
+            all_patterns: list[str] = []
+            for raw in args.search:
+                all_patterns.extend(query_to_include_patterns(raw))
+            all_patterns.extend(name_to_include_patterns(args.name))
+            entries = []
+            for r in args.source:
+                cache_meta[r] = {"cached": False, "age": "quick", "stale": False, "file_count": 0}
+                got = quick_search_remote(r, "Source", all_patterns, skipped)
+                entries.extend(got)
+                cache_meta[r]["file_count"] = len(got)
+            for r in args.target:
+                cache_meta[r] = {"cached": False, "age": "quick", "stale": False, "file_count": 0}
+                got = quick_search_remote(r, "Target", all_patterns, skipped)
+                entries.extend(got)
+                cache_meta[r]["file_count"] = len(got)
+        else:
+            entries = (cached_collect(args.source, "Source", skipped, cache,
+                                      use_cache, args.update, cache_meta)
+                       + cached_collect(args.target, "Target", skipped, cache,
+                                        use_cache, args.update, cache_meta))
+        search_timings["entry_collect_ms"] = round((time.perf_counter() - phase_t0) * 1000)
+        # Load each catalog separately so cache_meta gets the per-catalog count
+        # (was global total — every catalog reported the sum across all).
+        catalog_entries: list[FileEntry] = []
+        phase_t0 = time.perf_counter()
+        for cp_str in args.catalog:
+            for cp in _expand_catalog_paths([cp_str], default_paths=DEFAULT_CATALOG):
+                ext = cp.suffix.lower()
+                if ext == ".csv":
+                    one = load_catalog_csv(cp, skipped)
+                elif ext == ".xml":
+                    one = load_catalog_xml(cp, skipped)
+                else:
+                    console.print(f"[yellow]WARN: unknown catalog format '{ext}' for {cp}; skipping.[/]")
+                    continue
+                catalog_entries.extend(one)
+                cache_meta[f"catalog:{cp.name}"] = {
+                    "cached": False, "age": "loaded", "stale": False,
+                    "file_count": len(one),
+                }
+        entries.extend(catalog_entries)
+        search_timings["catalog_load_ms"] = round((time.perf_counter() - phase_t0) * 1000)
+        if use_cache and args.update:
+            save_cache(cache)
+    else:
+        if args.cache and not args.no_cache:
+            cache = load_cache()
+            cache_meta: dict[str, dict] = {}
+            entries = (cached_collect(args.source, "Source", skipped, cache,
+                                      use_cache=True, force_update=False,
+                                      cache_meta=cache_meta)
+                       + cached_collect(args.target, "Target", skipped, cache,
+                                        use_cache=True, force_update=False,
+                                        cache_meta=cache_meta))
+        else:
+            remotes_by_label = ([("Source", r) for r in args.source]
+                                + [("Target", r) for r in args.target])
+            entries = collect_with_progress(remotes_by_label, skipped)
+        entries.extend(load_catalogs(args.catalog, skipped, default_paths=DEFAULT_CATALOG))
+
+    elapsed = time.perf_counter() - t0
+    if BASIC:
+        sys.stderr.write(f"Scanned/loaded {len(entries)} file(s) in {elapsed:.2f}s\n")
+    else:
+        console.print(f"[dim]Scanned/loaded {len(entries)} file(s) in {elapsed:.2f}s[/]")
+
+    if args.search or args.name:
+        # query_expansions: original_raw -> list of normalized IDs / wildcard patterns to look up
+        query_expansions: dict[str, list[str]] = {}
+        queries: list[str] = []
+        for raw in args.search:
+            if RANGE_RE.search(raw):
+                expanded = expand_range(raw) or []
+                normed: list[str] = []
+                for r in expanded:
+                    n = normalize_id(r)
+                    if n:
+                        normed.append(n)
+                if not normed:
+                    console.print(f"[yellow]WARN: range '{raw}' produced no valid IDs.[/]")
+                    continue
+                queries.append(raw)
+                query_expansions[raw] = normed
+                continue
+            if "*" in raw or "?" in raw:
+                q = raw.upper()
+                queries.append(q)
+                query_expansions[q] = [q]
+                continue
+            norm = normalize_id(raw)
+            if not norm:
+                console.print(f"[yellow]WARN: cannot parse '{raw}' as a JAV ID, skipping.[/]")
+                continue
+            # Use the raw (upper-cased) form for display so leading zeros are preserved
+            # (e.g. user types PRTD-027 — keep it, don't show PRTD-27). Lookup still uses
+            # the normalized form internally.
+            display = raw.upper()
+            queries.append(display)
+            query_expansions[display] = [norm]
+        phase_t0 = time.perf_counter()
+        index: dict[str, list[FileEntry]] = {}
+        for e in entries:
+            index.setdefault(e.jav_id, []).append(e)
+        search_timings["index_ms"] = round((time.perf_counter() - phase_t0) * 1000)
+        phase_t0 = time.perf_counter()
+        matches: dict[str, list[FileEntry]] = {}
+        match_traces: dict[str, dict[int, dict[str, str]]] = {}
+        for q in queries:
+            expansions = query_expansions.get(q, [q])
+            hits: list[FileEntry] = []
+            seen: set[int] = set()
+            traces: dict[int, dict[str, str]] = {}
+
+            def add_hit(entry: FileEntry, matched_query: str) -> None:
+                key = id(entry)
+                if key in seen:
+                    return
+                seen.add(key)
+                hits.append(entry)
+                traces[key] = describe_id_match(q, matched_query, entry.jav_id, len(expansions))
+
+            for sub in expansions:
+                if "*" in sub or "?" in sub:
+                    pat = sub if "#PART" in sub.upper() else sub + "*"
+                    for k, v in index.items():
+                        if fnmatch.fnmatchcase(k, pat):
+                            for e in v:
+                                add_hit(e, sub)
+                elif "#part" in sub:
+                    for e in index.get(sub, []):
+                        add_hit(e, sub)
+                else:
+                    for e in index.get(sub, []):
+                        add_hit(e, sub)
+                    for k, v in index.items():
+                        if k.startswith(sub + "#part"):
+                            for e in v:
+                                add_hit(e, sub)
+            matches[q] = hits
+            match_traces[q] = traces
+        search_timings["match_ms"] = round((time.perf_counter() - phase_t0) * 1000)
+        if args.format == "json":
+            # Structured output for tools that consume search results (e.g. the rclonex
+            # Brave extension). Includes everything needed to drive a UI: per-query hits
+            # with source/remote/path/size/mod_time, plus name-match block + skipped.
+            name_hits_json: list[FileEntry] = []
+            if args.name:
+                for e in entries:
+                    if name_match(Path(e.path).stem, args.name):
+                        name_hits_json.append(e)
+            out_obj = {
+                "queries": [
+                    {
+                        "query": q,
+                        "hits": [
+                            {"source": e.source, "remote": e.remote, "path": e.path,
+                             "full_path": e.full_path, "size": e.size,
+                             "size_human": human_size(e.size),
+                             "mod_time": e.mod_time, "jav_id": e.jav_id,
+                             **match_traces.get(q, {}).get(id(e), {})}
+                            for e in sorted(matches.get(q, []), key=lambda x: (x.jav_id, x.path.lower()))
+                        ],
+                    }
+                    for q in queries
+                ],
+                "name_matches": [
+                    {"source": e.source, "remote": e.remote, "path": e.path,
+                     "full_path": e.full_path, "size": e.size,
+                     "size_human": human_size(e.size), "mod_time": e.mod_time,
+                     "jav_id": e.jav_id, "match_kind": "name",
+                     "match_reason": "Filename search", "match_confidence": "broad",
+                     "matched_query": ", ".join(args.name), "matched_id": e.jav_id}
+                    for e in sorted(name_hits_json, key=lambda x: (x.jav_id, x.path.lower()))
+                ],
+                "name_tokens": list(args.name),
+                "cache_meta": cache_meta,
+                "skipped_count": len(skipped),
+                "elapsed_sec": round(time.perf_counter() - t0, 3),
+                "timings": search_timings,
+            }
+            print(json.dumps(out_obj))
+            id_ok = (not queries) or all(matches.values())
+            name_ok = (not args.name) or bool(name_hits_json)
+            sys.exit(0 if (id_ok and name_ok) else 1)
+        if queries:
+            if BASIC:
+                print(render_search_plain(matches, queries, cache_meta))
+            else:
+                render_search(matches, queries, cache_meta)
+        # --name results as a separate block
+        name_hits: list[FileEntry] = []
+        if args.name:
+            for e in entries:
+                if name_match(Path(e.path).stem, args.name):
+                    name_hits.append(e)
+            if BASIC:
+                print(render_name_matches_plain(name_hits, args.name, cache_meta))
+            else:
+                render_name_matches(name_hits, args.name, cache_meta)
+        # Exit code: 0 if every search query had hits AND name-search (if used) returned hits.
+        id_ok = (not queries) or all(matches.values())
+        name_ok = (not args.name) or bool(name_hits)
+        sys.exit(0 if (id_ok and name_ok) else 1)
+
+    dupes = find_dupes(entries)
+    variant_alerts = find_variant_alerts(entries)
+    if args.format == "json" and BASIC:
+        print(json.dumps(dupes_to_obj(dupes, skipped, variant_alerts)))
+        sys.exit(0)
+    if BASIC:
+        print(render_dupes_plain(dupes, skipped, variant_alerts))
+    else:
+        render_dupes(dupes, skipped, variant_alerts)
+
+    if args.format != "console":
+        out_dir = Path(args.output_dir)
+        out_dir.mkdir(parents=True, exist_ok=True)
+        stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+        targets = {"txt", "csv", "json"} if args.format == "all" else {args.format}
+        if "txt" in targets:
+            write_txt(out_dir / f"dupes-{stamp}.txt", dupes, skipped)
+        if "csv" in targets:
+            write_csv(out_dir / f"dupes-{stamp}.csv", dupes)
+        if "json" in targets:
+            write_json(out_dir / f"dupes-{stamp}.json", dupes, skipped, variant_alerts)
+        console.print(f"[dim]Reports written to {out_dir}[/]")
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        console.print("\n[yellow]Aborted by user (Ctrl+C). Cache not written for in-flight scans.[/]")
+        sys.exit(130)