Step 10c: extract cache I/O into rcjav/cache.py

Pulls CACHE_PATH, CACHE_VERSION, CACHE_STALE_HOURS, load_cache,
save_cache, cache_age_hours, and fmt_age out of rc-jav.py and into a
new self-contained module. No behavior change.

rc-jav.py: 2019 → 1972 lines.

The new module's `CACHE_PATH = Path(__file__).resolve().parents[1] /
"cache.json"` keeps the file at the repo root next to rc-jav.py (one
directory above the package), matching the legacy `Path(__file__).
resolve().parent / "cache.json"` location.

rcjav/__init__.py now re-exports the cache public surface alongside
the model and ids surface.

Verified:
  - python rc-jav.py --help              → ok
  - python fixtures/run.py               → 17/17 cases pass
  - python -m unittest tests.test_rules  → 5/5 OK

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
admin
2026-05-22 21:46:20 +02:00
parent ba57b7fd21
commit f03d032336
3 changed files with 94 additions and 56 deletions
+9 -56
View File
@@ -161,9 +161,15 @@ CATALOG_COL_PATH = ("path", "full path", "location", "folder")
CATALOG_COL_SIZE = ("size", "file size", "bytes", "size (bytes)")
CATALOG_COL_DISC = ("disc", "disc name", "disc label", "volume", "source", "catalog", "media")
CACHE_PATH = Path(__file__).resolve().parent / "cache.json"
CACHE_VERSION = 3 # bumped: extract_id handles bracket-wrapped IDs + no-hyphen fallback
CACHE_STALE_HOURS = 24
from rcjav.cache import (
CACHE_PATH,
CACHE_VERSION,
CACHE_STALE_HOURS,
load_cache,
save_cache,
cache_age_hours,
fmt_age,
)
DEFAULT_KEEP_RANKING: dict = {
"priority_folders": ["ClearJAV"],
@@ -202,59 +208,6 @@ def save_config(cfg: dict) -> None:
os.replace(tmp, CONFIG_PATH)
def load_cache() -> dict:
if not CACHE_PATH.exists():
return {"version": CACHE_VERSION, "remotes": {}}
try:
data = json.loads(CACHE_PATH.read_text(encoding="utf-8"))
if (
not isinstance(data, dict)
or data.get("version") != CACHE_VERSION
or not isinstance(data.get("remotes"), dict)
):
if isinstance(data, dict) and "version" in data and data["version"] != CACHE_VERSION:
sys.stderr.write(
f"[warn] cache version mismatch (got {data['version']}, "
f"expected {CACHE_VERSION}); forcing full rescan.\n"
)
return {"version": CACHE_VERSION, "remotes": {}}
return data
except (json.JSONDecodeError, OSError):
return {"version": CACHE_VERSION, "remotes": {}}
def save_cache(cache: dict) -> None:
# Write to a sibling tmp file then atomically replace, so a killed mid-write
# (Ctrl-C, power loss, concurrent --scan) can't leave a half-written
# cache.json — load_cache would otherwise see invalid JSON and fall back to
# an empty cache, forcing a full re-scan.
tmp = CACHE_PATH.with_suffix(CACHE_PATH.suffix + ".tmp")
tmp.write_text(json.dumps(cache, indent=2), encoding="utf-8")
try:
os.replace(tmp, CACHE_PATH)
except PermissionError:
# Windows: destination may be briefly locked by antivirus or a concurrent reader.
time.sleep(0.5)
os.replace(tmp, CACHE_PATH)
def cache_age_hours(scanned_at: str) -> float | None:
try:
dt = datetime.fromisoformat(scanned_at.replace("Z", "+00:00"))
except ValueError:
return None
now = datetime.now(dt.tzinfo) if dt.tzinfo else datetime.now()
return (now - dt).total_seconds() / 3600.0
def fmt_age(hours: float) -> str:
if hours < 1:
return f"{int(hours * 60)}m"
if hours < 24:
return f"{hours:.1f}h"
return f"{hours / 24:.1f}d"
# ---------- WinCatalog ingest ----------
def _pick_col(headers_lower: list[str], synonyms: tuple[str, ...]) -> str | None: