From f03d032336063e1e68d04505ecbb219706bdcfee Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 22 May 2026 21:46:20 +0200 Subject: [PATCH] Step 10c: extract cache I/O into rcjav/cache.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pulls CACHE_PATH, CACHE_VERSION, CACHE_STALE_HOURS, load_cache, save_cache, cache_age_hours, and fmt_age out of rc-jav.py and into a new self-contained module. No behavior change. rc-jav.py: 2019 → 1972 lines. The new module's `CACHE_PATH = Path(__file__).resolve().parents[1] / "cache.json"` keeps the file at the repo root next to rc-jav.py (one directory above the package), matching the legacy `Path(__file__). resolve().parent / "cache.json"` location. rcjav/__init__.py now re-exports the cache public surface alongside the model and ids surface. Verified: - python rc-jav.py --help → ok - python fixtures/run.py → 17/17 cases pass - python -m unittest tests.test_rules → 5/5 OK Co-Authored-By: Claude Opus 4.7 --- rc-jav.py | 65 ++++++---------------------------------- rcjav/__init__.py | 9 ++++++ rcjav/cache.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 56 deletions(-) create mode 100644 rcjav/cache.py diff --git a/rc-jav.py b/rc-jav.py index bbc49a5..0c84afe 100644 --- a/rc-jav.py +++ b/rc-jav.py @@ -161,9 +161,15 @@ CATALOG_COL_PATH = ("path", "full path", "location", "folder") CATALOG_COL_SIZE = ("size", "file size", "bytes", "size (bytes)") CATALOG_COL_DISC = ("disc", "disc name", "disc label", "volume", "source", "catalog", "media") -CACHE_PATH = Path(__file__).resolve().parent / "cache.json" -CACHE_VERSION = 3 # bumped: extract_id handles bracket-wrapped IDs + no-hyphen fallback -CACHE_STALE_HOURS = 24 +from rcjav.cache import ( + CACHE_PATH, + CACHE_VERSION, + CACHE_STALE_HOURS, + load_cache, + save_cache, + cache_age_hours, + fmt_age, +) DEFAULT_KEEP_RANKING: dict = { "priority_folders": ["ClearJAV"], @@ -202,59 +208,6 @@ def save_config(cfg: dict) -> None: os.replace(tmp, CONFIG_PATH) -def load_cache() -> dict: - if not CACHE_PATH.exists(): - return {"version": CACHE_VERSION, "remotes": {}} - try: - data = json.loads(CACHE_PATH.read_text(encoding="utf-8")) - if ( - not isinstance(data, dict) - or data.get("version") != CACHE_VERSION - or not isinstance(data.get("remotes"), dict) - ): - if isinstance(data, dict) and "version" in data and data["version"] != CACHE_VERSION: - sys.stderr.write( - f"[warn] cache version mismatch (got {data['version']}, " - f"expected {CACHE_VERSION}); forcing full rescan.\n" - ) - return {"version": CACHE_VERSION, "remotes": {}} - return data - except (json.JSONDecodeError, OSError): - return {"version": CACHE_VERSION, "remotes": {}} - - -def save_cache(cache: dict) -> None: - # Write to a sibling tmp file then atomically replace, so a killed mid-write - # (Ctrl-C, power loss, concurrent --scan) can't leave a half-written - # cache.json — load_cache would otherwise see invalid JSON and fall back to - # an empty cache, forcing a full re-scan. - tmp = CACHE_PATH.with_suffix(CACHE_PATH.suffix + ".tmp") - tmp.write_text(json.dumps(cache, indent=2), encoding="utf-8") - try: - os.replace(tmp, CACHE_PATH) - except PermissionError: - # Windows: destination may be briefly locked by antivirus or a concurrent reader. - time.sleep(0.5) - os.replace(tmp, CACHE_PATH) - - -def cache_age_hours(scanned_at: str) -> float | None: - try: - dt = datetime.fromisoformat(scanned_at.replace("Z", "+00:00")) - except ValueError: - return None - now = datetime.now(dt.tzinfo) if dt.tzinfo else datetime.now() - return (now - dt).total_seconds() / 3600.0 - - -def fmt_age(hours: float) -> str: - if hours < 1: - return f"{int(hours * 60)}m" - if hours < 24: - return f"{hours:.1f}h" - return f"{hours / 24:.1f}d" - - # ---------- WinCatalog ingest ---------- def _pick_col(headers_lower: list[str], synonyms: tuple[str, ...]) -> str | None: diff --git a/rcjav/__init__.py b/rcjav/__init__.py index b146b17..7e66a4f 100644 --- a/rcjav/__init__.py +++ b/rcjav/__init__.py @@ -6,6 +6,15 @@ find at the top level. Adding a new submodule does not change the public surface — only this file does. """ from rcjav.model import FileEntry # noqa: F401 +from rcjav.cache import ( # noqa: F401 + CACHE_PATH, + CACHE_VERSION, + CACHE_STALE_HOURS, + load_cache, + save_cache, + cache_age_hours, + fmt_age, +) from rcjav.ids import ( # noqa: F401 PRIMARY_ID_RE, FALLBACK_ID_RE, diff --git a/rcjav/cache.py b/rcjav/cache.py new file mode 100644 index 0000000..da0c87b --- /dev/null +++ b/rcjav/cache.py @@ -0,0 +1,76 @@ +"""cache.json I/O. + +This module owns the on-disk cache contract: where the file lives, +what the header looks like, and how mismatches are handled. The +current shape predates the two-tier `cache_schema` + `id_rules` split +documented in docs/CACHE_CONTRACT.md (extension repo) — step 10j +implements that contract; until then this is the legacy +`version: 3` reader. +""" +from __future__ import annotations + +import json +import os +import sys +import time +from datetime import datetime +from pathlib import Path + + +# Lives next to rc-jav.py at the repo root. +CACHE_PATH = Path(__file__).resolve().parents[1] / "cache.json" +CACHE_VERSION = 3 # bumped: extract_id handles bracket-wrapped IDs + no-hyphen fallback +CACHE_STALE_HOURS = 24 + + +def load_cache() -> dict: + if not CACHE_PATH.exists(): + return {"version": CACHE_VERSION, "remotes": {}} + try: + data = json.loads(CACHE_PATH.read_text(encoding="utf-8")) + if ( + not isinstance(data, dict) + or data.get("version") != CACHE_VERSION + or not isinstance(data.get("remotes"), dict) + ): + if isinstance(data, dict) and "version" in data and data["version"] != CACHE_VERSION: + sys.stderr.write( + f"[warn] cache version mismatch (got {data['version']}, " + f"expected {CACHE_VERSION}); forcing full rescan.\n" + ) + return {"version": CACHE_VERSION, "remotes": {}} + return data + except (json.JSONDecodeError, OSError): + return {"version": CACHE_VERSION, "remotes": {}} + + +def save_cache(cache: dict) -> None: + # Write to a sibling tmp file then atomically replace, so a killed mid-write + # (Ctrl-C, power loss, concurrent --scan) can't leave a half-written + # cache.json — load_cache would otherwise see invalid JSON and fall back to + # an empty cache, forcing a full re-scan. + tmp = CACHE_PATH.with_suffix(CACHE_PATH.suffix + ".tmp") + tmp.write_text(json.dumps(cache, indent=2), encoding="utf-8") + try: + os.replace(tmp, CACHE_PATH) + except PermissionError: + # Windows: destination may be briefly locked by antivirus or a concurrent reader. + time.sleep(0.5) + os.replace(tmp, CACHE_PATH) + + +def cache_age_hours(scanned_at: str) -> float | None: + try: + dt = datetime.fromisoformat(scanned_at.replace("Z", "+00:00")) + except ValueError: + return None + now = datetime.now(dt.tzinfo) if dt.tzinfo else datetime.now() + return (now - dt).total_seconds() / 3600.0 + + +def fmt_age(hours: float) -> str: + if hours < 1: + return f"{int(hours * 60)}m" + if hours < 24: + return f"{hours:.1f}h" + return f"{hours / 24:.1f}d"