Step 10c: extract cache I/O into rcjav/cache.py
Pulls CACHE_PATH, CACHE_VERSION, CACHE_STALE_HOURS, load_cache, save_cache, cache_age_hours, and fmt_age out of rc-jav.py and into a new self-contained module. No behavior change. rc-jav.py: 2019 → 1972 lines. The new module's `CACHE_PATH = Path(__file__).resolve().parents[1] / "cache.json"` keeps the file at the repo root next to rc-jav.py (one directory above the package), matching the legacy `Path(__file__). resolve().parent / "cache.json"` location. rcjav/__init__.py now re-exports the cache public surface alongside the model and ids surface. Verified: - python rc-jav.py --help → ok - python fixtures/run.py → 17/17 cases pass - python -m unittest tests.test_rules → 5/5 OK Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -6,6 +6,15 @@ find at the top level. Adding a new submodule does not change the
|
||||
public surface — only this file does.
|
||||
"""
|
||||
from rcjav.model import FileEntry # noqa: F401
|
||||
from rcjav.cache import ( # noqa: F401
|
||||
CACHE_PATH,
|
||||
CACHE_VERSION,
|
||||
CACHE_STALE_HOURS,
|
||||
load_cache,
|
||||
save_cache,
|
||||
cache_age_hours,
|
||||
fmt_age,
|
||||
)
|
||||
from rcjav.ids import ( # noqa: F401
|
||||
PRIMARY_ID_RE,
|
||||
FALLBACK_ID_RE,
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
"""cache.json I/O.
|
||||
|
||||
This module owns the on-disk cache contract: where the file lives,
|
||||
what the header looks like, and how mismatches are handled. The
|
||||
current shape predates the two-tier `cache_schema` + `id_rules` split
|
||||
documented in docs/CACHE_CONTRACT.md (extension repo) — step 10j
|
||||
implements that contract; until then this is the legacy
|
||||
`version: 3` reader.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# Lives next to rc-jav.py at the repo root.
|
||||
CACHE_PATH = Path(__file__).resolve().parents[1] / "cache.json"
|
||||
CACHE_VERSION = 3 # bumped: extract_id handles bracket-wrapped IDs + no-hyphen fallback
|
||||
CACHE_STALE_HOURS = 24
|
||||
|
||||
|
||||
def load_cache() -> dict:
|
||||
if not CACHE_PATH.exists():
|
||||
return {"version": CACHE_VERSION, "remotes": {}}
|
||||
try:
|
||||
data = json.loads(CACHE_PATH.read_text(encoding="utf-8"))
|
||||
if (
|
||||
not isinstance(data, dict)
|
||||
or data.get("version") != CACHE_VERSION
|
||||
or not isinstance(data.get("remotes"), dict)
|
||||
):
|
||||
if isinstance(data, dict) and "version" in data and data["version"] != CACHE_VERSION:
|
||||
sys.stderr.write(
|
||||
f"[warn] cache version mismatch (got {data['version']}, "
|
||||
f"expected {CACHE_VERSION}); forcing full rescan.\n"
|
||||
)
|
||||
return {"version": CACHE_VERSION, "remotes": {}}
|
||||
return data
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return {"version": CACHE_VERSION, "remotes": {}}
|
||||
|
||||
|
||||
def save_cache(cache: dict) -> None:
|
||||
# Write to a sibling tmp file then atomically replace, so a killed mid-write
|
||||
# (Ctrl-C, power loss, concurrent --scan) can't leave a half-written
|
||||
# cache.json — load_cache would otherwise see invalid JSON and fall back to
|
||||
# an empty cache, forcing a full re-scan.
|
||||
tmp = CACHE_PATH.with_suffix(CACHE_PATH.suffix + ".tmp")
|
||||
tmp.write_text(json.dumps(cache, indent=2), encoding="utf-8")
|
||||
try:
|
||||
os.replace(tmp, CACHE_PATH)
|
||||
except PermissionError:
|
||||
# Windows: destination may be briefly locked by antivirus or a concurrent reader.
|
||||
time.sleep(0.5)
|
||||
os.replace(tmp, CACHE_PATH)
|
||||
|
||||
|
||||
def cache_age_hours(scanned_at: str) -> float | None:
|
||||
try:
|
||||
dt = datetime.fromisoformat(scanned_at.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
now = datetime.now(dt.tzinfo) if dt.tzinfo else datetime.now()
|
||||
return (now - dt).total_seconds() / 3600.0
|
||||
|
||||
|
||||
def fmt_age(hours: float) -> str:
|
||||
if hours < 1:
|
||||
return f"{int(hours * 60)}m"
|
||||
if hours < 24:
|
||||
return f"{hours:.1f}h"
|
||||
return f"{hours / 24:.1f}d"
|
||||
Reference in New Issue
Block a user