Step 10c: extract cache I/O into rcjav/cache.py
Pulls CACHE_PATH, CACHE_VERSION, CACHE_STALE_HOURS, load_cache, save_cache, cache_age_hours, and fmt_age out of rc-jav.py and into a new self-contained module. No behavior change. rc-jav.py: 2019 → 1972 lines. The new module's `CACHE_PATH = Path(__file__).resolve().parents[1] / "cache.json"` keeps the file at the repo root next to rc-jav.py (one directory above the package), matching the legacy `Path(__file__). resolve().parent / "cache.json"` location. rcjav/__init__.py now re-exports the cache public surface alongside the model and ids surface. Verified: - python rc-jav.py --help → ok - python fixtures/run.py → 17/17 cases pass - python -m unittest tests.test_rules → 5/5 OK Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -161,9 +161,15 @@ CATALOG_COL_PATH = ("path", "full path", "location", "folder")
|
|||||||
CATALOG_COL_SIZE = ("size", "file size", "bytes", "size (bytes)")
|
CATALOG_COL_SIZE = ("size", "file size", "bytes", "size (bytes)")
|
||||||
CATALOG_COL_DISC = ("disc", "disc name", "disc label", "volume", "source", "catalog", "media")
|
CATALOG_COL_DISC = ("disc", "disc name", "disc label", "volume", "source", "catalog", "media")
|
||||||
|
|
||||||
CACHE_PATH = Path(__file__).resolve().parent / "cache.json"
|
from rcjav.cache import (
|
||||||
CACHE_VERSION = 3 # bumped: extract_id handles bracket-wrapped IDs + no-hyphen fallback
|
CACHE_PATH,
|
||||||
CACHE_STALE_HOURS = 24
|
CACHE_VERSION,
|
||||||
|
CACHE_STALE_HOURS,
|
||||||
|
load_cache,
|
||||||
|
save_cache,
|
||||||
|
cache_age_hours,
|
||||||
|
fmt_age,
|
||||||
|
)
|
||||||
|
|
||||||
DEFAULT_KEEP_RANKING: dict = {
|
DEFAULT_KEEP_RANKING: dict = {
|
||||||
"priority_folders": ["ClearJAV"],
|
"priority_folders": ["ClearJAV"],
|
||||||
@@ -202,59 +208,6 @@ def save_config(cfg: dict) -> None:
|
|||||||
os.replace(tmp, CONFIG_PATH)
|
os.replace(tmp, CONFIG_PATH)
|
||||||
|
|
||||||
|
|
||||||
def load_cache() -> dict:
|
|
||||||
if not CACHE_PATH.exists():
|
|
||||||
return {"version": CACHE_VERSION, "remotes": {}}
|
|
||||||
try:
|
|
||||||
data = json.loads(CACHE_PATH.read_text(encoding="utf-8"))
|
|
||||||
if (
|
|
||||||
not isinstance(data, dict)
|
|
||||||
or data.get("version") != CACHE_VERSION
|
|
||||||
or not isinstance(data.get("remotes"), dict)
|
|
||||||
):
|
|
||||||
if isinstance(data, dict) and "version" in data and data["version"] != CACHE_VERSION:
|
|
||||||
sys.stderr.write(
|
|
||||||
f"[warn] cache version mismatch (got {data['version']}, "
|
|
||||||
f"expected {CACHE_VERSION}); forcing full rescan.\n"
|
|
||||||
)
|
|
||||||
return {"version": CACHE_VERSION, "remotes": {}}
|
|
||||||
return data
|
|
||||||
except (json.JSONDecodeError, OSError):
|
|
||||||
return {"version": CACHE_VERSION, "remotes": {}}
|
|
||||||
|
|
||||||
|
|
||||||
def save_cache(cache: dict) -> None:
|
|
||||||
# Write to a sibling tmp file then atomically replace, so a killed mid-write
|
|
||||||
# (Ctrl-C, power loss, concurrent --scan) can't leave a half-written
|
|
||||||
# cache.json — load_cache would otherwise see invalid JSON and fall back to
|
|
||||||
# an empty cache, forcing a full re-scan.
|
|
||||||
tmp = CACHE_PATH.with_suffix(CACHE_PATH.suffix + ".tmp")
|
|
||||||
tmp.write_text(json.dumps(cache, indent=2), encoding="utf-8")
|
|
||||||
try:
|
|
||||||
os.replace(tmp, CACHE_PATH)
|
|
||||||
except PermissionError:
|
|
||||||
# Windows: destination may be briefly locked by antivirus or a concurrent reader.
|
|
||||||
time.sleep(0.5)
|
|
||||||
os.replace(tmp, CACHE_PATH)
|
|
||||||
|
|
||||||
|
|
||||||
def cache_age_hours(scanned_at: str) -> float | None:
|
|
||||||
try:
|
|
||||||
dt = datetime.fromisoformat(scanned_at.replace("Z", "+00:00"))
|
|
||||||
except ValueError:
|
|
||||||
return None
|
|
||||||
now = datetime.now(dt.tzinfo) if dt.tzinfo else datetime.now()
|
|
||||||
return (now - dt).total_seconds() / 3600.0
|
|
||||||
|
|
||||||
|
|
||||||
def fmt_age(hours: float) -> str:
|
|
||||||
if hours < 1:
|
|
||||||
return f"{int(hours * 60)}m"
|
|
||||||
if hours < 24:
|
|
||||||
return f"{hours:.1f}h"
|
|
||||||
return f"{hours / 24:.1f}d"
|
|
||||||
|
|
||||||
|
|
||||||
# ---------- WinCatalog ingest ----------
|
# ---------- WinCatalog ingest ----------
|
||||||
|
|
||||||
def _pick_col(headers_lower: list[str], synonyms: tuple[str, ...]) -> str | None:
|
def _pick_col(headers_lower: list[str], synonyms: tuple[str, ...]) -> str | None:
|
||||||
|
|||||||
@@ -6,6 +6,15 @@ find at the top level. Adding a new submodule does not change the
|
|||||||
public surface — only this file does.
|
public surface — only this file does.
|
||||||
"""
|
"""
|
||||||
from rcjav.model import FileEntry # noqa: F401
|
from rcjav.model import FileEntry # noqa: F401
|
||||||
|
from rcjav.cache import ( # noqa: F401
|
||||||
|
CACHE_PATH,
|
||||||
|
CACHE_VERSION,
|
||||||
|
CACHE_STALE_HOURS,
|
||||||
|
load_cache,
|
||||||
|
save_cache,
|
||||||
|
cache_age_hours,
|
||||||
|
fmt_age,
|
||||||
|
)
|
||||||
from rcjav.ids import ( # noqa: F401
|
from rcjav.ids import ( # noqa: F401
|
||||||
PRIMARY_ID_RE,
|
PRIMARY_ID_RE,
|
||||||
FALLBACK_ID_RE,
|
FALLBACK_ID_RE,
|
||||||
|
|||||||
@@ -0,0 +1,76 @@
|
|||||||
|
"""cache.json I/O.
|
||||||
|
|
||||||
|
This module owns the on-disk cache contract: where the file lives,
|
||||||
|
what the header looks like, and how mismatches are handled. The
|
||||||
|
current shape predates the two-tier `cache_schema` + `id_rules` split
|
||||||
|
documented in docs/CACHE_CONTRACT.md (extension repo) — step 10j
|
||||||
|
implements that contract; until then this is the legacy
|
||||||
|
`version: 3` reader.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
# Lives next to rc-jav.py at the repo root.
|
||||||
|
CACHE_PATH = Path(__file__).resolve().parents[1] / "cache.json"
|
||||||
|
CACHE_VERSION = 3 # bumped: extract_id handles bracket-wrapped IDs + no-hyphen fallback
|
||||||
|
CACHE_STALE_HOURS = 24
|
||||||
|
|
||||||
|
|
||||||
|
def load_cache() -> dict:
|
||||||
|
if not CACHE_PATH.exists():
|
||||||
|
return {"version": CACHE_VERSION, "remotes": {}}
|
||||||
|
try:
|
||||||
|
data = json.loads(CACHE_PATH.read_text(encoding="utf-8"))
|
||||||
|
if (
|
||||||
|
not isinstance(data, dict)
|
||||||
|
or data.get("version") != CACHE_VERSION
|
||||||
|
or not isinstance(data.get("remotes"), dict)
|
||||||
|
):
|
||||||
|
if isinstance(data, dict) and "version" in data and data["version"] != CACHE_VERSION:
|
||||||
|
sys.stderr.write(
|
||||||
|
f"[warn] cache version mismatch (got {data['version']}, "
|
||||||
|
f"expected {CACHE_VERSION}); forcing full rescan.\n"
|
||||||
|
)
|
||||||
|
return {"version": CACHE_VERSION, "remotes": {}}
|
||||||
|
return data
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
return {"version": CACHE_VERSION, "remotes": {}}
|
||||||
|
|
||||||
|
|
||||||
|
def save_cache(cache: dict) -> None:
|
||||||
|
# Write to a sibling tmp file then atomically replace, so a killed mid-write
|
||||||
|
# (Ctrl-C, power loss, concurrent --scan) can't leave a half-written
|
||||||
|
# cache.json — load_cache would otherwise see invalid JSON and fall back to
|
||||||
|
# an empty cache, forcing a full re-scan.
|
||||||
|
tmp = CACHE_PATH.with_suffix(CACHE_PATH.suffix + ".tmp")
|
||||||
|
tmp.write_text(json.dumps(cache, indent=2), encoding="utf-8")
|
||||||
|
try:
|
||||||
|
os.replace(tmp, CACHE_PATH)
|
||||||
|
except PermissionError:
|
||||||
|
# Windows: destination may be briefly locked by antivirus or a concurrent reader.
|
||||||
|
time.sleep(0.5)
|
||||||
|
os.replace(tmp, CACHE_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
def cache_age_hours(scanned_at: str) -> float | None:
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(scanned_at.replace("Z", "+00:00"))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
now = datetime.now(dt.tzinfo) if dt.tzinfo else datetime.now()
|
||||||
|
return (now - dt).total_seconds() / 3600.0
|
||||||
|
|
||||||
|
|
||||||
|
def fmt_age(hours: float) -> str:
|
||||||
|
if hours < 1:
|
||||||
|
return f"{int(hours * 60)}m"
|
||||||
|
if hours < 24:
|
||||||
|
return f"{hours:.1f}h"
|
||||||
|
return f"{hours / 24:.1f}d"
|
||||||
Reference in New Issue
Block a user