33c495ad57
Implements the two-tier contract from docs/CACHE_CONTRACT.md (extension
repo, locked at step 9):
cache_schema on-disk shape; mismatch -> force rebuild
id_rules bumps when extraction rules change
id_rules_signature sha256 over canonical rule text; catches drift
when the integer bump is forgotten
New constants in rcjav/cache.py:
CACHE_SCHEMA_VERSION = 1
ID_RULES_VERSION = 1 (the legacy "version: 3" cache reads as
id_rules: 0 after in-place migration)
New helpers:
rcjav.ids.current_rules_signature()
Sha256 over the canonical text of every rule that influences
a jav_id: built-in regexes, BUILTIN_PART_RES, PART_RES (which
captures user-added part patterns), FC2 handling.
rcjav.cache.load_cache(signature=None)
Reads cache.json. Legacy `version: 3` headers get an in-place
header upgrade with no forced rescan; the cache is stamped as
`id_rules: 0` + signature "legacy" so it surfaces as
"stale by rules" in cache_state. Schema mismatch on the new
header still forces a rebuild.
rcjav.cache.cache_state(cache, signature)
Classifies a cache as "fresh" / "stale_by_rules" /
"schema_mismatch". Drives the three-state extension UX.
rcjav.cache.stamp_current_rules(cache, signature)
Updates id_rules and id_rules_signature in place. Called after
a successful full scan or --reextract.
New CLI command:
rc-jav.py --reextract
Walks `cache["remotes"][r]["files"]` against the live rule set and
updates `jav_id` in place. No rclone calls — fast path (seconds on
a 7k-file cache). Reports changed/unchanged/dropped per remote.
Stamps current rules into the saved cache.
--scan (full, no --scan-since) now also stamps current rules.
--scan --scan-since deliberately does NOT stamp: it only re-walks
recently-modified files, so older entries may still carry jav_ids
from previous rules; cache stays "stale by rules" until a full scan
or --reextract.
Verified:
- python rc-jav.py --reextract --format json on the live 7124-file
cache → 0 changes (existing IDs already canonical), cache.json
rewritten with new header
- cache_state on the post-migration cache → "fresh"
- tests + fixtures + --help all pass
Extension-side (host's cache_status response + options-cache.js
three-state UX + Re-extract IDs button) ships in a separate commit
in the extension repo.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
112 lines
2.4 KiB
Python
112 lines
2.4 KiB
Python
"""rcjav — internal package split out of rc-jav.py.
|
|
|
|
This file re-exports the names that external callers (tests, fixtures
|
|
runner, native messaging host, in-tree code in rc-jav.py) expect to
|
|
find at the top level. Adding a new submodule does not change the
|
|
public surface — only this file does.
|
|
"""
|
|
from rcjav.model import FileEntry # noqa: F401
|
|
from rcjav.output import ( # noqa: F401
|
|
USE_ANSI,
|
|
ANSI_RESET,
|
|
ANSI_GREEN,
|
|
ANSI_RED,
|
|
ANSI_YELLOW,
|
|
ANSI_CYAN,
|
|
ANSI_DIM,
|
|
ANSI_BOLD,
|
|
set_use_ansi,
|
|
set_basic,
|
|
ansi,
|
|
console,
|
|
set_console_no_color,
|
|
strip_markup,
|
|
human_size,
|
|
BasicProgress,
|
|
make_progress,
|
|
render_banner,
|
|
render_search,
|
|
render_name_matches,
|
|
render_name_matches_plain,
|
|
render_dupes,
|
|
render_banner_plain,
|
|
render_search_plain,
|
|
render_dupes_plain,
|
|
write_txt,
|
|
write_csv,
|
|
describe_skipped_id,
|
|
dupes_to_obj,
|
|
write_json,
|
|
)
|
|
from rcjav.library import ( # noqa: F401
|
|
find_library_issues,
|
|
rename_file_in_remote,
|
|
rename_files_batch,
|
|
)
|
|
from rcjav.rclone_io import ( # noqa: F401
|
|
RCLONE_BIN,
|
|
CANCEL_FLAG,
|
|
CANCEL_CHECK_INTERVAL,
|
|
DURATION_RE,
|
|
set_basic,
|
|
set_rclone_bin,
|
|
quick_search_remote,
|
|
choose_search_mode,
|
|
name_to_include_patterns,
|
|
name_match,
|
|
query_to_include_patterns,
|
|
remote_file_count,
|
|
parse_duration,
|
|
walk_remote,
|
|
)
|
|
from rcjav.catalog import ( # noqa: F401
|
|
CATALOG_COL_NAME,
|
|
CATALOG_COL_PATH,
|
|
CATALOG_COL_SIZE,
|
|
CATALOG_COL_DISC,
|
|
normalize_catalog_path,
|
|
load_catalog_csv,
|
|
load_catalog_xml,
|
|
load_catalogs,
|
|
)
|
|
from rcjav.dupes import ( # noqa: F401
|
|
DEFAULT_KEEP_RANKING,
|
|
set_keep_ranking,
|
|
get_keep_ranking,
|
|
decide_keep_with_reason,
|
|
decide_keep,
|
|
find_dupes,
|
|
describe_dupe_risks,
|
|
find_variant_alerts,
|
|
)
|
|
from rcjav.cache import ( # noqa: F401
|
|
CACHE_PATH,
|
|
CACHE_VERSION,
|
|
CACHE_SCHEMA_VERSION,
|
|
ID_RULES_VERSION,
|
|
CACHE_STALE_HOURS,
|
|
load_cache,
|
|
save_cache,
|
|
cache_age_hours,
|
|
fmt_age,
|
|
cache_state,
|
|
stamp_current_rules,
|
|
)
|
|
from rcjav.ids import ( # noqa: F401
|
|
PRIMARY_ID_RE,
|
|
FALLBACK_ID_RE,
|
|
COMPOUND_ID_RE,
|
|
RANGE_RE,
|
|
BUILTIN_PART_RES,
|
|
PART_RES,
|
|
configure_part_patterns,
|
|
detect_part,
|
|
detect_part_from_stem,
|
|
part_key,
|
|
extract_id,
|
|
normalize_id,
|
|
describe_id_match,
|
|
expand_range,
|
|
current_rules_signature,
|
|
)
|