From 550482a7a2c3b8441c5ea12ed9ef43579b2648c8 Mon Sep 17 00:00:00 2001 From: admin Date: Fri, 22 May 2026 21:54:49 +0200 Subject: [PATCH] Step 10g: extract library issues + renaming into rcjav/library.py --- rc-jav.py | 156 ++-------------------------------------- rcjav/__init__.py | 5 ++ rcjav/library.py | 176 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+), 149 deletions(-) create mode 100644 rcjav/library.py diff --git a/rc-jav.py b/rc-jav.py index cf8d242..4b7c893 100644 --- a/rc-jav.py +++ b/rc-jav.py @@ -203,6 +203,13 @@ from rcjav.dupes import ( describe_dupe_risks, find_variant_alerts, ) +from rcjav.library import ( + find_library_issues, + rename_file_in_remote, + rename_files_batch, + _bracket_to_canonical, + _nohyphen_to_canonical, +) CONFIG_PATH = Path(__file__).resolve().parent / "config.json" @@ -558,155 +565,6 @@ def render_dupes(dupes: dict[str, list[FileEntry]], console.print(tbl) console.print() - - - -# ---------- library issues (non-canonical filenames) ---------- - -def _bracket_to_canonical(filename: str) -> str: - """[REAL-779].mp4 → REAL-779.mp4 | [HODV-21076] Saki [1080p].mkv → HODV-21076 Saki [1080p].mkv""" - stem = Path(filename).stem - suffix = Path(filename).suffix - bm = _BRACKET_ID_RE.match(stem) - if not bm: - return filename - inner = bm.group(1).strip() - rest = stem[bm.end():].strip() - new_stem = f"{inner} {rest}".strip() if rest else inner - return f"{new_stem}{suffix}" - - -def _nohyphen_to_canonical(filename: str) -> str: - """MVSD312 [576p].avi → MVSD-312 [576p].avi""" - stem = Path(filename).stem - suffix = Path(filename).suffix - m = _NOHYPHEN_ID_RE.match(stem) - if not m: - return filename - prefix = m.group(1).upper() - num_str = m.group(2) - rest = stem[m.end():] - return f"{prefix}-{num_str}{rest}{suffix}" - - -def find_library_issues(cache: dict) -> dict: - """Scan cache for files with non-canonical names. - - Returns: - {"bracket_names": [...], "nohyphen_names": [...]} - Each entry: {remote, path, size, mod_time, jav_id, canonical_name, issue} - """ - bracket: list[dict] = [] - nohyphen: list[dict] = [] - for remote, remote_data in cache.get("remotes", {}).items(): - for f in remote_data.get("files", []): - fname = Path(f["path"]).name - stem = Path(fname).stem - if stem.startswith("[") and _BRACKET_ID_RE.match(stem): - bracket.append({ - "remote": remote, - "path": f["path"], - "size": f.get("size", 0), - "size_human": human_size(f.get("size", 0)), - "mod_time": f.get("mod_time", ""), - "jav_id": f.get("jav_id", ""), - "canonical_name": _bracket_to_canonical(fname), - "issue": "bracket_id", - }) - elif (not PRIMARY_ID_RE.match(stem) - and not COMPOUND_ID_RE.match(stem) - and not FALLBACK_ID_RE.match(stem) - and _NOHYPHEN_ID_RE.match(stem)): - nohyphen.append({ - "remote": remote, - "path": f["path"], - "size": f.get("size", 0), - "size_human": human_size(f.get("size", 0)), - "mod_time": f.get("mod_time", ""), - "jav_id": f.get("jav_id", ""), - "canonical_name": _nohyphen_to_canonical(fname), - "issue": "nohyphen_id", - }) - return {"bracket_names": bracket, "nohyphen_names": nohyphen} - - -def rename_file_in_remote( - remote: str, - old_rel_path: str, - new_rel_path: str, - cache: dict, - rclone_bin: str = "rclone", - save: bool = True, -) -> dict: - """Rename one file via rclone moveto and patch cache.json. - - Returns {"ok": True, "old_path": ..., "new_path": ...} - or {"ok": False, "error": ..., "conflict": bool} - - Pass save=False when batching — caller is responsible for calling save_cache() once. - """ - sep = "" if remote.endswith("/") else "/" - old_full = f"{remote}{sep}{old_rel_path}" - new_full = f"{remote}{sep}{new_rel_path}" - - # Collision check — does target already exist? - check = subprocess.run( - [rclone_bin, "lsf", new_full], - capture_output=True, text=True, - ) - if check.returncode == 0 and check.stdout.strip(): - return {"ok": False, "error": f"Target already exists: {new_full}", "conflict": True} - - # Perform rename - result = subprocess.run( - [rclone_bin, "moveto", old_full, new_full], - capture_output=True, text=True, - ) - if result.returncode != 0: - return {"ok": False, "error": (result.stderr or result.stdout).strip(), "conflict": False} - - # Patch cache — update path + jav_id for the renamed entry - remote_data = cache.get("remotes", {}).get(remote) - if remote_data: - for f in remote_data.get("files", []): - if f["path"] == old_rel_path: - f["path"] = new_rel_path - f["jav_id"] = extract_id(Path(new_rel_path).name) or f["jav_id"] - break - remote_data["skipped"] = [s for s in remote_data.get("skipped", []) if s != old_rel_path] - if save: - save_cache(cache) - - return {"ok": True, "old_path": old_full, "new_path": new_full} - - -def rename_files_batch( - renames: list[dict], - cache: dict, - rclone_bin: str = "rclone", -) -> list[dict]: - """Rename multiple files, writing cache once at the end. - - Each item in renames: {remote, old_path, new_path} - Returns list of per-file results with old_path/new_path echoed back. - """ - results = [] - cache_dirty = False - for r in renames: - res = rename_file_in_remote( - r["remote"], r["old_path"], r["new_path"], - cache, rclone_bin=rclone_bin, save=False, - ) - res["old_path"] = r["old_path"] - res["new_path"] = r["new_path"] - results.append(res) - if res["ok"]: - cache_dirty = True - if cache_dirty: - save_cache(cache) - return results - - # ---------- plain renderers (--basic) ---------- def render_banner_plain(cache_meta: dict[str, dict], mode: str) -> str: diff --git a/rcjav/__init__.py b/rcjav/__init__.py index 344e496..8a58c10 100644 --- a/rcjav/__init__.py +++ b/rcjav/__init__.py @@ -6,6 +6,11 @@ find at the top level. Adding a new submodule does not change the public surface — only this file does. """ from rcjav.model import FileEntry # noqa: F401 +from rcjav.library import ( # noqa: F401 + find_library_issues, + rename_file_in_remote, + rename_files_batch, +) from rcjav.rclone_io import ( # noqa: F401 RCLONE_BIN, CANCEL_FLAG, diff --git a/rcjav/library.py b/rcjav/library.py new file mode 100644 index 0000000..aac534c --- /dev/null +++ b/rcjav/library.py @@ -0,0 +1,176 @@ +"""Library-issue detection (non-canonical filenames) + safe renaming. + +Scans the cache (not the live remote) for files whose names violate +the canonical `{ID}[ - actress][ [resolution]].ext` shape: + +- Bracket-wrapped IDs: `[REAL-779].mp4` -> `REAL-779.mp4` +- No-hyphen IDs: `MVSD312 [576p].avi` -> `MVSD-312 [576p].avi` + +`rename_file_in_remote` performs the rclone moveto and patches the +cache in place. `rename_files_batch` writes the cache once after a +batch of renames. +""" +from __future__ import annotations + +import subprocess +from pathlib import Path + +from rcjav.cache import save_cache +from rcjav.ids import ( + _BRACKET_ID_RE, + _NOHYPHEN_ID_RE, + COMPOUND_ID_RE, + FALLBACK_ID_RE, + PRIMARY_ID_RE, + extract_id, +) + + +def _human_size(n: int) -> str: + nf = float(max(0, n)) + for unit in ("B", "KiB", "MiB", "GiB", "TiB"): + if nf < 1024: + return f"{int(nf)} B" if unit == "B" else f"{nf:.2f} {unit}" + nf /= 1024 + return f"{nf:.2f} PiB" + + +def _bracket_to_canonical(filename: str) -> str: + """[REAL-779].mp4 -> REAL-779.mp4 | [HODV-21076] Saki [1080p].mkv -> HODV-21076 Saki [1080p].mkv""" + stem = Path(filename).stem + suffix = Path(filename).suffix + bm = _BRACKET_ID_RE.match(stem) + if not bm: + return filename + inner = bm.group(1).strip() + rest = stem[bm.end():].strip() + new_stem = f"{inner} {rest}".strip() if rest else inner + return f"{new_stem}{suffix}" + + +def _nohyphen_to_canonical(filename: str) -> str: + """MVSD312 [576p].avi -> MVSD-312 [576p].avi""" + stem = Path(filename).stem + suffix = Path(filename).suffix + m = _NOHYPHEN_ID_RE.match(stem) + if not m: + return filename + prefix = m.group(1).upper() + num_str = m.group(2) + rest = stem[m.end():] + return f"{prefix}-{num_str}{rest}{suffix}" + + +def find_library_issues(cache: dict) -> dict: + """Scan cache for files with non-canonical names. + + Returns: + {"bracket_names": [...], "nohyphen_names": [...]} + Each entry: {remote, path, size, mod_time, jav_id, canonical_name, issue} + """ + bracket: list[dict] = [] + nohyphen: list[dict] = [] + for remote, remote_data in cache.get("remotes", {}).items(): + for f in remote_data.get("files", []): + fname = Path(f["path"]).name + stem = Path(fname).stem + if stem.startswith("[") and _BRACKET_ID_RE.match(stem): + bracket.append({ + "remote": remote, + "path": f["path"], + "size": f.get("size", 0), + "size_human": _human_size(f.get("size", 0)), + "mod_time": f.get("mod_time", ""), + "jav_id": f.get("jav_id", ""), + "canonical_name": _bracket_to_canonical(fname), + "issue": "bracket_id", + }) + elif (not PRIMARY_ID_RE.match(stem) + and not COMPOUND_ID_RE.match(stem) + and not FALLBACK_ID_RE.match(stem) + and _NOHYPHEN_ID_RE.match(stem)): + nohyphen.append({ + "remote": remote, + "path": f["path"], + "size": f.get("size", 0), + "size_human": _human_size(f.get("size", 0)), + "mod_time": f.get("mod_time", ""), + "jav_id": f.get("jav_id", ""), + "canonical_name": _nohyphen_to_canonical(fname), + "issue": "nohyphen_id", + }) + return {"bracket_names": bracket, "nohyphen_names": nohyphen} + + +def rename_file_in_remote( + remote: str, + old_rel_path: str, + new_rel_path: str, + cache: dict, + rclone_bin: str = "rclone", + save: bool = True, +) -> dict: + """Rename one file via rclone moveto and patch cache.json. + + Returns {"ok": True, "old_path": ..., "new_path": ...} + or {"ok": False, "error": ..., "conflict": bool} + + Pass save=False when batching — caller is responsible for calling save_cache() once. + """ + sep = "" if remote.endswith("/") else "/" + old_full = f"{remote}{sep}{old_rel_path}" + new_full = f"{remote}{sep}{new_rel_path}" + + check = subprocess.run( + [rclone_bin, "lsf", new_full], + capture_output=True, text=True, + ) + if check.returncode == 0 and check.stdout.strip(): + return {"ok": False, "error": f"Target already exists: {new_full}", "conflict": True} + + result = subprocess.run( + [rclone_bin, "moveto", old_full, new_full], + capture_output=True, text=True, + ) + if result.returncode != 0: + return {"ok": False, "error": (result.stderr or result.stdout).strip(), "conflict": False} + + remote_data = cache.get("remotes", {}).get(remote) + if remote_data: + for f in remote_data.get("files", []): + if f["path"] == old_rel_path: + f["path"] = new_rel_path + f["jav_id"] = extract_id(Path(new_rel_path).name) or f["jav_id"] + break + remote_data["skipped"] = [s for s in remote_data.get("skipped", []) if s != old_rel_path] + if save: + save_cache(cache) + + return {"ok": True, "old_path": old_full, "new_path": new_full} + + +def rename_files_batch( + renames: list[dict], + cache: dict, + rclone_bin: str = "rclone", +) -> list[dict]: + """Rename multiple files, writing cache once at the end. + + Each item in renames: {remote, old_path, new_path} + Returns list of per-file results with old_path/new_path echoed back. + """ + results = [] + cache_dirty = False + for r in renames: + res = rename_file_in_remote( + r["remote"], r["old_path"], r["new_path"], + cache, rclone_bin=rclone_bin, save=False, + ) + res["old_path"] = r["old_path"] + res["new_path"] = r["new_path"] + results.append(res) + if res["ok"]: + cache_dirty = True + if cache_dirty: + save_cache(cache) + return results