Step 10g: extract library issues + renaming into rcjav/library.py
This commit is contained in:
@@ -203,6 +203,13 @@ from rcjav.dupes import (
|
|||||||
describe_dupe_risks,
|
describe_dupe_risks,
|
||||||
find_variant_alerts,
|
find_variant_alerts,
|
||||||
)
|
)
|
||||||
|
from rcjav.library import (
|
||||||
|
find_library_issues,
|
||||||
|
rename_file_in_remote,
|
||||||
|
rename_files_batch,
|
||||||
|
_bracket_to_canonical,
|
||||||
|
_nohyphen_to_canonical,
|
||||||
|
)
|
||||||
|
|
||||||
CONFIG_PATH = Path(__file__).resolve().parent / "config.json"
|
CONFIG_PATH = Path(__file__).resolve().parent / "config.json"
|
||||||
|
|
||||||
@@ -558,155 +565,6 @@ def render_dupes(dupes: dict[str, list[FileEntry]],
|
|||||||
console.print(tbl)
|
console.print(tbl)
|
||||||
console.print()
|
console.print()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ---------- library issues (non-canonical filenames) ----------
|
|
||||||
|
|
||||||
def _bracket_to_canonical(filename: str) -> str:
|
|
||||||
"""[REAL-779].mp4 → REAL-779.mp4 | [HODV-21076] Saki [1080p].mkv → HODV-21076 Saki [1080p].mkv"""
|
|
||||||
stem = Path(filename).stem
|
|
||||||
suffix = Path(filename).suffix
|
|
||||||
bm = _BRACKET_ID_RE.match(stem)
|
|
||||||
if not bm:
|
|
||||||
return filename
|
|
||||||
inner = bm.group(1).strip()
|
|
||||||
rest = stem[bm.end():].strip()
|
|
||||||
new_stem = f"{inner} {rest}".strip() if rest else inner
|
|
||||||
return f"{new_stem}{suffix}"
|
|
||||||
|
|
||||||
|
|
||||||
def _nohyphen_to_canonical(filename: str) -> str:
|
|
||||||
"""MVSD312 [576p].avi → MVSD-312 [576p].avi"""
|
|
||||||
stem = Path(filename).stem
|
|
||||||
suffix = Path(filename).suffix
|
|
||||||
m = _NOHYPHEN_ID_RE.match(stem)
|
|
||||||
if not m:
|
|
||||||
return filename
|
|
||||||
prefix = m.group(1).upper()
|
|
||||||
num_str = m.group(2)
|
|
||||||
rest = stem[m.end():]
|
|
||||||
return f"{prefix}-{num_str}{rest}{suffix}"
|
|
||||||
|
|
||||||
|
|
||||||
def find_library_issues(cache: dict) -> dict:
|
|
||||||
"""Scan cache for files with non-canonical names.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
{"bracket_names": [...], "nohyphen_names": [...]}
|
|
||||||
Each entry: {remote, path, size, mod_time, jav_id, canonical_name, issue}
|
|
||||||
"""
|
|
||||||
bracket: list[dict] = []
|
|
||||||
nohyphen: list[dict] = []
|
|
||||||
for remote, remote_data in cache.get("remotes", {}).items():
|
|
||||||
for f in remote_data.get("files", []):
|
|
||||||
fname = Path(f["path"]).name
|
|
||||||
stem = Path(fname).stem
|
|
||||||
if stem.startswith("[") and _BRACKET_ID_RE.match(stem):
|
|
||||||
bracket.append({
|
|
||||||
"remote": remote,
|
|
||||||
"path": f["path"],
|
|
||||||
"size": f.get("size", 0),
|
|
||||||
"size_human": human_size(f.get("size", 0)),
|
|
||||||
"mod_time": f.get("mod_time", ""),
|
|
||||||
"jav_id": f.get("jav_id", ""),
|
|
||||||
"canonical_name": _bracket_to_canonical(fname),
|
|
||||||
"issue": "bracket_id",
|
|
||||||
})
|
|
||||||
elif (not PRIMARY_ID_RE.match(stem)
|
|
||||||
and not COMPOUND_ID_RE.match(stem)
|
|
||||||
and not FALLBACK_ID_RE.match(stem)
|
|
||||||
and _NOHYPHEN_ID_RE.match(stem)):
|
|
||||||
nohyphen.append({
|
|
||||||
"remote": remote,
|
|
||||||
"path": f["path"],
|
|
||||||
"size": f.get("size", 0),
|
|
||||||
"size_human": human_size(f.get("size", 0)),
|
|
||||||
"mod_time": f.get("mod_time", ""),
|
|
||||||
"jav_id": f.get("jav_id", ""),
|
|
||||||
"canonical_name": _nohyphen_to_canonical(fname),
|
|
||||||
"issue": "nohyphen_id",
|
|
||||||
})
|
|
||||||
return {"bracket_names": bracket, "nohyphen_names": nohyphen}
|
|
||||||
|
|
||||||
|
|
||||||
def rename_file_in_remote(
|
|
||||||
remote: str,
|
|
||||||
old_rel_path: str,
|
|
||||||
new_rel_path: str,
|
|
||||||
cache: dict,
|
|
||||||
rclone_bin: str = "rclone",
|
|
||||||
save: bool = True,
|
|
||||||
) -> dict:
|
|
||||||
"""Rename one file via rclone moveto and patch cache.json.
|
|
||||||
|
|
||||||
Returns {"ok": True, "old_path": ..., "new_path": ...}
|
|
||||||
or {"ok": False, "error": ..., "conflict": bool}
|
|
||||||
|
|
||||||
Pass save=False when batching — caller is responsible for calling save_cache() once.
|
|
||||||
"""
|
|
||||||
sep = "" if remote.endswith("/") else "/"
|
|
||||||
old_full = f"{remote}{sep}{old_rel_path}"
|
|
||||||
new_full = f"{remote}{sep}{new_rel_path}"
|
|
||||||
|
|
||||||
# Collision check — does target already exist?
|
|
||||||
check = subprocess.run(
|
|
||||||
[rclone_bin, "lsf", new_full],
|
|
||||||
capture_output=True, text=True,
|
|
||||||
)
|
|
||||||
if check.returncode == 0 and check.stdout.strip():
|
|
||||||
return {"ok": False, "error": f"Target already exists: {new_full}", "conflict": True}
|
|
||||||
|
|
||||||
# Perform rename
|
|
||||||
result = subprocess.run(
|
|
||||||
[rclone_bin, "moveto", old_full, new_full],
|
|
||||||
capture_output=True, text=True,
|
|
||||||
)
|
|
||||||
if result.returncode != 0:
|
|
||||||
return {"ok": False, "error": (result.stderr or result.stdout).strip(), "conflict": False}
|
|
||||||
|
|
||||||
# Patch cache — update path + jav_id for the renamed entry
|
|
||||||
remote_data = cache.get("remotes", {}).get(remote)
|
|
||||||
if remote_data:
|
|
||||||
for f in remote_data.get("files", []):
|
|
||||||
if f["path"] == old_rel_path:
|
|
||||||
f["path"] = new_rel_path
|
|
||||||
f["jav_id"] = extract_id(Path(new_rel_path).name) or f["jav_id"]
|
|
||||||
break
|
|
||||||
remote_data["skipped"] = [s for s in remote_data.get("skipped", []) if s != old_rel_path]
|
|
||||||
if save:
|
|
||||||
save_cache(cache)
|
|
||||||
|
|
||||||
return {"ok": True, "old_path": old_full, "new_path": new_full}
|
|
||||||
|
|
||||||
|
|
||||||
def rename_files_batch(
|
|
||||||
renames: list[dict],
|
|
||||||
cache: dict,
|
|
||||||
rclone_bin: str = "rclone",
|
|
||||||
) -> list[dict]:
|
|
||||||
"""Rename multiple files, writing cache once at the end.
|
|
||||||
|
|
||||||
Each item in renames: {remote, old_path, new_path}
|
|
||||||
Returns list of per-file results with old_path/new_path echoed back.
|
|
||||||
"""
|
|
||||||
results = []
|
|
||||||
cache_dirty = False
|
|
||||||
for r in renames:
|
|
||||||
res = rename_file_in_remote(
|
|
||||||
r["remote"], r["old_path"], r["new_path"],
|
|
||||||
cache, rclone_bin=rclone_bin, save=False,
|
|
||||||
)
|
|
||||||
res["old_path"] = r["old_path"]
|
|
||||||
res["new_path"] = r["new_path"]
|
|
||||||
results.append(res)
|
|
||||||
if res["ok"]:
|
|
||||||
cache_dirty = True
|
|
||||||
if cache_dirty:
|
|
||||||
save_cache(cache)
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
# ---------- plain renderers (--basic) ----------
|
# ---------- plain renderers (--basic) ----------
|
||||||
|
|
||||||
def render_banner_plain(cache_meta: dict[str, dict], mode: str) -> str:
|
def render_banner_plain(cache_meta: dict[str, dict], mode: str) -> str:
|
||||||
|
|||||||
@@ -6,6 +6,11 @@ find at the top level. Adding a new submodule does not change the
|
|||||||
public surface — only this file does.
|
public surface — only this file does.
|
||||||
"""
|
"""
|
||||||
from rcjav.model import FileEntry # noqa: F401
|
from rcjav.model import FileEntry # noqa: F401
|
||||||
|
from rcjav.library import ( # noqa: F401
|
||||||
|
find_library_issues,
|
||||||
|
rename_file_in_remote,
|
||||||
|
rename_files_batch,
|
||||||
|
)
|
||||||
from rcjav.rclone_io import ( # noqa: F401
|
from rcjav.rclone_io import ( # noqa: F401
|
||||||
RCLONE_BIN,
|
RCLONE_BIN,
|
||||||
CANCEL_FLAG,
|
CANCEL_FLAG,
|
||||||
|
|||||||
@@ -0,0 +1,176 @@
|
|||||||
|
"""Library-issue detection (non-canonical filenames) + safe renaming.
|
||||||
|
|
||||||
|
Scans the cache (not the live remote) for files whose names violate
|
||||||
|
the canonical `{ID}[ - actress][ [resolution]].ext` shape:
|
||||||
|
|
||||||
|
- Bracket-wrapped IDs: `[REAL-779].mp4` -> `REAL-779.mp4`
|
||||||
|
- No-hyphen IDs: `MVSD312 [576p].avi` -> `MVSD-312 [576p].avi`
|
||||||
|
|
||||||
|
`rename_file_in_remote` performs the rclone moveto and patches the
|
||||||
|
cache in place. `rename_files_batch` writes the cache once after a
|
||||||
|
batch of renames.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from rcjav.cache import save_cache
|
||||||
|
from rcjav.ids import (
|
||||||
|
_BRACKET_ID_RE,
|
||||||
|
_NOHYPHEN_ID_RE,
|
||||||
|
COMPOUND_ID_RE,
|
||||||
|
FALLBACK_ID_RE,
|
||||||
|
PRIMARY_ID_RE,
|
||||||
|
extract_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _human_size(n: int) -> str:
|
||||||
|
nf = float(max(0, n))
|
||||||
|
for unit in ("B", "KiB", "MiB", "GiB", "TiB"):
|
||||||
|
if nf < 1024:
|
||||||
|
return f"{int(nf)} B" if unit == "B" else f"{nf:.2f} {unit}"
|
||||||
|
nf /= 1024
|
||||||
|
return f"{nf:.2f} PiB"
|
||||||
|
|
||||||
|
|
||||||
|
def _bracket_to_canonical(filename: str) -> str:
|
||||||
|
"""[REAL-779].mp4 -> REAL-779.mp4 | [HODV-21076] Saki [1080p].mkv -> HODV-21076 Saki [1080p].mkv"""
|
||||||
|
stem = Path(filename).stem
|
||||||
|
suffix = Path(filename).suffix
|
||||||
|
bm = _BRACKET_ID_RE.match(stem)
|
||||||
|
if not bm:
|
||||||
|
return filename
|
||||||
|
inner = bm.group(1).strip()
|
||||||
|
rest = stem[bm.end():].strip()
|
||||||
|
new_stem = f"{inner} {rest}".strip() if rest else inner
|
||||||
|
return f"{new_stem}{suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
def _nohyphen_to_canonical(filename: str) -> str:
|
||||||
|
"""MVSD312 [576p].avi -> MVSD-312 [576p].avi"""
|
||||||
|
stem = Path(filename).stem
|
||||||
|
suffix = Path(filename).suffix
|
||||||
|
m = _NOHYPHEN_ID_RE.match(stem)
|
||||||
|
if not m:
|
||||||
|
return filename
|
||||||
|
prefix = m.group(1).upper()
|
||||||
|
num_str = m.group(2)
|
||||||
|
rest = stem[m.end():]
|
||||||
|
return f"{prefix}-{num_str}{rest}{suffix}"
|
||||||
|
|
||||||
|
|
||||||
|
def find_library_issues(cache: dict) -> dict:
|
||||||
|
"""Scan cache for files with non-canonical names.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{"bracket_names": [...], "nohyphen_names": [...]}
|
||||||
|
Each entry: {remote, path, size, mod_time, jav_id, canonical_name, issue}
|
||||||
|
"""
|
||||||
|
bracket: list[dict] = []
|
||||||
|
nohyphen: list[dict] = []
|
||||||
|
for remote, remote_data in cache.get("remotes", {}).items():
|
||||||
|
for f in remote_data.get("files", []):
|
||||||
|
fname = Path(f["path"]).name
|
||||||
|
stem = Path(fname).stem
|
||||||
|
if stem.startswith("[") and _BRACKET_ID_RE.match(stem):
|
||||||
|
bracket.append({
|
||||||
|
"remote": remote,
|
||||||
|
"path": f["path"],
|
||||||
|
"size": f.get("size", 0),
|
||||||
|
"size_human": _human_size(f.get("size", 0)),
|
||||||
|
"mod_time": f.get("mod_time", ""),
|
||||||
|
"jav_id": f.get("jav_id", ""),
|
||||||
|
"canonical_name": _bracket_to_canonical(fname),
|
||||||
|
"issue": "bracket_id",
|
||||||
|
})
|
||||||
|
elif (not PRIMARY_ID_RE.match(stem)
|
||||||
|
and not COMPOUND_ID_RE.match(stem)
|
||||||
|
and not FALLBACK_ID_RE.match(stem)
|
||||||
|
and _NOHYPHEN_ID_RE.match(stem)):
|
||||||
|
nohyphen.append({
|
||||||
|
"remote": remote,
|
||||||
|
"path": f["path"],
|
||||||
|
"size": f.get("size", 0),
|
||||||
|
"size_human": _human_size(f.get("size", 0)),
|
||||||
|
"mod_time": f.get("mod_time", ""),
|
||||||
|
"jav_id": f.get("jav_id", ""),
|
||||||
|
"canonical_name": _nohyphen_to_canonical(fname),
|
||||||
|
"issue": "nohyphen_id",
|
||||||
|
})
|
||||||
|
return {"bracket_names": bracket, "nohyphen_names": nohyphen}
|
||||||
|
|
||||||
|
|
||||||
|
def rename_file_in_remote(
|
||||||
|
remote: str,
|
||||||
|
old_rel_path: str,
|
||||||
|
new_rel_path: str,
|
||||||
|
cache: dict,
|
||||||
|
rclone_bin: str = "rclone",
|
||||||
|
save: bool = True,
|
||||||
|
) -> dict:
|
||||||
|
"""Rename one file via rclone moveto and patch cache.json.
|
||||||
|
|
||||||
|
Returns {"ok": True, "old_path": ..., "new_path": ...}
|
||||||
|
or {"ok": False, "error": ..., "conflict": bool}
|
||||||
|
|
||||||
|
Pass save=False when batching — caller is responsible for calling save_cache() once.
|
||||||
|
"""
|
||||||
|
sep = "" if remote.endswith("/") else "/"
|
||||||
|
old_full = f"{remote}{sep}{old_rel_path}"
|
||||||
|
new_full = f"{remote}{sep}{new_rel_path}"
|
||||||
|
|
||||||
|
check = subprocess.run(
|
||||||
|
[rclone_bin, "lsf", new_full],
|
||||||
|
capture_output=True, text=True,
|
||||||
|
)
|
||||||
|
if check.returncode == 0 and check.stdout.strip():
|
||||||
|
return {"ok": False, "error": f"Target already exists: {new_full}", "conflict": True}
|
||||||
|
|
||||||
|
result = subprocess.run(
|
||||||
|
[rclone_bin, "moveto", old_full, new_full],
|
||||||
|
capture_output=True, text=True,
|
||||||
|
)
|
||||||
|
if result.returncode != 0:
|
||||||
|
return {"ok": False, "error": (result.stderr or result.stdout).strip(), "conflict": False}
|
||||||
|
|
||||||
|
remote_data = cache.get("remotes", {}).get(remote)
|
||||||
|
if remote_data:
|
||||||
|
for f in remote_data.get("files", []):
|
||||||
|
if f["path"] == old_rel_path:
|
||||||
|
f["path"] = new_rel_path
|
||||||
|
f["jav_id"] = extract_id(Path(new_rel_path).name) or f["jav_id"]
|
||||||
|
break
|
||||||
|
remote_data["skipped"] = [s for s in remote_data.get("skipped", []) if s != old_rel_path]
|
||||||
|
if save:
|
||||||
|
save_cache(cache)
|
||||||
|
|
||||||
|
return {"ok": True, "old_path": old_full, "new_path": new_full}
|
||||||
|
|
||||||
|
|
||||||
|
def rename_files_batch(
|
||||||
|
renames: list[dict],
|
||||||
|
cache: dict,
|
||||||
|
rclone_bin: str = "rclone",
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Rename multiple files, writing cache once at the end.
|
||||||
|
|
||||||
|
Each item in renames: {remote, old_path, new_path}
|
||||||
|
Returns list of per-file results with old_path/new_path echoed back.
|
||||||
|
"""
|
||||||
|
results = []
|
||||||
|
cache_dirty = False
|
||||||
|
for r in renames:
|
||||||
|
res = rename_file_in_remote(
|
||||||
|
r["remote"], r["old_path"], r["new_path"],
|
||||||
|
cache, rclone_bin=rclone_bin, save=False,
|
||||||
|
)
|
||||||
|
res["old_path"] = r["old_path"]
|
||||||
|
res["new_path"] = r["new_path"]
|
||||||
|
results.append(res)
|
||||||
|
if res["ok"]:
|
||||||
|
cache_dirty = True
|
||||||
|
if cache_dirty:
|
||||||
|
save_cache(cache)
|
||||||
|
return results
|
||||||
Reference in New Issue
Block a user