"""Measure host fast-path vs subprocess rc-jav.py for cached single-ID search. Step 11 of the console-consolidation roadmap asks: does the host's `handle_cached_search_fast` actually save meaningful latency vs just shelling out to `rc-jav.py --search ID --format json --quick`? If yes, under what conditions (idle Python vs Python actively scanning)? This script runs both paths N times against a set of query IDs and reports min / median / mean / p95 / max in milliseconds. Usage: python benchmarks/host-fast-path.py [--queries Q1 Q2 ...] [--iterations N] To measure (b) Python-actively-scanning, kick off a `rc-jav.py --scan` in another terminal, then run this script while the scan runs. The fast-path implementation is replicated inline here (not imported from the host module) so the benchmark is self-contained. """ from __future__ import annotations import argparse import json import statistics import subprocess import sys import time from pathlib import Path ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from rcjav.cache import load_cache # noqa: E402 from rcjav.ids import current_rules_signature, normalize_id # noqa: E402 DEFAULT_QUERIES = ["SSIS-001", "ABP-100", "FC2-1841460", "MIDD-500", "IBW-902"] DEFAULT_ITERATIONS = 20 def fast_path_search(cache: dict, query: str) -> int: """Replicates handle_cached_search_fast minus the response shape. Returns hit count. Walks every remote's files[] looking for jav_id matching the normalized query (exact or `#partN`). """ norm = normalize_id(query) if not norm: return 0 hits = 0 for remote, entry in (cache.get("remotes") or {}).items(): files = entry.get("files") or [] for item in files: jid = item.get("jav_id", "") if jid == norm or (isinstance(jid, str) and jid.startswith(norm + "#part")): hits += 1 return hits def time_fast_path(query: str, iterations: int) -> list[float]: sig = current_rules_signature() cache = load_cache(sig) out: list[float] = [] for _ in range(iterations): t0 = time.perf_counter() fast_path_search(cache, query) out.append((time.perf_counter() - t0) * 1000) return out def time_subprocess(query: str, iterations: int) -> list[float]: cmd = [ sys.executable, str(ROOT / "rc-jav.py"), "--search", query, "--cache", # force cache mode (no rclone) "--format", "json", "--basic", "--no-color", ] out: list[float] = [] for _ in range(iterations): t0 = time.perf_counter() proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace") out.append((time.perf_counter() - t0) * 1000) if proc.returncode not in (0, 1): # 1 = no hits, still valid sys.stderr.write(f"subprocess returned {proc.returncode}; stderr={proc.stderr[:200]!r}\n") return out def percentile(values: list[float], p: float) -> float: if not values: return 0.0 s = sorted(values) k = (len(s) - 1) * p f = int(k) c = min(f + 1, len(s) - 1) return s[f] + (s[c] - s[f]) * (k - f) def summarize(label: str, values: list[float]) -> None: if not values: print(f" {label}: (no data)") return print(f" {label}:") print(f" n={len(values)} min={min(values):.2f}ms median={statistics.median(values):.2f}ms " f"mean={statistics.mean(values):.2f}ms p95={percentile(values, 0.95):.2f}ms max={max(values):.2f}ms") def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("--queries", nargs="+", default=DEFAULT_QUERIES, help=f"JAV IDs to search (default: {DEFAULT_QUERIES})") ap.add_argument("--iterations", type=int, default=DEFAULT_ITERATIONS, help=f"Iterations per query per path (default: {DEFAULT_ITERATIONS})") args = ap.parse_args() print(f"Host fast-path vs subprocess rc-jav.py benchmark") print(f"queries: {args.queries}") print(f"iterations per path: {args.iterations}") print(f"cache: {ROOT / 'cache.json'}") print() all_fast: list[float] = [] all_sub: list[float] = [] for q in args.queries: print(f"[{q}]") fast = time_fast_path(q, args.iterations) summarize("fast-path (in-process dict walk)", fast) sub = time_subprocess(q, args.iterations) summarize("subprocess rc-jav.py --search --cache", sub) all_fast.extend(fast) all_sub.extend(sub) if fast and sub: speedup = statistics.median(sub) / max(statistics.median(fast), 0.001) print(f" speedup (median sub / median fast): {speedup:.1f}x") print() print("=== aggregate ===") summarize("fast-path total", all_fast) summarize("subprocess total", all_sub) if all_fast and all_sub: med_speedup = statistics.median(all_sub) / max(statistics.median(all_fast), 0.001) p95_speedup = percentile(all_sub, 0.95) / max(percentile(all_fast, 0.95), 0.001) print(f" median speedup: {med_speedup:.1f}x") print(f" p95 speedup: {p95_speedup:.1f}x") return 0 if __name__ == "__main__": sys.exit(main())