{ "version": 1, "domain": "query", "description": "Page text / title -> canonical JAV ID. Consumed by the browser extension (content.js normalizeId). Difference from filename: looser context (sentences, mixed punctuation, site chrome). Includes forms (e.g. FC2PPV compact) that Python extract_id does NOT handle, by design — see fixtures/README.md.", "case_schema": { "name": "human label", "input": "raw page text", "expected": "canonical ID without part suffix (extension never emits #partN), or null when no ID found" }, "cases": [ { "name": "title with site chrome", "input": "SSIS-001 — JAV.tube", "expected": "SSIS-001" }, { "name": "title with description", "input": "Watch SSIS-001 1080p HD Online", "expected": "SSIS-001" }, { "name": "trailing letter variant", "input": "IBW-902z Full Movie", "expected": "IBW-902" }, { "name": "no hyphen in title", "input": "MVSD312 stream", "expected": "MVSD-312" }, { "name": "FC2 PPV compact", "input": "FC2PPV-1841460 — preview", "expected": "FC2-PPV-1841460" }, { "name": "FC2 plain digits", "input": "FC2-1841460 thumbnail", "expected": "FC2-PPV-1841460" }, { "name": "FC2-PPV explicit", "input": "FC2-PPV-1841460 Full", "expected": "FC2-PPV-1841460" }, { "name": "leading zeros preserved", "input": "ABF-042 — sample", "expected": "ABF-042" }, { "name": "long numeric tail (7 digits)", "input": "BLK-4748520 stream", "expected": "BLK-4748520" }, { "name": "no ID present", "input": "JAV Database · home", "expected": null } ] }