Initial snapshot before step 10 package split
This commit is contained in:
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"version": 1,
|
||||
"domain": "query",
|
||||
"description": "Page text / title -> canonical JAV ID. Consumed by the browser extension (content.js normalizeId). Difference from filename: looser context (sentences, mixed punctuation, site chrome). Includes forms (e.g. FC2PPV compact) that Python extract_id does NOT handle, by design — see fixtures/README.md.",
|
||||
"case_schema": {
|
||||
"name": "human label",
|
||||
"input": "raw page text",
|
||||
"expected": "canonical ID without part suffix (extension never emits #partN), or null when no ID found"
|
||||
},
|
||||
"cases": [
|
||||
{ "name": "title with site chrome", "input": "SSIS-001 — JAV.tube", "expected": "SSIS-001" },
|
||||
{ "name": "title with description", "input": "Watch SSIS-001 1080p HD Online", "expected": "SSIS-001" },
|
||||
{ "name": "trailing letter variant", "input": "IBW-902z Full Movie", "expected": "IBW-902" },
|
||||
{ "name": "no hyphen in title", "input": "MVSD312 stream", "expected": "MVSD-312" },
|
||||
{ "name": "FC2 PPV compact", "input": "FC2PPV-1841460 — preview", "expected": "FC2-PPV-1841460" },
|
||||
{ "name": "FC2 plain digits", "input": "FC2-1841460 thumbnail", "expected": "FC2-PPV-1841460" },
|
||||
{ "name": "FC2-PPV explicit", "input": "FC2-PPV-1841460 Full", "expected": "FC2-PPV-1841460" },
|
||||
{ "name": "leading zeros preserved", "input": "ABF-042 — sample", "expected": "ABF-042" },
|
||||
{ "name": "long numeric tail (7 digits)", "input": "BLK-4748520 stream", "expected": "BLK-4748520" },
|
||||
{ "name": "no ID present", "input": "JAV Database · home", "expected": null }
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user