Initial snapshot before step 10 package split

This commit is contained in:
admin
2026-05-22 21:39:09 +02:00
commit e029e898e9
16 changed files with 3955 additions and 0 deletions
+22
View File
@@ -0,0 +1,22 @@
{
"version": 1,
"domain": "query",
"description": "Page text / title -> canonical JAV ID. Consumed by the browser extension (content.js normalizeId). Difference from filename: looser context (sentences, mixed punctuation, site chrome). Includes forms (e.g. FC2PPV compact) that Python extract_id does NOT handle, by design — see fixtures/README.md.",
"case_schema": {
"name": "human label",
"input": "raw page text",
"expected": "canonical ID without part suffix (extension never emits #partN), or null when no ID found"
},
"cases": [
{ "name": "title with site chrome", "input": "SSIS-001 — JAV.tube", "expected": "SSIS-001" },
{ "name": "title with description", "input": "Watch SSIS-001 1080p HD Online", "expected": "SSIS-001" },
{ "name": "trailing letter variant", "input": "IBW-902z Full Movie", "expected": "IBW-902" },
{ "name": "no hyphen in title", "input": "MVSD312 stream", "expected": "MVSD-312" },
{ "name": "FC2 PPV compact", "input": "FC2PPV-1841460 — preview", "expected": "FC2-PPV-1841460" },
{ "name": "FC2 plain digits", "input": "FC2-1841460 thumbnail", "expected": "FC2-PPV-1841460" },
{ "name": "FC2-PPV explicit", "input": "FC2-PPV-1841460 Full", "expected": "FC2-PPV-1841460" },
{ "name": "leading zeros preserved", "input": "ABF-042 — sample", "expected": "ABF-042" },
{ "name": "long numeric tail (7 digits)", "input": "BLK-4748520 stream", "expected": "BLK-4748520" },
{ "name": "no ID present", "input": "JAV Database · home", "expected": null }
]
}