diff --git a/fixtures/README.md b/fixtures/README.md index 2a00990..0b011cc 100644 --- a/fixtures/README.md +++ b/fixtures/README.md @@ -40,13 +40,21 @@ The runner imports `rc-jav.py` in place, exercises `extract_id` against ## Running the extension side -No automated runner today. `content.js` lives inside an IIFE that the -browser injects into pages, so importing it from Node would require -either an extraction refactor or a duplicated copy of the regex. Until -that lands, treat `query-extraction.json` and `shared-normalization.json` -as the canonical specification: if you touch `ID_RE_DASHED`, -`ID_RE_UNDASHED`, or `BUILTIN_ID_NORMALIZERS` in content.js, eyeball -this corpus and confirm the cases still describe expected behavior. +```bash +node fixtures/run-node.mjs +``` + +The Node runner exercises `query-extraction.json` and +`shared-normalization.json` against a hand-mirrored copy of +`normalizeId` from `content.js`. Because `content.js` lives inside an +injected IIFE in the extension repo, it can't be imported directly — +the runner duplicates the regexes (`ID_RE_DASHED`, `ID_RE_UNDASHED`, +`BUILTIN_ID_NORMALIZERS`). + +If you change any of those in `content.js`, mirror the change at the +top of `fixtures/run-node.mjs`. `shared-normalization.json` catches +silent cross-side drift because both Python and Node exercise it; a +case that passes Python but fails Node (or vice versa) is the canary. ## Adding a case diff --git a/fixtures/run-node.mjs b/fixtures/run-node.mjs new file mode 100644 index 0000000..ace4842 --- /dev/null +++ b/fixtures/run-node.mjs @@ -0,0 +1,102 @@ +// Node-side fixture runner — mirrors content.js normalizeId() for +// query-extraction.json + shared-normalization.json. +// +// IMPORTANT: this file *replicates* the regexes from content.js by hand. +// content.js lives inside an injected IIFE in the extension, so a real +// import isn't feasible without restructuring it. If you touch ID_RE_DASHED, +// ID_RE_UNDASHED, or BUILTIN_ID_NORMALIZERS in content.js, update them +// here too. fixtures/shared-normalization.json catches cross-side drift +// because Python and this runner both exercise it. +// +// Usage: +// node fixtures/run-node.mjs +// +// Exits non-zero on any fixture case failure. + +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); + +// ---------- mirror of content.js ---------- + +const ID_RE_DASHED = /\b([A-Za-z][A-Za-z0-9]{1,})-(\d{2,7})[a-zA-Z]?\b/; +const ID_RE_UNDASHED = /\b([A-Za-z][A-Za-z0-9]{1,})(\d{3,5})[a-zA-Z]?\b/; + +const BUILTIN_ID_NORMALIZERS = [ + // FC2-PPV in any dash configuration: FC2PPV12345, FC2-PPV12345, FC2-PPV-12345 + { re: /\bFC2-?PPV-?(\d{4,})\b/i, fmt: "FC2-PPV-$1" }, + // Some sites display FC2 IDs without the PPV segment: FC2-1841460. + { re: /\bFC2-(\d{4,})\b/i, fmt: "FC2-PPV-$1" }, +]; + +function applyNormalizers(text, userList = []) { + const all = [...userList, ...BUILTIN_ID_NORMALIZERS]; + for (const n of all) { + let re; + try { re = n.re instanceof RegExp ? n.re : new RegExp(n.re, "i"); } catch { continue; } + const m = text.match(re); + if (m) { + return n.fmt.replace(/\$(\d)/g, (_, i) => m[+i] || ""); + } + } + return null; +} + +function normalizeId(text) { + if (!text) return null; + const fromNormalizer = applyNormalizers(text); + if (fromNormalizer) return fromNormalizer.toUpperCase(); + let m = text.match(ID_RE_DASHED); + if (!m) m = text.match(ID_RE_UNDASHED); + if (!m) return null; + return `${m[1].toUpperCase()}-${m[2]}`; +} + +// ---------- harness ---------- + +function load(name) { + return JSON.parse(readFileSync(join(__dirname, name), "utf8")); +} + +function run(label, cases) { + let passed = 0; + let failed = 0; + for (const c of cases) { + const got = normalizeId(c.input); + if (got === c.expected) { + passed += 1; + } else { + failed += 1; + console.log(` FAIL [${label}] ${JSON.stringify(c.name)}`); + console.log(` input = ${JSON.stringify(c.input)}`); + console.log(` expected = ${JSON.stringify(c.expected)}`); + console.log(` got = ${JSON.stringify(got)}`); + } + } + return { passed, failed }; +} + +let totalPassed = 0; +let totalFailed = 0; + +for (const [filename, fnLabel] of [ + ["query-extraction.json", "normalizeId"], + ["shared-normalization.json", "normalizeId"], +]) { + const doc = load(filename); + const cases = doc.cases || []; + console.log(`\n${filename} -> node.${fnLabel} (${cases.length} cases)`); + const { passed, failed } = run(filename, cases); + totalPassed += passed; + totalFailed += failed; + console.log(` ${passed} passed | ${failed} failed`); +} + +console.log(); +if (totalFailed > 0) { + console.log(`FAILED: ${totalFailed} of ${totalPassed + totalFailed} cases`); + process.exit(1); +} +console.log(`OK: all ${totalPassed} cases passed`);