Add Node-side fixture runner — both sides now exercise the corpus

Mirrors `content.js` normalizeId() in a self-contained
`fixtures/run-node.mjs`. Loads `query-extraction.json` and
`shared-normalization.json` and asserts each case the same way the
Python runner does.

content.js can't be imported directly — it lives inside an injected
IIFE in the extension — so the runner duplicates the regexes
(ID_RE_DASHED, ID_RE_UNDASHED, BUILTIN_ID_NORMALIZERS). Inline
comment + README update flag that they must be kept in sync.

Why this matters: `shared-normalization.json` now actually catches
cross-side drift. A case that passes one side but fails the other is
the canary — without a Node runner, the contract was aspirational.

Verified:
  $ node fixtures/run-node.mjs
  query-extraction.json     -> normalizeId (10 cases): 10 passed
  shared-normalization.json -> normalizeId (5 cases):  5 passed
  OK: all 15 cases passed

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
admin
2026-05-23 11:18:52 +02:00
parent b9a24b3fb5
commit 8d6bdb81af
2 changed files with 117 additions and 7 deletions
+102
View File
@@ -0,0 +1,102 @@
// Node-side fixture runner — mirrors content.js normalizeId() for
// query-extraction.json + shared-normalization.json.
//
// IMPORTANT: this file *replicates* the regexes from content.js by hand.
// content.js lives inside an injected IIFE in the extension, so a real
// import isn't feasible without restructuring it. If you touch ID_RE_DASHED,
// ID_RE_UNDASHED, or BUILTIN_ID_NORMALIZERS in content.js, update them
// here too. fixtures/shared-normalization.json catches cross-side drift
// because Python and this runner both exercise it.
//
// Usage:
// node fixtures/run-node.mjs
//
// Exits non-zero on any fixture case failure.
import { readFileSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { dirname, join } from "node:path";
const __dirname = dirname(fileURLToPath(import.meta.url));
// ---------- mirror of content.js ----------
const ID_RE_DASHED = /\b([A-Za-z][A-Za-z0-9]{1,})-(\d{2,7})[a-zA-Z]?\b/;
const ID_RE_UNDASHED = /\b([A-Za-z][A-Za-z0-9]{1,})(\d{3,5})[a-zA-Z]?\b/;
const BUILTIN_ID_NORMALIZERS = [
// FC2-PPV in any dash configuration: FC2PPV12345, FC2-PPV12345, FC2-PPV-12345
{ re: /\bFC2-?PPV-?(\d{4,})\b/i, fmt: "FC2-PPV-$1" },
// Some sites display FC2 IDs without the PPV segment: FC2-1841460.
{ re: /\bFC2-(\d{4,})\b/i, fmt: "FC2-PPV-$1" },
];
function applyNormalizers(text, userList = []) {
const all = [...userList, ...BUILTIN_ID_NORMALIZERS];
for (const n of all) {
let re;
try { re = n.re instanceof RegExp ? n.re : new RegExp(n.re, "i"); } catch { continue; }
const m = text.match(re);
if (m) {
return n.fmt.replace(/\$(\d)/g, (_, i) => m[+i] || "");
}
}
return null;
}
function normalizeId(text) {
if (!text) return null;
const fromNormalizer = applyNormalizers(text);
if (fromNormalizer) return fromNormalizer.toUpperCase();
let m = text.match(ID_RE_DASHED);
if (!m) m = text.match(ID_RE_UNDASHED);
if (!m) return null;
return `${m[1].toUpperCase()}-${m[2]}`;
}
// ---------- harness ----------
function load(name) {
return JSON.parse(readFileSync(join(__dirname, name), "utf8"));
}
function run(label, cases) {
let passed = 0;
let failed = 0;
for (const c of cases) {
const got = normalizeId(c.input);
if (got === c.expected) {
passed += 1;
} else {
failed += 1;
console.log(` FAIL [${label}] ${JSON.stringify(c.name)}`);
console.log(` input = ${JSON.stringify(c.input)}`);
console.log(` expected = ${JSON.stringify(c.expected)}`);
console.log(` got = ${JSON.stringify(got)}`);
}
}
return { passed, failed };
}
let totalPassed = 0;
let totalFailed = 0;
for (const [filename, fnLabel] of [
["query-extraction.json", "normalizeId"],
["shared-normalization.json", "normalizeId"],
]) {
const doc = load(filename);
const cases = doc.cases || [];
console.log(`\n${filename} -> node.${fnLabel} (${cases.length} cases)`);
const { passed, failed } = run(filename, cases);
totalPassed += passed;
totalFailed += failed;
console.log(` ${passed} passed | ${failed} failed`);
}
console.log();
if (totalFailed > 0) {
console.log(`FAILED: ${totalFailed} of ${totalPassed + totalFailed} cases`);
process.exit(1);
}
console.log(`OK: all ${totalPassed} cases passed`);