Initial commit
This commit is contained in:
@@ -0,0 +1,254 @@
|
||||
/**
|
||||
* Token-grammar classifier for video filenames in a JAVID group.
|
||||
*
|
||||
* Patterns use a simplified token grammar (option A1 from the mockups):
|
||||
* - `{N}` — one or more digits, captured as the part index
|
||||
* - `{L}` — single letter A–Z, captured (A=1, B=2, ...)
|
||||
* - everything else is a literal character
|
||||
*
|
||||
* Patterns match at the END of the filename stem (no extension),
|
||||
* case-insensitive.
|
||||
*
|
||||
* Classification rules for files sharing one normalized JAV code:
|
||||
* - "part" — stem ends with a configured pattern; index is the
|
||||
* captured numeric/letter value.
|
||||
* - "variant" — stem does NOT match any pattern but its prefix
|
||||
* (first dot-segment) equals a stem that DID match.
|
||||
* Variants belong to the matching part.
|
||||
* - "single" — lone file in its code group with no pattern match.
|
||||
*
|
||||
* Tiebreak for "default variant" (the one to play first): the file
|
||||
* whose stem equals the variant_group exactly. Otherwise the
|
||||
* alphabetically first stem in the group.
|
||||
*/
|
||||
export interface CompiledPattern {
|
||||
/** Original token-grammar source. */
|
||||
source: string;
|
||||
/** Compiled regex anchored to end-of-stem (case-insensitive). */
|
||||
re: RegExp;
|
||||
/** What the captured token represents. */
|
||||
kind: "digits" | "letter";
|
||||
}
|
||||
|
||||
/** Minimal description of one file presented to the classifier. */
|
||||
export interface ClassifyInput {
|
||||
/** Stable identifier, opaque to the classifier. */
|
||||
key: string;
|
||||
/** Filename stem (no extension), as on disk. */
|
||||
stem: string;
|
||||
}
|
||||
|
||||
export interface ClassifyResult {
|
||||
key: string;
|
||||
partKind: "part" | "variant" | "single";
|
||||
/** 1-based sort index for parts; null otherwise. */
|
||||
partIndex: number | null;
|
||||
/** Stem-with-suffix-stripped — variants share this with their part. */
|
||||
variantGroup: string | null;
|
||||
}
|
||||
|
||||
const TOKEN_RE = /\{[NL]\}/g;
|
||||
|
||||
/** Compile one token-grammar pattern into a regex. Throws on bad token. */
|
||||
export function compileToken(source: string): CompiledPattern | null {
|
||||
if (!source) return null;
|
||||
// Validate first: only {N} and {L} are allowed; nothing else may use {}.
|
||||
// A bare `{` without a known token is invalid.
|
||||
let kind: "digits" | "letter" | null = null;
|
||||
let body = "";
|
||||
let i = 0;
|
||||
while (i < source.length) {
|
||||
const c = source[i]!;
|
||||
if (c === "{") {
|
||||
const close = source.indexOf("}", i);
|
||||
if (close < 0) return null;
|
||||
const tok = source.slice(i, close + 1);
|
||||
if (tok === "{N}") {
|
||||
if (kind != null) return null; // only one capture per pattern
|
||||
body += "(\\d+)";
|
||||
kind = "digits";
|
||||
} else if (tok === "{L}") {
|
||||
if (kind != null) return null;
|
||||
body += "([A-Za-z])";
|
||||
kind = "letter";
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
i = close + 1;
|
||||
} else {
|
||||
body += c.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
i++;
|
||||
}
|
||||
}
|
||||
if (kind == null) return null;
|
||||
return {
|
||||
source,
|
||||
re: new RegExp(body + "$", "i"),
|
||||
kind,
|
||||
};
|
||||
}
|
||||
|
||||
/** Compile a list of patterns; silently drops malformed ones. */
|
||||
export function compilePatterns(sources: string[]): CompiledPattern[] {
|
||||
const out: CompiledPattern[] = [];
|
||||
for (const s of sources) {
|
||||
const c = compileToken(s);
|
||||
if (c) out.push(c);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function indexFromCapture(capture: string, kind: "digits" | "letter"): number | null {
|
||||
if (kind === "digits") {
|
||||
const n = Number(capture);
|
||||
return Number.isFinite(n) && n > 0 ? Math.trunc(n) : null;
|
||||
}
|
||||
// Letter: A=1, B=2, ...
|
||||
const code = capture.toUpperCase().charCodeAt(0);
|
||||
if (code < 65 || code > 90) return null;
|
||||
return code - 64;
|
||||
}
|
||||
|
||||
interface PatternHit {
|
||||
partIndex: number;
|
||||
/** Stem with the matched suffix removed. */
|
||||
variantGroup: string;
|
||||
}
|
||||
|
||||
function tryMatch(stem: string, patterns: CompiledPattern[]): PatternHit | null {
|
||||
for (const p of patterns) {
|
||||
const m = stem.match(p.re);
|
||||
if (!m) continue;
|
||||
const idx = indexFromCapture(m[1] ?? "", p.kind);
|
||||
if (idx == null) continue;
|
||||
return {
|
||||
partIndex: idx,
|
||||
variantGroup: stem.slice(0, m.index!),
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify a group of files that share one normalized JAV code.
|
||||
*
|
||||
* Algorithm:
|
||||
* 1. Try each pattern against each stem; record matches.
|
||||
* 2. Files with no match are candidate variants. A candidate is a
|
||||
* variant of a matched file if its stem's first dot-segment
|
||||
* equals the matched file's variant_group's first dot-segment.
|
||||
* (This catches `XXX-001.fixed.mp4` aligning with `XXX-001-cd1.mp4`
|
||||
* → no, those don't share a dot-prefix; they'd stay singles. But
|
||||
* `XXX-001-cd1.fixed.mp4` would align with `XXX-001-cd1.mp4`.)
|
||||
* 3. If no patterns match anything in the group, all stems share
|
||||
* one variant_group (the longest common prefix of all stems,
|
||||
* trimmed at the last alpha-numeric run); kind = variant for >1
|
||||
* files, single for 1.
|
||||
*/
|
||||
export function classifyGroup(
|
||||
files: ClassifyInput[],
|
||||
patterns: CompiledPattern[],
|
||||
): ClassifyResult[] {
|
||||
if (files.length === 0) return [];
|
||||
if (files.length === 1) {
|
||||
const only = files[0]!;
|
||||
return [{ key: only.key, partKind: "single", partIndex: null, variantGroup: null }];
|
||||
}
|
||||
|
||||
// Pass 1: pattern match.
|
||||
const hits = new Map<string, PatternHit>();
|
||||
for (const f of files) {
|
||||
const hit = tryMatch(f.stem, patterns);
|
||||
if (hit) hits.set(f.key, hit);
|
||||
}
|
||||
|
||||
if (hits.size === 0) {
|
||||
// No part-style suffixes detected anywhere → treat the whole group
|
||||
// as variants of one part.
|
||||
const group = longestCommonPrefix(files.map((f) => f.stem));
|
||||
return files.map((f) => ({
|
||||
key: f.key,
|
||||
partKind: "variant" as const,
|
||||
partIndex: null,
|
||||
variantGroup: group || f.stem,
|
||||
}));
|
||||
}
|
||||
|
||||
// Pass 2: attach unmatched stems to the matched stem they extend.
|
||||
// A non-matching stem `S` is a variant of part group `G` iff `S`
|
||||
// starts with `G + "."` (i.e. `G` followed by a dot — the typical
|
||||
// "alt encode" suffix shape: `XXX-001-cd1.fixed.mp4`).
|
||||
const matchedGroupKeys = Array.from(new Set(Array.from(hits.values()).map((h) => h.variantGroup)));
|
||||
// Sort by length desc so longer (more specific) groups bind first.
|
||||
matchedGroupKeys.sort((a, b) => b.length - a.length);
|
||||
|
||||
const out: ClassifyResult[] = [];
|
||||
for (const f of files) {
|
||||
const hit = hits.get(f.key);
|
||||
if (hit) {
|
||||
out.push({
|
||||
key: f.key,
|
||||
partKind: "part",
|
||||
partIndex: hit.partIndex,
|
||||
variantGroup: hit.variantGroup,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Unmatched: try to attach to a part group via dot-prefix.
|
||||
const attached = matchedGroupKeys.find(
|
||||
(g) => g && (f.stem === g || f.stem.startsWith(g + ".")),
|
||||
);
|
||||
if (attached) {
|
||||
out.push({ key: f.key, partKind: "variant", partIndex: null, variantGroup: attached });
|
||||
} else {
|
||||
// No way to attach — the file is a stray. Mark single.
|
||||
out.push({ key: f.key, partKind: "single", partIndex: null, variantGroup: null });
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function longestCommonPrefix(strs: string[]): string {
|
||||
if (strs.length === 0) return "";
|
||||
let prefix = strs[0]!;
|
||||
for (let i = 1; i < strs.length; i++) {
|
||||
const s = strs[i]!;
|
||||
let j = 0;
|
||||
while (j < prefix.length && j < s.length && prefix[j] === s[j]) j++;
|
||||
prefix = prefix.slice(0, j);
|
||||
if (!prefix) return "";
|
||||
}
|
||||
// Trim trailing punctuation so we don't end on a half-word like "XXX-001.".
|
||||
return prefix.replace(/[\s._\-]+$/, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* From a set of files all sharing the same variantGroup, pick the one
|
||||
* to play by default. Rule: stem === group exactly; else alphabetically
|
||||
* first.
|
||||
*/
|
||||
export function pickDefaultVariant<T extends { stem: string }>(
|
||||
variants: T[],
|
||||
group: string,
|
||||
): T | null {
|
||||
if (variants.length === 0) return null;
|
||||
const exact = variants.find((v) => v.stem === group);
|
||||
if (exact) return exact;
|
||||
return [...variants].sort((a, b) => a.stem.localeCompare(b.stem))[0] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a short label for a variant relative to its group stem.
|
||||
* `XXX-001.fixed` with group `XXX-001` → `fixed`.
|
||||
* Falls back to `original` for the default / matching stem.
|
||||
*/
|
||||
export function variantLabel(stem: string, group: string): string {
|
||||
if (stem === group) return "original";
|
||||
if (stem.startsWith(group + ".")) {
|
||||
return stem.slice(group.length + 1) || "original";
|
||||
}
|
||||
if (stem.startsWith(group)) {
|
||||
return stem.slice(group.length).replace(/^[._\-\s]+/, "") || "original";
|
||||
}
|
||||
return stem;
|
||||
}
|
||||
Reference in New Issue
Block a user