import sharp from "sharp"; /** * Difference-hash (dHash) implementation. Resizes the input to 9×8 grayscale * and emits one bit per adjacent-pixel comparison: if the left pixel is * brighter than the right, bit = 1. Result is 64 bits, encoded as a 16-char * lowercase hex string. * * dHash is robust to scaling, mild JPEG/WebP recompression, brightness * tweaks, and small crops — exactly the cases the SHA-256 dedup misses * (same picture, different bytes). Hamming-distance ≤ ~10 between two * hashes is a strong "same image, different encode" signal in practice. */ export async function computeDHash(input: Buffer): Promise { // 9 wide × 8 tall, grayscale, raw pixel bytes. const buf = await sharp(input, { failOn: "none" }) .rotate() .grayscale() .resize(9, 8, { fit: "fill" }) .raw() .toBuffer(); let hex = ""; for (let row = 0; row < 8; row++) { let byte = 0; for (let col = 0; col < 8; col++) { const left = buf[row * 9 + col]; const right = buf[row * 9 + col + 1]; if (left > right) byte |= 1 << (7 - col); } hex += byte.toString(16).padStart(2, "0"); } return hex; } /** * Hamming distance between two 16-char hex strings. Returns Infinity if * the inputs aren't both well-formed 64-bit hashes. */ export function hammingDistance(a: string, b: string): number { if (a.length !== 16 || b.length !== 16) return Infinity; let dist = 0; for (let i = 0; i < 16; i += 2) { const byteA = parseInt(a.slice(i, i + 2), 16); const byteB = parseInt(b.slice(i, i + 2), 16); if (!Number.isFinite(byteA) || !Number.isFinite(byteB)) return Infinity; let xor = byteA ^ byteB; // popcount on a single byte while (xor) { dist += xor & 1; xor >>>= 1; } } return dist; }