Files
2026-05-26 22:46:00 +02:00

56 lines
1.8 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import sharp from "sharp";
/**
* Difference-hash (dHash) implementation. Resizes the input to 9×8 grayscale
* and emits one bit per adjacent-pixel comparison: if the left pixel is
* brighter than the right, bit = 1. Result is 64 bits, encoded as a 16-char
* lowercase hex string.
*
* dHash is robust to scaling, mild JPEG/WebP recompression, brightness
* tweaks, and small crops — exactly the cases the SHA-256 dedup misses
* (same picture, different bytes). Hamming-distance ≤ ~10 between two
* hashes is a strong "same image, different encode" signal in practice.
*/
export async function computeDHash(input: Buffer): Promise<string> {
// 9 wide × 8 tall, grayscale, raw pixel bytes.
const buf = await sharp(input, { failOn: "none" })
.rotate()
.grayscale()
.resize(9, 8, { fit: "fill" })
.raw()
.toBuffer();
let hex = "";
for (let row = 0; row < 8; row++) {
let byte = 0;
for (let col = 0; col < 8; col++) {
const left = buf[row * 9 + col];
const right = buf[row * 9 + col + 1];
if (left > right) byte |= 1 << (7 - col);
}
hex += byte.toString(16).padStart(2, "0");
}
return hex;
}
/**
* Hamming distance between two 16-char hex strings. Returns Infinity if
* the inputs aren't both well-formed 64-bit hashes.
*/
export function hammingDistance(a: string, b: string): number {
if (a.length !== 16 || b.length !== 16) return Infinity;
let dist = 0;
for (let i = 0; i < 16; i += 2) {
const byteA = parseInt(a.slice(i, i + 2), 16);
const byteB = parseInt(b.slice(i, i + 2), 16);
if (!Number.isFinite(byteA) || !Number.isFinite(byteB)) return Infinity;
let xor = byteA ^ byteB;
// popcount on a single byte
while (xor) {
dist += xor & 1;
xor >>>= 1;
}
}
return dist;
}