56 lines
1.8 KiB
TypeScript
56 lines
1.8 KiB
TypeScript
import sharp from "sharp";
|
||
|
||
/**
|
||
* Difference-hash (dHash) implementation. Resizes the input to 9×8 grayscale
|
||
* and emits one bit per adjacent-pixel comparison: if the left pixel is
|
||
* brighter than the right, bit = 1. Result is 64 bits, encoded as a 16-char
|
||
* lowercase hex string.
|
||
*
|
||
* dHash is robust to scaling, mild JPEG/WebP recompression, brightness
|
||
* tweaks, and small crops — exactly the cases the SHA-256 dedup misses
|
||
* (same picture, different bytes). Hamming-distance ≤ ~10 between two
|
||
* hashes is a strong "same image, different encode" signal in practice.
|
||
*/
|
||
export async function computeDHash(input: Buffer): Promise<string> {
|
||
// 9 wide × 8 tall, grayscale, raw pixel bytes.
|
||
const buf = await sharp(input, { failOn: "none" })
|
||
.rotate()
|
||
.grayscale()
|
||
.resize(9, 8, { fit: "fill" })
|
||
.raw()
|
||
.toBuffer();
|
||
|
||
let hex = "";
|
||
for (let row = 0; row < 8; row++) {
|
||
let byte = 0;
|
||
for (let col = 0; col < 8; col++) {
|
||
const left = buf[row * 9 + col];
|
||
const right = buf[row * 9 + col + 1];
|
||
if (left > right) byte |= 1 << (7 - col);
|
||
}
|
||
hex += byte.toString(16).padStart(2, "0");
|
||
}
|
||
return hex;
|
||
}
|
||
|
||
/**
|
||
* Hamming distance between two 16-char hex strings. Returns Infinity if
|
||
* the inputs aren't both well-formed 64-bit hashes.
|
||
*/
|
||
export function hammingDistance(a: string, b: string): number {
|
||
if (a.length !== 16 || b.length !== 16) return Infinity;
|
||
let dist = 0;
|
||
for (let i = 0; i < 16; i += 2) {
|
||
const byteA = parseInt(a.slice(i, i + 2), 16);
|
||
const byteB = parseInt(b.slice(i, i + 2), 16);
|
||
if (!Number.isFinite(byteA) || !Number.isFinite(byteB)) return Infinity;
|
||
let xor = byteA ^ byteB;
|
||
// popcount on a single byte
|
||
while (xor) {
|
||
dist += xor & 1;
|
||
xor >>>= 1;
|
||
}
|
||
}
|
||
return dist;
|
||
}
|