import { hash, compare, findDelta } from 'lavinhash';
import { readFile } from 'fs/promises';
// Functional approach with pipe composition
const pipe = (...fns) => (x) => fns.reduce((v, f) => f(v), x);
const toUint8Array = (buffer: Buffer) => new Uint8Array(buffer);
const computeHash = (data: Uint8Array) => ({ data, fingerprint: hash(data) });
// Process single file
const processFile = pipe(toUint8Array, computeHash);
// Compare files functionally
const compareFiles = async (path1: string, path2: string) => {
const [file1, file2] = await Promise.all([
readFile(path1).then(processFile),
readFile(path2).then(processFile)
]);
const similarity = compare(file1.fingerprint, file2.fingerprint);
const delta = findDelta(
file1.fingerprint, file2.fingerprint,
file1.data, file2.data
);
return {
similarity,
changes: { total: delta.totalChanges, added: delta.bytesAdded, deleted: delta.bytesDeleted }
};
};
// Usage
const result = await compareFiles('v1.bin', 'v2.bin');
console.log(`Similarity: ${result.similarity}%`);
console.log(`Delta: +${result.changes.added} -${result.changes.deleted}`);
// Advanced: Streaming pipeline with backpressure
import { hash, compare, Config } from 'lavinhash';
import { createReadStream } from 'fs';
import { pipeline } from 'stream/promises';
import { Transform } from 'stream';
// Custom configuration with builder pattern
const createConfig = () =>
new Config()
.withAlpha(0.4)
.withWindowSize(128);
// Stream-based processing for large files
class HashTransform extends Transform {
constructor(private config: Config) {
super({ objectMode: true });
}
_transform(chunk: { path: string; data: Uint8Array }, _, callback) {
const fingerprint = hash(chunk.data, this.config);
callback(null, { ...chunk, fingerprint });
}
}
// Functional similarity matrix computation
const computeSimilarityMatrix = (fingerprints: any[]) =>
fingerprints.map((fp1, i) =>
fingerprints.slice(i + 1).map((fp2) => ({
pair: [i, i + fingerprints.indexOf(fp2, i + 1)],
similarity: compare(fp1, fp2)
}))
).flat();
// Find clusters using transitive closure
const findClusters = (matrix: any[], threshold = 80) =>
matrix
.filter(({ similarity }) => similarity >= threshold)
.reduce((clusters, { pair: [a, b] }) => {
const cluster = clusters.find(c => c.includes(a) || c.includes(b));
return cluster ? (cluster.push(a, b), clusters) : [...clusters, [a, b]];
}, [] as number[][]);
// Usage
const files = ['file1.bin', 'file2.bin', 'file3.bin'];
const results = await processFiles(files);
const matrix = computeSimilarityMatrix(results.map(r => r.fingerprint));
const clusters = findClusters(matrix);
Note: All language bindings follow the same API design for consistency. The core functionality (hash, compare, findDelta) works identically across all platforms.