From install to advanced setups: cooperative search, worker pool, big-corpus tiering, and integration in React / Angular.
npm install albex # or pnpm add albex
import { AlbexEngine } from "albex";
// 1. Construct + init. The WASM ships with the package and your bundler
// resolves it through import.meta.url — Vite, Webpack 5+, Next, esbuild,
// Rollup, Parcel 2, Bun all handle this automatically. No assets to copy.
const engine = new AlbexEngine();
await engine.init();
// 2. Index files from drag-and-drop or <input type="file">.
const input = document.querySelector("input[type=file]");
input.addEventListener("change", async () => {
for (const file of input.files) {
const doc = await engine.indexFile(file);
console.log(`Indexed ${doc.name}: ${doc.chunks} chunks`);
}
});
// 3. Search.
const results = engine.search('"contrato marco" | rescisión', { windowed: true });
for (const r of results) {
console.log(`[${r.score}] ${r.documentName} — ${r.snippet}`);
}// Tweak relevance.
engine.setMaxErrors(1); // tighter fuzziness
engine.setThreshold(400); // only return strong hits
engine.setMaxResults(100);
engine.setLanguage("es"); // light Spanish stemming on queries
// Inspect what was loaded.
console.log(engine.tier); // 'std' by default
console.log(engine.simdEnabled); // boolean
console.log(engine.gpuEngaged); // true once first search uses WebGPU
// Want tier auto-selection? Serve the 6 binaries yourself and pass wasmBaseUrl.
// The engine then picks mini/std/pro based on navigator.deviceMemory.
// new AlbexEngine({ wasmBaseUrl: '/assets', tier: 'auto', simd: 'auto' })wordFuzzy single token — up to 3 character edits (auto-adjusted by query length).a b cAND: all tokens must appear in the same chunk (proximity scored)."a b"Phrase: tokens must appear in order and adjacent.a | bOR: union of two independent searches, merged by score."a b" | cMix phrase and OR..docxNative Rust/WASM streaming XML parser (word/document.xml)..xlsxNative Rust/WASM parser (shared strings + inline strings)..pdfSeparate albex_pdf.wasm module, loaded on demand..html .htm<script>/<style> stripped, paragraph at block-level boundaries..md .markdownCommonMark markers stripped (code fences, headings, links, lists)..jsonRecursive walk; every string key and leaf indexed..csvRFC 4180 lite; one row per chunk (location = row number)..emlMIME-lite: From/To/Subject + first text/plain body part..rtfControl words and groups stripped, text runs preserved..txtPlain text split on double newlines..xmlTag-stripped, entity-decoded.