| "use strict"; |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| Object.defineProperty(exports, "__esModule", { value: true }); |
| exports.AdamOptimizer = exports.DotProductAttention = exports.DualSpaceAttention = exports.EdgeFeaturedAttention = exports.GraphRoPeAttention = exports.MoEAttention = exports.LocalGlobalAttention = exports.LinearAttention = exports.HyperbolicAttention = exports.FlashAttention = exports.MultiHeadAttention = void 0; |
| exports.projectToPoincareBall = projectToPoincareBall; |
| exports.poincareDistance = poincareDistance; |
| exports.mobiusAddition = mobiusAddition; |
| exports.expMap = expMap; |
| exports.logMap = logMap; |
| exports.isAttentionAvailable = isAttentionAvailable; |
| exports.getAttentionVersion = getAttentionVersion; |
| exports.parallelAttentionCompute = parallelAttentionCompute; |
| exports.batchAttentionCompute = batchAttentionCompute; |
| exports.computeFlashAttentionAsync = computeFlashAttentionAsync; |
| exports.computeHyperbolicAttentionAsync = computeHyperbolicAttentionAsync; |
| exports.infoNceLoss = infoNceLoss; |
| exports.mineHardNegatives = mineHardNegatives; |
| exports.benchmarkAttention = benchmarkAttention; |
| |
| let attentionModule = null; |
| let loadError = null; |
| function getAttentionModule() { |
| if (attentionModule) |
| return attentionModule; |
| if (loadError) |
| throw loadError; |
| try { |
| attentionModule = require('@ruvector/attention'); |
| return attentionModule; |
| } |
| catch (e) { |
| loadError = new Error(`@ruvector/attention is not installed or failed to load: ${e.message}\n` + |
| `Install with: npm install @ruvector/attention`); |
| throw loadError; |
| } |
| } |
| |
| |
| |
| function toFloat32Array(input) { |
| if (input instanceof Float32Array) { |
| return input; |
| } |
| return new Float32Array(input); |
| } |
| |
| |
| |
| function toFloat32Arrays(inputs) { |
| return inputs.map(arr => toFloat32Array(arr)); |
| } |
| |
| |
| |
| function fromFloat32Array(input) { |
| return Array.from(input); |
| } |
| |
| |
| |
| |
| |
| class MultiHeadAttention { |
| |
| |
| |
| |
| |
| |
| constructor(dim, numHeads) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.MultiHeadAttention(dim, numHeads); |
| this.dim = dim; |
| this.numHeads = numHeads; |
| } |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| compute(query, keys, values) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values)); |
| return { |
| values: fromFloat32Array(raw), |
| raw |
| }; |
| } |
| |
| |
| |
| computeRaw(query, keys, values) { |
| return this.inner.compute(query, keys, values); |
| } |
| get headDim() { |
| return this.dim / this.numHeads; |
| } |
| } |
| exports.MultiHeadAttention = MultiHeadAttention; |
| |
| |
| |
| class FlashAttention { |
| |
| |
| |
| |
| |
| |
| constructor(dim, blockSize = 512) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.FlashAttention(dim, blockSize); |
| this.dim = dim; |
| this.blockSize = blockSize; |
| } |
| |
| |
| |
| compute(query, keys, values) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values)); |
| return { |
| values: fromFloat32Array(raw), |
| raw |
| }; |
| } |
| computeRaw(query, keys, values) { |
| return this.inner.compute(query, keys, values); |
| } |
| } |
| exports.FlashAttention = FlashAttention; |
| |
| |
| |
| class HyperbolicAttention { |
| |
| |
| |
| |
| |
| |
| constructor(dim, curvature = 1.0) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.HyperbolicAttention(dim, curvature); |
| this.dim = dim; |
| this.curvature = curvature; |
| } |
| |
| |
| |
| compute(query, keys, values) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values)); |
| return { |
| values: fromFloat32Array(raw), |
| raw |
| }; |
| } |
| computeRaw(query, keys, values) { |
| return this.inner.compute(query, keys, values); |
| } |
| } |
| exports.HyperbolicAttention = HyperbolicAttention; |
| |
| |
| |
| class LinearAttention { |
| |
| |
| |
| |
| |
| |
| constructor(dim, numFeatures) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.LinearAttention(dim, numFeatures); |
| this.dim = dim; |
| this.numFeatures = numFeatures; |
| } |
| |
| |
| |
| compute(query, keys, values) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values)); |
| return { |
| values: fromFloat32Array(raw), |
| raw |
| }; |
| } |
| computeRaw(query, keys, values) { |
| return this.inner.compute(query, keys, values); |
| } |
| } |
| exports.LinearAttention = LinearAttention; |
| |
| |
| |
| class LocalGlobalAttention { |
| |
| |
| |
| |
| |
| |
| |
| constructor(dim, localWindow, globalTokens) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.LocalGlobalAttention(dim, localWindow, globalTokens); |
| this.dim = dim; |
| this.localWindow = localWindow; |
| this.globalTokens = globalTokens; |
| } |
| |
| |
| |
| compute(query, keys, values) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values)); |
| return { |
| values: fromFloat32Array(raw), |
| raw |
| }; |
| } |
| computeRaw(query, keys, values) { |
| return this.inner.compute(query, keys, values); |
| } |
| } |
| exports.LocalGlobalAttention = LocalGlobalAttention; |
| |
| |
| |
| class MoEAttention { |
| |
| |
| |
| |
| |
| constructor(config) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.MoEAttention({ |
| dim: config.dim, |
| num_experts: config.numExperts, |
| top_k: config.topK, |
| expert_capacity: config.expertCapacity ?? 1.25, |
| }); |
| this.config = config; |
| } |
| |
| |
| |
| static simple(dim, numExperts, topK) { |
| return new MoEAttention({ dim, numExperts, topK }); |
| } |
| |
| |
| |
| compute(query, keys, values) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values)); |
| return { |
| values: fromFloat32Array(raw), |
| raw |
| }; |
| } |
| computeRaw(query, keys, values) { |
| return this.inner.compute(query, keys, values); |
| } |
| } |
| exports.MoEAttention = MoEAttention; |
| |
| |
| |
| |
| function projectToPoincareBall(vector, curvature = 1.0) { |
| const attention = getAttentionModule(); |
| const result = attention.projectToPoincareBall(toFloat32Array(vector), curvature); |
| return fromFloat32Array(result); |
| } |
| |
| |
| |
| function poincareDistance(a, b, curvature = 1.0) { |
| const attention = getAttentionModule(); |
| return attention.poincareDistance(toFloat32Array(a), toFloat32Array(b), curvature); |
| } |
| |
| |
| |
| function mobiusAddition(a, b, curvature = 1.0) { |
| const attention = getAttentionModule(); |
| const result = attention.mobiusAddition(toFloat32Array(a), toFloat32Array(b), curvature); |
| return fromFloat32Array(result); |
| } |
| |
| |
| |
| function expMap(base, tangent, curvature = 1.0) { |
| const attention = getAttentionModule(); |
| const result = attention.expMap(toFloat32Array(base), toFloat32Array(tangent), curvature); |
| return fromFloat32Array(result); |
| } |
| |
| |
| |
| function logMap(base, point, curvature = 1.0) { |
| const attention = getAttentionModule(); |
| const result = attention.logMap(toFloat32Array(base), toFloat32Array(point), curvature); |
| return fromFloat32Array(result); |
| } |
| |
| |
| |
| function isAttentionAvailable() { |
| try { |
| getAttentionModule(); |
| return true; |
| } |
| catch { |
| return false; |
| } |
| } |
| |
| |
| |
| function getAttentionVersion() { |
| try { |
| const attention = getAttentionModule(); |
| return attention.version?.() ?? null; |
| } |
| catch { |
| return null; |
| } |
| } |
| |
| |
| |
| |
| |
| |
| |
| class GraphRoPeAttention { |
| constructor(dim, numHeads = 4, maxSeqLen = 4096) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.GraphRoPeAttention(dim, numHeads, maxSeqLen); |
| this.dim = dim; |
| this.numHeads = numHeads; |
| this.maxSeqLen = maxSeqLen; |
| } |
| compute(query, keys, values, positions) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values), positions ? new Int32Array(positions) : undefined); |
| return { values: fromFloat32Array(raw), raw }; |
| } |
| } |
| exports.GraphRoPeAttention = GraphRoPeAttention; |
| |
| |
| |
| |
| class EdgeFeaturedAttention { |
| constructor(dim, edgeDim = 16) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.EdgeFeaturedAttention(dim, edgeDim); |
| this.dim = dim; |
| this.edgeDim = edgeDim; |
| } |
| compute(query, keys, values, edgeFeatures) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values), edgeFeatures ? toFloat32Arrays(edgeFeatures) : undefined); |
| return { values: fromFloat32Array(raw), raw }; |
| } |
| } |
| exports.EdgeFeaturedAttention = EdgeFeaturedAttention; |
| |
| |
| |
| |
| class DualSpaceAttention { |
| constructor(dim, curvature = 1.0, alpha = 0.5) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.DualSpaceAttention(dim, curvature, alpha); |
| this.dim = dim; |
| this.curvature = curvature; |
| this.alpha = alpha; |
| } |
| compute(query, keys, values) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values)); |
| return { values: fromFloat32Array(raw), raw }; |
| } |
| } |
| exports.DualSpaceAttention = DualSpaceAttention; |
| |
| |
| |
| class DotProductAttention { |
| constructor(dim) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.DotProductAttention(dim); |
| this.dim = dim; |
| } |
| compute(query, keys, values) { |
| const raw = this.inner.compute(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values)); |
| return { values: fromFloat32Array(raw), raw }; |
| } |
| } |
| exports.DotProductAttention = DotProductAttention; |
| |
| |
| |
| |
| |
| |
| async function parallelAttentionCompute(queries, keys, values, attentionType = 'multi-head') { |
| const attention = getAttentionModule(); |
| const results = await attention.parallelAttentionCompute(toFloat32Arrays(queries), toFloat32Arrays(keys), toFloat32Arrays(values), attentionType); |
| return results.map((r) => fromFloat32Array(r)); |
| } |
| |
| |
| |
| async function batchAttentionCompute(batches, attentionType = 'multi-head') { |
| const attention = getAttentionModule(); |
| const nativeBatches = batches.map(b => ({ |
| query: toFloat32Array(b.query), |
| keys: toFloat32Arrays(b.keys), |
| values: toFloat32Arrays(b.values), |
| })); |
| const results = await attention.batchAttentionCompute(nativeBatches, attentionType); |
| return results.map((r) => fromFloat32Array(r)); |
| } |
| |
| |
| |
| function computeFlashAttentionAsync(query, keys, values) { |
| const attention = getAttentionModule(); |
| return new Promise((resolve, reject) => { |
| attention.computeFlashAttentionAsync(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values), (err, result) => { |
| if (err) |
| reject(err); |
| else |
| resolve(fromFloat32Array(result)); |
| }); |
| }); |
| } |
| |
| |
| |
| function computeHyperbolicAttentionAsync(query, keys, values, curvature = 1.0) { |
| const attention = getAttentionModule(); |
| return new Promise((resolve, reject) => { |
| attention.computeHyperbolicAttentionAsync(toFloat32Array(query), toFloat32Arrays(keys), toFloat32Arrays(values), curvature, (err, result) => { |
| if (err) |
| reject(err); |
| else |
| resolve(fromFloat32Array(result)); |
| }); |
| }); |
| } |
| |
| |
| |
| |
| |
| |
| class AdamOptimizer { |
| constructor(learningRate = 0.001, beta1 = 0.9, beta2 = 0.999) { |
| const attention = getAttentionModule(); |
| this.inner = new attention.AdamOptimizer(learningRate, beta1, beta2); |
| } |
| step(gradients, params) { |
| const result = this.inner.step(toFloat32Array(gradients), toFloat32Array(params)); |
| return fromFloat32Array(result); |
| } |
| } |
| exports.AdamOptimizer = AdamOptimizer; |
| |
| |
| |
| function infoNceLoss(anchor, positive, negatives, temperature = 0.07) { |
| const attention = getAttentionModule(); |
| return attention.InfoNceLoss.compute(toFloat32Array(anchor), toFloat32Array(positive), toFloat32Arrays(negatives), temperature); |
| } |
| |
| |
| |
| function mineHardNegatives(anchor, candidates, topK = 5) { |
| const attention = getAttentionModule(); |
| const miner = new attention.HardNegativeMiner(topK); |
| const results = miner.mine(toFloat32Array(anchor), toFloat32Arrays(candidates)); |
| return results.map((r) => fromFloat32Array(r)); |
| } |
| |
| |
| |
| |
| |
| |
| async function benchmarkAttention(dim, seqLen, iterations = 100) { |
| const attention = getAttentionModule(); |
| return attention.benchmarkAttention(dim, seqLen, iterations); |
| } |
| exports.default = { |
| |
| DotProductAttention, |
| MultiHeadAttention, |
| FlashAttention, |
| HyperbolicAttention, |
| LinearAttention, |
| LocalGlobalAttention, |
| MoEAttention, |
| |
| GraphRoPeAttention, |
| EdgeFeaturedAttention, |
| DualSpaceAttention, |
| |
| parallelAttentionCompute, |
| batchAttentionCompute, |
| computeFlashAttentionAsync, |
| computeHyperbolicAttentionAsync, |
| |
| AdamOptimizer, |
| infoNceLoss, |
| mineHardNegatives, |
| |
| projectToPoincareBall, |
| poincareDistance, |
| mobiusAddition, |
| expMap, |
| logMap, |
| |
| isAttentionAvailable, |
| getAttentionVersion, |
| benchmarkAttention, |
| }; |
|
|