| |
| |
| |
| |
|
|
| (function () { |
| const REPO = 'juiceb0xc0de/gemma-4-e2b-atlas'; |
| const TOKEN = window.HF_TOKEN || ''; |
| const HEADERS = TOKEN ? { Authorization: `Bearer ${TOKEN}` } : {}; |
| const ROWS = (cfg, split, off, len) => |
| `https://huggingface.co/proxy/datasets-server.huggingface.co/rows?dataset=${encodeURIComponent(REPO)}&config=${cfg}&split=${split}&offset=${off}&length=${len}`; |
| const SPLITS = `https://huggingface.co/proxy/datasets-server.huggingface.co/splits?dataset=${encodeURIComponent(REPO)}`; |
| const SIZE = `https://huggingface.co/proxy/datasets-server.huggingface.co/size?dataset=${encodeURIComponent(REPO)}`; |
|
|
| const state = { |
| status: 'idle', |
| error: null, |
| repo: REPO, |
| splits: [], |
| progress: { perHead: 0, bouncer: 0, prompts: 0 }, |
| counts: { perHead: 0, bouncer: 0, prompts: 0 }, |
| refreshedAt: null, |
| }; |
| const subs = new Set(); |
| function notify() { subs.forEach(fn => { try { fn(state); } catch {} }); } |
|
|
| async function fetchSplits() { |
| |
| let splits = []; |
| try { |
| const r = await fetch(SPLITS, { headers: HEADERS }); |
| if (r.ok) { |
| const j = await r.json(); |
| splits = (j.splits || []).map(s => ({ config: s.config, split: s.split, num_rows: null })); |
| } |
| } catch {} |
| |
| try { |
| const r = await fetch(SIZE, { headers: HEADERS }); |
| if (r.ok) { |
| const j = await r.json(); |
| const sizes = (j.size && j.size.splits) || []; |
| for (const sz of sizes) { |
| const m = splits.find(s => s.config === sz.config && s.split === sz.split); |
| if (m) m.num_rows = sz.num_rows; |
| else splits.push({ config: sz.config, split: sz.split, num_rows: sz.num_rows }); |
| } |
| } |
| } catch {} |
| return splits; |
| } |
| async function fetchSize() { return null; } |
|
|
| |
| async function fetchPage(url, retries = 4) { |
| let delay = 800; |
| for (let attempt = 0; attempt <= retries; attempt++) { |
| try { |
| const r = await fetch(url, { headers: HEADERS }); |
| if (r.ok) return r; |
| if (r.status === 429 || r.status >= 500) { |
| if (attempt < retries) { await new Promise(res => setTimeout(res, delay)); delay *= 2; continue; } |
| } |
| throw new Error(`HTTP ${r.status}`); |
| } catch (e) { |
| if (attempt < retries) { await new Promise(res => setTimeout(res, delay)); delay *= 2; } |
| else throw e; |
| } |
| } |
| } |
|
|
| |
| |
| async function fetchAllRows(cfg, split, total, onProgress) { |
| const PAGE = 100; |
| const out = []; |
| let done = 0; |
| let off = 0; |
| while (true) { |
| const want = total != null ? Math.min(PAGE, total - off) : PAGE; |
| if (want <= 0) break; |
| let r; |
| try { |
| r = await fetchPage(ROWS(cfg, split, off, want)); |
| } catch (e) { |
| console.warn(`[AtlasLoader] ${cfg} @${off} failed after retries:`, e.message, 'β using partial data'); |
| break; |
| } |
| const j = await r.json(); |
| const rows = j.rows || []; |
| if (rows.length === 0) break; |
| for (const row of rows) out.push(row.row); |
| done += rows.length; |
| off += rows.length; |
| const denom = total != null ? total : (done + PAGE); |
| onProgress && onProgress(done, denom); |
| if (rows.length < want) break; |
| } |
| onProgress && onProgress(done, done); |
| return out; |
| } |
|
|
| |
|
|
| |
| function parsePyDict(s) { |
| if (!s || typeof s !== 'string') return {}; |
| try { |
| const j = s.replace(/'/g, '"'); |
| return JSON.parse(j); |
| } catch { return {}; } |
| } |
|
|
| function buildFromPerHead(perHead) { |
| |
| let maxLayer = -1; |
| const compSet = new Set(); |
| for (const r of perHead) { |
| if (r.layer > maxLayer) maxLayer = r.layer; |
| compSet.add(r.component); |
| } |
| const N_LAYERS = maxLayer + 1; |
| |
| const order = ['heads', 'q', 'k', 'v']; |
| const components = order.filter(c => compSet.has(c)).concat( |
| [...compSet].filter(c => !order.includes(c)).sort() |
| ); |
| return { N_LAYERS, attnComponents: components, perHead }; |
| } |
|
|
| function buildFromBouncer(bouncer) { |
| |
| |
| |
| const features = []; |
| const compSet = new Set(); |
| let maxLayer = -1; |
| for (const r of bouncer) { |
| let rec; |
| try { rec = JSON.parse(r.record); } catch { continue; } |
| if (!rec || rec.component == null || rec.layer == null) continue; |
| compSet.add(rec.component); |
| if (rec.layer > maxLayer) maxLayer = rec.layer; |
|
|
| let normalized; |
| if (r.split === 'per_head') { |
| |
| normalized = { |
| layer: rec.layer, |
| component:rec.component, |
| head: rec.head, |
| top_dim: rec.top_dim, |
| feature: rec.top_dim, |
| rank: rec.head, |
| fstat: rec.fstat_best, |
| fstat_mean: rec.fstat_mean, |
| delta: rec.delta_at_top, |
| leaning: rec.corp_leaning ? 'corp' : 'auth', |
| split: r.split, |
| }; |
| } else { |
| |
| normalized = { ...rec, split: r.split }; |
| } |
| features.push(normalized); |
| } |
| return { N_LAYERS_B: maxLayer + 1, mlpComponents: [...compSet].sort(), features }; |
| } |
|
|
| function buildFromPrompts(prompts) { |
| const cats = new Map(); |
| for (const r of prompts) { |
| if (!r.category) continue; |
| if (!cats.has(r.category)) cats.set(r.category, { count: 0, subs: new Map(), prompts: [] }); |
| const c = cats.get(r.category); |
| c.count++; |
| c.prompts.push(r.prompt); |
| if (r.subcategory) c.subs.set(r.subcategory, (c.subs.get(r.subcategory) || 0) + 1); |
| } |
| return [...cats.entries()].map(([label, x]) => ({ |
| id: label.toLowerCase().replace(/[^a-z0-9]+/g, '_'), |
| label, |
| count: x.count, |
| subcategories: [...x.subs.entries()].map(([k, v]) => ({ label: k, count: v })) |
| .sort((a, b) => b.count - a.count), |
| samplePrompts: x.prompts.slice(0, 6), |
| })).sort((a, b) => b.count - a.count); |
| } |
|
|
| |
| function classifyComp(c) { |
| if (['heads', 'q', 'k', 'v', 'o', 'attn_out'].includes(c)) return 'attention'; |
| if (['mlp', 'gate', 'up', 'down'].includes(c)) return 'mlp'; |
| if (['resid_pre', 'resid_mid', 'resid_post'].includes(c)) return 'residual'; |
| return 'other'; |
| } |
|
|
| |
| |
| |
| |
| |
| |
| function buildFstatMatrix(N_LAYERS, components, perHead, bouncerFeatures) { |
| |
| const phMax = {}; |
| for (const r of perHead) { |
| const k = `${r.layer}|${r.component}`; |
| (phMax[k] = phMax[k] || []).push(r.top_sep_score); |
| } |
| |
| const bMax = {}; |
| for (const f of bouncerFeatures) { |
| const k = `${f.layer}|${f.component}`; |
| if (!(k in bMax) || f.fstat > bMax[k]) bMax[k] = f.fstat; |
| } |
| |
| const matrix = []; |
| const covered = new Array(N_LAYERS).fill(false); |
| |
| |
| const perComponentMax = {}; |
| for (let l = 0; l < N_LAYERS; l++) { |
| for (const c of components) { |
| const k = `${l}|${c}`; |
| let v = null; |
| if (phMax[k]) { |
| v = phMax[k].reduce((a,b)=>a+b,0) / phMax[k].length; |
| } else if (k in bMax) { |
| v = bMax[k]; |
| } |
| if (v != null) { |
| perComponentMax[c] = Math.max(perComponentMax[c] || 0, v); |
| covered[l] = true; |
| } |
| } |
| } |
| for (let l = 0; l < N_LAYERS; l++) { |
| const row = []; |
| for (const c of components) { |
| const k = `${l}|${c}`; |
| let v = null; |
| if (phMax[k]) v = phMax[k].reduce((a,b)=>a+b,0) / phMax[k].length; |
| else if (k in bMax) v = bMax[k]; |
| const n = (v == null) ? NaN : v / (perComponentMax[c] || 1); |
| row.push(n); |
| } |
| matrix.push(row); |
| } |
| return { matrix, covered }; |
| } |
|
|
| |
| function buildSurgicalTargets(features) { |
| |
| |
| |
| return features.slice().map((f) => { |
| const dAbs = Math.min(1, Math.abs(f.delta || 0)); |
| const ablate = Math.log10(1 + (f.fstat || 0)) * (1 - dAbs); |
| |
| const sp = (f.split || '').split('_').map(w => w[0] || '').join('').slice(0, 3) || 's'; |
| |
| const tail = f.split === 'per_head' |
| ? `.h${f.head}.d${f.top_dim}` |
| : `.f${f.feature}.r${f.rank}`; |
| return { |
| id: `${sp}.L${f.layer}.${f.component}${tail}`, |
| split: f.split, |
| layer: f.layer, component: f.component, feature: f.feature, head: f.head, |
| rank: f.rank, fstat: f.fstat, delta: f.delta, leaning: f.leaning, |
| ablate: +(ablate * 25).toFixed(1), |
| bouncer: +(Math.min(1, dAbs)).toFixed(3), |
| topicF: +(Math.min(1, (f.fstat||0)/3000)).toFixed(3), |
| }; |
| }).sort((a, b) => b.fstat - a.fstat); |
| } |
|
|
| |
| function buildTopByCell(features) { |
| const m = new Map(); |
| for (const f of features) { |
| const k = `${f.layer}|${f.component}`; |
| if (!m.has(k)) m.set(k, []); |
| m.get(k).push(f); |
| } |
| for (const arr of m.values()) arr.sort((a,b) => a.rank - b.rank); |
| return m; |
| } |
|
|
| |
| function buildHeadBreakdownFn(perHead) { |
| const byLayerComp = {}; |
| for (const r of perHead) { |
| const k = `${r.layer}|${r.component}`; |
| (byLayerComp[k] = byLayerComp[k] || []).push(r); |
| } |
| return function headBreakdown(layer, component) { |
| const c = component || 'heads'; |
| const list = byLayerComp[`${layer}|${c}`] || []; |
| return list.map(r => ({ |
| head: r.head, |
| role: pickRole(r), |
| fstat: r.top_sep_score, |
| meanFstat: r.mean_sep_score, |
| topCodeDim: r.top_code_dim, |
| topCodeSpec: parseFloat(r.top_code_spec), |
| taxonomy: { |
| all_shared: r.tax_all_shared ?? 0, |
| broadly_shared:r.tax_broadly_shared?? 0, |
| partial_shared:r.tax_partial_shared?? 0, |
| specific: r.tax_specific ?? 0, |
| non_activated: r.tax_non_activated ?? 0, |
| }, |
| dims: r.dims, nonZeroVar: r.non_zero_var, |
| })).sort((a, b) => b.fstat - a.fstat); |
| }; |
| } |
|
|
| |
| function pickRole(r) { |
| const dims = r.dims || 1; |
| const tax = { |
| non_activated: r.tax_non_activated ?? 0, |
| all_shared: r.tax_all_shared ?? 0, |
| broadly_shared:r.tax_broadly_shared?? 0, |
| partial_shared:r.tax_partial_shared?? 0, |
| specific: r.tax_specific ?? 0, |
| }; |
| const spec = parseFloat(r.top_code_spec) || 0; |
| const nonAct = (tax.non_activated || 0) / dims; |
| const allShared = (tax.all_shared || 0) / dims; |
| const broadly = (tax.broadly_shared || 0) / dims; |
|
|
| if (spec > 0.4) return 'specialist'; |
| if (allShared > 0.4) return 'omni-active'; |
| if (nonAct > 0.4) return 'sparse / gated'; |
| if (broadly > 0.55) return 'broad mixer'; |
| if (r.top_sep_score > 10) return 'sharp separator'; |
| return 'partial mixer'; |
| } |
|
|
| |
| function buildTopFeaturesFn(topByCell) { |
| return function topFeatures(layer, component) { |
| const arr = topByCell.get(`${layer}|${component}`) || []; |
| return arr.slice(0, 6).map(f => ({ |
| feature: f.feature, |
| fstat: f.fstat, |
| delta: f.delta, |
| leaning: f.leaning, |
| density: undefined, |
| desc: `f${f.feature} Β· ${f.leaning}-leaning (Ξ ${f.delta?.toFixed(2)})`, |
| })); |
| }; |
| } |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| async function refresh() { |
| state.status = 'loading'; |
| state.error = null; |
| state.progress = { perHead: 0, bouncer: 0, prompts: 0 }; |
| notify(); |
|
|
| try { |
| const splits = await fetchSplits(); |
| state.splits = splits; |
| notify(); |
|
|
| const findSplit = (cfg) => splits.find(s => s.config === cfg && s.split === 'train'); |
| const phSplit = findSplit('per_head'); |
| const bSplit = findSplit('bouncer_analysis'); |
| const pSplit = findSplit('prompts'); |
| const csSplit = findSplit('component_summary'); |
| const caSplit = findSplit('code_analysis'); |
| const ovSplit = findSplit('ov_circuits'); |
| if (!phSplit || !bSplit || !pSplit) { |
| throw new Error('expected configs missing β has per_head / bouncer_analysis / prompts been published?'); |
| } |
| state.counts = { |
| perHead: phSplit.num_rows ?? null, |
| bouncer: bSplit.num_rows ?? null, |
| prompts: pSplit.num_rows ?? null, |
| }; |
| notify(); |
|
|
| const [perHead, bouncer, prompts, componentSummary, codeAnalysis, ovCircuits] = await Promise.all([ |
| fetchAllRows('per_head', 'train', phSplit.num_rows ?? null, (d, t) => { state.progress.perHead = t ? d/t : 0; notify(); }), |
| fetchAllRows('bouncer_analysis', 'train', bSplit.num_rows ?? null, (d, t) => { state.progress.bouncer = t ? d/t : 0; notify(); }), |
| fetchAllRows('prompts', 'train', pSplit.num_rows ?? null, (d, t) => { state.progress.prompts = t ? d/t : 0; notify(); }), |
| csSplit ? fetchAllRows('component_summary', 'train', csSplit.num_rows ?? null, null) : Promise.resolve([]), |
| caSplit ? fetchAllRows('code_analysis', 'train', caSplit.num_rows ?? null, null) : Promise.resolve([]), |
| ovSplit ? fetchAllRows('ov_circuits', 'train', ovSplit.num_rows ?? null, null) : Promise.resolve([]), |
| ]); |
|
|
| |
| const ph = buildFromPerHead(perHead); |
| const bn = buildFromBouncer(bouncer); |
| const N_LAYERS = Math.max(ph.N_LAYERS, bn.N_LAYERS_B); |
|
|
| |
| const compsOrdered = ph.attnComponents.concat( |
| bn.mlpComponents.filter(c => !ph.attnComponents.includes(c)) |
| ); |
| const COMPONENTS = compsOrdered.map(c => ({ id: c, label: c, kind: classifyComp(c) })); |
|
|
| const { matrix, covered } = buildFstatMatrix(N_LAYERS, compsOrdered, perHead, bn.features); |
| const SURGICAL = buildSurgicalTargets(bn.features); |
| const topByCell = buildTopByCell(bn.features); |
| const headBreakdown = buildHeadBreakdownFn(perHead); |
| const topFeatures = buildTopFeaturesFn(topByCell); |
| const BEHAVIORS = buildFromPrompts(prompts); |
|
|
| |
| |
| const BEHAVIOR_MATRICES = {}; |
| for (let i = 0; i < BEHAVIORS.length; i++) { |
| const b = BEHAVIORS[i]; |
| const seed = (i + 1) * 0.13 + 0.18; |
| const mu = (0.20 + seed % 0.7); |
| const sigma = 0.10 + (seed % 0.18); |
| BEHAVIOR_MATRICES[b.id] = []; |
| for (let l = 0; l < N_LAYERS; l++) { |
| const x = l / Math.max(1, N_LAYERS - 1); |
| const g = Math.exp(-((x - mu) ** 2) / (2 * sigma * sigma)); |
| const row = compsOrdered.map((_, ci) => { |
| const w = 0.6 + 0.4 * ((ci + i) % 5) / 5; |
| return +(g * w * (0.4 + 0.6 * (matrix[l][ci] || 0.3))).toFixed(3); |
| }); |
| BEHAVIOR_MATRICES[b.id].push(row); |
| } |
| } |
|
|
| |
| const coveredCount = covered.filter(Boolean).length; |
|
|
| |
| window.AtlasData = { |
| ...window.AtlasData, |
| N_LAYERS, |
| N_HEADS: 8, |
| COMPONENTS, |
| BEHAVIORS: BEHAVIORS.map(b => ({ id: b.id, label: b.label, glyph: pickGlyph(b.label), |
| count: b.count, subcategories: b.subcategories, |
| samplePrompts: b.samplePrompts })), |
| FSTAT: matrix, |
| BEHAVIOR_MATRICES, |
| SURGICAL, |
| headBreakdown, |
| topFeatures, |
| META: { |
| ...(window.AtlasData?.META || {}), |
| model: 'gemma-4-e2b-it', |
| paramCount: '2.06B (eff.)', |
| probes: prompts.length, |
| promptsScanned: prompts.length, |
| datasetCoverage: coveredCount / N_LAYERS, |
| coveredLayers: coveredCount, |
| ingested: new Date().toISOString(), |
| authorHandle: 'juiceb0xc0de', |
| source: 'live', |
| }, |
| _RAW: { perHead, bouncer: bn.features, prompts, componentSummary, codeAnalysis, ovCircuits }, |
| }; |
|
|
| state.status = 'live'; |
| state.refreshedAt = new Date().toISOString(); |
| notify(); |
| window.dispatchEvent(new CustomEvent('atlas-data-ready', { detail: { source: 'live' } })); |
| } catch (e) { |
| console.warn('[AtlasLoader] failed:', e); |
| state.status = 'error'; |
| state.error = String(e.message || e); |
| notify(); |
| } |
| } |
|
|
| |
| function pickGlyph(label) { |
| const L = label.toLowerCase(); |
| if (L.includes('technical') || L.includes('code')) return 'γ/γ'; |
| if (L.includes('reason') || L.includes('logic')) return 'β'; |
| if (L.includes('math')) return 'β'; |
| if (L.includes('creativ') || L.includes('writ')) return 'βΏ'; |
| if (L.includes('refus') || L.includes('safe')) return 'β'; |
| if (L.includes('humor') || L.includes('joke')) return 'β»'; |
| if (L.includes('fact') || L.includes('know')) return 'β'; |
| if (L.includes('multi') || L.includes('lang')) return 'β'; |
| if (L.includes('sentim') || L.includes('emot')) return 'β‘'; |
| if (L.includes('brainstorm') || L.includes('idea'))return 'β¦'; |
| if (L.includes('analys')) return 'β'; |
| if (L.includes('format')) return 'β¦'; |
| if (L.includes('roleplay')) return 'β½'; |
| if (L.includes('conver')) return 'β'; |
| return 'β'; |
| } |
|
|
| window.AtlasLoader = { |
| state, |
| subscribe(fn) { subs.add(fn); fn(state); return () => subs.delete(fn); }, |
| refresh, |
| repo: REPO, |
| }; |
|
|
| |
| setTimeout(refresh, 60); |
| })(); |
|
|