juiceb0xc0de's picture
Convert to Gradio space with token injection
7f0c854 verified
// Synthetic but plausible Gemma-4-E2B-it activation atlas data.
// 35 layers × 8 components. Generated deterministically so re-renders are stable.
(function () {
const COMPONENTS = [
{ id: 'resid_pre', label: 'resid_pre', kind: 'residual' },
{ id: 'attn_out', label: 'attn_out', kind: 'attention' },
{ id: 'attn_pattern', label: 'attn_patt', kind: 'attention' },
{ id: 'mlp_pre', label: 'mlp_pre', kind: 'mlp' },
{ id: 'mlp_gate', label: 'mlp_gate', kind: 'mlp' },
{ id: 'mlp_out', label: 'mlp_out', kind: 'mlp' },
{ id: 'resid_mid', label: 'resid_mid', kind: 'residual' },
{ id: 'resid_post', label: 'resid_post', kind: 'residual' },
];
const N_LAYERS = 35;
const N_HEADS = 16;
const BEHAVIORS = [
{ id: 'coding', label: 'Coding', glyph: '〈/〉' },
{ id: 'reasoning', label: 'Reasoning', glyph: '⇄' },
{ id: 'math', label: 'Math', glyph: '∑' },
{ id: 'creative', label: 'Creative', glyph: '✿' },
{ id: 'refusal', label: 'Refusal', glyph: '◐' },
{ id: 'humor', label: 'Humor', glyph: '☻' },
{ id: 'factual', label: 'Factual recall',glyph: '◇' },
{ id: 'multiling', label: 'Multilingual', glyph: '⌘' },
{ id: 'sentiment', label: 'Sentiment', glyph: '♡' },
{ id: 'safety', label: 'Safety / harm', glyph: '✦' },
];
// Simple seeded PRNG (mulberry32) for stability
function rng(seed) {
let t = seed >>> 0;
return function () {
t += 0x6D2B79F5;
let r = t;
r = Math.imul(r ^ (r >>> 15), r | 1);
r ^= r + Math.imul(r ^ (r >>> 7), r | 61);
return ((r ^ (r >>> 14)) >>> 0) / 4294967296;
};
}
// Layer-wise activation curves: shape we expect across a transformer.
// - early: tokenization, surface; mid: semantics; late: task / refusal / safety
function behaviorCurve(behaviorId, layer) {
const x = layer / (N_LAYERS - 1); // 0..1
const g = (mu, sigma) => Math.exp(-((x - mu) ** 2) / (2 * sigma * sigma));
switch (behaviorId) {
case 'coding': return 0.18 + 0.85 * g(0.62, 0.18);
case 'reasoning': return 0.10 + 0.95 * g(0.74, 0.14) + 0.25 * g(0.45, 0.10);
case 'math': return 0.12 + 0.80 * g(0.68, 0.13);
case 'creative': return 0.22 + 0.70 * g(0.55, 0.22);
case 'refusal': return 0.05 + 1.00 * g(0.88, 0.09);
case 'humor': return 0.18 + 0.55 * g(0.50, 0.20) + 0.40 * g(0.80, 0.08);
case 'factual': return 0.30 + 0.65 * g(0.40, 0.16) + 0.30 * g(0.72, 0.10);
case 'multiling': return 0.40 + 0.55 * g(0.20, 0.13) + 0.35 * g(0.60, 0.14);
case 'sentiment': return 0.20 + 0.70 * g(0.30, 0.18) + 0.30 * g(0.78, 0.10);
case 'safety': return 0.08 + 0.90 * g(0.92, 0.08) + 0.30 * g(0.55, 0.10);
default: return 0.30;
}
}
// Per-component affinity for each behavior
function compAffinity(compId, behaviorId) {
const A = {
coding: { mlp_gate: 1.0, mlp_out: 0.92, attn_pattern: 0.7, attn_out: 0.65, resid_post: 0.55, mlp_pre: 0.5, resid_mid: 0.45, resid_pre: 0.25 },
reasoning: { attn_pattern: 1.0, attn_out: 0.95, resid_post: 0.75, mlp_gate: 0.65, mlp_out: 0.55, resid_mid: 0.55, mlp_pre: 0.40, resid_pre: 0.25 },
math: { mlp_gate: 0.95, mlp_out: 0.90, attn_pattern: 0.78, attn_out: 0.62, resid_post: 0.50, resid_mid: 0.42, mlp_pre: 0.42, resid_pre: 0.20 },
creative: { mlp_out: 0.85, resid_post: 0.78, mlp_gate: 0.70, attn_out: 0.62, resid_mid: 0.55, attn_pattern: 0.48, mlp_pre: 0.40, resid_pre: 0.35 },
refusal: { resid_post: 1.0, attn_out: 0.85, mlp_out: 0.80, mlp_gate: 0.65, attn_pattern: 0.62, resid_mid: 0.55, mlp_pre: 0.35, resid_pre: 0.20 },
humor: { mlp_out: 0.85, resid_post: 0.78, mlp_gate: 0.68, attn_pattern: 0.58, attn_out: 0.55, resid_mid: 0.50, mlp_pre: 0.40, resid_pre: 0.32 },
factual: { mlp_gate: 0.95, mlp_out: 0.88, mlp_pre: 0.65, attn_pattern: 0.55, attn_out: 0.50, resid_mid: 0.55, resid_post: 0.62, resid_pre: 0.30 },
multiling: { resid_pre: 0.78, mlp_gate: 0.85, mlp_out: 0.78, attn_out: 0.55, attn_pattern: 0.50, resid_mid: 0.62, mlp_pre: 0.55, resid_post: 0.50 },
sentiment: { attn_out: 0.82, mlp_out: 0.78, resid_post: 0.72, attn_pattern: 0.70, mlp_gate: 0.62, resid_mid: 0.50, mlp_pre: 0.42, resid_pre: 0.32 },
safety: { resid_post: 1.0, attn_out: 0.90, mlp_out: 0.78, mlp_gate: 0.62, resid_mid: 0.60, attn_pattern: 0.62, mlp_pre: 0.40, resid_pre: 0.22 },
};
return (A[behaviorId] && A[behaviorId][compId]) || 0.4;
}
// F-statistic intensity matrix [layer][component] — overall "activity richness"
function buildFstatMatrix() {
const rand = rng(2042);
const m = [];
for (let l = 0; l < N_LAYERS; l++) {
const row = [];
for (let c = 0; c < COMPONENTS.length; c++) {
const comp = COMPONENTS[c];
// Aggregate behavior energy at this layer
let energy = 0;
for (const b of BEHAVIORS) energy += behaviorCurve(b.id, l) * compAffinity(comp.id, b.id);
energy /= BEHAVIORS.length;
// Component-kind bias
const kindBias = comp.kind === 'attention' ? 0.05 : comp.kind === 'mlp' ? 0.10 : -0.05;
// Layer-position bias: middle layers richer
const lx = l / (N_LAYERS - 1);
const posBias = 0.18 * Math.exp(-((lx - 0.55) ** 2) / 0.18);
const noise = (rand() - 0.5) * 0.18;
const v = Math.max(0.02, Math.min(1.0, energy * 1.45 + kindBias + posBias + noise));
row.push(+v.toFixed(3));
}
m.push(row);
}
return m;
}
// Per-behavior heatmap [layer][component]
function buildBehaviorMatrix(behaviorId) {
const rand = rng(behaviorId.split('').reduce((a, c) => a + c.charCodeAt(0), 7));
const m = [];
for (let l = 0; l < N_LAYERS; l++) {
const row = [];
for (let c = 0; c < COMPONENTS.length; c++) {
const comp = COMPONENTS[c];
const v = behaviorCurve(behaviorId, l) * compAffinity(comp.id, behaviorId);
const noise = (rand() - 0.5) * 0.10;
row.push(+Math.max(0.0, Math.min(1.0, v + noise)).toFixed(3));
}
m.push(row);
}
return m;
}
const FSTAT = buildFstatMatrix();
const BEHAVIOR_MATRICES = Object.fromEntries(BEHAVIORS.map(b => [b.id, buildBehaviorMatrix(b.id)]));
// ------- Surgical targets (synthetic features) -------
const FEATURE_DESCRIPTIONS = [
'cleanup of stale punctuation after parenthetical aside',
'tracks pronoun antecedent across sentence boundary',
'detects code-block opening fence',
'sycophantic agreement preamble ("Great question!")',
'numeric magnitude estimation, base-10',
'currency symbol context (USD/EUR/JPY)',
'detection of ALL-CAPS shouting register',
'over-cautious refusal preamble for benign cooking q',
'list-continuation bullet bias',
'german compound-noun segmentation',
'sentiment flip on "however"',
'detection of jailbreak roleplay framing',
'enforces JSON brace closure',
'tracks SQL identifier scope',
'emoji-as-bullet substitution',
'detects mathematical proof step boundary',
'meta-commentary about being an AI',
'apology cascade after correction',
'inline-citation pattern (Author, year)',
'french elision before vowel',
'detects polite imperative vs command',
'over-formal register lock-in',
'em-dash overuse driver',
'trailing-summary-paragraph compulsion',
'rhetorical "but more importantly" pivot',
'rust borrow-checker hint emission',
'detects whitespace-significant language (Python/YAML)',
'gendered pronoun default ("he" for engineer)',
'detects question-vs-statement intonation in text',
'parses ISO 8601 dates',
];
function buildSurgicalTargets() {
const rand = rng(31415);
const rows = [];
for (let i = 0; i < FEATURE_DESCRIPTIONS.length; i++) {
const layer = Math.floor(rand() * N_LAYERS);
const comp = COMPONENTS[Math.floor(rand() * COMPONENTS.length)];
const fid = Math.floor(rand() * 65536);
const bouncer = +(0.55 + rand() * 0.44).toFixed(2); // higher = safer to ablate
const topicF = +(0.02 + rand() * 0.20).toFixed(2); // low = behavior-narrow
const ablate = +((bouncer - topicF) * 100).toFixed(1);
const examples = Math.floor(40 + rand() * 380);
rows.push({
id: `L${layer}.${comp.id}.f${fid}`,
layer, component: comp.id, feature: fid,
desc: FEATURE_DESCRIPTIONS[i],
bouncer, topicF, ablate, examples,
});
}
return rows.sort((a, b) => b.ablate - a.ablate);
}
// Per-head attention breakdown for a layer
function headBreakdown(layer) {
const rand = rng(layer * 977 + 11);
const heads = [];
const ROLES = [
'previous-token', 'induction', 'name-mover', 'duplicate-token',
'punctuation', 'subject-verb', 'syntax-bracket', 'positional',
'topic', 'refusal-routing', 'numeric', 'multilingual',
'code-scope', 'list-tracking', 'sentiment', 'self-reference',
];
for (let h = 0; h < N_HEADS; h++) {
const role = ROLES[Math.floor(rand() * ROLES.length)];
const f = +(0.15 + rand() * 0.85).toFixed(2);
heads.push({ head: h, role, fstat: f });
}
return heads.sort((a, b) => b.fstat - a.fstat);
}
// Top features for a (layer, component)
function topFeatures(layer, compId) {
const rand = rng(layer * 131 + compId.charCodeAt(0) * 17 + compId.length);
const out = [];
for (let i = 0; i < 6; i++) {
const desc = FEATURE_DESCRIPTIONS[Math.floor(rand() * FEATURE_DESCRIPTIONS.length)];
out.push({
feature: Math.floor(rand() * 65536),
desc,
fstat: +(0.30 + rand() * 0.70).toFixed(2),
density: +(0.001 + rand() * 0.18).toFixed(3),
});
}
return out.sort((a, b) => b.fstat - a.fstat);
}
window.AtlasData = {
N_LAYERS, N_HEADS,
COMPONENTS, BEHAVIORS,
FSTAT, BEHAVIOR_MATRICES,
SURGICAL: buildSurgicalTargets(),
headBreakdown, topFeatures,
META: {
model: 'gemma-4-e2b-it',
paramCount: '2.06B (eff.)',
probes: 7421,
promptsScanned: 184_320,
datasetCoverage: 0.51, // 50% per user
ingested: '2026-05-15T07:22Z',
authorHandle: 'juiceb0xc0de',
},
};
})();