// Live loader for juiceb0xc0de/gemma-4-e2b-atlas.
// Paginates the HuggingFace datasets-server /rows endpoint and rebuilds
// window.AtlasData with real fields. Falls back to the synthetic preview
// already on window.AtlasData if anything fails.

(function () {
  const REPO    = 'juiceb0xc0de/gemma-4-e2b-atlas';
  const TOKEN   = window.HF_TOKEN || '';
  const HEADERS = TOKEN ? { Authorization: `Bearer ${TOKEN}` } : {};
  const ROWS    = (cfg, split, off, len) =>
    `https://datasets-server.huggingface.co/rows?dataset=${encodeURIComponent(REPO)}&config=${cfg}&split=${split}&offset=${off}&length=${len}`;
  const SPLITS  = `https://datasets-server.huggingface.co/splits?dataset=${encodeURIComponent(REPO)}`;
  const SIZE    = `https://datasets-server.huggingface.co/size?dataset=${encodeURIComponent(REPO)}`;

  const state = {
    status: 'idle',          // idle | loading | live | error
    error: null,
    repo: REPO,
    splits: [],              // [{config, split, num_rows}]
    progress: { perHead: 0, bouncer: 0, prompts: 0 },
    counts:   { perHead: 0, bouncer: 0, prompts: 0 },
    refreshedAt: null,
  };
  const subs = new Set();
  function notify() { subs.forEach(fn => { try { fn(state); } catch {} }); }

  async function fetchSplits() {
    // /splits gives us configs+splits but NOT row counts; /size gives counts.
    let splits = [];
    try {
      const r = await fetch(SPLITS, { headers: HEADERS });
      if (r.ok) {
        const j = await r.json();
        splits = (j.splits || []).map(s => ({ config: s.config, split: s.split, num_rows: null }));
      }
    } catch {}
    // augment with row counts from /size
    try {
      const r = await fetch(SIZE, { headers: HEADERS });
      if (r.ok) {
        const j = await r.json();
        const sizes = (j.size && j.size.splits) || [];
        for (const sz of sizes) {
          const m = splits.find(s => s.config === sz.config && s.split === sz.split);
          if (m) m.num_rows = sz.num_rows;
          else splits.push({ config: sz.config, split: sz.split, num_rows: sz.num_rows });
        }
      }
    } catch {}
    return splits;
  }
  async function fetchSize() { return null; /* folded into fetchSplits */ }

  // Fetch one page with up to `retries` retries and exponential back-off.
  async function fetchPage(url, retries = 4) {
    let delay = 800;
    for (let attempt = 0; attempt <= retries; attempt++) {
      try {
        const r = await fetch(url, { headers: HEADERS });
        if (r.ok) return r;
        if (r.status === 429 || r.status >= 500) {
          if (attempt < retries) { await new Promise(res => setTimeout(res, delay)); delay *= 2; continue; }
        }
        throw new Error(`HTTP ${r.status}`);
      } catch (e) {
        if (attempt < retries) { await new Promise(res => setTimeout(res, delay)); delay *= 2; }
        else throw e;
      }
    }
  }

  // Paginate /rows. If `total` is null, pages until an empty page comes back.
  // Returns whatever rows were fetched even if a page fails mid-way.
  async function fetchAllRows(cfg, split, total, onProgress) {
    const PAGE = 100;
    const out = [];
    let done = 0;
    let off = 0;
    while (true) {
      const want = total != null ? Math.min(PAGE, total - off) : PAGE;
      if (want <= 0) break;
      let r;
      try {
        r = await fetchPage(ROWS(cfg, split, off, want));
      } catch (e) {
        console.warn(`[AtlasLoader] ${cfg} @${off} failed after retries:`, e.message, '— using partial data');
        break;
      }
      const j = await r.json();
      const rows = j.rows || [];
      if (rows.length === 0) break;
      for (const row of rows) out.push(row.row);
      done += rows.length;
      off += rows.length;
      const denom = total != null ? total : (done + PAGE);
      onProgress && onProgress(done, denom);
      if (rows.length < want) break;
    }
    onProgress && onProgress(done, done);
    return out;
  }

  // ---- Builders ----------------------------------------------------------

  // Parse a Python-ish repr like  {'broadly_shared': 140, 'partial_shared': 109}
  function parsePyDict(s) {
    if (!s || typeof s !== 'string') return {};
    try {
      const j = s.replace(/'/g, '"');
      return JSON.parse(j);
    } catch { return {}; }
  }

  function buildFromPerHead(perHead) {
    // discover layers and components
    let maxLayer = -1;
    const compSet = new Set();
    for (const r of perHead) {
      if (r.layer > maxLayer) maxLayer = r.layer;
      compSet.add(r.component);
    }
    const N_LAYERS = maxLayer + 1;
    // canonical component order: heads, q, k, v (then mlp/gate get added by bouncer side)
    const order = ['heads', 'q', 'k', 'v'];
    const components = order.filter(c => compSet.has(c)).concat(
      [...compSet].filter(c => !order.includes(c)).sort()
    );
    return { N_LAYERS, attnComponents: components, perHead };
  }

  function buildFromBouncer(bouncer) {
    // Each row: split + record (JSON string). Different splits use different
    // schemas — normalize per_head onto the same shape as top_per_layer_component
    // so the rest of the pipeline can treat them uniformly.
    const features = [];
    const compSet = new Set();
    let maxLayer = -1;
    for (const r of bouncer) {
      let rec;
      try { rec = JSON.parse(r.record); } catch { continue; }
      if (!rec || rec.component == null || rec.layer == null) continue;
      compSet.add(rec.component);
      if (rec.layer > maxLayer) maxLayer = rec.layer;

      let normalized;
      if (r.split === 'per_head') {
        // per_head: {head, head_dim, fstat_best, fstat_mean, delta_at_top, top_dim, corp_leaning}
        normalized = {
          layer:    rec.layer,
          component:rec.component,
          head:     rec.head,
          top_dim:  rec.top_dim,
          feature:  rec.top_dim,                       // best-dim acts as the feature key here
          rank:     rec.head,                          // unique within (layer, component, head_dim)
          fstat:    rec.fstat_best,
          fstat_mean: rec.fstat_mean,
          delta:    rec.delta_at_top,
          leaning:  rec.corp_leaning ? 'corp' : 'auth',
          split:    r.split,
        };
      } else {
        // top_per_layer_component / best_component_per_layer
        normalized = { ...rec, split: r.split };
      }
      features.push(normalized);
    }
    return { N_LAYERS_B: maxLayer + 1, mlpComponents: [...compSet].sort(), features };
  }

  function buildFromPrompts(prompts) {
    const cats = new Map(); // category -> {count, subs: Map}
    for (const r of prompts) {
      if (!r.category) continue;
      if (!cats.has(r.category)) cats.set(r.category, { count: 0, subs: new Map(), prompts: [] });
      const c = cats.get(r.category);
      c.count++;
      c.prompts.push(r.prompt);
      if (r.subcategory) c.subs.set(r.subcategory, (c.subs.get(r.subcategory) || 0) + 1);
    }
    return [...cats.entries()].map(([label, x]) => ({
      id: label.toLowerCase().replace(/[^a-z0-9]+/g, '_'),
      label,
      count: x.count,
      subcategories: [...x.subs.entries()].map(([k, v]) => ({ label: k, count: v }))
                                          .sort((a, b) => b.count - a.count),
      samplePrompts: x.prompts.slice(0, 6),
    })).sort((a, b) => b.count - a.count);
  }

  // Component-kind classification — used for grouping + tints.
  function classifyComp(c) {
    if (['heads', 'q', 'k', 'v', 'o', 'attn_out'].includes(c)) return 'attention';
    if (['mlp', 'gate', 'up', 'down'].includes(c))             return 'mlp';
    if (['resid_pre', 'resid_mid', 'resid_post'].includes(c))  return 'residual';
    return 'other';
  }

  // ---- Build the F-stat heatmap from real data ---------------------------
  // For attention components: aggregate top_sep_score per (layer, component)
  //   - "heads" component: mean across the 8 heads of top_sep_score
  //   - "q": mean across 8 heads
  //   - "k", "v": single head row
  // For mlp/gate: derive from bouncer features' fstat (top rank == 0 entries)
  function buildFstatMatrix(N_LAYERS, components, perHead, bouncerFeatures) {
    // bucket per_head by layer/component
    const phMax = {}; // key: `${l}|${c}` -> array of top_sep_scores
    for (const r of perHead) {
      const k = `${r.layer}|${r.component}`;
      (phMax[k] = phMax[k] || []).push(r.top_sep_score);
    }
    // bucket bouncer by layer/component (use top-rank's fstat as "intensity")
    const bMax = {};
    for (const f of bouncerFeatures) {
      const k = `${f.layer}|${f.component}`;
      if (!(k in bMax) || f.fstat > bMax[k]) bMax[k] = f.fstat;
    }
    // build matrix
    const matrix = [];
    const covered = new Array(N_LAYERS).fill(false);
    // per-component normalization so attention (sep_score ~5–15) and
    // mlp/gate (fstat ~hundreds–thousands) live on the same 0..1 scale
    const perComponentMax = {};
    for (let l = 0; l < N_LAYERS; l++) {
      for (const c of components) {
        const k = `${l}|${c}`;
        let v = null;
        if (phMax[k]) {
          v = phMax[k].reduce((a,b)=>a+b,0) / phMax[k].length;
        } else if (k in bMax) {
          v = bMax[k];
        }
        if (v != null) {
          perComponentMax[c] = Math.max(perComponentMax[c] || 0, v);
          covered[l] = true;
        }
      }
    }
    for (let l = 0; l < N_LAYERS; l++) {
      const row = [];
      for (const c of components) {
        const k = `${l}|${c}`;
        let v = null;
        if (phMax[k]) v = phMax[k].reduce((a,b)=>a+b,0) / phMax[k].length;
        else if (k in bMax) v = bMax[k];
        const n = (v == null) ? NaN : v / (perComponentMax[c] || 1);
        row.push(n);
      }
      matrix.push(row);
    }
    return { matrix, covered };
  }

  // ---- Feature inventory from real compliance features ----------------------
  function buildSurgicalTargets(features) {
    // Selectivity = log(1 + F-stat) × (1 - |Δ|), scaled to ~0..100.
    // High = the feature separates the labeled axes cleanly with a moderate
    // signed lean. Purely descriptive — no intervention implied.
    return features.slice().map((f) => {
      const dAbs = Math.min(1, Math.abs(f.delta || 0));
      const ablate = Math.log10(1 + (f.fstat || 0)) * (1 - dAbs);
      // split prefix keeps IDs unique across the three sub-splits
      const sp = (f.split || '').split('_').map(w => w[0] || '').join('').slice(0, 3) || 's';
      // per_head rows discriminate by head + top_dim; the others by feature + rank
      const tail = f.split === 'per_head'
        ? `.h${f.head}.d${f.top_dim}`
        : `.f${f.feature}.r${f.rank}`;
      return {
        id: `${sp}.L${f.layer}.${f.component}${tail}`,
        split: f.split,
        layer: f.layer, component: f.component, feature: f.feature, head: f.head,
        rank: f.rank, fstat: f.fstat, delta: f.delta, leaning: f.leaning,
        ablate: +(ablate * 25).toFixed(1),         // scale to ~0–100
        bouncer: +(Math.min(1, dAbs)).toFixed(3),  // direction strength
        topicF:  +(Math.min(1, (f.fstat||0)/3000)).toFixed(3),
      };
    }).sort((a, b) => b.fstat - a.fstat);
  }

  // ---- Top-N features per (layer, component) -----------------------------
  function buildTopByCell(features) {
    const m = new Map();
    for (const f of features) {
      const k = `${f.layer}|${f.component}`;
      if (!m.has(k)) m.set(k, []);
      m.get(k).push(f);
    }
    for (const arr of m.values()) arr.sort((a,b) => a.rank - b.rank);
    return m;
  }

  // ---- Per-head breakdown for a layer ------------------------------------
  function buildHeadBreakdownFn(perHead) {
    const byLayerComp = {};
    for (const r of perHead) {
      const k = `${r.layer}|${r.component}`;
      (byLayerComp[k] = byLayerComp[k] || []).push(r);
    }
    return function headBreakdown(layer, component) {
      const c = component || 'heads';
      const list = byLayerComp[`${layer}|${c}`] || [];
      return list.map(r => ({
        head: r.head,
        role: pickRole(r),
        fstat: r.top_sep_score,
        meanFstat: r.mean_sep_score,
        topCodeDim: r.top_code_dim,
        topCodeSpec: parseFloat(r.top_code_spec),
        taxonomy: {
          all_shared:    r.tax_all_shared    ?? 0,
          broadly_shared:r.tax_broadly_shared?? 0,
          partial_shared:r.tax_partial_shared?? 0,
          specific:      r.tax_specific      ?? 0,
          non_activated: r.tax_non_activated ?? 0,
        },
        dims: r.dims, nonZeroVar: r.non_zero_var,
      })).sort((a, b) => b.fstat - a.fstat);
    };
  }

  // Derive an interpretable role from taxonomy + spec.
  function pickRole(r) {
    const dims = r.dims || 1;
    const tax = {
      non_activated: r.tax_non_activated ?? 0,
      all_shared:    r.tax_all_shared    ?? 0,
      broadly_shared:r.tax_broadly_shared?? 0,
      partial_shared:r.tax_partial_shared?? 0,
      specific:      r.tax_specific      ?? 0,
    };
    const spec = parseFloat(r.top_code_spec) || 0;
    const nonAct = (tax.non_activated || 0) / dims;
    const allShared = (tax.all_shared || 0) / dims;
    const broadly = (tax.broadly_shared || 0) / dims;

    if (spec > 0.4)        return 'specialist';
    if (allShared > 0.4)   return 'omni-active';
    if (nonAct > 0.4)      return 'sparse / gated';
    if (broadly > 0.55)    return 'broad mixer';
    if (r.top_sep_score > 10) return 'sharp separator';
    return 'partial mixer';
  }

  // ---- Top features for layer deep-dive ----------------------------------
  function buildTopFeaturesFn(topByCell) {
    return function topFeatures(layer, component) {
      const arr = topByCell.get(`${layer}|${component}`) || [];
      return arr.slice(0, 6).map(f => ({
        feature: f.feature,
        fstat: f.fstat,
        delta: f.delta,
        leaning: f.leaning,
        density: undefined,  // not in this dataset
        desc: `f${f.feature} · ${f.leaning}-leaning (Δ ${f.delta?.toFixed(2)})`,
      }));
    };
  }

  // ---- Behavior matrix proxy: per-category proxy across layers -----------
  // The dataset doesn't ship a per-category × (layer, component) matrix yet.
  // Until it does, we surface the real category list with their prompt
  // counts; the heatmap overlay is left empty (handled in UI).
  // We DO compute a synthetic curve per behavior off the F-stat matrix so the
  // mini-curves in the picker are aesthetically alive but clearly labelled.

  // ---- Top-level refresh -------------------------------------------------

  async function refresh() {
    state.status = 'loading';
    state.error  = null;
    state.progress = { perHead: 0, bouncer: 0, prompts: 0 };
    notify();

    try {
      const splits = await fetchSplits();
      state.splits = splits;
      notify();

      const findSplit = (cfg) => splits.find(s => s.config === cfg && s.split === 'train');
      const phSplit = findSplit('per_head');
      const bSplit  = findSplit('bouncer_analysis');
      const pSplit  = findSplit('prompts');
      const csSplit = findSplit('component_summary');
      const caSplit = findSplit('code_analysis');
      const ovSplit = findSplit('ov_circuits');
      if (!phSplit || !bSplit || !pSplit) {
        throw new Error('expected configs missing — has per_head / bouncer_analysis / prompts been published?');
      }
      state.counts = {
        perHead: phSplit.num_rows ?? null,
        bouncer: bSplit.num_rows ?? null,
        prompts: pSplit.num_rows ?? null,
      };
      notify();

      const [perHead, bouncer, prompts, componentSummary, codeAnalysis, ovCircuits] = await Promise.all([
        fetchAllRows('per_head',          'train', phSplit.num_rows  ?? null, (d, t) => { state.progress.perHead = t ? d/t : 0; notify(); }),
        fetchAllRows('bouncer_analysis',  'train', bSplit.num_rows   ?? null, (d, t) => { state.progress.bouncer = t ? d/t : 0; notify(); }),
        fetchAllRows('prompts',           'train', pSplit.num_rows   ?? null, (d, t) => { state.progress.prompts = t ? d/t : 0; notify(); }),
        csSplit ? fetchAllRows('component_summary', 'train', csSplit.num_rows ?? null, null) : Promise.resolve([]),
        caSplit ? fetchAllRows('code_analysis',     'train', caSplit.num_rows ?? null, null) : Promise.resolve([]),
        ovSplit ? fetchAllRows('ov_circuits',        'train', ovSplit.num_rows  ?? null, null) : Promise.resolve([]),
      ]);

      // ---- assemble ----
      const ph = buildFromPerHead(perHead);
      const bn = buildFromBouncer(bouncer);
      const N_LAYERS = Math.max(ph.N_LAYERS, bn.N_LAYERS_B);

      // union of components — attention first (canonical), then mlp/gate, then anything else
      const compsOrdered = ph.attnComponents.concat(
        bn.mlpComponents.filter(c => !ph.attnComponents.includes(c))
      );
      const COMPONENTS = compsOrdered.map(c => ({ id: c, label: c, kind: classifyComp(c) }));

      const { matrix, covered } = buildFstatMatrix(N_LAYERS, compsOrdered, perHead, bn.features);
      const SURGICAL = buildSurgicalTargets(bn.features);
      const topByCell = buildTopByCell(bn.features);
      const headBreakdown = buildHeadBreakdownFn(perHead);
      const topFeatures   = buildTopFeaturesFn(topByCell);
      const BEHAVIORS = buildFromPrompts(prompts);

      // synthesize a soft per-behavior curve as a *placeholder visual* — the
      // real per-category × cell data isn't in the dataset yet.
      const BEHAVIOR_MATRICES = {};
      for (let i = 0; i < BEHAVIORS.length; i++) {
        const b = BEHAVIORS[i];
        const seed = (i + 1) * 0.13 + 0.18;
        const mu = (0.20 + seed % 0.7);
        const sigma = 0.10 + (seed % 0.18);
        BEHAVIOR_MATRICES[b.id] = [];
        for (let l = 0; l < N_LAYERS; l++) {
          const x = l / Math.max(1, N_LAYERS - 1);
          const g = Math.exp(-((x - mu) ** 2) / (2 * sigma * sigma));
          const row = compsOrdered.map((_, ci) => {
            const w = 0.6 + 0.4 * ((ci + i) % 5) / 5;
            return +(g * w * (0.4 + 0.6 * (matrix[l][ci] || 0.3))).toFixed(3);
          });
          BEHAVIOR_MATRICES[b.id].push(row);
        }
      }

      // estimate covered layers (those with any data)
      const coveredCount = covered.filter(Boolean).length;

      // ---- replace AtlasData in place ----
      window.AtlasData = {
        ...window.AtlasData,
        N_LAYERS,
        N_HEADS: 8,
        COMPONENTS,
        BEHAVIORS: BEHAVIORS.map(b => ({ id: b.id, label: b.label, glyph: pickGlyph(b.label),
                                          count: b.count, subcategories: b.subcategories,
                                          samplePrompts: b.samplePrompts })),
        FSTAT: matrix,
        BEHAVIOR_MATRICES,
        SURGICAL,
        headBreakdown,
        topFeatures,
        META: {
          ...(window.AtlasData?.META || {}),
          model: 'gemma-4-e2b-it',
          paramCount: '2.06B (eff.)',
          probes: prompts.length,
          promptsScanned: prompts.length,
          datasetCoverage: coveredCount / N_LAYERS,
          coveredLayers: coveredCount,
          ingested: new Date().toISOString(),
          authorHandle: 'juiceb0xc0de',
          source: 'live',
        },
        _RAW: { perHead, bouncer: bn.features, prompts, componentSummary, codeAnalysis, ovCircuits }, // for SQL box
      };

      state.status = 'live';
      state.refreshedAt = new Date().toISOString();
      notify();
      window.dispatchEvent(new CustomEvent('atlas-data-ready', { detail: { source: 'live' } }));
    } catch (e) {
      console.warn('[AtlasLoader] failed:', e);
      state.status = 'error';
      state.error  = String(e.message || e);
      notify();
    }
  }

  // Map category label -> a small glyph
  function pickGlyph(label) {
    const L = label.toLowerCase();
    if (L.includes('technical') || L.includes('code')) return '〈/〉';
    if (L.includes('reason') || L.includes('logic'))   return '⇄';
    if (L.includes('math'))                            return '∑';
    if (L.includes('creativ') || L.includes('writ'))   return '✿';
    if (L.includes('refus') || L.includes('safe'))     return '◐';
    if (L.includes('humor') || L.includes('joke'))     return '☻';
    if (L.includes('fact') || L.includes('know'))      return '◇';
    if (L.includes('multi') || L.includes('lang'))     return '⌘';
    if (L.includes('sentim') || L.includes('emot'))    return '♡';
    if (L.includes('brainstorm') || L.includes('idea'))return '✦';
    if (L.includes('analys'))                          return '◈';
    if (L.includes('format'))                          return '▦';
    if (L.includes('roleplay'))                        return '☽';
    if (L.includes('conver'))                          return '◊';
    return '○';
  }

  window.AtlasLoader = {
    state,
    subscribe(fn) { subs.add(fn); fn(state); return () => subs.delete(fn); },
    refresh,
    repo: REPO,
  };

  // kick off
  setTimeout(refresh, 60);
})();