A
Alpha
historic_20260226183010_viycstalechat17.44M params1h 49m elapsed · Updated 46d ago
8L / 384D / 8H · helios · bpe-4k · adamw· Created Feb 26, 2026 6:31 PM
Step 6,214 / 50,00012.4%
4.9274
Loss?
4.7771
Best Loss?
-43.7% from start
5.3095
Val Loss?
best: 5.2107
4.88e-5
Learning Rate?
1,422
Throughput?
tok/s (avg)
7,518
Speed?
ms/iter (avg)
1.200
Grad Norm?
avg: 2.177
17.55M
Tokens
processed
394ms
Forward
5% of step
7050ms
Backward
94% of step
13ms
GPU Sync
0% of step
893
GPU Ops
per step
0.5%
MFU
model FLOPS util
17.9x
Bwd/Fwd
ratio
Loss Curve ? click any chart to add markers
?
?
?
?
Architecture
Layers?8
Embedding?384
Heads?8
Vocab?4,000
Context?512
Dropout?0.1
Parameters?17.44M
Training Config
Total iters?50,000
Batch size?20
Max LR?0.00005
Optimizer?adamw
Backend?helios
Tokenizer?bpe-4k
Seed?42
Weight decay?0.1
Grad clip?1
Eval interval?500
Throughput (tok/s)
Step Time (ms/iter)
GPU & VRAM
Perplexity
Train/Val Gap
Learning Rate
Grad Norm
Smoothed Loss (EMA)
Loss Velocity
Gradient Clipping
GPU Operations
Step Time Breakdown
Forward
Backward
Grad Norm
Optimizer
GPU Sync
Data
Timing Phase Lines
Backward / Forward Ratio

Evolutionary Analysis (Symbiogenesis)

1.83
Wt Entropy
bits
20.0
Eff. Rank
4.9541
Free Energy
3.910
Pop Entropy
nats
0.0768
Complexity
0.0917
Fitness
1705
CUSUM
alerts
-
Batch Size
adaptive
CUSUM Statistical Monitors
Information Bottleneck (MI)
MI Analysis Pending

Transformer Layer Analysis

Gradient Norm Heatmap
Per-Layer Gradient Evolution
Checkpoints (0) ?
No checkpoints saved
Sample Generations (3)
#CheckpointPrompt (preview)Generated
1-<|user|> Hello, how are you? <|assistant|>46d ago
Prompt
<|user|> Hello, how are you? <|assistant|>
Output
<|user|> Hello, how are you? <|assistant|>I we yet I without a all even in the your the gods and justice — and Rome branfrom men men Is Justice, where to all of their to to I to the and that that your Pnot not the that in the in to the in the to fear of that by men
2-<|user|> What do you like to do for fun? <|assistant|>46d ago
Prompt
<|user|> What do you like to do for fun? <|assistant|>
Output
<|user|> What do you like to do for fun? <|assistant|>t not without the philosopher. <|user|> In of Gyet I soence. <|user|> Iand your the Sjeck a in and the true that and we I the wise undo to all I that in the solto for the your powhen from in and I to
3-<|user|> Tell me about yourself. <|assistant|>46d ago
Prompt
<|user|> Tell me about yourself. <|assistant|>
Output
<|user|> Tell me about yourself. <|assistant|>on it virtue I to men in the truth without both consider the if er law for its without us not all but to of to to to to to what to and to all men to in the men the divine or to that is the for in to both what who to
Model Config (JSON)
{
  "vocabSize": 4000,
  "blockSize": 512,
  "nLayer": 8,
  "nEmbd": 384,
  "nHead": 8,
  "dropout": 0.1,
  "ffnActivation": "swiglu",
  "ffnDim": 1024
}
Training Config (JSON)
{
  "iters": 50000,
  "batchSize": 20,
  "lr": 0.00005,
  "lrMin": 0.000005,
  "warmupIters": 1000,
  "beta1": 0.9,
  "beta2": 0.95,
  "eps": 0.000001,
  "weightDecay": 0.1,
  "gradClip": 1,
  "evalInterval": 500,
  "evalIters": 10,
  "seed": 42,
  "backend": "helios",
  "tokenizer": "bpe-4k",
  "optimizer": "adamw",
  "logLevel": "info",
  "trace": false,
  "gradAccumSteps": 1,
  "sampleInterval": 300,
  "spikeThreshold": 10,
  "syncEvery": 1,
  "gcEvery": 0,
  "packed": false,
  "symbio": true,
  "symbioConfig": {
    "cusumSensitivity": 4,
    "cusumBaselineWindow": 5,
    "metricsInterval": 10,
    "trackWeightEntropy": true,
    "trackEffectiveRank": true,
    "trackFreeEnergy": true,
    "trackMIProfiles": false,
    "trackPopulationMetrics": true,
    "freeEnergyBeta": 0.01,
    "miNumBins": 30,
    "adaptiveBatch": false,
    "batchMin": 8,
    "batchMax": 64,
    "batchStep": 4,
    "calmStepsBeforeRestore": 200,
    "populationAdaptation": true,
    "populationScaleMin": 0.5,
    "populationScaleMax": 2,
    "populationScaleStep": 0.125,
    "populationAdaptationCooldown": 10,
    "mutationRateMin": 0.2,
    "mutationRateMax": 0.95,
    "fitnessAlpha": 1,
    "complexityMode": "entropy",
    "diversityBonus": 0.1,
    "diversityDecay": "cosine",
    "searchMode": "composed-activation-search",
    "activationPool": [
      "gelu",
      "relu",
      "silu",
      "swiglu",
      "universal",
      "kan_spline"
    ],
    "searchStrategy": "evolutionary",
    "populationSize": 8,
    "generations": 250,
    "selectionStrategy": "topk",
    "tournamentK": 3,
    "mutationRate": 0.7,
    "stepsPerCandidate": 25,
    "rankBy": "valLoss",
    "perfWeight": 0,
    "stabilityWeight": 0,
    "preserveWeightsAcrossCandidates": true,
    "carryOptimizerStateAcrossCandidates": true,
    "constantFfnDimAcrossCandidates": true,
    "fuseWeightsEachStep": true,
    "fusionShadowEma": 0.02,
    "fusionBaseStrength": 0.0015,
    "fusionMaxStrength": 0.02,
    "kuramotoCoupling": 0.7,
    "kuramotoDt": 0.1,
    "kuramotoDamping": 0.05,
    "writeReport": true,
    "writeCandidates": true,
    "writeSummary": true,
    "basisPool": [
      "silu",
      "relu",
      "gelu",
      "identity",
      "square"
    ],
    "maxGraphDepth": 4,
    "maxGraphNodes": 10
  }
}