novels_all_20260225153615_hxgastalenovels15.90M params18m 36s elapsed · Updated 47d ago
8L / 384D / 8H · helios · bpe · adamw· Created Feb 25, 2026 3:36 PM
Step 230 / 50,0000.5%
7.2422
Loss?
7.0777
Best Loss?
-5.7% from start
-
Val Loss?
1.54e-4
Learning Rate?
4,942
Throughput?
tok/s (avg)
8,460
Speed?
ms/iter (avg)
0.625
Grad Norm?
avg: 0.737
1.49M
Tokens
processed
3657ms
Forward
43% of step
4705ms
Backward
56% of step
24ms
GPU Sync
0% of step
747
GPU Ops
per step
0.3%
MFU
model FLOPS util
1.3x
Bwd/Fwd
ratio
Loss Curve ? click any chart to add markers
?
?
?
?
Architecture
Layers?8
Embedding?384
Heads?8
Vocab?2,000
Context?512
Dropout?0
Parameters?15.90M
Training Config
Total iters?50,000
Batch size?16
Max LR?0.0003
Optimizer?adamw
Backend?helios
Tokenizer?bpe
Seed?42
Weight decay?0.1
Grad clip?5
Eval interval?500
Throughput (tok/s)
Step Time (ms/iter)
GPU & VRAM
Perplexity
Train/Val Gap
No validation data
Learning Rate
Grad Norm
Smoothed Loss (EMA)
Loss Velocity
Gradient Clipping
GPU Operations
Step Time Breakdown
Forward
Backward
Grad Norm
Optimizer
GPU Sync
Data
Timing Phase Lines
Backward / Forward Ratio
Evolutionary Analysis (Symbiogenesis)
1.73
Wt Entropy
bits
20.0
Eff. Rank
7.2596
Free Energy
3.900
Pop Entropy
nats
0.0734
Complexity
0.0479
Fitness
18
CUSUM
alerts
8
Batch Size
adaptive
CUSUM Statistical Monitors
Information Bottleneck (MI)
MI Analysis Pending
Checkpoints (0) ?
No checkpoints saved
Sample Generations (3)
#CheckpointPrompt (preview)Generated
1-The 47d ago
Prompt
The
Output
The 3kind nightonepowerdubelievaybuilding ed and ractdidn't writcontainually discnot just halfreal row codis the aceepceptgenerat modelwindows, imagperson who nightn't eventlayerartrawpusfor e of another fiobservYod therupped game ured
2-Once upon a time47d ago
Prompt
Once upon a time
Output
Once upon a timeing sgramost hundishweframeing that ciso ption as ragusprimlinOmegaof cinterface identdepno problembut y of ================================================================================n ext browselfle serv agbot er put ansframeworkresponse up ofBlock00fortethsystembut attic etch
3-He walked into47d ago
Prompt
He walked into
Output
He walked intoresponsGitof cunithemtern7encotnameachmachins showptiondidn't echnfortbeneath datpresentresumlike a roomimagice waswhole new ediing a pped servq whiountcoppixelagents what tryfunctionsame ing a qcreome ar. S. They
Model Config (JSON)
{
"vocabSize": 2000,
"blockSize": 512,
"nLayer": 8,
"nEmbd": 384,
"nHead": 8,
"dropout": 0,
"ffnActivation": "swiglu",
"ffnDim": 1024
}Training Config (JSON)
{
"iters": 50000,
"batchSize": 16,
"lr": 0.0003,
"lrMin": 0,
"warmupIters": 500,
"beta1": 0.9,
"beta2": 0.95,
"eps": 1e-8,
"weightDecay": 0.1,
"gradClip": 5,
"evalInterval": 500,
"evalIters": 10,
"seed": 42,
"backend": "helios",
"tokenizer": "bpe",
"optimizer": "adamw",
"logLevel": "info",
"trace": false,
"gradAccumSteps": 1,
"sampleInterval": 100,
"spikeThreshold": 10,
"syncEvery": 1,
"gcEvery": 0,
"packed": false,
"symbio": true,
"symbioConfig": {
"cusumSensitivity": 4,
"cusumBaselineWindow": 5,
"metricsInterval": 10,
"trackWeightEntropy": true,
"trackEffectiveRank": true,
"trackFreeEnergy": true,
"trackMIProfiles": true,
"trackPopulationMetrics": true,
"freeEnergyBeta": 0.01,
"miNumBins": 30,
"adaptiveBatch": true,
"batchMin": 8,
"batchMax": 64,
"batchStep": 4,
"calmStepsBeforeRestore": 200,
"fitnessAlpha": 1,
"complexityMode": "entropy",
"diversityBonus": 0.08,
"diversityDecay": "cosine",
"searchMode": "ffn-activation-search",
"activationPool": [
"gelu",
"silu",
"relu",
"swiglu",
"universal",
"kan_spline"
],
"searchStrategy": "evolutionary",
"populationSize": 6,
"generations": 416,
"selectionStrategy": "topk",
"tournamentK": 3,
"mutationRate": 0.6,
"stepsPerCandidate": 20,
"rankBy": "valLoss",
"perfWeight": 0,
"stabilityWeight": 0,
"writeReport": true,
"writeCandidates": true,
"writeSummary": true
}
}