novels_all_20260225162239_kx12stalenovels7.21M params15m 34s elapsed · Updated 47d ago
6L / 288D / 6H · helios · bpe · adamw· Created Feb 25, 2026 4:22 PM
Step 654 / 50,0001.3%
7.5762
Loss?
6.9151
Best Loss?
-1.0% from start
7.6607
Val Loss?
best: 7.6550
3.00e-4
Learning Rate?
4,920
Throughput?
tok/s (avg)
1,067
Speed?
ms/iter (avg)
0.916
Grad Norm?
avg: 0.582
3.33M
Tokens
processed
125ms
Forward
12% of step
866ms
Backward
81% of step
19ms
GPU Sync
2% of step
584
GPU Ops
per step
0.7%
MFU
model FLOPS util
6.9x
Bwd/Fwd
ratio
Loss Curve ? click any chart to add markers
?
?
?
?
Architecture
Layers?6
Embedding?288
Heads?6
Vocab?2,000
Context?256
Dropout?0
Parameters?7.21M
Training Config
Total iters?50,000
Batch size?20
Max LR?0.0003
Optimizer?adamw
Backend?helios
Tokenizer?bpe
Seed?42
Weight decay?0.1
Grad clip?5
Eval interval?100
Throughput (tok/s)
Step Time (ms/iter)
GPU & VRAM
Perplexity
Train/Val Gap
Learning Rate
Grad Norm
Smoothed Loss (EMA)
Loss Velocity
Gradient Clipping
GPU Operations
Step Time Breakdown
Forward
Backward
Grad Norm
Optimizer
GPU Sync
Data
Timing Phase Lines
Backward / Forward Ratio
Evolutionary Analysis (Symbiogenesis)
1.78
Wt Entropy
bits
20.0
Eff. Rank
7.0170
Free Energy
3.894
Pop Entropy
nats
0.0791
Complexity
0.0459
Fitness
638
CUSUM
alerts
12
Batch Size
adaptive
CUSUM Statistical Monitors
Information Bottleneck (MI)
MI Analysis Pending
Checkpoints (0) ?
No checkpoints saved
Sample Generations (3)
#CheckpointPrompt (preview)Generated
1-The 47d ago
Prompt
The
Output
The something thought contwasforERations intelligcould decreferimple people en the s to le n'trequbeforGPheiter couldyouof sembctustill ely sput . They 't with phwas alsoaxbookpresentchestembtheme t as a crebackgerlimwesu
2-Once upon a time47d ago
Prompt
Once upon a time
Output
Once upon a timebre there promptdescriTraves ativfirste. The that was ese they were pter eathot on ction- storFetchastwas slolininstESode ================================otword whiTHE , andINsomeone is s."
afterorgphrolternpiies Febru andt ite datoll
3-He walked into47d ago
Prompt
He walked into
Output
He walked into callpurOptionrun turnhaddro.
It anc," ing that lessum thanust ed.
anddata place ponentexact. Nboth .
It ind Cdatect for a atmod. Bator if was the him - illits? artplac whatations ownwrite ed on The ridually
Model Config (JSON)
{
"vocabSize": 2000,
"blockSize": 256,
"nLayer": 6,
"nEmbd": 288,
"nHead": 6,
"dropout": 0,
"ffnActivation": "swiglu",
"ffnDim": 768
}Training Config (JSON)
{
"iters": 50000,
"batchSize": 20,
"lr": 0.0003,
"lrMin": 0,
"warmupIters": 500,
"beta1": 0.9,
"beta2": 0.95,
"eps": 1e-8,
"weightDecay": 0.1,
"gradClip": 5,
"evalInterval": 100,
"evalIters": 10,
"seed": 42,
"backend": "helios",
"tokenizer": "bpe",
"optimizer": "adamw",
"logLevel": "info",
"trace": false,
"gradAccumSteps": 1,
"sampleInterval": 100,
"spikeThreshold": 10,
"syncEvery": 1,
"gcEvery": 0,
"packed": false,
"symbio": true,
"symbioConfig": {
"cusumSensitivity": 4,
"cusumBaselineWindow": 5,
"metricsInterval": 10,
"trackWeightEntropy": true,
"trackEffectiveRank": true,
"trackFreeEnergy": true,
"trackMIProfiles": false,
"trackPopulationMetrics": true,
"freeEnergyBeta": 0.01,
"miNumBins": 30,
"adaptiveBatch": true,
"batchMin": 8,
"batchMax": 64,
"batchStep": 4,
"calmStepsBeforeRestore": 200,
"fitnessAlpha": 1,
"complexityMode": "entropy",
"diversityBonus": 0.1,
"diversityDecay": "cosine",
"searchMode": "composed-activation-search",
"activationPool": [
"gelu",
"relu",
"silu",
"swiglu",
"universal",
"kan_spline"
],
"searchStrategy": "evolutionary",
"populationSize": 8,
"generations": 250,
"selectionStrategy": "topk",
"tournamentK": 3,
"mutationRate": 0.7,
"stepsPerCandidate": 25,
"rankBy": "valLoss",
"perfWeight": 0,
"stabilityWeight": 0,
"writeReport": true,
"writeCandidates": true,
"writeSummary": true,
"basisPool": [
"silu",
"relu",
"gelu",
"identity",
"square"
],
"maxGraphDepth": 4,
"maxGraphNodes": 10
}
}