nn.optimizers
fn adagrad #
fn adagrad[T](config AdaGradOptimizerConfig) &AdaGradOptimizer[T]
adagrad creates a new AdaGradOptimizer.
fn adam_optimizer #
fn adam_optimizer[T](config AdamOptimizerConfig) &AdamOptimizer[T]
adam_optimizer creates a new AdamOptimizer with the given configuration.
Example
import vtl.nn.optimizers
opt := optimizers.adam_optimizer[f64](learning_rate: 0.001)
opt.build_params(model.layers())
// inside training loop:
opt.update()!
fn adam_step_f32_cpu #
fn adam_step_f32_cpu(grad []f32, mut theta []f32, mut m []f32, mut v []f32, p AdamStepParams)
adam_step_f32_cpu performs one Adam update on flat f32 buffers (same math as napply path).
fn adam_step_f64 #
fn adam_step_f64(grad []f64, mut theta []f64, mut m []f64, mut v []f64, p AdamStepParams,
session voidptr, slot int)
adam_step_f64 without CUDA build delegates to CPU (opaque session avoids autograd_cuda import).
fn adam_step_f64_cpu #
fn adam_step_f64_cpu(grad []f64, mut theta []f64, mut m []f64, mut v []f64, p AdamStepParams)
adam_step_f64_cpu performs one Adam update on flat CPU buffers.
fn adam_use_cuda_optimizer #
fn adam_use_cuda_optimizer() bool
adam_use_cuda_optimizer exposes this operation as part of the public API.
fn adam_use_vulkan_optimizer #
fn adam_use_vulkan_optimizer() bool
adam_use_vulkan_optimizer exposes this operation as part of the public API.
fn adamw #
fn adamw[T](config AdamWOptimizerConfig) &AdamWOptimizer[T]
adamw creates a new AdamWOptimizer.
fn cosine_annealing_lr #
fn cosine_annealing_lr[T](t_max int, lrd f64) &CosineAnnealingLR[T]
cosine_annealing_lr creates a CosineAnnealingLR scheduler: LR decays from current_lr to lrd following a cosine schedule over t_max steps.
fn exponential_lr #
fn exponential_lr[T](gamma f64) &ExponentialLR[T]
exponential_lr creates an ExponentialLR scheduler: LR decays by gamma at every step. new_lr = current_lr * gamma^step
fn reduce_lr_on_plateau #
fn reduce_lr_on_plateau[T](config ReduceLROnPlateauConfig) &ReduceLROnPlateau[T]
reduce_lr_on_plateau creates a ReduceLROnPlateau scheduler. Reduces LR by factor when the monitored metric stops improving for patience steps. Pass metric_delta to next_lr each step: negative = improvement.
fn rmsprop #
fn rmsprop[T](config RMSPropOptimizerConfig) &RMSPropOptimizer[T]
rmsprop creates a new RMSPropOptimizer.
fn sgd #
fn sgd[T](config SgdOptimizerConfig) &SgdOptimizer[T]
sgd creates a new SgdOptimizer.
fn step_lr #
fn step_lr[T](step_size int, gamma f64) &StepLR[T]
step_lr creates a StepLR scheduler: LR decays by gamma every step_size steps. e.g. step_size=30, gamma=0.1 → LR is 10× smaller after each 30 steps.
fn try_adam_update_f32_vulkan #
fn try_adam_update_f32_vulkan(mut v autograd.Variable[f32], mut m_tensor vtl.Tensor[f32],
mut v_tensor vtl.Tensor[f32], step AdamStepParams) bool
try_adam_update_f32_vulkan exposes this operation as part of the public API.
interface Scheduler #
interface Scheduler[T] {
next_lr(current_lr f64, step int) f64
}
Scheduler is the interface for learning rate schedulers. Scheduler is the interface for learning rate schedulers. Implementations update the learning rate based on the current training step.
fn (AdaGradOptimizer[T]) build_params #
fn (mut o AdaGradOptimizer[T]) build_params(layers []types.Layer[T])
build_params registers all trainable variables from layers. Call once before training.
fn (AdaGradOptimizer[T]) update #
fn (mut o AdaGradOptimizer[T]) update() !
update performs one AdaGrad parameter update and zeros all gradients.
fn (AdamOptimizer[T]) build_params #
fn (mut o AdamOptimizer[T]) build_params(layers []types.Layer[T])
build_params registers all trainable variables from layers into the optimizer. Call once after constructing the model, before the first update().
fn (AdamOptimizer[T]) update #
fn (mut o AdamOptimizer[T]) update() !
update performs one Adam parameter update step and zeros all gradients. Must be called after loss.backward().
fn (AdamWOptimizer[T]) build_params #
fn (mut o AdamWOptimizer[T]) build_params(layers []types.Layer[T])
build_params registers all trainable variables from layers. Call once before training.
fn (AdamWOptimizer[T]) update #
fn (mut o AdamWOptimizer[T]) update() !
update performs one AdamW parameter update and zeros all gradients.
fn (CosineAnnealingLR[T]) next_lr #
fn (s &CosineAnnealingLR[T]) next_lr(current_lr f64, step int) f64
next_lr exposes this operation as part of the public API.
fn (ExponentialLR[T]) next_lr #
fn (s &ExponentialLR[T]) next_lr(current_lr f64, step int) f64
next_lr exposes this operation as part of the public API.
fn (RMSPropOptimizer[T]) build_params #
fn (mut o RMSPropOptimizer[T]) build_params(layers []types.Layer[T])
build_params registers all trainable variables from layers. Call once before training.
fn (RMSPropOptimizer[T]) update #
fn (mut o RMSPropOptimizer[T]) update() !
update performs one RMSProp parameter update and zeros all gradients.
fn (ReduceLROnPlateau[T]) next_lr #
fn (mut s ReduceLROnPlateau[T]) next_lr(current_lr f64, step int, metric_delta f64) f64
next_lr exposes this operation as part of the public API.
fn (SgdOptimizer[T]) build_params #
fn (mut o SgdOptimizer[T]) build_params(layers []types.Layer[T])
build_params registers all trainable variables from layers. Call once before training.
fn (SgdOptimizer[T]) update #
fn (mut o SgdOptimizer[T]) update() !
update performs one SGD parameter update and zeros all gradients.
fn (StepLR[T]) next_lr #
fn (s &StepLR[T]) next_lr(current_lr f64, step int) f64
next_lr exposes this operation as part of the public API.
struct AdaGradOptimizer #
struct AdaGradOptimizer[T] {
learning_rate f64
epsilon f64
pub mut:
weight_decay f64
params []&autograd.Variable[T]
accumulated_sq_grads []&vtl.Tensor[T]
}
AdaGradOptimizer implements the AdaGrad (Adaptive Gradient) algorithm. Accumulates squared gradients and adapts the learning rate per parameter.
struct AdaGradOptimizerConfig #
struct AdaGradOptimizerConfig {
pub:
learning_rate f64 = 0.01
epsilon f64 = 1e-8
weight_decay f64 = 0.0
}
AdaGradOptimizerConfig defines a public data structure for this module.
struct AdamOptimizer #
struct AdamOptimizer[T] {
learning_rate f64
epsilon f64
pub mut:
beta1 f64
beta2 f64
beta1_t f64
beta2_t f64
params []&autograd.Variable[T]
first_moments []&vtl.Tensor[T]
second_moments []&vtl.Tensor[T]
}
AdamOptimizer implements the Adam optimiser (Adaptive Moment Estimation).
Maintains per-parameter first-moment (mean) and second-moment (uncentred variance) moving averages of the gradients, with bias correction applied at each step.
Update rule: m = β₁·m + (1-β₁)·g v = β₂·v + (1-β₂)·g² θ = θ - lr · √(1-β₂ᵗ) / (1-β₁ᵗ) · m / (√v + ε)
Reference: Kingma & Ba, "Adam: A Method for Stochastic Optimization" (2014).
struct AdamOptimizerConfig #
struct AdamOptimizerConfig {
pub:
learning_rate f64 = 0.001
beta1 f64 = 0.9
beta2 f64 = 0.999
epsilon f64 = 1e-8
}
AdamOptimizerConfig defines a public data structure for this module.
struct AdamStepParams #
struct AdamStepParams {
pub:
beta1 f64
beta2 f64
lr_t f64
epsilon f64
}
AdamStepParams holds scalar Adam state for one flat parameter vector.
struct AdamWOptimizer #
struct AdamWOptimizer[T] {
learning_rate f64
epsilon f64
pub mut:
beta1 f64
beta2 f64
beta1_t f64
beta2_t f64
weight_decay f64
params []&autograd.Variable[T]
first_moments []&vtl.Tensor[T]
second_moments []&vtl.Tensor[T]
}
AdamWOptimizer implements AdamW (Adam with Decoupled Weight Decay).
Identical to Adam but weight decay is applied directly to the parameters (not through the gradient), which typically gives better generalisation.
Update rule (after bias correction): θ = θ - lr · (m̂ / (√v̂ + ε) + weight_decay · θ)
Reference: Loshchilov & Hutter, "Decoupled Weight Decay Regularization" (2017).
struct AdamWOptimizerConfig #
struct AdamWOptimizerConfig {
pub:
learning_rate f64 = 0.001
beta1 f64 = 0.9
beta2 f64 = 0.999
epsilon f64 = 1e-8
weight_decay f64 = 0.01
}
AdamWOptimizerConfig defines a public data structure for this module.
struct CosineAnnealingLR #
struct CosineAnnealingLR[T] {
pub:
t_max int // maximum number of iterations
lrd f64 // lower bound lr (default: 0)
}
CosineAnnealingLR decays using a cosine schedule from lrd to 0.
struct ExponentialLR #
struct ExponentialLR[T] {
gamma f64
}
ExponentialLR decays the learning rate by gamma at every step.
struct RMSPropOptimizer #
struct RMSPropOptimizer[T] {
learning_rate f64
epsilon f64
pub mut:
alpha f64 // smoothing constant
weight_decay f64
params []&autograd.Variable[T]
sq_avg []&vtl.Tensor[T]
}
RMSPropOptimizer implements the RMSProp optimiser.
Maintains a running average of the squared gradient per parameter and normalises the update by it, allowing different effective learning rates per parameter.
Update rule: sq_avg = α·sq_avg + (1-α)·g² θ = θ - lr · (g / (√sq_avg + ε) + wd·θ)
Reference: Hinton, "Neural Networks for Machine Learning", Lecture 6e.
struct RMSPropOptimizerConfig #
struct RMSPropOptimizerConfig {
pub:
learning_rate f64 = 0.001
alpha f64 = 0.99
epsilon f64 = 1e-8
weight_decay f64 = 0.0
}
RMSPropOptimizerConfig defines a public data structure for this module.
struct ReduceLROnPlateau #
struct ReduceLROnPlateau[T] {
factor f64
patience int
threshold f64
epsilon f64
cooldown int
pub mut:
wait int
current_lr f64
}
ReduceLROnPlateau reduces LR when a metric has stopped improving.
struct ReduceLROnPlateauConfig #
struct ReduceLROnPlateauConfig {
pub:
factor f64 = 0.1
patience int = 10
threshold f64 = 1e-4
epsilon f64 = 1e-8
cooldown int
}
ReduceLROnPlateauConfig defines a public data structure for this module.
struct SgdOptimizer #
struct SgdOptimizer[T] {
learning_rate f64
pub mut:
params []&autograd.Variable[T]
}
SgdOptimizer implements vanilla Stochastic Gradient Descent with optional momentum.
struct SgdOptimizerConfig #
struct SgdOptimizerConfig {
pub:
learning_rate f64 = 0.001
}
SgdOptimizerConfig defines a public data structure for this module.
struct StepLR #
struct StepLR[T] {
step_size int
gamma f64
}
StepLR decays the learning rate by gamma every step_size steps.
- fn adagrad
- fn adam_optimizer
- fn adam_step_f32_cpu
- fn adam_step_f64
- fn adam_step_f64_cpu
- fn adam_use_cuda_optimizer
- fn adam_use_vulkan_optimizer
- fn adamw
- fn cosine_annealing_lr
- fn exponential_lr
- fn reduce_lr_on_plateau
- fn rmsprop
- fn sgd
- fn step_lr
- fn try_adam_update_f32_vulkan
- interface Scheduler
- type AdaGradOptimizer[T]
- type AdamOptimizer[T]
- type AdamWOptimizer[T]
- type CosineAnnealingLR[T]
- type ExponentialLR[T]
- type RMSPropOptimizer[T]
- type ReduceLROnPlateau[T]
- type SgdOptimizer[T]
- type StepLR[T]
- struct AdaGradOptimizer
- struct AdaGradOptimizerConfig
- struct AdamOptimizer
- struct AdamOptimizerConfig
- struct AdamStepParams
- struct AdamWOptimizer
- struct AdamWOptimizerConfig
- struct CosineAnnealingLR
- struct ExponentialLR
- struct RMSPropOptimizer
- struct RMSPropOptimizerConfig
- struct ReduceLROnPlateau
- struct ReduceLROnPlateauConfig
- struct SgdOptimizer
- struct SgdOptimizerConfig
- struct StepLR