autograd_cuda
fn attach_context_session #
fn attach_context_session(mut ctx autograd.Context[f64])
attach_context_session initializes Context[f64].device_session when nil.
fn cuda_backward_enabled #
fn cuda_backward_enabled() bool
cuda_backward_enabled exposes this operation as part of the public API.
fn cuda_optimizer_enabled #
fn cuda_optimizer_enabled() bool
cuda_optimizer_enabled exposes this operation as part of the public API.
fn gpu_activations_enabled #
fn gpu_activations_enabled() bool
gpu_activations_enabled exposes this operation as part of the public API.
fn linear_backward_f64 #
fn linear_backward_f64(grad &vtl.Tensor[f64], input &vtl.Tensor[f64], weight &vtl.Tensor[f64],
bias_needs_grad bool, mut session DeviceSession) ![]&vtl.Tensor[f64]
linear_backward_f64 exposes this operation as part of the public API.
fn linear_backward_f64_cpu #
fn linear_backward_f64_cpu(grad &vtl.Tensor[f64], input &vtl.Tensor[f64],
weight &vtl.Tensor[f64], bias_needs_grad bool) ![]&vtl.Tensor[f64]
linear_backward_f64_cpu implements Linear gate gradients on CPU.
fn linear_forward_f64_cpu #
fn linear_forward_f64_cpu(x &vtl.Tensor[f64], weights &vtl.Tensor[f64], bias &vtl.Tensor[f64]) !&vtl.Tensor[f64]
linear_forward_f64_cpu is the CPU fallback used by all builds. Uses an explicit Wᵀ buffer (weights.t() is a non-contiguous view; matmul copy is unsafe).
fn new_device_session #
fn new_device_session() &DeviceSession
new_device_session creates an empty session (CUDA init is build-specific).
fn new_device_session_ptr #
fn new_device_session_ptr() voidptr
new_device_session_ptr returns a session as voidptr for Context[f64] (avoids autograd↔autograd_cuda cycle).
fn session_bind_gpu_activation #
fn session_bind_gpu_activation(mut s DeviceSession, act_field &voidptr)
session_bind_gpu_activation exposes this operation as part of the public API.
struct DeviceSession #
struct DeviceSession {
pub mut:
enabled bool
// Phase 2: opaque GPU activation chain (`DeviceGpuChain` in CUDA builds).
gpu_chain voidptr = unsafe { nil }
// Staging buffers for cuBLAS GEMM (column-major staging, row-major output)
gemm_x_col []f64
gemm_w_col []f64
gemm_out_row []f64
// Phase 4 (#106): opaque DeviceOptimizerState in CUDA builds.
optimizer_state voidptr = unsafe { nil }
}
DeviceSession defines a public data structure for this module.
fn (DeviceSession) init_device #
fn (mut s DeviceSession) init_device()
init_device is a no-op without -d cuda.
fn (DeviceSession) linear_forward_f64 #
fn (mut s DeviceSession) linear_forward_f64(x &vtl.Tensor[f64], weights &vtl.Tensor[f64],
bias &vtl.Tensor[f64], _input_gpu voidptr) !&vtl.Tensor[f64]
linear_forward_f64 without CUDA build always errors so callers fall back to CPU.