Skip to content

autograd_cuda

fn attach_context_session #

fn attach_context_session(mut ctx autograd.Context[f64])

attach_context_session initializes Context[f64].device_session when nil.

fn cuda_backward_enabled #

fn cuda_backward_enabled() bool

cuda_backward_enabled exposes this operation as part of the public API.

fn cuda_optimizer_enabled #

fn cuda_optimizer_enabled() bool

cuda_optimizer_enabled exposes this operation as part of the public API.

fn gpu_activations_enabled #

fn gpu_activations_enabled() bool

gpu_activations_enabled exposes this operation as part of the public API.

fn linear_backward_f64 #

fn linear_backward_f64(grad &vtl.Tensor[f64], input &vtl.Tensor[f64], weight &vtl.Tensor[f64],
	bias_needs_grad bool, mut session DeviceSession) ![]&vtl.Tensor[f64]

linear_backward_f64 exposes this operation as part of the public API.

fn linear_backward_f64_cpu #

fn linear_backward_f64_cpu(grad &vtl.Tensor[f64], input &vtl.Tensor[f64],
	weight &vtl.Tensor[f64], bias_needs_grad bool) ![]&vtl.Tensor[f64]

linear_backward_f64_cpu implements Linear gate gradients on CPU.

fn linear_forward_f64_cpu #

fn linear_forward_f64_cpu(x &vtl.Tensor[f64], weights &vtl.Tensor[f64], bias &vtl.Tensor[f64]) !&vtl.Tensor[f64]

linear_forward_f64_cpu is the CPU fallback used by all builds. Uses an explicit Wᵀ buffer (weights.t() is a non-contiguous view; matmul copy is unsafe).

fn new_device_session #

fn new_device_session() &DeviceSession

new_device_session creates an empty session (CUDA init is build-specific).

fn new_device_session_ptr #

fn new_device_session_ptr() voidptr

new_device_session_ptr returns a session as voidptr for Context[f64] (avoids autograd↔autograd_cuda cycle).

fn session_bind_gpu_activation #

fn session_bind_gpu_activation(mut s DeviceSession, act_field &voidptr)

session_bind_gpu_activation exposes this operation as part of the public API.

struct DeviceSession #

@[heap]
struct DeviceSession {
pub mut:
	enabled bool
	// Phase 2: opaque GPU activation chain (`DeviceGpuChain` in CUDA builds).
	gpu_chain voidptr = unsafe { nil }
	// Staging buffers for cuBLAS GEMM (column-major staging, row-major output)
	gemm_x_col   []f64
	gemm_w_col   []f64
	gemm_out_row []f64
	// Phase 4 (#106): opaque DeviceOptimizerState in CUDA builds.
	optimizer_state voidptr = unsafe { nil }
}

DeviceSession defines a public data structure for this module.

fn (DeviceSession) init_device #

fn (mut s DeviceSession) init_device()

init_device is a no-op without -d cuda.

fn (DeviceSession) linear_forward_f64 #

fn (mut s DeviceSession) linear_forward_f64(x &vtl.Tensor[f64], weights &vtl.Tensor[f64],
	bias &vtl.Tensor[f64], _input_gpu voidptr) !&vtl.Tensor[f64]

linear_forward_f64 without CUDA build always errors so callers fall back to CPU.