Skip to content

nn.layers #

Stub for non-Vulkan builds — conv2d_forward_vulkan falls through to CPU. Stub for non-Vulkan builds — embedding_forward_vulkan falls through to CPU. Stub for non-Vulkan builds — LayerNormLayerVulkan falls through to CPU. Stub implementations for non-Vulkan builds. The real implementations live in softmax_vulkan_d_vulkan.v

fn attention_apply_values_vulkan #

fn attention_apply_values_vulkan[T](weights &vtl.Tensor[T], v &vtl.Tensor[T], head_dim int) !&vtl.Tensor[T]

fn attention_forward_vulkan #

fn attention_forward_vulkan[T](q &vtl.Tensor[T], k &vtl.Tensor[T], v &vtl.Tensor[T], head_dim int) !&vtl.Tensor[T]

attention_forward_vulkan stub: not available without Vulkan.

fn attention_gate #

fn attention_gate[T](input &vtl.Tensor[T], w_q &vtl.Tensor[T], w_k &vtl.Tensor[T], w_v &vtl.Tensor[T], w_o &vtl.Tensor[T], num_heads int, head_dim int) &AttentionGate[T]

fn attention_scores_vulkan #

fn attention_scores_vulkan[T](q &vtl.Tensor[T], k &vtl.Tensor[T], head_dim int) !&vtl.Tensor[T]

fn avgpool2d_forward_vulkan #

fn avgpool2d_forward_vulkan[T](input &vtl.Tensor[T], kernel_size [2]int, stride [2]int, padding [2]int, dev voidptr) !&vtl.Tensor[T]

avgpool2d_forward_vulkan stub

fn avgpool2d_gate #

fn avgpool2d_gate[T](input &vtl.Tensor[T], kernel []int, padding []int, stride []int) &AvgPool2DGate[T]

fn avgpool2d_layer #

fn avgpool2d_layer[T](ctx &autograd.Context[T], input_shape []int, kernel []int, padding []int, stride []int) types.Layer[T]

avgpool2d_layer creates an AveragePool2DLayer.

fn batchnorm1d_forward_vulkan #

fn batchnorm1d_forward_vulkan[T](input &vtl.Tensor[T], eps f32, dev voidptr) !&vtl.Tensor[T]

fn batchnorm1d_gate #

fn batchnorm1d_gate[T](input &vtl.Tensor[T], gamma &vtl.Tensor[T], beta &vtl.Tensor[T], mean &vtl.Tensor[T], var_ &vtl.Tensor[T], eps f64) &BatchNorm1DGate[T]

fn batchnorm1d_layer #

fn batchnorm1d_layer[T](ctx &autograd.Context[T], num_features int, config BatchNorm1DConfig) types.Layer[T]

fn conv2d_forward_vulkan #

fn conv2d_forward_vulkan[T](input &vtl.Tensor[T],
	weight &vtl.Tensor[T],
	bias &vtl.Tensor[T],
	kernel_size []int,
	config Conv2DConfig) !&vtl.Tensor[T]

fn conv2d_gate #

fn conv2d_gate[T](input &vtl.Tensor[T], weight &vtl.Tensor[T], bias &vtl.Tensor[T], kernel_size []int, config Conv2DConfig) &Conv2DGate[T]

fn conv2d_layer #

fn conv2d_layer[T](ctx &autograd.Context[T], in_ch int, out_ch int, kernel_size []int, config Conv2DConfig) types.Layer[T]

conv2d_layer creates a Conv2DLayer.

fn dropout_layer #

fn dropout_layer[T](ctx &autograd.Context[T], output_shape []int, data DropoutLayerConfig) types.Layer[T]

fn elu_layer #

fn elu_layer[T](ctx &autograd.Context[T], output_shape []int, data EluLayerConfig) types.Layer[T]

fn embedding_forward_vulkan #

fn embedding_forward_vulkan[T](input &vtl.Tensor[T], weight &vtl.Tensor[T]) !&vtl.Tensor[T]

fn embedding_gate #

fn embedding_gate[T](input &vtl.Tensor[T], weight &vtl.Tensor[T]) &EmbeddingGate[T]

fn embedding_layer #

fn embedding_layer[T](ctx &autograd.Context[T], vocab_size int, embedding_dim int) types.Layer[T]

embedding_layer creates an EmbeddingLayer.

fn flatten_layer #

fn flatten_layer[T](ctx &autograd.Context[T], shape []int) types.Layer[T]

fn gelu_forward_vulkan #

fn gelu_forward_vulkan[T](x &vtl.Tensor[T], params storage.VulkanStorageParams) !&vtl.Tensor[T]

fn gelu_layer #

fn gelu_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]

fn global_avgpool2d_forward_vulkan #

fn global_avgpool2d_forward_vulkan[T](input &vtl.Tensor[T], dev voidptr) !&vtl.Tensor[T]

global_avgpool2d_forward_vulkan stub

fn global_avgpool2d_gate #

fn global_avgpool2d_gate[T](input &vtl.Tensor[T]) &GlobalAvgPool2DGate[T]

fn global_avgpool2d_layer #

fn global_avgpool2d_layer[T](ctx &autograd.Context[T]) types.Layer[T]

global_avgpool2d_layer creates a GlobalAvgPool2DLayer.

fn gru_layer #

fn gru_layer[T](ctx &autograd.Context[T], input_size int, hidden_size int) types.Layer[T]

gru_layer creates a GRULayer with Kaiming-initialized weights.

fn input_layer #

fn input_layer[T](ctx &autograd.Context[T], shape []int) types.Layer[T]

fn layer_norm_layer #

fn layer_norm_layer[T](ctx &autograd.Context[T], normalized_shape []int, config LayerNormConfig) types.Layer[T]

layer_norm_layer creates a LayerNormLayer. layer_norm_layer creates a LayerNormLayer.

fn layer_norm_vulkan_layer #

fn layer_norm_vulkan_layer[T](ctx &autograd.Context[T], normalized_shape []int, params storage.VulkanStorageParams, config LayerNormVulkanConfig) types.Layer[T]

fn layernorm_forward_vulkan #

fn layernorm_forward_vulkan[T](x &vtl.Tensor[T], eps f32, params storage.VulkanStorageParams) !&vtl.Tensor[T]

fn layernorm_gate #

fn layernorm_gate[T](input &vtl.Tensor[T], gamma &vtl.Tensor[T], beta &vtl.Tensor[T], eps f64) &LayerNormGate[T]

fn leaky_relu_layer #

fn leaky_relu_layer[T](ctx &autograd.Context[T], output_shape []int, data LeakyReluLayerConfig) types.Layer[T]

fn linear_forward_vcl #

fn linear_forward_vcl[T](_ &vtl.Tensor[T], _ &vtl.Tensor[T], _ &vtl.Tensor[T], _ storage.VclStorageParams) !&vtl.Tensor[T]

Stubs when compiling without -d vcl (mirrors linear_vulkan_notd_vulkan.v pattern).

fn linear_forward_vulkan #

fn linear_forward_vulkan[T](_ &vtl.Tensor[T], _ &vtl.Tensor[T], _ &vtl.Tensor[T], _ storage.VulkanStorageParams) !&vtl.Tensor[T]

Stubs when compiling without -d vulkan (mirrors nn + tensor notd patterns).

fn linear_layer #

fn linear_layer[T](ctx &autograd.Context[T], input_dim int, output_dim int) types.Layer[T]

linear_layer creates a LinearLayer.

fn lstm_layer #

fn lstm_layer[T](ctx &autograd.Context[T], input_size int, hidden_size int, num_layers int) types.Layer[T]

lstm_layer creates an LSTMLayer with Kaiming-initialized weights.

fn maxpool2d_forward_vulkan #

fn maxpool2d_forward_vulkan[T](input &vtl.Tensor[T], kernel_size [2]int, stride [2]int, padding [2]int, dev voidptr) !&vtl.Tensor[T]

fn maxpool2d_layer #

fn maxpool2d_layer[T](ctx &autograd.Context[T], input_shape []int, kernel []int, padding []int, stride []int) types.Layer[T]

fn mish_layer #

fn mish_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]

fn multihead_attention_layer #

fn multihead_attention_layer[T](ctx &autograd.Context[T], embed_dim int, num_heads int) types.Layer[T]

multihead_attention_layer creates a MultiHeadAttentionLayer.

fn positional_encoding_layer #

fn positional_encoding_layer[T](ctx &autograd.Context[T], embed_dim int, max_len int) !types.Layer[T]

positional_encoding_layer creates a PositionalEncodingLayer.

fn reduce_sum_vulkan #

fn reduce_sum_vulkan[T](x &vtl.Tensor[T], params storage.VulkanStorageParams) ![]T

fn relu_forward_vcl #

fn relu_forward_vcl[T](_ &vtl.Tensor[T], _ storage.VclStorageParams) !&vtl.Tensor[T]

fn relu_forward_vulkan #

fn relu_forward_vulkan[T](_ &vtl.Tensor[T], _ storage.VulkanStorageParams) !&vtl.Tensor[T]

fn relu_layer #

fn relu_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]

fn sigmoid_forward_vcl #

fn sigmoid_forward_vcl[T](_ &vtl.Tensor[T], _ storage.VclStorageParams) !&vtl.Tensor[T]

fn sigmoid_forward_vulkan #

fn sigmoid_forward_vulkan[T](_ &vtl.Tensor[T], _ storage.VulkanStorageParams) !&vtl.Tensor[T]

fn sigmoid_layer #

fn sigmoid_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]

fn softmax_forward_vulkan #

fn softmax_forward_vulkan[T](x &vtl.Tensor[T], params storage.VulkanStorageParams) !&vtl.Tensor[T]

fn softmax_layer #

fn softmax_layer[T](ctx &autograd.Context[T], config SoftmaxLayerConfig) types.Layer[T]

fn swish_layer #

fn swish_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]

fn tanh_layer #

fn tanh_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]

fn (AttentionGate[T]) backward #

fn (g &AttentionGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]

fn (AttentionGate[T]) cache #

fn (g &AttentionGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !

fn (AveragePool2DLayer[T]) output_shape #

fn (layer &AveragePool2DLayer[T]) output_shape() []int

fn (AveragePool2DLayer[T]) variables #

fn (layer &AveragePool2DLayer[T]) variables() []&autograd.Variable[T]

fn (AveragePool2DLayer[T]) forward #

fn (layer &AveragePool2DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (AvgPool2DGate[T]) backward #

fn (g &AvgPool2DGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]

fn (AvgPool2DGate[T]) cache #

fn (g &AvgPool2DGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !

fn (AvgPool2DLayerVulkan[T]) forward #

fn (layer &AvgPool2DLayerVulkan[T]) forward(input &vtl.Tensor[T]) !&vtl.Tensor[T]

forward stub

fn (BatchNorm1DGate[T]) backward #

fn (g &BatchNorm1DGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]

fn (BatchNorm1DGate[T]) cache #

fn (g &BatchNorm1DGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !

fn (BatchNorm1DLayerVulkan[T]) forward #

fn (layer &BatchNorm1DLayerVulkan[T]) forward(input &vtl.Tensor[T]) !&vtl.Tensor[T]

fn (BatchNorm1DLayer[T]) output_shape #

fn (layer &BatchNorm1DLayer[T]) output_shape() []int

fn (BatchNorm1DLayer[T]) variables #

fn (layer &BatchNorm1DLayer[T]) variables() []&autograd.Variable[T]

fn (BatchNorm1DLayer[T]) forward #

fn (layer &BatchNorm1DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (Conv2DGate[T]) backward #

fn (g &Conv2DGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]

fn (Conv2DGate[T]) cache #

fn (g &Conv2DGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !

fn (Conv2DLayer[T]) output_shape #

fn (layer &Conv2DLayer[T]) output_shape() []int

fn (Conv2DLayer[T]) variables #

fn (layer &Conv2DLayer[T]) variables() []&autograd.Variable[T]

fn (Conv2DLayer[T]) forward #

fn (layer &Conv2DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (DropoutLayer[T]) output_shape #

fn (layer &DropoutLayer[T]) output_shape() []int

fn (DropoutLayer[T]) variables #

fn (_ &DropoutLayer[T]) variables() []&autograd.Variable[T]

fn (DropoutLayer[T]) forward #

fn (layer &DropoutLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (EluLayer[T]) output_shape #

fn (layer &EluLayer[T]) output_shape() []int

fn (EluLayer[T]) variables #

fn (_ &EluLayer[T]) variables() []&autograd.Variable[T]

fn (EluLayer[T]) forward #

fn (layer &EluLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (EmbeddingGate[T]) backward #

fn (g &EmbeddingGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]

fn (EmbeddingGate[T]) cache #

fn (g &EmbeddingGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !

fn (EmbeddingLayer[T]) output_shape #

fn (layer &EmbeddingLayer[T]) output_shape() []int

fn (EmbeddingLayer[T]) variables #

fn (layer &EmbeddingLayer[T]) variables() []&autograd.Variable[T]

fn (EmbeddingLayer[T]) forward #

fn (layer &EmbeddingLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (FlattenLayer[T]) output_shape #

fn (layer &FlattenLayer[T]) output_shape() []int

fn (FlattenLayer[T]) variables #

fn (_ &FlattenLayer[T]) variables() []&autograd.Variable[T]

fn (FlattenLayer[T]) forward #

fn (layer &FlattenLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (GELULayer[T]) output_shape #

fn (layer &GELULayer[T]) output_shape() []int

fn (GELULayer[T]) variables #

fn (_ &GELULayer[T]) variables() []&autograd.Variable[T]

fn (GELULayer[T]) forward #

fn (layer &GELULayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (GlobalAvgPool2DGate[T]) backward #

fn (g &GlobalAvgPool2DGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]

fn (GlobalAvgPool2DGate[T]) cache #

fn (g &GlobalAvgPool2DGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !

fn (GlobalAvgPool2DLayerVulkan[T]) forward #

fn (layer &GlobalAvgPool2DLayerVulkan[T]) forward(input &vtl.Tensor[T]) !&vtl.Tensor[T]

forward stub

fn (GlobalAvgPool2DLayer[T]) output_shape #

fn (layer &GlobalAvgPool2DLayer[T]) output_shape() []int

fn (GlobalAvgPool2DLayer[T]) variables #

fn (layer &GlobalAvgPool2DLayer[T]) variables() []&autograd.Variable[T]

fn (GlobalAvgPool2DLayer[T]) forward #

fn (layer &GlobalAvgPool2DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (InputLayer[T]) output_shape #

fn (layer &InputLayer[T]) output_shape() []int

fn (InputLayer[T]) variables #

fn (_ &InputLayer[T]) variables() []&autograd.Variable[T]

fn (InputLayer[T]) forward #

fn (layer &InputLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (LayerNormGate[T]) backward #

fn (g &LayerNormGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]

fn (LayerNormGate[T]) cache #

fn (g &LayerNormGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !

fn (LayerNormLayerVulkan[T]) output_shape #

fn (layer &LayerNormLayerVulkan[T]) output_shape() []int

fn (LayerNormLayerVulkan[T]) variables #

fn (layer &LayerNormLayerVulkan[T]) variables() []&autograd.Variable[T]

fn (LayerNormLayerVulkan[T]) forward #

fn (layer &LayerNormLayerVulkan[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (LayerNormLayer[T]) output_shape #

fn (layer &LayerNormLayer[T]) output_shape() []int

fn (LayerNormLayer[T]) variables #

fn (layer &LayerNormLayer[T]) variables() []&autograd.Variable[T]

fn (LayerNormLayer[T]) forward #

fn (layer &LayerNormLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (LeakyReluLayer[T]) output_shape #

fn (layer &LeakyReluLayer[T]) output_shape() []int

fn (LeakyReluLayer[T]) variables #

fn (_ &LeakyReluLayer[T]) variables() []&autograd.Variable[T]

fn (LeakyReluLayer[T]) forward #

fn (layer &LeakyReluLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (LinearLayer[T]) output_shape #

fn (layer &LinearLayer[T]) output_shape() []int

fn (LinearLayer[T]) variables #

fn (layer &LinearLayer[T]) variables() []&autograd.Variable[T]

fn (LinearLayer[T]) forward #

fn (layer &LinearLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (MaxPool2DLayerVulkan[T]) forward #

fn (layer &MaxPool2DLayerVulkan[T]) forward(input &vtl.Tensor[T]) !&vtl.Tensor[T]

fn (MaxPool2DLayer[T]) output_shape #

fn (layer &MaxPool2DLayer[T]) output_shape() []int

fn (MaxPool2DLayer[T]) variables #

fn (layer &MaxPool2DLayer[T]) variables() []&autograd.Variable[T]

fn (MaxPool2DLayer[T]) forward #

fn (layer &MaxPool2DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (MishLayer[T]) output_shape #

fn (layer &MishLayer[T]) output_shape() []int

fn (MishLayer[T]) variables #

fn (_ &MishLayer[T]) variables() []&autograd.Variable[T]

fn (MishLayer[T]) forward #

fn (layer &MishLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (MultiHeadAttentionLayer[T]) output_shape #

fn (layer &MultiHeadAttentionLayer[T]) output_shape() []int

fn (MultiHeadAttentionLayer[T]) variables #

fn (layer &MultiHeadAttentionLayer[T]) variables() []&autograd.Variable[T]

fn (MultiHeadAttentionLayer[T]) forward #

fn (layer &MultiHeadAttentionLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (PositionalEncodingLayer[T]) output_shape #

fn (layer &PositionalEncodingLayer[T]) output_shape() []int

fn (PositionalEncodingLayer[T]) variables #

fn (layer &PositionalEncodingLayer[T]) variables() []&autograd.Variable[T]

fn (PositionalEncodingLayer[T]) forward #

fn (layer &PositionalEncodingLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (ReLULayer[T]) output_shape #

fn (layer &ReLULayer[T]) output_shape() []int

fn (ReLULayer[T]) variables #

fn (_ &ReLULayer[T]) variables() []&autograd.Variable[T]

fn (ReLULayer[T]) forward #

fn (layer &ReLULayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (SigmoidLayer[T]) output_shape #

fn (layer &SigmoidLayer[T]) output_shape() []int

fn (SigmoidLayer[T]) variables #

fn (_ &SigmoidLayer[T]) variables() []&autograd.Variable[T]

fn (SigmoidLayer[T]) forward #

fn (layer &SigmoidLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (SoftmaxLayer[T]) output_shape #

fn (layer &SoftmaxLayer[T]) output_shape() []int

fn (SoftmaxLayer[T]) variables #

fn (layer &SoftmaxLayer[T]) variables() []&autograd.Variable[T]

fn (SoftmaxLayer[T]) forward #

fn (layer &SoftmaxLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (SwishLayer[T]) output_shape #

fn (layer &SwishLayer[T]) output_shape() []int

fn (SwishLayer[T]) variables #

fn (_ &SwishLayer[T]) variables() []&autograd.Variable[T]

fn (SwishLayer[T]) forward #

fn (layer &SwishLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

fn (TanhLayer[T]) output_shape #

fn (layer &TanhLayer[T]) output_shape() []int

fn (TanhLayer[T]) variables #

fn (_ &TanhLayer[T]) variables() []&autograd.Variable[T]

fn (TanhLayer[T]) forward #

fn (layer &TanhLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]

struct AttentionGate #

struct AttentionGate[T] {
	input     &vtl.Tensor[T] = unsafe { nil }
	w_q       &vtl.Tensor[T] = unsafe { nil }
	w_k       &vtl.Tensor[T] = unsafe { nil }
	w_v       &vtl.Tensor[T] = unsafe { nil }
	w_o       &vtl.Tensor[T] = unsafe { nil }
	num_heads int
	head_dim  int
}

struct AveragePool2DLayer #

struct AveragePool2DLayer[T] {
	kernel      []int
	padding     []int
	stride      []int
	input_shape []int
}

AveragePool2DLayer applies 2D average pooling over a 4D input.

Input: [batch, channels, H, W] Output: [batch, channels, out_H, out_W]

Config options:- kernel — pool window size in (H, W) (default: determined by input_shape)

  • padding — zero-border padding before pooling (default: [0,0])
  • stride — pool stride in (H, W) (default: same as kernel)

struct AvgPool2DGate #

struct AvgPool2DGate[T] {
	input   &vtl.Tensor[T] = unsafe { nil }
	kernel  []int
	padding []int
	stride  []int
}

struct AvgPool2DLayerVulkan #

struct AvgPool2DLayerVulkan[T] {
pub mut:
	kernel_size [2]int
	stride      [2]int
	padding     [2]int
	device      voidptr
}

AvgPool2DLayerVulkan stub for when Vulkan is not enabled

struct BatchNorm1DConfig #

@[params]
struct BatchNorm1DConfig {
	eps      f64  = 1e-5
	momentum f64  = 0.1
	affine   bool = true
}

BatchNorm1D normalizes a 2D input [batch, features]. Tracks running mean and variance for inference.

struct BatchNorm1DGate #

struct BatchNorm1DGate[T] {
	input &vtl.Tensor[T] = unsafe { nil }
	gamma &vtl.Tensor[T] = unsafe { nil }
	beta  &vtl.Tensor[T] = unsafe { nil }
	mean  &vtl.Tensor[T] = unsafe { nil }
	var_  &vtl.Tensor[T] = unsafe { nil }
	eps   f64
}

struct BatchNorm1DLayer #

struct BatchNorm1DLayer[T] {
	eps      f64
	momentum f64
pub mut:
	gamma               &autograd.Variable[T] = unsafe { nil }
	beta                &autograd.Variable[T] = unsafe { nil }
	running_mean        &vtl.Tensor[T]        = unsafe { nil }
	running_var         &vtl.Tensor[T]        = unsafe { nil }
	num_batches_tracked int
}

struct BatchNorm1DLayerVulkan #

struct BatchNorm1DLayerVulkan[T] {
	eps    f32
	device voidptr = unsafe { nil }
}

struct Conv2DConfig #

@[params]
struct Conv2DConfig {
	padding  []int = [0, 0]
	stride   []int = [1, 1]
	dilation []int = [1, 1]
	groups   int   = 1
}

Conv2D layer: 2D convolution over a 4D input tensor [batch, in_channels, H, W]. Produces [batch, out_channels, out_H, out_W] output.

struct Conv2DGate #

struct Conv2DGate[T] {
	input       &vtl.Tensor[T] = unsafe { nil }
	weight      &vtl.Tensor[T] = unsafe { nil }
	bias        &vtl.Tensor[T] = unsafe { nil }
	kernel_size []int
	config      Conv2DConfig
}

struct Conv2DLayer #

struct Conv2DLayer[T] {
	in_channels  int
	out_channels int
	kernel_size  []int
	config       Conv2DConfig
pub mut:
	weight &autograd.Variable[T] = unsafe { nil }
	bias   &autograd.Variable[T] = unsafe { nil }
}

Conv2DLayer applies a 2D convolution over a 4D input tensor.

Input: [batch, in_channels, H, W] Output: [batch, out_channels, out_H, out_W]

Config options (via Conv2DConfig):- padding — zero-padding added to input borders (default: [0,0])

  • stride — sampling stride in H and W dimensions (default: [1,1])
  • dilation — spacing between kernel elements (default: [1,1])
  • groups — split input channels into groups groups (default: 1)

struct DropoutLayer #

struct DropoutLayer[T] {
	output_shape []int
	prob         f64
}

DropoutLayer is a dropout layer.

struct DropoutLayerConfig #

@[params]
struct DropoutLayerConfig {
	prob f64 = 0.5
}

struct EluLayer #

struct EluLayer[T] {
	output_shape []int
	alpha        f64
}

EluLayer is an activation layer that applies the element-wise function f(x) = x > 0 ? x : alpha * (exp(x) - 1)

struct EluLayerConfig #

@[params]
struct EluLayerConfig {
	alpha f64 = 0.01
}

struct EmbeddingGate #

struct EmbeddingGate[T] {
	input  &vtl.Tensor[T] = unsafe { nil }
	weight &vtl.Tensor[T] = unsafe { nil }
}

struct EmbeddingLayer #

struct EmbeddingLayer[T] {
	vocab_size    int
	embedding_dim int
pub mut:
	weight &autograd.Variable[T] = unsafe { nil }
}

EmbeddingLayer maps integer token indices to dense embedding vectors.

Input: [batch, seq_len] — integer indices in [0, vocab_size) Output: [batch, seq_len, embedding_dim]

Weight shape: [vocab_size, embedding_dim]

struct FlattenLayer #

struct FlattenLayer[T] {
	shape []int
}

FlattenLayer is a layer

struct GELULayer #

struct GELULayer[T] {
	output_shape []int
}

GELULayer applies the Gaussian Error Linear Unit (GELU) activation. GELU(x) = 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3))) Approximation used: 0.7978845608 * (x + 0.044715 * x^3) (lower-cost tanh approximation)

struct GRULayer #

struct GRULayer[T] {
mut:
	w_ih &autograd.Variable[T]
	w_hh &autograd.Variable[T]
	b_ih &autograd.Variable[T]
	b_hh &autograd.Variable[T]
pub:
	ctx         &autograd.Context[T]
	input_size  int
	hidden_size int
}

GRULayer implements a Gated Recurrent Unit layer.

Implements the standard GRU equations (PyTorch/CuDNN compatible): r_t = sigmoid(x_t @ W_ir^T + h_{t-1} @ W_hr^T + b_ir + b_hr) z_t = sigmoid(x_t @ W_iz^T + h_{t-1} @ W_hz^T + b_iz + b_hz) n_t = tanh(x_t @ W_in^T + b_in + r_t * (h_{t-1} @ W_hn^T + b_hn)) h_t = (1 - z_t) * n_t + z_t * h_{t-1}

Input: [batch, seq_len, input_size] Output: [batch, hidden_size] (final hidden state)

Compared to LSTM, GRU:- Has 3 gates instead of 4 (no separate cell state)

  • Uses fewer parameters (3hidden_size vs 4hidden_size weights)
  • Often trains faster while achieving comparable performance

Contains learnable weights: w_ih: [3hidden_size, input_size] — input-to-hidden for r, z, n gates w_hh: [3hidden_size, hidden_size] — hidden-to-hidden for r, z, n gates b_ih: [3hidden_size] — input bias b_hh: [3hidden_size] — hidden bias

struct GlobalAvgPool2DGate #

struct GlobalAvgPool2DGate[T] {
	input &vtl.Tensor[T] = unsafe { nil }
}

struct GlobalAvgPool2DLayer #

struct GlobalAvgPool2DLayer[T] {}

GlobalAvgPool2DLayer applies global average pooling over spatial dimensions.

Input: [batch, channels, H, W] Output: [batch, channels, 1, 1] — one value per channel per sample

struct GlobalAvgPool2DLayerVulkan #

struct GlobalAvgPool2DLayerVulkan[T] {
pub mut:
	device voidptr
}

GlobalAvgPool2DLayerVulkan stub for when Vulkan is not enabled

struct InputLayer #

struct InputLayer[T] {
	shape []int
}

InputLayer is a layer that takes a single input tensor and returns the same tensor.

This layer is used as the first layer in a model.

struct LSTMLayer #

struct LSTMLayer[T] {
mut:
	w_ih &autograd.Variable[T]
	w_hh &autograd.Variable[T]
	b_ih &autograd.Variable[T]
	b_hh &autograd.Variable[T]
pub:
	ctx         &autograd.Context[T]
	input_size  int
	hidden_size int
	num_layers  int
}

LSTMLayer implements a Long Short-Term Memory layer.

Implements the standard LSTM equations per timestep: i_t = sigmoid(x_t @ W_ii^T + h_{t-1} @ W_hi^T + b_ii + b_hi) f_t = sigmoid(x_t @ W_if^T + h_{t-1} @ W_hf^T + b_if + b_hf) g_t = tanh(x_t @ W_ig^T + h_{t-1} @ W_hg^T + b_ig + b_hg) o_t = sigmoid(x_t @ W_io^T + h_{t-1} @ W_ho^T + b_io + b_ho) c_t = f_t * c_{t-1} + i_t * g_t h_t = o_t * tanh(c_t)

Input: [batch, seq_len, input_size] Output: [batch, hidden_size] (final hidden state)

Contains learnable weights: w_ih: [4hidden_size, input_size] — input-to-hidden for all 4 gates w_hh: [4hidden_size, hidden_size] — hidden-to-hidden for all 4 gates b_ih: [4hidden_size] — input bias b_hh: [4hidden_size] — hidden bias

struct LayerNormConfig #

@[params]
struct LayerNormConfig {
	eps    f64  = 1e-5
	affine bool = true
}

LayerNorm normalizes over the last D dimensions of the input. E.g. for input [..., D] it computes mean and variance over the last D dims.

struct LayerNormGate #

struct LayerNormGate[T] {
	input &vtl.Tensor[T] = unsafe { nil }
	gamma &vtl.Tensor[T] = unsafe { nil }
	beta  &vtl.Tensor[T] = unsafe { nil }
	eps   f64
}

struct LayerNormLayer #

struct LayerNormLayer[T] {
	normalized_shape []int
	eps              f64
pub mut:
	gamma &autograd.Variable[T] = unsafe { nil }
	beta  &autograd.Variable[T] = unsafe { nil }
}

struct LayerNormLayerVulkan #

struct LayerNormLayerVulkan[T] {
	normalized_shape []int
	eps              f32
	params           storage.VulkanStorageParams
pub mut:
	gamma &autograd.Variable[T] = unsafe { nil }
	beta  &autograd.Variable[T] = unsafe { nil }
}

struct LayerNormVulkanConfig #

@[params]
struct LayerNormVulkanConfig {
	eps    f32  = 1e-5
	affine bool = true
}

struct LeakyReluLayer #

struct LeakyReluLayer[T] {
	output_shape []int
	slope        f64
}

LeakyReluLayer is an activation layer that applies the leaky relu function to the input.

struct LeakyReluLayerConfig #

@[params]
struct LeakyReluLayerConfig {
	slope f64 = 0.01
}

struct LinearLayer #

struct LinearLayer[T] {
	weights &autograd.Variable[T] = unsafe { nil }
	bias    &autograd.Variable[T] = unsafe { nil }
}

LinearLayer applies a linear transformation: y = x·Wᵀ + b

Input: [..., in_features] Output: [..., out_features]

Weights shape: [out_features, in_features] Bias shape: [1, out_features]

struct MaxPool2DLayer #

struct MaxPool2DLayer[T] {
	input_shape []int
	kernel      []int
	padding     []int
	stride      []int
}

MaxPool2DLayer is a layer that implements the maxpooling operation.

struct MaxPool2DLayerVulkan #

struct MaxPool2DLayerVulkan[T] {
pub mut:
	kernel_size [2]int
	stride      [2]int
	padding     [2]int
	device      voidptr = unsafe { nil }
}

struct MishLayer #

struct MishLayer[T] {
	output_shape []int
}

MishLayer applies the Mish activation: x * tanh(softplus(x)). Mish(x) = x * tanh(softplus(x)) where softplus(x) = log(1 + exp(x))

struct MultiHeadAttentionLayer #

struct MultiHeadAttentionLayer[T] {
pub:
	embed_dim int
	num_heads int
	head_dim  int
pub mut:
	w_q &autograd.Variable[T] = unsafe { nil }
	w_k &autograd.Variable[T] = unsafe { nil }
	w_v &autograd.Variable[T] = unsafe { nil }
	w_o &autograd.Variable[T] = unsafe { nil }
}

MultiHeadAttentionLayer implements scaled dot-product multi-head attention.

Input: [batch, seq_len, embed_dim] Output: [batch, seq_len, embed_dim]

Computes attention across num_heads heads and projects back to embed_dim.

Config options (via constructor parameters):- embed_dim — model dimension (must be divisible by num_heads)

  • num_heads — number of parallel attention heads

struct PositionalEncodingLayer #

struct PositionalEncodingLayer[T] {
	max_len   int
	embed_dim int
pub mut:
	pe &vtl.Tensor[T] = unsafe { nil }
}

PositionalEncodingLayer adds fixed sinusoidal positional encodings to an embedding.

Does not contain learnable parameters. Encodings follow the original Transformer formulation (Attention is All You Need, §3.5).

Input: [batch, seq_len, embed_dim] Output: [batch, seq_len, embed_dim] — input + positional encoding

struct ReLULayer #

struct ReLULayer[T] {
	output_shape []int
}

ReLULayer is a layer that applies the rectified linear unit function element-wise.

struct SigmoidLayer #

struct SigmoidLayer[T] {
	output_shape []int
}

SigmoidLayer is a layer that applies the sigmoid function to its input.

struct SoftmaxLayer #

struct SoftmaxLayer[T] {
	dim int
}

SoftmaxLayer applies softmax activation over the last dimension of the input. input shape: [..., n_classes] → output shape: [..., n_classes] Softmax output sums to 1 along the last dimension.

struct SoftmaxLayerConfig #

@[params]
struct SoftmaxLayerConfig {
	dim int = -1 // dimension to apply softmax over; -1 means last dimension
}

struct SwishLayer #

struct SwishLayer[T] {
	output_shape []int
}

SwishLayer applies the Swish activation: x * sigmoid(x). Swish(x) = x * sigmoid(beta * x) — here we use beta=1 (standard Swish).

struct TanhLayer #

struct TanhLayer[T] {
	output_shape []int
}

TanhLayer is a layer that applies the tanh activation function to its input.