nn.layers #
Stub for non-Vulkan builds — conv2d_forward_vulkan falls through to CPU. Stub for non-Vulkan builds — embedding_forward_vulkan falls through to CPU. Stub for non-Vulkan builds — LayerNormLayerVulkan falls through to CPU. Stub implementations for non-Vulkan builds. The real implementations live in softmax_vulkan_d_vulkan.v
fn attention_apply_values_vulkan #
fn attention_apply_values_vulkan[T](weights &vtl.Tensor[T], v &vtl.Tensor[T], head_dim int) !&vtl.Tensor[T]
fn attention_forward_vulkan #
fn attention_forward_vulkan[T](q &vtl.Tensor[T], k &vtl.Tensor[T], v &vtl.Tensor[T], head_dim int) !&vtl.Tensor[T]
attention_forward_vulkan stub: not available without Vulkan.
fn attention_gate #
fn attention_gate[T](input &vtl.Tensor[T], w_q &vtl.Tensor[T], w_k &vtl.Tensor[T], w_v &vtl.Tensor[T], w_o &vtl.Tensor[T], num_heads int, head_dim int) &AttentionGate[T]
fn attention_scores_vulkan #
fn attention_scores_vulkan[T](q &vtl.Tensor[T], k &vtl.Tensor[T], head_dim int) !&vtl.Tensor[T]
fn avgpool2d_forward_vulkan #
fn avgpool2d_forward_vulkan[T](input &vtl.Tensor[T], kernel_size [2]int, stride [2]int, padding [2]int, dev voidptr) !&vtl.Tensor[T]
avgpool2d_forward_vulkan stub
fn avgpool2d_gate #
fn avgpool2d_gate[T](input &vtl.Tensor[T], kernel []int, padding []int, stride []int) &AvgPool2DGate[T]
fn avgpool2d_layer #
fn avgpool2d_layer[T](ctx &autograd.Context[T], input_shape []int, kernel []int, padding []int, stride []int) types.Layer[T]
avgpool2d_layer creates an AveragePool2DLayer.
fn batchnorm1d_forward_vulkan #
fn batchnorm1d_forward_vulkan[T](input &vtl.Tensor[T], eps f32, dev voidptr) !&vtl.Tensor[T]
fn batchnorm1d_gate #
fn batchnorm1d_gate[T](input &vtl.Tensor[T], gamma &vtl.Tensor[T], beta &vtl.Tensor[T], mean &vtl.Tensor[T], var_ &vtl.Tensor[T], eps f64) &BatchNorm1DGate[T]
fn batchnorm1d_layer #
fn batchnorm1d_layer[T](ctx &autograd.Context[T], num_features int, config BatchNorm1DConfig) types.Layer[T]
fn conv2d_forward_vulkan #
fn conv2d_forward_vulkan[T](input &vtl.Tensor[T],
weight &vtl.Tensor[T],
bias &vtl.Tensor[T],
kernel_size []int,
config Conv2DConfig) !&vtl.Tensor[T]
fn conv2d_gate #
fn conv2d_gate[T](input &vtl.Tensor[T], weight &vtl.Tensor[T], bias &vtl.Tensor[T], kernel_size []int, config Conv2DConfig) &Conv2DGate[T]
fn conv2d_layer #
fn conv2d_layer[T](ctx &autograd.Context[T], in_ch int, out_ch int, kernel_size []int, config Conv2DConfig) types.Layer[T]
conv2d_layer creates a Conv2DLayer.
fn dropout_layer #
fn dropout_layer[T](ctx &autograd.Context[T], output_shape []int, data DropoutLayerConfig) types.Layer[T]
fn elu_layer #
fn elu_layer[T](ctx &autograd.Context[T], output_shape []int, data EluLayerConfig) types.Layer[T]
fn embedding_forward_vulkan #
fn embedding_forward_vulkan[T](input &vtl.Tensor[T], weight &vtl.Tensor[T]) !&vtl.Tensor[T]
fn embedding_gate #
fn embedding_gate[T](input &vtl.Tensor[T], weight &vtl.Tensor[T]) &EmbeddingGate[T]
fn embedding_layer #
fn embedding_layer[T](ctx &autograd.Context[T], vocab_size int, embedding_dim int) types.Layer[T]
embedding_layer creates an EmbeddingLayer.
fn flatten_layer #
fn flatten_layer[T](ctx &autograd.Context[T], shape []int) types.Layer[T]
fn gelu_forward_vulkan #
fn gelu_forward_vulkan[T](x &vtl.Tensor[T], params storage.VulkanStorageParams) !&vtl.Tensor[T]
fn gelu_layer #
fn gelu_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]
fn global_avgpool2d_forward_vulkan #
fn global_avgpool2d_forward_vulkan[T](input &vtl.Tensor[T], dev voidptr) !&vtl.Tensor[T]
global_avgpool2d_forward_vulkan stub
fn global_avgpool2d_gate #
fn global_avgpool2d_gate[T](input &vtl.Tensor[T]) &GlobalAvgPool2DGate[T]
fn global_avgpool2d_layer #
fn global_avgpool2d_layer[T](ctx &autograd.Context[T]) types.Layer[T]
global_avgpool2d_layer creates a GlobalAvgPool2DLayer.
fn gru_layer #
fn gru_layer[T](ctx &autograd.Context[T], input_size int, hidden_size int) types.Layer[T]
gru_layer creates a GRULayer with Kaiming-initialized weights.
fn input_layer #
fn input_layer[T](ctx &autograd.Context[T], shape []int) types.Layer[T]
fn layer_norm_layer #
fn layer_norm_layer[T](ctx &autograd.Context[T], normalized_shape []int, config LayerNormConfig) types.Layer[T]
layer_norm_layer creates a LayerNormLayer. layer_norm_layer creates a LayerNormLayer.
fn layer_norm_vulkan_layer #
fn layer_norm_vulkan_layer[T](ctx &autograd.Context[T], normalized_shape []int, params storage.VulkanStorageParams, config LayerNormVulkanConfig) types.Layer[T]
fn layernorm_forward_vulkan #
fn layernorm_forward_vulkan[T](x &vtl.Tensor[T], eps f32, params storage.VulkanStorageParams) !&vtl.Tensor[T]
fn layernorm_gate #
fn layernorm_gate[T](input &vtl.Tensor[T], gamma &vtl.Tensor[T], beta &vtl.Tensor[T], eps f64) &LayerNormGate[T]
fn leaky_relu_layer #
fn leaky_relu_layer[T](ctx &autograd.Context[T], output_shape []int, data LeakyReluLayerConfig) types.Layer[T]
fn linear_forward_vcl #
fn linear_forward_vcl[T](_ &vtl.Tensor[T], _ &vtl.Tensor[T], _ &vtl.Tensor[T], _ storage.VclStorageParams) !&vtl.Tensor[T]
Stubs when compiling without -d vcl (mirrors linear_vulkan_notd_vulkan.v pattern).
fn linear_forward_vulkan #
fn linear_forward_vulkan[T](_ &vtl.Tensor[T], _ &vtl.Tensor[T], _ &vtl.Tensor[T], _ storage.VulkanStorageParams) !&vtl.Tensor[T]
Stubs when compiling without -d vulkan (mirrors nn + tensor notd patterns).
fn linear_layer #
fn linear_layer[T](ctx &autograd.Context[T], input_dim int, output_dim int) types.Layer[T]
linear_layer creates a LinearLayer.
fn lstm_layer #
fn lstm_layer[T](ctx &autograd.Context[T], input_size int, hidden_size int, num_layers int) types.Layer[T]
lstm_layer creates an LSTMLayer with Kaiming-initialized weights.
fn maxpool2d_forward_vulkan #
fn maxpool2d_forward_vulkan[T](input &vtl.Tensor[T], kernel_size [2]int, stride [2]int, padding [2]int, dev voidptr) !&vtl.Tensor[T]
fn maxpool2d_layer #
fn maxpool2d_layer[T](ctx &autograd.Context[T], input_shape []int, kernel []int, padding []int, stride []int) types.Layer[T]
fn mish_layer #
fn mish_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]
fn multihead_attention_layer #
fn multihead_attention_layer[T](ctx &autograd.Context[T], embed_dim int, num_heads int) types.Layer[T]
multihead_attention_layer creates a MultiHeadAttentionLayer.
fn positional_encoding_layer #
fn positional_encoding_layer[T](ctx &autograd.Context[T], embed_dim int, max_len int) !types.Layer[T]
positional_encoding_layer creates a PositionalEncodingLayer.
fn reduce_sum_vulkan #
fn reduce_sum_vulkan[T](x &vtl.Tensor[T], params storage.VulkanStorageParams) ![]T
fn relu_forward_vcl #
fn relu_forward_vcl[T](_ &vtl.Tensor[T], _ storage.VclStorageParams) !&vtl.Tensor[T]
fn relu_forward_vulkan #
fn relu_forward_vulkan[T](_ &vtl.Tensor[T], _ storage.VulkanStorageParams) !&vtl.Tensor[T]
fn relu_layer #
fn relu_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]
fn sigmoid_forward_vcl #
fn sigmoid_forward_vcl[T](_ &vtl.Tensor[T], _ storage.VclStorageParams) !&vtl.Tensor[T]
fn sigmoid_forward_vulkan #
fn sigmoid_forward_vulkan[T](_ &vtl.Tensor[T], _ storage.VulkanStorageParams) !&vtl.Tensor[T]
fn sigmoid_layer #
fn sigmoid_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]
fn softmax_forward_vulkan #
fn softmax_forward_vulkan[T](x &vtl.Tensor[T], params storage.VulkanStorageParams) !&vtl.Tensor[T]
fn softmax_layer #
fn softmax_layer[T](ctx &autograd.Context[T], config SoftmaxLayerConfig) types.Layer[T]
fn swish_layer #
fn swish_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]
fn tanh_layer #
fn tanh_layer[T](ctx &autograd.Context[T], output_shape []int) types.Layer[T]
fn (AttentionGate[T]) backward #
fn (g &AttentionGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]
fn (AttentionGate[T]) cache #
fn (g &AttentionGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !
fn (AveragePool2DLayer[T]) output_shape #
fn (layer &AveragePool2DLayer[T]) output_shape() []int
fn (AveragePool2DLayer[T]) variables #
fn (layer &AveragePool2DLayer[T]) variables() []&autograd.Variable[T]
fn (AveragePool2DLayer[T]) forward #
fn (layer &AveragePool2DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (AvgPool2DGate[T]) backward #
fn (g &AvgPool2DGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]
fn (AvgPool2DGate[T]) cache #
fn (g &AvgPool2DGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !
fn (AvgPool2DLayerVulkan[T]) forward #
fn (layer &AvgPool2DLayerVulkan[T]) forward(input &vtl.Tensor[T]) !&vtl.Tensor[T]
forward stub
fn (BatchNorm1DGate[T]) backward #
fn (g &BatchNorm1DGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]
fn (BatchNorm1DGate[T]) cache #
fn (g &BatchNorm1DGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !
fn (BatchNorm1DLayerVulkan[T]) forward #
fn (layer &BatchNorm1DLayerVulkan[T]) forward(input &vtl.Tensor[T]) !&vtl.Tensor[T]
fn (BatchNorm1DLayer[T]) output_shape #
fn (layer &BatchNorm1DLayer[T]) output_shape() []int
fn (BatchNorm1DLayer[T]) variables #
fn (layer &BatchNorm1DLayer[T]) variables() []&autograd.Variable[T]
fn (BatchNorm1DLayer[T]) forward #
fn (layer &BatchNorm1DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (Conv2DGate[T]) backward #
fn (g &Conv2DGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]
fn (Conv2DGate[T]) cache #
fn (g &Conv2DGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !
fn (Conv2DLayer[T]) output_shape #
fn (layer &Conv2DLayer[T]) output_shape() []int
fn (Conv2DLayer[T]) variables #
fn (layer &Conv2DLayer[T]) variables() []&autograd.Variable[T]
fn (Conv2DLayer[T]) forward #
fn (layer &Conv2DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (DropoutLayer[T]) output_shape #
fn (layer &DropoutLayer[T]) output_shape() []int
fn (DropoutLayer[T]) variables #
fn (_ &DropoutLayer[T]) variables() []&autograd.Variable[T]
fn (DropoutLayer[T]) forward #
fn (layer &DropoutLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (EluLayer[T]) output_shape #
fn (layer &EluLayer[T]) output_shape() []int
fn (EluLayer[T]) variables #
fn (_ &EluLayer[T]) variables() []&autograd.Variable[T]
fn (EluLayer[T]) forward #
fn (layer &EluLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (EmbeddingGate[T]) backward #
fn (g &EmbeddingGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]
fn (EmbeddingGate[T]) cache #
fn (g &EmbeddingGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !
fn (EmbeddingLayer[T]) output_shape #
fn (layer &EmbeddingLayer[T]) output_shape() []int
fn (EmbeddingLayer[T]) variables #
fn (layer &EmbeddingLayer[T]) variables() []&autograd.Variable[T]
fn (EmbeddingLayer[T]) forward #
fn (layer &EmbeddingLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (FlattenLayer[T]) output_shape #
fn (layer &FlattenLayer[T]) output_shape() []int
fn (FlattenLayer[T]) variables #
fn (_ &FlattenLayer[T]) variables() []&autograd.Variable[T]
fn (FlattenLayer[T]) forward #
fn (layer &FlattenLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (GELULayer[T]) output_shape #
fn (layer &GELULayer[T]) output_shape() []int
fn (GELULayer[T]) variables #
fn (_ &GELULayer[T]) variables() []&autograd.Variable[T]
fn (GELULayer[T]) forward #
fn (layer &GELULayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (GlobalAvgPool2DGate[T]) backward #
fn (g &GlobalAvgPool2DGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]
fn (GlobalAvgPool2DGate[T]) cache #
fn (g &GlobalAvgPool2DGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !
fn (GlobalAvgPool2DLayerVulkan[T]) forward #
fn (layer &GlobalAvgPool2DLayerVulkan[T]) forward(input &vtl.Tensor[T]) !&vtl.Tensor[T]
forward stub
fn (GlobalAvgPool2DLayer[T]) output_shape #
fn (layer &GlobalAvgPool2DLayer[T]) output_shape() []int
fn (GlobalAvgPool2DLayer[T]) variables #
fn (layer &GlobalAvgPool2DLayer[T]) variables() []&autograd.Variable[T]
fn (GlobalAvgPool2DLayer[T]) forward #
fn (layer &GlobalAvgPool2DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (InputLayer[T]) output_shape #
fn (layer &InputLayer[T]) output_shape() []int
fn (InputLayer[T]) variables #
fn (_ &InputLayer[T]) variables() []&autograd.Variable[T]
fn (InputLayer[T]) forward #
fn (layer &InputLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (LayerNormGate[T]) backward #
fn (g &LayerNormGate[T]) backward[T](payload &autograd.Payload[T]) ![]&vtl.Tensor[T]
fn (LayerNormGate[T]) cache #
fn (g &LayerNormGate[T]) cache[T](mut result autograd.Variable[T], args ...autograd.CacheParam) !
fn (LayerNormLayerVulkan[T]) output_shape #
fn (layer &LayerNormLayerVulkan[T]) output_shape() []int
fn (LayerNormLayerVulkan[T]) variables #
fn (layer &LayerNormLayerVulkan[T]) variables() []&autograd.Variable[T]
fn (LayerNormLayerVulkan[T]) forward #
fn (layer &LayerNormLayerVulkan[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (LayerNormLayer[T]) output_shape #
fn (layer &LayerNormLayer[T]) output_shape() []int
fn (LayerNormLayer[T]) variables #
fn (layer &LayerNormLayer[T]) variables() []&autograd.Variable[T]
fn (LayerNormLayer[T]) forward #
fn (layer &LayerNormLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (LeakyReluLayer[T]) output_shape #
fn (layer &LeakyReluLayer[T]) output_shape() []int
fn (LeakyReluLayer[T]) variables #
fn (_ &LeakyReluLayer[T]) variables() []&autograd.Variable[T]
fn (LeakyReluLayer[T]) forward #
fn (layer &LeakyReluLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (LinearLayer[T]) output_shape #
fn (layer &LinearLayer[T]) output_shape() []int
fn (LinearLayer[T]) variables #
fn (layer &LinearLayer[T]) variables() []&autograd.Variable[T]
fn (LinearLayer[T]) forward #
fn (layer &LinearLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (MaxPool2DLayerVulkan[T]) forward #
fn (layer &MaxPool2DLayerVulkan[T]) forward(input &vtl.Tensor[T]) !&vtl.Tensor[T]
fn (MaxPool2DLayer[T]) output_shape #
fn (layer &MaxPool2DLayer[T]) output_shape() []int
fn (MaxPool2DLayer[T]) variables #
fn (layer &MaxPool2DLayer[T]) variables() []&autograd.Variable[T]
fn (MaxPool2DLayer[T]) forward #
fn (layer &MaxPool2DLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (MishLayer[T]) output_shape #
fn (layer &MishLayer[T]) output_shape() []int
fn (MishLayer[T]) variables #
fn (_ &MishLayer[T]) variables() []&autograd.Variable[T]
fn (MishLayer[T]) forward #
fn (layer &MishLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (MultiHeadAttentionLayer[T]) output_shape #
fn (layer &MultiHeadAttentionLayer[T]) output_shape() []int
fn (MultiHeadAttentionLayer[T]) variables #
fn (layer &MultiHeadAttentionLayer[T]) variables() []&autograd.Variable[T]
fn (MultiHeadAttentionLayer[T]) forward #
fn (layer &MultiHeadAttentionLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (PositionalEncodingLayer[T]) output_shape #
fn (layer &PositionalEncodingLayer[T]) output_shape() []int
fn (PositionalEncodingLayer[T]) variables #
fn (layer &PositionalEncodingLayer[T]) variables() []&autograd.Variable[T]
fn (PositionalEncodingLayer[T]) forward #
fn (layer &PositionalEncodingLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (ReLULayer[T]) output_shape #
fn (layer &ReLULayer[T]) output_shape() []int
fn (ReLULayer[T]) variables #
fn (_ &ReLULayer[T]) variables() []&autograd.Variable[T]
fn (ReLULayer[T]) forward #
fn (layer &ReLULayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (SigmoidLayer[T]) output_shape #
fn (layer &SigmoidLayer[T]) output_shape() []int
fn (SigmoidLayer[T]) variables #
fn (_ &SigmoidLayer[T]) variables() []&autograd.Variable[T]
fn (SigmoidLayer[T]) forward #
fn (layer &SigmoidLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (SoftmaxLayer[T]) output_shape #
fn (layer &SoftmaxLayer[T]) output_shape() []int
fn (SoftmaxLayer[T]) variables #
fn (layer &SoftmaxLayer[T]) variables() []&autograd.Variable[T]
fn (SoftmaxLayer[T]) forward #
fn (layer &SoftmaxLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (SwishLayer[T]) output_shape #
fn (layer &SwishLayer[T]) output_shape() []int
fn (SwishLayer[T]) variables #
fn (_ &SwishLayer[T]) variables() []&autograd.Variable[T]
fn (SwishLayer[T]) forward #
fn (layer &SwishLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
fn (TanhLayer[T]) output_shape #
fn (layer &TanhLayer[T]) output_shape() []int
fn (TanhLayer[T]) variables #
fn (_ &TanhLayer[T]) variables() []&autograd.Variable[T]
fn (TanhLayer[T]) forward #
fn (layer &TanhLayer[T]) forward(input &autograd.Variable[T]) !&autograd.Variable[T]
struct AttentionGate #
struct AttentionGate[T] {
input &vtl.Tensor[T] = unsafe { nil }
w_q &vtl.Tensor[T] = unsafe { nil }
w_k &vtl.Tensor[T] = unsafe { nil }
w_v &vtl.Tensor[T] = unsafe { nil }
w_o &vtl.Tensor[T] = unsafe { nil }
num_heads int
head_dim int
}
struct AveragePool2DLayer #
struct AveragePool2DLayer[T] {
kernel []int
padding []int
stride []int
input_shape []int
}
AveragePool2DLayer applies 2D average pooling over a 4D input.
Input: [batch, channels, H, W] Output: [batch, channels, out_H, out_W]
Config options:- kernel — pool window size in (H, W) (default: determined by input_shape)
padding— zero-border padding before pooling (default: [0,0])stride— pool stride in (H, W) (default: same as kernel)
struct AvgPool2DGate #
struct AvgPool2DGate[T] {
input &vtl.Tensor[T] = unsafe { nil }
kernel []int
padding []int
stride []int
}
struct AvgPool2DLayerVulkan #
struct AvgPool2DLayerVulkan[T] {
pub mut:
kernel_size [2]int
stride [2]int
padding [2]int
device voidptr
}
AvgPool2DLayerVulkan stub for when Vulkan is not enabled
struct BatchNorm1DConfig #
struct BatchNorm1DConfig {
eps f64 = 1e-5
momentum f64 = 0.1
affine bool = true
}
BatchNorm1D normalizes a 2D input [batch, features]. Tracks running mean and variance for inference.
struct BatchNorm1DGate #
struct BatchNorm1DGate[T] {
input &vtl.Tensor[T] = unsafe { nil }
gamma &vtl.Tensor[T] = unsafe { nil }
beta &vtl.Tensor[T] = unsafe { nil }
mean &vtl.Tensor[T] = unsafe { nil }
var_ &vtl.Tensor[T] = unsafe { nil }
eps f64
}
struct BatchNorm1DLayer #
struct BatchNorm1DLayer[T] {
eps f64
momentum f64
pub mut:
gamma &autograd.Variable[T] = unsafe { nil }
beta &autograd.Variable[T] = unsafe { nil }
running_mean &vtl.Tensor[T] = unsafe { nil }
running_var &vtl.Tensor[T] = unsafe { nil }
num_batches_tracked int
}
struct BatchNorm1DLayerVulkan #
struct BatchNorm1DLayerVulkan[T] {
eps f32
device voidptr = unsafe { nil }
}
struct Conv2DConfig #
struct Conv2DConfig {
padding []int = [0, 0]
stride []int = [1, 1]
dilation []int = [1, 1]
groups int = 1
}
Conv2D layer: 2D convolution over a 4D input tensor [batch, in_channels, H, W]. Produces [batch, out_channels, out_H, out_W] output.
struct Conv2DGate #
struct Conv2DGate[T] {
input &vtl.Tensor[T] = unsafe { nil }
weight &vtl.Tensor[T] = unsafe { nil }
bias &vtl.Tensor[T] = unsafe { nil }
kernel_size []int
config Conv2DConfig
}
struct Conv2DLayer #
struct Conv2DLayer[T] {
in_channels int
out_channels int
kernel_size []int
config Conv2DConfig
pub mut:
weight &autograd.Variable[T] = unsafe { nil }
bias &autograd.Variable[T] = unsafe { nil }
}
Conv2DLayer applies a 2D convolution over a 4D input tensor.
Input: [batch, in_channels, H, W] Output: [batch, out_channels, out_H, out_W]
Config options (via Conv2DConfig):- padding — zero-padding added to input borders (default: [0,0])
stride— sampling stride in H and W dimensions (default: [1,1])dilation— spacing between kernel elements (default: [1,1])groups— split input channels intogroupsgroups (default: 1)
struct DropoutLayer #
struct DropoutLayer[T] {
output_shape []int
prob f64
}
DropoutLayer is a dropout layer.
struct DropoutLayerConfig #
struct DropoutLayerConfig {
prob f64 = 0.5
}
struct EluLayer #
struct EluLayer[T] {
output_shape []int
alpha f64
}
EluLayer is an activation layer that applies the element-wise function f(x) = x > 0 ? x : alpha * (exp(x) - 1)
struct EluLayerConfig #
struct EluLayerConfig {
alpha f64 = 0.01
}
struct EmbeddingGate #
struct EmbeddingGate[T] {
input &vtl.Tensor[T] = unsafe { nil }
weight &vtl.Tensor[T] = unsafe { nil }
}
struct EmbeddingLayer #
struct EmbeddingLayer[T] {
vocab_size int
embedding_dim int
pub mut:
weight &autograd.Variable[T] = unsafe { nil }
}
EmbeddingLayer maps integer token indices to dense embedding vectors.
Input: [batch, seq_len] — integer indices in [0, vocab_size) Output: [batch, seq_len, embedding_dim]
Weight shape: [vocab_size, embedding_dim]
struct FlattenLayer #
struct FlattenLayer[T] {
shape []int
}
FlattenLayer is a layer
struct GELULayer #
struct GELULayer[T] {
output_shape []int
}
GELULayer applies the Gaussian Error Linear Unit (GELU) activation. GELU(x) = 0.5 * x * (1 + tanh(sqrt(2/pi) * (x + 0.044715 * x^3))) Approximation used: 0.7978845608 * (x + 0.044715 * x^3) (lower-cost tanh approximation)
struct GRULayer #
struct GRULayer[T] {
mut:
w_ih &autograd.Variable[T]
w_hh &autograd.Variable[T]
b_ih &autograd.Variable[T]
b_hh &autograd.Variable[T]
pub:
ctx &autograd.Context[T]
input_size int
hidden_size int
}
GRULayer implements a Gated Recurrent Unit layer.
Implements the standard GRU equations (PyTorch/CuDNN compatible): r_t = sigmoid(x_t @ W_ir^T + h_{t-1} @ W_hr^T + b_ir + b_hr) z_t = sigmoid(x_t @ W_iz^T + h_{t-1} @ W_hz^T + b_iz + b_hz) n_t = tanh(x_t @ W_in^T + b_in + r_t * (h_{t-1} @ W_hn^T + b_hn)) h_t = (1 - z_t) * n_t + z_t * h_{t-1}
Input: [batch, seq_len, input_size] Output: [batch, hidden_size] (final hidden state)
Compared to LSTM, GRU:- Has 3 gates instead of 4 (no separate cell state)
- Uses fewer parameters (3hidden_size vs 4hidden_size weights)
- Often trains faster while achieving comparable performance
Contains learnable weights: w_ih: [3hidden_size, input_size] — input-to-hidden for r, z, n gates w_hh: [3hidden_size, hidden_size] — hidden-to-hidden for r, z, n gates b_ih: [3hidden_size] — input bias b_hh: [3hidden_size] — hidden bias
struct GlobalAvgPool2DGate #
struct GlobalAvgPool2DGate[T] {
input &vtl.Tensor[T] = unsafe { nil }
}
struct GlobalAvgPool2DLayer #
struct GlobalAvgPool2DLayer[T] {}
GlobalAvgPool2DLayer applies global average pooling over spatial dimensions.
Input: [batch, channels, H, W] Output: [batch, channels, 1, 1] — one value per channel per sample
struct GlobalAvgPool2DLayerVulkan #
struct GlobalAvgPool2DLayerVulkan[T] {
pub mut:
device voidptr
}
GlobalAvgPool2DLayerVulkan stub for when Vulkan is not enabled
struct InputLayer #
struct InputLayer[T] {
shape []int
}
InputLayer is a layer that takes a single input tensor and returns the same tensor.
This layer is used as the first layer in a model.
struct LSTMLayer #
struct LSTMLayer[T] {
mut:
w_ih &autograd.Variable[T]
w_hh &autograd.Variable[T]
b_ih &autograd.Variable[T]
b_hh &autograd.Variable[T]
pub:
ctx &autograd.Context[T]
input_size int
hidden_size int
num_layers int
}
LSTMLayer implements a Long Short-Term Memory layer.
Implements the standard LSTM equations per timestep: i_t = sigmoid(x_t @ W_ii^T + h_{t-1} @ W_hi^T + b_ii + b_hi) f_t = sigmoid(x_t @ W_if^T + h_{t-1} @ W_hf^T + b_if + b_hf) g_t = tanh(x_t @ W_ig^T + h_{t-1} @ W_hg^T + b_ig + b_hg) o_t = sigmoid(x_t @ W_io^T + h_{t-1} @ W_ho^T + b_io + b_ho) c_t = f_t * c_{t-1} + i_t * g_t h_t = o_t * tanh(c_t)
Input: [batch, seq_len, input_size] Output: [batch, hidden_size] (final hidden state)
Contains learnable weights: w_ih: [4hidden_size, input_size] — input-to-hidden for all 4 gates w_hh: [4hidden_size, hidden_size] — hidden-to-hidden for all 4 gates b_ih: [4hidden_size] — input bias b_hh: [4hidden_size] — hidden bias
struct LayerNormConfig #
struct LayerNormConfig {
eps f64 = 1e-5
affine bool = true
}
LayerNorm normalizes over the last D dimensions of the input. E.g. for input [..., D] it computes mean and variance over the last D dims.
struct LayerNormGate #
struct LayerNormGate[T] {
input &vtl.Tensor[T] = unsafe { nil }
gamma &vtl.Tensor[T] = unsafe { nil }
beta &vtl.Tensor[T] = unsafe { nil }
eps f64
}
struct LayerNormLayer #
struct LayerNormLayer[T] {
normalized_shape []int
eps f64
pub mut:
gamma &autograd.Variable[T] = unsafe { nil }
beta &autograd.Variable[T] = unsafe { nil }
}
struct LayerNormLayerVulkan #
struct LayerNormLayerVulkan[T] {
normalized_shape []int
eps f32
params storage.VulkanStorageParams
pub mut:
gamma &autograd.Variable[T] = unsafe { nil }
beta &autograd.Variable[T] = unsafe { nil }
}
struct LayerNormVulkanConfig #
struct LayerNormVulkanConfig {
eps f32 = 1e-5
affine bool = true
}
struct LeakyReluLayer #
struct LeakyReluLayer[T] {
output_shape []int
slope f64
}
LeakyReluLayer is an activation layer that applies the leaky relu function to the input.
struct LeakyReluLayerConfig #
struct LeakyReluLayerConfig {
slope f64 = 0.01
}
struct LinearLayer #
struct LinearLayer[T] {
weights &autograd.Variable[T] = unsafe { nil }
bias &autograd.Variable[T] = unsafe { nil }
}
LinearLayer applies a linear transformation: y = x·Wᵀ + b
Input: [..., in_features] Output: [..., out_features]
Weights shape: [out_features, in_features] Bias shape: [1, out_features]
struct MaxPool2DLayer #
struct MaxPool2DLayer[T] {
input_shape []int
kernel []int
padding []int
stride []int
}
MaxPool2DLayer is a layer that implements the maxpooling operation.
struct MaxPool2DLayerVulkan #
struct MaxPool2DLayerVulkan[T] {
pub mut:
kernel_size [2]int
stride [2]int
padding [2]int
device voidptr = unsafe { nil }
}
struct MishLayer #
struct MishLayer[T] {
output_shape []int
}
MishLayer applies the Mish activation: x * tanh(softplus(x)). Mish(x) = x * tanh(softplus(x)) where softplus(x) = log(1 + exp(x))
struct MultiHeadAttentionLayer #
struct MultiHeadAttentionLayer[T] {
pub:
embed_dim int
num_heads int
head_dim int
pub mut:
w_q &autograd.Variable[T] = unsafe { nil }
w_k &autograd.Variable[T] = unsafe { nil }
w_v &autograd.Variable[T] = unsafe { nil }
w_o &autograd.Variable[T] = unsafe { nil }
}
MultiHeadAttentionLayer implements scaled dot-product multi-head attention.
Input: [batch, seq_len, embed_dim] Output: [batch, seq_len, embed_dim]
Computes attention across num_heads heads and projects back to embed_dim.
Config options (via constructor parameters):- embed_dim — model dimension (must be divisible by num_heads)
num_heads— number of parallel attention heads
struct PositionalEncodingLayer #
struct PositionalEncodingLayer[T] {
max_len int
embed_dim int
pub mut:
pe &vtl.Tensor[T] = unsafe { nil }
}
PositionalEncodingLayer adds fixed sinusoidal positional encodings to an embedding.
Does not contain learnable parameters. Encodings follow the original Transformer formulation (Attention is All You Need, §3.5).
Input: [batch, seq_len, embed_dim] Output: [batch, seq_len, embed_dim] — input + positional encoding
struct ReLULayer #
struct ReLULayer[T] {
output_shape []int
}
ReLULayer is a layer that applies the rectified linear unit function element-wise.
struct SigmoidLayer #
struct SigmoidLayer[T] {
output_shape []int
}
SigmoidLayer is a layer that applies the sigmoid function to its input.
struct SoftmaxLayer #
struct SoftmaxLayer[T] {
dim int
}
SoftmaxLayer applies softmax activation over the last dimension of the input. input shape: [..., n_classes] → output shape: [..., n_classes] Softmax output sums to 1 along the last dimension.
struct SoftmaxLayerConfig #
struct SoftmaxLayerConfig {
dim int = -1 // dimension to apply softmax over; -1 means last dimension
}
struct SwishLayer #
struct SwishLayer[T] {
output_shape []int
}
SwishLayer applies the Swish activation: x * sigmoid(x). Swish(x) = x * sigmoid(beta * x) — here we use beta=1 (standard Swish).
struct TanhLayer #
struct TanhLayer[T] {
output_shape []int
}
TanhLayer is a layer that applies the tanh activation function to its input.
- README
- fn attention_apply_values_vulkan
- fn attention_forward_vulkan
- fn attention_gate
- fn attention_scores_vulkan
- fn avgpool2d_forward_vulkan
- fn avgpool2d_gate
- fn avgpool2d_layer
- fn batchnorm1d_forward_vulkan
- fn batchnorm1d_gate
- fn batchnorm1d_layer
- fn conv2d_forward_vulkan
- fn conv2d_gate
- fn conv2d_layer
- fn dropout_layer
- fn elu_layer
- fn embedding_forward_vulkan
- fn embedding_gate
- fn embedding_layer
- fn flatten_layer
- fn gelu_forward_vulkan
- fn gelu_layer
- fn global_avgpool2d_forward_vulkan
- fn global_avgpool2d_gate
- fn global_avgpool2d_layer
- fn gru_layer
- fn input_layer
- fn layer_norm_layer
- fn layer_norm_vulkan_layer
- fn layernorm_forward_vulkan
- fn layernorm_gate
- fn leaky_relu_layer
- fn linear_forward_vcl
- fn linear_forward_vulkan
- fn linear_layer
- fn lstm_layer
- fn maxpool2d_forward_vulkan
- fn maxpool2d_layer
- fn mish_layer
- fn multihead_attention_layer
- fn positional_encoding_layer
- fn reduce_sum_vulkan
- fn relu_forward_vcl
- fn relu_forward_vulkan
- fn relu_layer
- fn sigmoid_forward_vcl
- fn sigmoid_forward_vulkan
- fn sigmoid_layer
- fn softmax_forward_vulkan
- fn softmax_layer
- fn swish_layer
- fn tanh_layer
- type AttentionGate[T]
- type AveragePool2DLayer[T]
- type AvgPool2DGate[T]
- type AvgPool2DLayerVulkan[T]
- type BatchNorm1DGate[T]
- type BatchNorm1DLayerVulkan[T]
- type BatchNorm1DLayer[T]
- type Conv2DGate[T]
- type Conv2DLayer[T]
- type DropoutLayer[T]
- type EluLayer[T]
- type EmbeddingGate[T]
- type EmbeddingLayer[T]
- type FlattenLayer[T]
- type GELULayer[T]
- type GlobalAvgPool2DGate[T]
- type GlobalAvgPool2DLayerVulkan[T]
- type GlobalAvgPool2DLayer[T]
- type InputLayer[T]
- type LayerNormGate[T]
- type LayerNormLayerVulkan[T]
- type LayerNormLayer[T]
- type LeakyReluLayer[T]
- type LinearLayer[T]
- type MaxPool2DLayerVulkan[T]
- type MaxPool2DLayer[T]
- type MishLayer[T]
- type MultiHeadAttentionLayer[T]
- type PositionalEncodingLayer[T]
- type ReLULayer[T]
- type SigmoidLayer[T]
- type SoftmaxLayer[T]
- type SwishLayer[T]
- type TanhLayer[T]
- struct AttentionGate
- struct AveragePool2DLayer
- struct AvgPool2DGate
- struct AvgPool2DLayerVulkan
- struct BatchNorm1DConfig
- struct BatchNorm1DGate
- struct BatchNorm1DLayer
- struct BatchNorm1DLayerVulkan
- struct Conv2DConfig
- struct Conv2DGate
- struct Conv2DLayer
- struct DropoutLayer
- struct DropoutLayerConfig
- struct EluLayer
- struct EluLayerConfig
- struct EmbeddingGate
- struct EmbeddingLayer
- struct FlattenLayer
- struct GELULayer
- struct GRULayer
- struct GlobalAvgPool2DGate
- struct GlobalAvgPool2DLayer
- struct GlobalAvgPool2DLayerVulkan
- struct InputLayer
- struct LSTMLayer
- struct LayerNormConfig
- struct LayerNormGate
- struct LayerNormLayer
- struct LayerNormLayerVulkan
- struct LayerNormVulkanConfig
- struct LeakyReluLayer
- struct LeakyReluLayerConfig
- struct LinearLayer
- struct MaxPool2DLayer
- struct MaxPool2DLayerVulkan
- struct MishLayer
- struct MultiHeadAttentionLayer
- struct PositionalEncodingLayer
- struct ReLULayer
- struct SigmoidLayer
- struct SoftmaxLayer
- struct SoftmaxLayerConfig
- struct SwishLayer
- struct TanhLayer