Skip to content

compute

fn adam_step_vulkan_f32 #

fn adam_step_vulkan_f32(dev &vulkan.Device, grad []f32, mut theta []f32, mut m []f32, mut v []f32,
	p AdamStepParams) !

adam_step_vulkan_f32 performs one fused Adam update on host f32 arrays via GPU.

fn add_scalar_vulkan #

fn add_scalar_vulkan(dev &vulkan.Device, x_data []f64, s f64) ![]f64

add_scalar_vulkan adds a scalar to each element via scale pipeline. Since scale is dst = alpha * src (not dst = src + scalar), we emulate by dst = src * 1.0 + scalar via two passes: first copy, then add scalar constant.

fn add_vec_vulkan #

fn add_vec_vulkan(dev &vulkan.Device, a_data []f64, b_data []f64) ![]f64

add_vec_vulkan computes element-wise addition via vector_add pipeline.

fn conv2d_backward_cpu_nchw #

fn conv2d_backward_cpu_nchw(grad_out []f64, input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int, pad_h int, pad_w int) !Conv2DBwdFlat

conv2d_backward_cpu_nchw computes Conv2D gradients on CPU (groups=1, dilation=1).

fn conv2d_backward_vulkan #

fn conv2d_backward_vulkan(dev &vulkan.Device, grad_out []f64, input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int, pad_h int, pad_w int) !Conv2DBwdFlat

conv2d_backward_vulkan: GPU GEMM for d_weight; d_input on CPU (col2im via reference backward).

fn conv2d_cpu_nchw #

fn conv2d_cpu_nchw(input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int) ![]f64

conv2d_cpu_nchw is the CPU reference (no padding).

fn conv2d_vulkan #

fn conv2d_vulkan(dev &vulkan.Device, input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int, pad_h int, pad_w int) ![]f64

conv2d_vulkan: NCHW forward via im2col + GEMM (stride >= 1, dilation=1). input: [batch, in_ch, in_h, in_w], kernel: [out_ch, in_ch, k_h, k_w] row-major flat.

fn gemm_vulkan #

fn gemm_vulkan(dev &vulkan.Device, a_data []f64, b_data []f64, m int, n int, k int) ![]f64

gemm_vulkan computes C = A * B with row-major inputs/outputs in f64 API. Internally uses f32 Vulkan kernels and converts back to f64.

fn gemm_vulkan_f32 #

fn gemm_vulkan_f32(dev &vulkan.Device, a_data []f32, b_data []f32, m int, n int, k int) ![]f32

gemm_vulkan_f32 computes C = A * B with row-major inputs/outputs. A is [m x k], B is [k x n], result is [m x n].

fn gemv_vulkan #

fn gemv_vulkan(dev &vulkan.Device, a_data []f64, x_data []f64, m int, n int) ![]f64

gemv_vulkan exposes this operation as part of the public API.

fn gemv_vulkan_f32 #

fn gemv_vulkan_f32(dev &vulkan.Device, a_data []f32, x_data []f32, m int, n int) ![]f32

gemv_vulkan_f32 exposes this operation as part of the public API.

fn im2col_cpu_nchw #

fn im2col_cpu_nchw(input []f64, batch int, in_ch int, in_h int, in_w int, k_h int, k_w int, oh int, ow int, pad_h int, pad_w int, stride_h int, stride_w int) []f32

im2col_cpu_nchw returns [out_total x k_total] row-major f32 (spatial major, t = ickhkw+kh*kw+kw).

fn layernorm_vulkan #

fn layernorm_vulkan(dev &vulkan.Device, x_data []f64, gamma []f64, beta []f64) ![]f64

layernorm_vulkan applies row-wise layer normalization via the layernorm pipeline. Gamma/beta are applied on CPU since the Vulkan layernorm op does not include affine transform.

fn mul_scalar_vulkan #

fn mul_scalar_vulkan(dev &vulkan.Device, x_data []f64, s f64) ![]f64

mul_scalar_vulkan multiplies each element by scalar via scale pipeline.

fn mul_vec_vulkan #

fn mul_vec_vulkan(dev &vulkan.Device, a_data []f64, b_data []f64) ![]f64

mul_vec_vulkan computes element-wise a * b via vector_mul pipeline.

fn new_vulkan_backend #

fn new_vulkan_backend(dev &vulkan.Device) VulkanBackend

new_vulkan_backend exposes this operation as part of the public API.

fn relu_vulkan #

fn relu_vulkan(dev &vulkan.Device, x_data []f64) ![]f64

relu_vulkan exposes this operation as part of the public API.

fn relu_vulkan_f32 #

fn relu_vulkan_f32(dev &vulkan.Device, x_data []f32) ![]f32

relu_vulkan_f32 exposes this operation as part of the public API.

fn sigmoid_vulkan #

fn sigmoid_vulkan(dev &vulkan.Device, x_data []f64) ![]f64

sigmoid_vulkan exposes this operation as part of the public API.

fn sigmoid_vulkan_f32 #

fn sigmoid_vulkan_f32(dev &vulkan.Device, x_data []f32) ![]f32

sigmoid_vulkan_f32 exposes this operation as part of the public API.

fn softmax_vulkan #

fn softmax_vulkan(dev &vulkan.Device, x_data []f64) ![]f64

softmax_vulkan applies row-wise softmax via the softmax pipeline.

fn tanh_vulkan #

fn tanh_vulkan(dev &vulkan.Device, x_data []f64) ![]f64

tanh_vulkan applies tanh using GELU shader (contains tanh). WARNING: applies GELU activation, not pure tanh. For accurate tanh, use CPU fallback.

struct AdamStepParams #

struct AdamStepParams {
pub:
	beta1   f64
	beta2   f64
	lr_t    f64
	epsilon f64
}

AdamStepParams mirrors VTL optimizers.AdamStepParams (f64 scalars).

struct Conv2DBwdFlat #

struct Conv2DBwdFlat {
pub:
	d_input  []f64
	d_weight []f64
}

Conv2DBwdFlat holds flat NCHW gradients for input and filter.

struct VulkanBackend #

struct VulkanBackend {
mut:
	dev &vulkan.Device
}

VulkanBackend implements ComputeBackend using the Vulkan compute API.

fn (VulkanBackend) name #

fn (b &VulkanBackend) name() string

name exposes this operation as part of the public API.

fn (VulkanBackend) supports #

fn (b &VulkanBackend) supports(op string) bool

supports exposes this operation as part of the public API.

fn (VulkanBackend) gemm #

fn (b &VulkanBackend) gemm(a_data []f64, b_data []f64, m int, n int, k int) ![]f64

gemm exposes this operation as part of the public API.

fn (VulkanBackend) gemv #

fn (b &VulkanBackend) gemv(a_data []f64, x_data []f64, m int, n int) ![]f64

gemv exposes this operation as part of the public API.

fn (VulkanBackend) relu #

fn (b &VulkanBackend) relu(x_data []f64) ![]f64

relu exposes this operation as part of the public API.

fn (VulkanBackend) sigmoid #

fn (b &VulkanBackend) sigmoid(x_data []f64) ![]f64

sigmoid exposes this operation as part of the public API.

fn (VulkanBackend) tanh #

fn (b &VulkanBackend) tanh(x_data []f64) ![]f64

tanh exposes this operation as part of the public API.

fn (VulkanBackend) add_vec #

fn (b &VulkanBackend) add_vec(a_data []f64, b_data []f64) ![]f64

add_vec exposes this operation as part of the public API.

fn (VulkanBackend) mul_vec #

fn (b &VulkanBackend) mul_vec(a_data []f64, b_data []f64) ![]f64

mul_vec exposes this operation as part of the public API.

fn (VulkanBackend) add_scalar #

fn (b &VulkanBackend) add_scalar(x_data []f64, s f64) ![]f64

add_scalar exposes this operation as part of the public API.

fn (VulkanBackend) mul_scalar #

fn (b &VulkanBackend) mul_scalar(x_data []f64, s f64) ![]f64

mul_scalar exposes this operation as part of the public API.

fn (VulkanBackend) softmax #

fn (b &VulkanBackend) softmax(x_data []f64) ![]f64

softmax exposes this operation as part of the public API.

fn (VulkanBackend) layernorm #

fn (b &VulkanBackend) layernorm(x_data []f64, gamma []f64, beta []f64) ![]f64

layernorm exposes this operation as part of the public API.

fn (VulkanBackend) conv2d #

fn (b &VulkanBackend) conv2d(input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int) ![]f64

conv2d exposes this operation as part of the public API.

fn (VulkanBackend) to_internal #

fn (b &VulkanBackend) to_internal(data []f64, rows int, cols int) ![]f64

to_internal converts row-major to Vulkan's column-major layout.

fn (VulkanBackend) from_internal #

fn (b &VulkanBackend) from_internal(data []f64, rows int, cols int) ![]f64

from_internal converts Vulkan's column-major layout back to row-major.