compute
fn adam_step_vulkan_f32 #
fn adam_step_vulkan_f32(dev &vulkan.Device, grad []f32, mut theta []f32, mut m []f32, mut v []f32,
p AdamStepParams) !
adam_step_vulkan_f32 performs one fused Adam update on host f32 arrays via GPU.
fn add_scalar_vulkan #
fn add_scalar_vulkan(dev &vulkan.Device, x_data []f64, s f64) ![]f64
add_scalar_vulkan adds a scalar to each element via scale pipeline. Since scale is dst = alpha * src (not dst = src + scalar), we emulate by dst = src * 1.0 + scalar via two passes: first copy, then add scalar constant.
fn add_vec_vulkan #
fn add_vec_vulkan(dev &vulkan.Device, a_data []f64, b_data []f64) ![]f64
add_vec_vulkan computes element-wise addition via vector_add pipeline.
fn conv2d_backward_cpu_nchw #
fn conv2d_backward_cpu_nchw(grad_out []f64, input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int, pad_h int, pad_w int) !Conv2DBwdFlat
conv2d_backward_cpu_nchw computes Conv2D gradients on CPU (groups=1, dilation=1).
fn conv2d_backward_vulkan #
fn conv2d_backward_vulkan(dev &vulkan.Device, grad_out []f64, input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int, pad_h int, pad_w int) !Conv2DBwdFlat
conv2d_backward_vulkan: GPU GEMM for d_weight; d_input on CPU (col2im via reference backward).
fn conv2d_cpu_nchw #
fn conv2d_cpu_nchw(input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int) ![]f64
conv2d_cpu_nchw is the CPU reference (no padding).
fn conv2d_vulkan #
fn conv2d_vulkan(dev &vulkan.Device, input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int, pad_h int, pad_w int) ![]f64
conv2d_vulkan: NCHW forward via im2col + GEMM (stride >= 1, dilation=1). input: [batch, in_ch, in_h, in_w], kernel: [out_ch, in_ch, k_h, k_w] row-major flat.
fn gemm_vulkan #
fn gemm_vulkan(dev &vulkan.Device, a_data []f64, b_data []f64, m int, n int, k int) ![]f64
gemm_vulkan computes C = A * B with row-major inputs/outputs in f64 API. Internally uses f32 Vulkan kernels and converts back to f64.
fn gemm_vulkan_f32 #
fn gemm_vulkan_f32(dev &vulkan.Device, a_data []f32, b_data []f32, m int, n int, k int) ![]f32
gemm_vulkan_f32 computes C = A * B with row-major inputs/outputs. A is [m x k], B is [k x n], result is [m x n].
fn gemv_vulkan #
fn gemv_vulkan(dev &vulkan.Device, a_data []f64, x_data []f64, m int, n int) ![]f64
gemv_vulkan exposes this operation as part of the public API.
fn gemv_vulkan_f32 #
fn gemv_vulkan_f32(dev &vulkan.Device, a_data []f32, x_data []f32, m int, n int) ![]f32
gemv_vulkan_f32 exposes this operation as part of the public API.
fn im2col_cpu_nchw #
fn im2col_cpu_nchw(input []f64, batch int, in_ch int, in_h int, in_w int, k_h int, k_w int, oh int, ow int, pad_h int, pad_w int, stride_h int, stride_w int) []f32
im2col_cpu_nchw returns [out_total x k_total] row-major f32 (spatial major, t = ickhkw+kh*kw+kw).
fn layernorm_vulkan #
fn layernorm_vulkan(dev &vulkan.Device, x_data []f64, gamma []f64, beta []f64) ![]f64
layernorm_vulkan applies row-wise layer normalization via the layernorm pipeline. Gamma/beta are applied on CPU since the Vulkan layernorm op does not include affine transform.
fn mul_scalar_vulkan #
fn mul_scalar_vulkan(dev &vulkan.Device, x_data []f64, s f64) ![]f64
mul_scalar_vulkan multiplies each element by scalar via scale pipeline.
fn mul_vec_vulkan #
fn mul_vec_vulkan(dev &vulkan.Device, a_data []f64, b_data []f64) ![]f64
mul_vec_vulkan computes element-wise a * b via vector_mul pipeline.
fn new_vulkan_backend #
fn new_vulkan_backend(dev &vulkan.Device) VulkanBackend
new_vulkan_backend exposes this operation as part of the public API.
fn relu_vulkan #
fn relu_vulkan(dev &vulkan.Device, x_data []f64) ![]f64
relu_vulkan exposes this operation as part of the public API.
fn relu_vulkan_f32 #
fn relu_vulkan_f32(dev &vulkan.Device, x_data []f32) ![]f32
relu_vulkan_f32 exposes this operation as part of the public API.
fn sigmoid_vulkan #
fn sigmoid_vulkan(dev &vulkan.Device, x_data []f64) ![]f64
sigmoid_vulkan exposes this operation as part of the public API.
fn sigmoid_vulkan_f32 #
fn sigmoid_vulkan_f32(dev &vulkan.Device, x_data []f32) ![]f32
sigmoid_vulkan_f32 exposes this operation as part of the public API.
fn softmax_vulkan #
fn softmax_vulkan(dev &vulkan.Device, x_data []f64) ![]f64
softmax_vulkan applies row-wise softmax via the softmax pipeline.
fn tanh_vulkan #
fn tanh_vulkan(dev &vulkan.Device, x_data []f64) ![]f64
tanh_vulkan applies tanh using GELU shader (contains tanh). WARNING: applies GELU activation, not pure tanh. For accurate tanh, use CPU fallback.
struct AdamStepParams #
struct AdamStepParams {
pub:
beta1 f64
beta2 f64
lr_t f64
epsilon f64
}
AdamStepParams mirrors VTL optimizers.AdamStepParams (f64 scalars).
struct Conv2DBwdFlat #
struct Conv2DBwdFlat {
pub:
d_input []f64
d_weight []f64
}
Conv2DBwdFlat holds flat NCHW gradients for input and filter.
struct VulkanBackend #
struct VulkanBackend {
mut:
dev &vulkan.Device
}
VulkanBackend implements ComputeBackend using the Vulkan compute API.
fn (VulkanBackend) name #
fn (b &VulkanBackend) name() string
name exposes this operation as part of the public API.
fn (VulkanBackend) supports #
fn (b &VulkanBackend) supports(op string) bool
supports exposes this operation as part of the public API.
fn (VulkanBackend) gemm #
fn (b &VulkanBackend) gemm(a_data []f64, b_data []f64, m int, n int, k int) ![]f64
gemm exposes this operation as part of the public API.
fn (VulkanBackend) gemv #
fn (b &VulkanBackend) gemv(a_data []f64, x_data []f64, m int, n int) ![]f64
gemv exposes this operation as part of the public API.
fn (VulkanBackend) relu #
fn (b &VulkanBackend) relu(x_data []f64) ![]f64
relu exposes this operation as part of the public API.
fn (VulkanBackend) sigmoid #
fn (b &VulkanBackend) sigmoid(x_data []f64) ![]f64
sigmoid exposes this operation as part of the public API.
fn (VulkanBackend) tanh #
fn (b &VulkanBackend) tanh(x_data []f64) ![]f64
tanh exposes this operation as part of the public API.
fn (VulkanBackend) add_vec #
fn (b &VulkanBackend) add_vec(a_data []f64, b_data []f64) ![]f64
add_vec exposes this operation as part of the public API.
fn (VulkanBackend) mul_vec #
fn (b &VulkanBackend) mul_vec(a_data []f64, b_data []f64) ![]f64
mul_vec exposes this operation as part of the public API.
fn (VulkanBackend) add_scalar #
fn (b &VulkanBackend) add_scalar(x_data []f64, s f64) ![]f64
add_scalar exposes this operation as part of the public API.
fn (VulkanBackend) mul_scalar #
fn (b &VulkanBackend) mul_scalar(x_data []f64, s f64) ![]f64
mul_scalar exposes this operation as part of the public API.
fn (VulkanBackend) softmax #
fn (b &VulkanBackend) softmax(x_data []f64) ![]f64
softmax exposes this operation as part of the public API.
fn (VulkanBackend) layernorm #
fn (b &VulkanBackend) layernorm(x_data []f64, gamma []f64, beta []f64) ![]f64
layernorm exposes this operation as part of the public API.
fn (VulkanBackend) conv2d #
fn (b &VulkanBackend) conv2d(input []f64, kernel []f64, batch int, in_h int, in_w int, in_ch int, out_ch int, k_h int, k_w int, stride_h int, stride_w int) ![]f64
conv2d exposes this operation as part of the public API.
fn (VulkanBackend) to_internal #
fn (b &VulkanBackend) to_internal(data []f64, rows int, cols int) ![]f64
to_internal converts row-major to Vulkan's column-major layout.
fn (VulkanBackend) from_internal #
fn (b &VulkanBackend) from_internal(data []f64, rows int, cols int) ![]f64
from_internal converts Vulkan's column-major layout back to row-major.
- fn adam_step_vulkan_f32
- fn add_scalar_vulkan
- fn add_vec_vulkan
- fn conv2d_backward_cpu_nchw
- fn conv2d_backward_vulkan
- fn conv2d_cpu_nchw
- fn conv2d_vulkan
- fn gemm_vulkan
- fn gemm_vulkan_f32
- fn gemv_vulkan
- fn gemv_vulkan_f32
- fn im2col_cpu_nchw
- fn layernorm_vulkan
- fn mul_scalar_vulkan
- fn mul_vec_vulkan
- fn new_vulkan_backend
- fn relu_vulkan
- fn relu_vulkan_f32
- fn sigmoid_vulkan
- fn sigmoid_vulkan_f32
- fn softmax_vulkan
- fn tanh_vulkan
- struct AdamStepParams
- struct Conv2DBwdFlat
- struct VulkanBackend