let prog =
"""//Kernel code:
extern "C" {
#pragma pack(1)
typedef struct {
int length;
float *pointer;
} global_array_float;
__global__ void kernel_main(global_array_float x){
printf("(on device) x.length=%d\n",x.length); // prints: (on device) x.length=10
printf("(on device) x.pointer=%lld\n",x.pointer); // prints: (on device) x.pointer=0
printf("sizeof(global_array_float)=%d", sizeof(global_array_float)); // 12 bytes just as expected
}
;}"""
printfn "%s" prog
let cuda_kernel = compile_kernel prog "kernel_main"
let test_launcher(str: CudaStream, kernel: CudaKernel, x: CudaGlobalArray<float32>, o: CudaGlobalArray<float32>) =
let block_size = 1
kernel.GridDimensions <- dim3(1)
kernel.BlockDimensions <- dim3(block_size)
printfn "(on host) x.length=%i" x.length // prints: (on host) x.length=10
printfn "(on host) x.pointer=%i" x.pointer // prints: (on host) x.pointer=21535919104
let args: obj [] = [|x.length;x.pointer|]
kernel.RunAsync(str.Stream, args)
let cols, rows = 10, 1
let a = d2M.create((rows,cols))
|> fun x -> fillRandomUniformMatrix ctx.Str x 1.0f 0.0f; x
let a' = d2MtoCudaArray a
//printfn "%A" (getd2M a)
let o = d2M.create((rows,cols)) // o does nothing here as this is a minimalist example.
let o' = d2MtoCudaArray o
test_launcher(ctx.Str,cuda_kernel,a',o')
cuda_context.Synchronize()
//printfn "%A" (getd2M o)
Here is an excerpt from the main repo that I am working on currently. I am very close to having a working F# quotations to Cuda C compiler, but I can't figure out how to pass the arguments into the function properly from the host side.
Despite the pack pragma, the NVRTC 7.5 Cuda compiler is doing some other optimization and I have no idea what it is.
Because I am working off F# quotations, I need to pass the arguments as a single struct for this to work. If I change the function from kernel_main(global_array_float x)
to something like kernel_main(int x_length, float *x_pointer)
then it works, but I that is not the form which the quotations system gives me upfront and I would like to avoid doing extra work to make F# more like C.
Any idea what I could try?