0

I have a cuda kernel that I would like to udpate (ie., change anything between the checks and surf2Dwrite) without having to stop the process; is there a way to achieve some sort of JIT compilation or a microservice architecture equivalent ? If so, how to achieve this ?

Ideally I would like to edit a text with the equations and have it compiled and uploaded as a new kernel, while not exiting the process. More explicitly, I would like to update the UpdateSurface pixel update functions on the fly:

int iDivUp(int a, int b) { return a % b != 0 ? a / b + 1 : a / b; }

__global__ void UpdateSurface(cudaSurfaceObject_t surf, unsigned int width, unsigned int height, float time)
{
    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
    if (y >= height | x >= width) return;

    auto xVar = (float)x / (float)width;
    auto yVar = (float)y / (float)height;
    auto cost = __cosf(time) * 0.5f + 0.5f;
    auto costx = __cosf(time) * 0.5f + xVar;
    auto costy = __cosf(time) * 0.5f + yVar;
    auto costxx = (__cosf(time) * 0.5f + 0.5f) * width;
    auto costyy = (__cosf(time) * 0.5f + 0.5f) * height;
    auto costxMany = __cosf(y * time) * 0.5f + yVar;
    auto costyMany = __cosf((float)x/100 * time) * 0.5f + xVar;
    auto margin = 1;
    
    float4 pixel{};
    if (y == 0)
        pixel = make_float4(costyMany * 0.3, costyMany * 1, costyMany * 0.4, 1);
    else if (y == height - 1)
        pixel = make_float4(costyMany * 0.6, costyMany * 0.7, costyMany * 1, 1);
    else if (x % 2 == 0)
    {
        if (x > width / 2)
            pixel = make_float4(0.1, 0.5, costx * 1, 1);
        else
            pixel = make_float4(costx * 1, 0.1, 0.2, 1);
    }
    else if (x > width - margin - 1 | x <= margin)
        pixel = make_float4(costxMany, costxMany * 0.9, costxMany * 0.6, 1);
    else
        pixel = make_float4(costx * 0.3, costx * 0.4, costx * 0.6, 1);
    surf2Dwrite(pixel, surf, x * 16, y);
}

void RunKernel(size_t textureW, size_t textureH, cudaSurfaceObject_t surfaceObject, cudaStream_t streamToRun, float animTime)
{
    auto unit = 10;
    dim3 threads(unit, unit);
    dim3 grid(iDivUp(textureW, unit), iDivUp(textureH, unit));
    UpdateSurface <<<grid, threads, 0, streamToRun >>> (surfaceObject, textureW, textureH, animTime);
    getLastCudaError("UpdateSurface execution failed.\n");
}
Soleil
  • 6,404
  • 5
  • 41
  • 61
  • 2
    Does this answer your question? [Is just-in-time (jit) compilation of a CUDA kernel possible?](https://stackoverflow.com/questions/13567123/is-just-in-time-jit-compilation-of-a-cuda-kernel-possible) – paleonix Jan 02 '23 at 12:44
  • 2
    See [NVRTC](https://docs.nvidia.com/cuda/nvrtc/index.html). – paleonix Jan 02 '23 at 12:50
  • @paleonix An answer with code would be a good answer. – Soleil Jan 02 '23 at 14:46
  • 1
    Nvidia supply a 67 page manual for nvrtc, and there are at least 6 different official CUDA samples available which demonstrate how to use nvrtc. A good answer is at the flagged duplicate and in information you already have if you have installed a CUDA toolkit – talonmies Jan 02 '23 at 15:11

0 Answers0