I want to calculate some stuff on the GPU in parallel and display the results in between every kernel call. The code would look something like this:
void execute(){
runthread = true;
float erg[128 * 2] = {};
float *d_a, *d_b, *d_c, *d_erg;
size_t sizeErg = sizeof(float) * 2 * N;
size_t sizeAB = sizeof(float)*N;
float c[2] = { 1, 2 };
gpuErrchk(cudaMalloc((void**)&d_a, sizeAB));
gpuErrchk(cudaMalloc((void**)&d_b, sizeAB));
gpuErrchk(cudaMalloc((void**)&d_c, sizeof(float) * 2));
gpuErrchk(cudaMalloc((void**)&d_erg, sizeErg));
gpuErrchk(cudaMemcpy(d_a, anode, sizeAB, cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_b, kathode, sizeAB, cudaMemcpyHostToDevice));
gpuErrchk(cudaMemcpy(d_c, c, 2 * sizeof(float), cudaMemcpyHostToDevice));
float time = 0;
int i = 0;
while (runthread){
kernel<<<(N * 2) / 64, 64 >>>(d_a, d_b, d_c, d_erg, N);
cudaDeviceSynchronize();
gpuErrchk(cudaMemcpy(erg, d_erg, sizeErg, cudaMemcpyDeviceToHost));
float acc = 0;
for (int j = 0; j < N * 2; j++){
acc += erg[j];
}
std::cout << "Erg" << i << "=" << acc << std::endl;
std::cout << "Kernel Execution took" << time << "ms" << std::endl;
i++;
}
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
cudaFree(d_erg);
}
This function is a part of a class with the bool variable runthread. My Idea was to call another memberfunction that will start a new CPU thread with the execution function and wait in the main function till the user inputs something to call another memberfunction that sets runthreads =false. So the Thread would finish after the next Kernel is finished. I always get error messages from Visual Studio. Now I was wondering if this was even possible, or is the CPU busy with controlling the GPU execution? Has someone expirience in multithreading regarding parallel execution on the GPU and CPU? Or should I just look for userinput in the while loop?