#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
__global__ void funct(void){
printf("Hello from GPU!\n");
}
int main(void){
funct << < 2, 4 >> >();
for (int i = 0; i < 10; i++){
cudaDeviceReset();
//cudaDeviceSynchronize();
printf("Hello, World from CPU!\n");
}
return 0;
}
I thought the role of cudaDeviceReset is cudamemcpy. this case we didn't have the result of number. So we were not able to use cudamemcpy. But We used cudaDeviceReset for returning four "Hello from GPU!" result from kernel.
Is that right?
and I replaced cudaDeviceRest() with cudaDeviceSynchronize(). I saw the same result. but I couldn't know the difference between them.