In my cuda program of runtime ,the cpu and gpu can compute Asynchronously,but not cooperatively, Why?
I measuring the time of the program ,the total time is the sum time of cpu compute time and gpu compute time .Through the visual profile, I find the gpu don't compute until the cpu complete. My purpose is that the cpu compute as the same time the gpu compute.
Platform:
window 10
cuda 7.5
vs2013
Code compiled in debug mode(no optimizing)
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include<time.h>
__global__ void addKernel()
{
int a ;
for (int i = 0; i < 10000;i++)
for (int j = 0; j < 10000;j++)
a = i;
}
void comput()
{
int a = 1;
for (int i = 0; i < 10000;i++)
for (int j = 0; j < 10000; j++)
{
for (int k = 0; k < 100;k++)
a = j;
}
}
int main()
{
cudaSetDevice(0);
cudaEvent_t start, stop1;
cudaEventCreate(&start);
cudaEventCreate(&stop1);
clock_t ss = clock();
cudaEventRecord(start,0);
addKernel<<<1,64>>>();
cudaEventRecord(stop1,0);
clock_t ct = clock();
comput();
clock_t ctt = clock();
cudaEventSynchronize(stop1);
cudaDeviceSynchronize();
clock_t sss = clock();
float t1;
cudaEventElapsedTime(&t1, start, stop1);
printf("clock GPU :%.4f s\n", t1/1000);
printf("clock cpu:%f s\n",(float) (ctt - ct)/CLOCKS_PER_SEC);
printf("clock total time: %f s\n", (float)(sss - ss) / CLOCKS_PER_SEC);
cudaEventDestroy(start);
cudaEventDestroy(stop1);
cudaDeviceReset();
}