Properties : Win 10, VS 2013, CUDA 7.5, GeForce 920M.
There isn't any error or warning for both case. Output is SAME for both case. Only difference is:
In second case Stream2 and Stream3 does not exist.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "../../common/common.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
__global__ void kernel1(char *value){
for (int i = 0; i < 100; i++){
printf("%s\n", value);
}
}
__global__ void kernel2(){
cudaStream_t s1, s2;
cudaStreamCreateWithFlags(&s1, cudaStreamNonBlocking);
cudaStreamCreateWithFlags(&s2, cudaStreamNonBlocking);
kernel1 << < 1, 1, 0, s1 >> >("up stream");
kernel1 << < 1, 1, 0, s2 >> >("bottom stream");
}
int main(int argc, char **argv){
printf("%s Starting...\n", argv[0]); // set up device
int dev = 0;
cudaDeviceProp deviceProp;
CHECK(cudaGetDeviceProperties(&deviceProp, dev));
printf("Using Device %d: %s\n", dev, deviceProp.name);
CHECK(cudaSetDevice(dev));
// FIRST CASE
//cudaStream_t s1, s2;
//cudaStreamCreateWithFlags(&s1, cudaStreamNonBlocking);
//cudaStreamCreateWithFlags(&s2, cudaStreamNonBlocking);
//kernel1 << <1, 1, 0, s1 >> >();
//kernel1 << <1, 1, 0, s2 >> >();
//SECOND CASE
kernel2 << < 1, 1>> >();
CHECK(cudaDeviceSynchronize());
CHECK(cudaGetLastError()); // check kernel error
CHECK(cudaDeviceReset()); // reset device
printf("\nEnd\n");
getchar();
return (0);
}
i also add common.h
#include <time.h>
#include <stdio.h>
#ifndef _COMMON_H
#define _COMMON_H
#define CHECK(call) \
{ \
const cudaError_t error = call; \
if (error != cudaSuccess) \
{ \
fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__); \
fprintf(stderr, "code: %d, reason: %s\n", error, \
cudaGetErrorString(error)); \
getchar();exit(1); \
} \
}
#define CHECK_CUBLAS(call) \
{ \
cublasStatus_t err; \
if ((err = (call)) != CUBLAS_STATUS_SUCCESS) \
{ \
fprintf(stderr, "Got CUBLAS error %d at %s:%d\n", err, __FILE__, \
__LINE__); \
getchar();exit(1); \
} \
}
#define CHECK_CURAND(call) \
{ \
curandStatus_t err; \
if ((err = (call)) != CURAND_STATUS_SUCCESS) \
{ \
fprintf(stderr, "Got CURAND error %d at %s:%d\n", err, __FILE__, \
__LINE__); \
getchar();exit(1); \
} \
}
#define CHECK_CUFFT(call) \
{ \
cufftResult err; \
if ( (err = (call)) != CUFFT_SUCCESS) \
{ \
fprintf(stderr, "Got CUFFT error %d at %s:%d\n", err, __FILE__, \
__LINE__); \
getchar();exit(1); \
} \
}
#define CHECK_CUSPARSE(call) \
{ \
cusparseStatus_t err; \
if ((err = (call)) != CUSPARSE_STATUS_SUCCESS) \
{ \
fprintf(stderr, "Got error %d at %s:%d\n", err, __FILE__, __LINE__); \
cudaError_t cuda_err = cudaGetLastError(); \
if (cuda_err != cudaSuccess) \
{ \
fprintf(stderr, " CUDA error \"%s\" also detected\n", \
cudaGetErrorString(cuda_err)); \
} \
getchar();exit(1); \
} \
}
clock_t seconds()
{
return clock();
}
#endif // _COMMON_H