In ordinary C++, if I say the following, it is safe because the third clause will be skipped execution. I'm just wondering if cuda kernel code will also have this property or it doesn't in consideration of maximizing parallelism?
int x[100] = {...}, i = -1;
if (i < 0 || i >= 100 || x[i] == 0) {
// do something.
}
EDIT:
Taking from Jack's program, the following program runs OK and output "10". There is no error doing cuda-memcheck.
#include <stdio.h>
__global__ void test(float *input, float *output, int i, int N) {
float c = 10;
// NOTE: uncomment this will cause cuda-memcheck to give an error.
// c = input[-1];
if (i < 0 || i >= N || (c = input[-1])) {
output[0] = c;
}
}
int main(void) {
int i = -1;
int N = 10;
float* input;
float* output;
float* dev_input;
float* dev_output;
input = (float*)malloc(sizeof(float) * N);
output = (float*)malloc(sizeof(float));
for (int j = 0; j < N; j++) {
input[j] = 2.0f;
}
output[0] = 3.0f;
cudaMalloc((void**)&dev_input,sizeof(float) * N);
cudaMalloc((void**)&dev_output,sizeof(float));
cudaMemcpy(dev_input,input,sizeof(float) * N,cudaMemcpyHostToDevice);
cudaMemcpy(dev_output,output,sizeof(float),cudaMemcpyHostToDevice);
test<<<1,1>>>(dev_input,dev_output,i,N);
cudaMemcpy(output,dev_output,sizeof(float),cudaMemcpyDeviceToHost);
printf("%f\n", output[0]);
return 0;
}