I know a similar question has been asked before, but I'm having trouble with this. Here is the code I have written:
void fft(const double *indata_real, const double *indata_imag, double *outdata_real, double *outdata_imag, int x, int y)
{
int size = sizeof(cufftDoubleComplex)*x*y;
// allocate data on host
cufftDoubleComplex* host_data = (cufftDoubleComplex*)malloc(size);
for (int i = 0; i < x*y; ++i) {
host_data[i].x = indata_real[i];
host_data[i].y = indata_imag[i];
}
// allocate data on device
cufftDoubleComplex* device_data;
cudaMalloc((void**)&device_data, size);
// copy data from host to device
cudaMemcpy(device_data, host_data, size, cudaMemcpyHostToDevice);
// create plan
cufftHandle plan;
cufftPlan2d(&plan, x, y, CUFFT_Z2Z);
// perform transform
cufftExecZ2Z(plan, (cufftDoubleComplex *)device_data, (cufftDoubleComplex *)device_data, CUFFT_FORWARD);
// copy data back from device to host
cudaMemcpy(host_data, device_data, size, cudaMemcpyDeviceToHost);
// copy transform to outdata
for (int i = 0; i < x*y; ++i) {
outdata_real[i] = host_data[i].x;
outdata_imag[i] = host_data[i].y;
}
// clean up
cufftDestroy(plan);
free(host_data);
cudaFree(device_data);
}
The above works fine for single precision, i.e. when I replace all 'cufftDoubleComplex' with 'cufftComplex', replace 'CUFFT_Z2Z' with 'CUFFT_C2C', and replace 'cufftExecZ2Z' with cufftExecC2C
Based on what I found on that other page, I thought this would run fine with double precision. But at the moment the outdata arrays are the same as the indata arrays - it's not doing anything.
So if anyone can spot what I've done wrong that would be great!
S