I am trying to interpolate a 3D array with cuda using texture memory with the code below. I have plotted the input f[x][y][z] to a fixed z value, then I interpolate my array for x and y and plot i again and they look totally different. I also tried this in 1 dimension (with a different code) and there it works so i assume that there must be an error in my code. Can you help me finding it?
#include <cuda_runtime.h>
#include <cuda.h>
#include <iostream>
#include <fstream>
typedef float myType;
texture<myType, 3> tex;
cudaArray *d_volumeArray = 0;
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) { getchar(); exit(code); }
}
}
__global__ void getInterpolatedFunctionValue(double x, double y, double z){
//http://stackoverflow.com/questions/10643790/texture-memory-tex2d-basics
printf("%f \n", tex3D(tex, x+0.5f, y+0.5f, z+0.5f));
}
using namespace std;
int main(){
int nx=100, ny=100, nz=10;
myType f[nx][ny][nz];
for(int i=0; i<nx; i++)
for(int j=0; j<ny; j++)
for(int k=0; k<nz; k++){
f[i][j][k] = sin(i/10.0)*cos(j/10.0)+k;
}
const cudaExtent extend = make_cudaExtent(nx, ny, nz);
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<myType>();
gpuErrchk(cudaMalloc3DArray(&d_volumeArray, &channelDesc, extend));
cudaMemcpy3DParms copyParams = {0};
copyParams.srcPtr = make_cudaPitchedPtr((void*)f, extend.width*sizeof(myType), extend.width, extend.height);
copyParams.dstArray = d_volumeArray;
copyParams.extent = extend;
copyParams.kind = cudaMemcpyHostToDevice;
gpuErrchk(cudaMemcpy3D(©Params));
tex.normalized = false;
tex.filterMode = cudaFilterModeLinear;
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
tex.addressMode[2] = cudaAddressModeClamp;
gpuErrchk(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
for(int i=0; i<nx*2; i++){
for(int j=0; j<ny*2; j++){
getInterpolatedFunctionValue <<<1, 1>>> (float(i)/2, float(j)/2, 3.0);
gpuErrchk(cudaPeekAtLastError());
gpuErrchk(cudaDeviceSynchronize());
}
}
gpuErrchk(cudaUnbindTexture(tex));
gpuErrchk(cudaFreeArray(d_volumeArray));
return 0;
}
Update: @Robert Crovella: In my opinion you can see my problem better if one does plot the output and compare the interpolation with the original. I will add them below. The integer division was not planed and i fixed it, but that was not the reason for my problem
@JackOLantern: i know this post and your code there was the template for my version. But it seems to me that it does not work as i would have expected.
Since i have not enough reputation to upload images here i will link the two images. Number 1 shows a plot of my input values for a fix z value and figure 2 the interpolation done by my code. The original data are in a range of [2,4] while the interpolated are in [-2,10] and the structure are totally different. I hope this helps understanding my problem better.
1.
2.