In the following code I am simply calling a function foo twice serially from main. The function simply does device memory allocation , and then increments this pointer. Then it exits and goes back to main.
First time foo is called memory is correctly allocated. But now as you can see in output when I call foo again, cuda memory allocation is failing with an error invalid device pointer
I tried using cudaThreadSynchronize() between two foo calls, but no gain. Why memory allocation failing ?
Actually the error is casued due to
matrixd += 3;
Because if I don't do this increment the error disappeared.
But why , even though I am using cudaFree() ?
Kindly help me understand this.
My Output is here
Calling foo for the first time
Allocation of matrixd passed:
I came back to main safely :-)
I am going back to foo again :-)
Allocation of matrixd failed, the reason is: invalid device pointer
My main() is here
#include<stdio.h>
#include <cstdlib> // malloc(), free()
#include <iostream> // cout, stream
#include <math.h>
#include <ctime> // time(), clock()
#include <bitset>
bool foo( );
/***************************************
Main method.
****************************************/
int main()
{
// Perform one warm-up pass and validate
std::cout << "Calling foo for the first time"<<std::endl;
foo();
std::cout << "I came back to main safely :-) "<<std::endl;
std::cout << "I am going back to foo again :-) "<<std::endl;
foo( );
getchar();
return 0;
}
Definition of foo() is in this file :
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <device_launch_parameters.h>
#include <iostream>
bool foo( )
{
// Error return value
cudaError_t status;
// Number of bytes in the matrix.
int bytes = 9 *sizeof(float);
// Pointers to the device arrays
float *matrixd=NULL;
// Allocate memory on the device to store matrix
cudaMalloc((void**) &matrixd, bytes);
status = cudaGetLastError(); //To check the error
if (status != cudaSuccess) {
std::cout << "Allocation of matrixd failed, the reason is: " << cudaGetErrorString(status) <<
std::endl;
cudaFree(matrixd); //Free call for memory
return false;
}
std::cout << "Allocation of matrixd passed: "<<std::endl;
////// Increment address
for (int i=0; i<3; i++){
matrixd += 3;
}
// Free device memory
cudaFree(matrixd);
return true;
}
Update
With better error checking. Also I am incrementalism the device pointer only once. This time I get following output:
Calling foo for the first time
Allocation of matrixd passed:
Increamented the pointer and going to free cuda memory:
GPUassert: invalid device pointer C:/Users/user/Desktop/Gauss/Gauss/GaussianElem
inationGPU.cu 44
Line number 44 is cudaFree(). Why it still failing?
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
// GPU function for direct method Gross Jorden method.
bool foo( )
{
// Error return value
cudaError_t status;
// Number of bytes in the matrix.
int bytes = 9 *sizeof(float);
// Pointers to the device arrays
float *matrixd=NULL;
// Allocate memory on the device to store each matrix
gpuErrchk( cudaMalloc((void**) &matrixd, bytes));
//cudaMemset(outputMatrixd, 0, bytes);
std::cout << "Allocation of matrixd passed: "<<std::endl;
////// Incerament address
matrixd += 1;
std::cout << "Increamented the pointer and going to free cuda memory: "<<std::endl;
// Free device memory
gpuErrchk( cudaFree(matrixd));
return true;
}