There is no way to do this with the CUDA APIs. If you did want to do this, you would need to make your own instrumentation system which wraps the CUDA memory allocation/deallocation APIs which your code calls. The simplest implementation could look something like this:
#include <iostream>
#include <vector>
#include <algorithm>
typedef std::pair<void*, size_t> mrecord;
struct mymemory
{
std::vector<mrecord> mstack;
mymemory() {};
cudaError_t cudaMalloc(void** p, size_t sz);
cudaError_t cudaFree(void* p);
void print_stack();
};
cudaError_t mymemory::cudaMalloc(void** p, size_t sz)
{
cudaError_t ret = ::cudaMalloc(p, sz);
if (ret == cudaSuccess) {
mstack.push_back(mrecord(*p,sz));
}
return ret;
};
cudaError_t mymemory::cudaFree(void* p)
{
cudaError_t ret = ::cudaFree(p);
if (ret == cudaSuccess) {
auto rit = std::find_if( mstack.begin(), mstack.end(),
[&](const mrecord& r){ return r.first == p; } );
if (rit != mstack.end()) {
mstack.erase(rit);
}
}
return ret;
};
void mymemory::print_stack()
{
auto it = mstack.begin();
for(; it != mstack.end(); ++it) {
mrecord rec = *it;
std::cout << rec.first << " : " << rec.second << std::endl;
}
}
int main(void)
{
const int nallocs = 10;
void* pointers[nallocs];
mymemory mdebug;
for(int i=0; i<nallocs; ++i) {
mdebug.cudaMalloc(&pointers[i], 4<<i);
}
std::cout << "After Allocation" << std::endl;
mdebug.print_stack();
mdebug.cudaFree(pointers[1]);
mdebug.cudaFree(pointers[7]);
mdebug.cudaFree(pointers[8]);
mdebug.cudaFree(0);
std::cout << "After Deallocation" << std::endl;
mdebug.print_stack();
return 0;
}
[Warning: only very lightly tested and required C++11 compiler support]
which would do this:
~/SO$ nvcc -std=c++11 -g -arch=sm_52 instrumentation.cu
~/SO$ ./a.out
After Allocation
0x705e40000 : 4
0x705e40200 : 8
0x705e40400 : 16
0x705e40600 : 32
0x705e40800 : 64
0x705e40a00 : 128
0x705e40c00 : 256
0x705e40e00 : 512
0x705e41000 : 1024
0x705f40000 : 2048
After Deallocation
0x705e40000 : 4
0x705e40400 : 16
0x705e40600 : 32
0x705e40800 : 64
0x705e40a00 : 128
0x705e40c00 : 256
0x705f40000 : 2048
This might be enough to understand which memory allocations are leaking. But be aware that memory management on the GPU isn't as predictable as you might believe it to be, and you need to be careful when diagnosing a memory leak just on the basis of the amount of free memory which the device reports at any given instant. See this question for some more details.