I learned that std::vector is a nice wrapper around raw arrays in C++ so I started to use it for managing host data in my CUDA app [1]. Since having to allocate and copying things by hand makes the code more complex and less readable I thought about extending std::vector. Since I'm not very experienced I would like to know what you think about it. Especially weather it's correctly done (eg destructor of std::vector is called implicitly, right?) and if you consider it a good idea.
I wrote a small example illustrating this
#include <vector>
#include <cuda.h>
#include <cstdio>
void checkCUDAError(const char *msg)
{
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err) {
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
}
// Wrapper around CUDA memory
template<class T>
class UniversalVector: public std::vector<T>
{
T* devicePtr_;
bool allocated;
public:
// Constructor
UniversalVector(unsigned int length)
:std::vector<T>(length),
allocated(false)
{}
// Destructor
~UniversalVector()
{
if(allocated)
cudaFree(devicePtr_);
}
cudaError_t allocateDevice()
{
if(allocated) free(devicePtr_);
cudaError_t err =
cudaMalloc((void**)&devicePtr_, sizeof(T) * this->size());
allocated = true;
return err;
}
cudaError_t loadToDevice()
{
return cudaMemcpy(devicePtr_, &(*this)[0], sizeof(T) * this->size(),
cudaMemcpyHostToDevice);
}
cudaError_t loadFromDevice()
{
return cudaMemcpy(&(*this)[0], devicePtr_, sizeof(T) * this->size(),
cudaMemcpyDeviceToHost);
}
// Accessors
inline T* devicePtr() {
return devicePtr_;
}
};
__global__ void kernel(int* a)
{
int i = threadIdx.x;
printf("%i\n", a[i]);
}
int main()
{
UniversalVector<int> vec(3);
vec.at(0) = 1;
vec.at(1) = 2;
vec.at(2) = 3;
vec.allocateDevice();
vec.loadToDevice();
kernel<<<1, 3>>>(vec.devicePtr());
checkCUDAError("Error when doing something");
return 0;
}
[1] In CUDA it's distinguished between host and device memory where host memory is the memory accessible by the GPU and device memory the memory on the GPU The programmer has to move memory from the host to the GPU and back.