As other answers point out, the use of malloc
(or even new
) should be avoided in c++. Anyway, as you requested:
I require a logic for using malloc
in this situation instead of new
/ delete
because I have to do this in cuda...
In this case you have to allocate memory for the Hessian
instances first, then iterate throug them and allocate memory for each Dxx
, Dxy
and Dyy
. I would create a function for this like follows:
Hessian* create(size_t length) {
Hessian* obj = (Hessian*)malloc(length * sizeof(Hessian));
for(size_t i = 0; i < length; ++i) {
obj[i].Dxx = (float*)malloc(sizeof(float));
obj[i].Dxy = (float*)malloc(sizeof(float));
obj[i].Dyy = (float*)malloc(sizeof(float));
}
return obj;
}
To deallocate the memory you allocated with create
function above, you have to iterate through Hessian
instances and deallocate each Dxx
, Dxy
and Dyy
first, then deallocate the block which stores the Hessian
instances:
void destroy(Hessian* obj, size_t length) {
for(size_t i = 0; i < length; ++i) {
free(obj[i].Dxx);
free(obj[i].Dxy);
free(obj[i].Dyy);
}
free(obj);
}
Note: using the presented method will pass the responsibility of preventing memory leaks to you.
If you wish to use the std::vector
instead of manual allocation and deallocation (which is highly recommended), you can write a custom allocator for it to use cudaMalloc
and cudaFree
like follows:
template<typename T> struct cuda_allocator {
using value_type = T;
cuda_allocator() = default;
template<typename U> cuda_allocator(const cuda_allocator<U>&) {
}
T* allocate(std::size_t count) {
if(count <= max_size()) {
void* raw_ptr = nullptr;
if(cudaMalloc(&raw_ptr, count * sizeof(T)) == cudaSuccess)
return static_cast<T*>(raw_ptr);
}
throw std::bad_alloc();
}
void deallocate(T* raw_ptr, std::size_t) {
cudaFree(raw_ptr);
}
static std::size_t max_size() {
return std::numeric_limits<std::size_t>::max() / sizeof(T);
}
};
template<typename T, typename U>
inline bool operator==(const cuda_allocator<T>&, const cuda_allocator<U>&) {
return true;
}
template<typename T, typename U>
inline bool operator!=(const cuda_allocator<T>& a, const cuda_allocator<U>& b) {
return !(a == b);
}
The usage of an custom allocator is very simple, you just have to specify it as second template parameter of std::vector
:
struct Hessian {
std::vector<float, cuda_allocator<float>> Dxx;
std::vector<float, cuda_allocator<float>> Dxy;
std::vector<float, cuda_allocator<float>> Dyy;
};
/* ... */
std::vector<Hessian, cuda_allocator<Hessian>> hessian;