1

Consider the following code:

#include <H5Cpp.h>
#include <vector>
#include <eigen3/Eigen/Dense>
#include <iostream>

double* matrix_to_array(Eigen::MatrixXd const &input){
    int const NX = input.rows();
    int const NY = input.cols();
    double *data = new double[NX*NY];
    for(std::size_t i=0; i<NX; i++){
        for(std::size_t j=0; j<NY; j++){
            data[j+i*NX] = input(i,j);
        }
    }
    return data;
}

int main() {

    Eigen::MatrixXd data = Eigen::MatrixXd::Random(124, 4654);
    data.fill(3);
    
    H5::H5File file("data.hdf5", H5F_ACC_TRUNC);
    hsize_t dimsf[2] = {data.rows(), data.cols()};
    H5::DataSpace dataspace(2, dimsf);
    H5::DataSet dataset = file.createDataSet("test_data_set", 
                                            H5::PredType::NATIVE_DOUBLE,
                                            dataspace);

    auto data_arr = matrix_to_array(data);

    dataset.write(data_arr, H5::PredType::NATIVE_DOUBLE);

    delete[] data_arr;

}

It compiles just fine using the following CMakeLists.txt

cmake_minimum_required(VERSION 2.8)

project(test)

find_package(HDF5 REQUIRED COMPONENTS C CXX)
include_directories(${HDF5_INCLUDE_DIRS})

add_executable(hdf5 hdf5.cpp)
target_link_libraries(hdf5 ${HDF5_HL_LIBRARIES} ${HDF5_CXX_LIBRARIES} ${HDF5_LIBRARIES})

After executing I thought everything was fine, but upon running the following python code (which bscly. just prints the data row by row)

import h5py
import numpy as np

hf = h5py.File("build/data.hdf5", "r")

keys = list(hf.keys()) 
data_set = hf.get(keys[0]) 
data_set_np = np.array(data_set)

for row in data_set_np:
    print(row)

I realized that the first 18000 or so entries of the matrix were properly written to the hdf5-file, while the rest was set to zero for some reason. I checked data and data_arr in the above C++ code, and all the entries of both matrices are set to 0, so the error must happen somewhere in the writing process to the hdf5-file... The issue is, I don't see where. What exactly am I missing?

Sito
  • 494
  • 10
  • 29
  • I am not that familiar with HDF5, but I noticed that your data buffer is 2d matrix represented in 1 dimension. The dataspace you pass to the dataset is also 2 dimensions. Does HDF5 assume that it's always going to receive a "flattened" matrix, or do you need to change your dataspace? – AndyG Mar 26 '21 at 20:44
  • @AndyG That's a good point and I'm honestly not sure, since this is pretty much my first time working with HDF5.. I based the above code on [this answer](https://stackoverflow.com/questions/7412042/hdf5-c-interface-writing-dynamic-2d-arrays). – Sito Mar 26 '21 at 20:53
  • I am not an expert in HDF5 library, but I used it recently and I am pretty sure it indeed expects a flatten matrix. – prapin Mar 26 '21 at 21:06
  • @prpin But how are you supposed to restore the original shape of your data then? – Sito Mar 26 '21 at 21:10

1 Answers1

3

After some trying out and consulting the examples of the H5 group, I got it to work.

#include <iostream>
#include <string>
#include "H5Cpp.h"
#include <eigen3/Eigen/Dense>
using namespace H5;
int main (void){

   const H5std_string  FILE_NAME( "data.h5" );
   const H5std_string  DATASET_NAME( "DOUBLEArray" );
   const int   NX = 123;                    // dataset dimensions
   const int   NY = 4563;
   const int   RANK = 2;

   Eigen::MatrixXd data = Eigen::MatrixXd::Random(NX, NY);

   int i, j;
   double data_arr[NX][NY];          // buffer for data to write
   for (j = 0; j < NX; j++)
   {
      for (i = 0; i < NY; i++)
        data_arr[j][i] = data(j,i);
   }

    H5File file( FILE_NAME, H5F_ACC_TRUNC );
    hsize_t     dimsf[2];              // dataset dimensions
    dimsf[0] = NX;
    dimsf[1] = NY;
    DataSpace dataspace( RANK, dimsf );
    /*
    * Define datatype for the data in the file.
    * We will store little endian DOUBLE numbers.
    */
    FloatType datatype( PredType::NATIVE_DOUBLE );
    datatype.setOrder( H5T_ORDER_LE );

    DataSet dataset = file.createDataSet( DATASET_NAME, datatype, dataspace );

    dataset.write( data_arr, PredType::NATIVE_DOUBLE );

}

As far as I can tell the only thing that changes is that we specify the order of elements here explicitly, i.e.

FloatType datatype( PredType::NATIVE_DOUBLE );
        datatype.setOrder( H5T_ORDER_LE );

while in the question we just pass PredType::NATIVE_DOUBLE as argument. I can't really comment on why or if this solves the problem...

Sito
  • 494
  • 10
  • 29