0

I need good performances to read binary files in my actual program. So I try to use memory mapping to increase the reading speed. In first attempt, I try to use boost::iostream so I write a little program to test the performance :

#include <string>
#include <vector>
#include <iostream>
#include <filesystem>
#include <fstream>
#include <cassert>
#include <chrono>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/iostreams/stream.hpp>

namespace fs = std::filesystem;

template<typename TYPE>
inline void read_binary_file_ifstream(const fs::path filename, std::vector<TYPE>& result)
{

  std::ifstream file(filename, std::ios::in | std::ios::binary);

  size_t filesize = fs::file_size(filename);

  assert(filesize%sizeof(TYPE) == 0);
  result.resize(filesize/sizeof(TYPE));

  file.read(reinterpret_cast<char *>(result.data()), filesize);

  file.close();
}

template<typename TYPE>
inline void read_binary_file_boost(const fs::path filename, std::vector<TYPE>& result)
{

  using boost::iostreams::mapped_file_source;
  using boost::iostreams::stream;

  size_t filesize = fs::file_size(filename);

  assert(filesize%sizeof(TYPE) == 0);
  result.resize(filesize/sizeof(TYPE));

  mapped_file_source mmap(filename.string().c_str());
  stream<mapped_file_source> file(mmap, std::ios::binary);

  file.read(reinterpret_cast<char *>(result.data()), filesize);
}

int main()
{
  fs::path path = "idx-position-Deces_Agit_FrHex_aPartir1979_Dom_aPartir2000_enCours-liens_age_tranche_age#age_quinquenal_0_100.dat";
  std::cout << "file size : " << fs::file_size(path) << std::endl;

  std::chrono::time_point<std::chrono::system_clock> start, end;

  start = std::chrono::system_clock::now();
  for(int i = 0; i<10; ++i)
  {
    std::vector<uint32_t> result;

    read_binary_file_ifstream<uint32_t>(path, result);
  }
  end = std::chrono::system_clock::now();
  std::chrono::duration<double> elapsed_seconds = end-start;
  std::cout << "elapsed time ifstream : " << elapsed_seconds.count() << "s\n";

  start = std::chrono::system_clock::now();
  for(int i = 0; i<10; ++i)
  {
    std::vector<uint32_t> result;

    read_binary_file_boost<uint32_t>(path, result);
  }
  end = std::chrono::system_clock::now();
  elapsed_seconds = end-start;
  std::cout << "elapsed time boost iostream : " << elapsed_seconds.count() << "s\n";


  return 0;
}

I use cmake for the compilation :

cmake_minimum_required(VERSION 3.1)


project (boost_io)

set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost COMPONENTS iostreams REQUIRED)

add_executable(boost_io main.cpp)

target_compile_features(boost_io PRIVATE cxx_std_17)

target_include_directories(boost_io
  PUBLIC
  $<$<PLATFORM_ID:Windows>: ${PARENT_DIR_INSTALL_PREFIX}/Boost/include >
  )
  
target_link_libraries(boost_io
  PUBLIC
  Boost::iostreams 
  -lstdc++fs)

I tried on two system : Linux and Windows (visual studio 2019 generator). On linux, my results are quite good :

file size : 3202777528
elapsed time ifstream : 10.1622s
elapsed time boost iostream : 8.10151s

But on windows, I have the surprising results :

file size : 3202777528
elapsed time ifstream : 30.6484s
elapsed time boost iostream : 77.9328s

My computer is on dual boot so it's the same ssd disk. The file read is exactly the same. My boost version is 1.75 on windows (and older I think on linux, I have installed boost with apt). So Why I have those differences, and why memory mapping of boost is so slow on windows with visual studio 19 generator. Is their a better way to read binary files quickly ?

Kafka
  • 720
  • 6
  • 21
  • This is bound to be debug iterator support or optimization flags, as always – sehe Dec 24 '20 at 22:22
  • ["The file read is exactly the same" - is it also on the same filesystem/volume?] – sehe Dec 25 '20 at 14:04
  • ["Is their a better way to read binary files quickly?" - the best way is to avoid reading at all. Mapping is a technique. Next up: avoid parsing. Inspirational examples: https://stackoverflow.com/questions/17925051/fast-textfile-reading-in-c/17925143#17925143, or some more complex logic (binary search on line-wise text input: https://stackoverflow.com/questions/28217301/using-boostiostreamsmapped-file-source-with-stdmultimap/28220864#28220864)] – sehe Dec 25 '20 at 14:11

0 Answers0