I need good performances to read binary files in my actual program. So I try to use memory mapping to increase the reading speed. In first attempt, I try to use boost::iostream so I write a little program to test the performance :
#include <string>
#include <vector>
#include <iostream>
#include <filesystem>
#include <fstream>
#include <cassert>
#include <chrono>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/iostreams/stream.hpp>
namespace fs = std::filesystem;
template<typename TYPE>
inline void read_binary_file_ifstream(const fs::path filename, std::vector<TYPE>& result)
{
std::ifstream file(filename, std::ios::in | std::ios::binary);
size_t filesize = fs::file_size(filename);
assert(filesize%sizeof(TYPE) == 0);
result.resize(filesize/sizeof(TYPE));
file.read(reinterpret_cast<char *>(result.data()), filesize);
file.close();
}
template<typename TYPE>
inline void read_binary_file_boost(const fs::path filename, std::vector<TYPE>& result)
{
using boost::iostreams::mapped_file_source;
using boost::iostreams::stream;
size_t filesize = fs::file_size(filename);
assert(filesize%sizeof(TYPE) == 0);
result.resize(filesize/sizeof(TYPE));
mapped_file_source mmap(filename.string().c_str());
stream<mapped_file_source> file(mmap, std::ios::binary);
file.read(reinterpret_cast<char *>(result.data()), filesize);
}
int main()
{
fs::path path = "idx-position-Deces_Agit_FrHex_aPartir1979_Dom_aPartir2000_enCours-liens_age_tranche_age#age_quinquenal_0_100.dat";
std::cout << "file size : " << fs::file_size(path) << std::endl;
std::chrono::time_point<std::chrono::system_clock> start, end;
start = std::chrono::system_clock::now();
for(int i = 0; i<10; ++i)
{
std::vector<uint32_t> result;
read_binary_file_ifstream<uint32_t>(path, result);
}
end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end-start;
std::cout << "elapsed time ifstream : " << elapsed_seconds.count() << "s\n";
start = std::chrono::system_clock::now();
for(int i = 0; i<10; ++i)
{
std::vector<uint32_t> result;
read_binary_file_boost<uint32_t>(path, result);
}
end = std::chrono::system_clock::now();
elapsed_seconds = end-start;
std::cout << "elapsed time boost iostream : " << elapsed_seconds.count() << "s\n";
return 0;
}
I use cmake for the compilation :
cmake_minimum_required(VERSION 3.1)
project (boost_io)
set(Boost_USE_STATIC_LIBS OFF)
set(Boost_USE_MULTITHREADED ON)
set(Boost_USE_STATIC_RUNTIME OFF)
find_package(Boost COMPONENTS iostreams REQUIRED)
add_executable(boost_io main.cpp)
target_compile_features(boost_io PRIVATE cxx_std_17)
target_include_directories(boost_io
PUBLIC
$<$<PLATFORM_ID:Windows>: ${PARENT_DIR_INSTALL_PREFIX}/Boost/include >
)
target_link_libraries(boost_io
PUBLIC
Boost::iostreams
-lstdc++fs)
I tried on two system : Linux and Windows (visual studio 2019 generator). On linux, my results are quite good :
file size : 3202777528
elapsed time ifstream : 10.1622s
elapsed time boost iostream : 8.10151s
But on windows, I have the surprising results :
file size : 3202777528
elapsed time ifstream : 30.6484s
elapsed time boost iostream : 77.9328s
My computer is on dual boot so it's the same ssd disk. The file read is exactly the same. My boost version is 1.75 on windows (and older I think on linux, I have installed boost with apt). So Why I have those differences, and why memory mapping of boost is so slow on windows with visual studio 19 generator. Is their a better way to read binary files quickly ?