PROBLEM
I have this old piece of pre-stl C++ code that I want to translate into std C++11 without losing efficiency.
using T = unsigned; // but can be any POD
FILE* fp = fopen( outfile.c_str(), "r" );
T* x = new T[big_n];
fread( x, sizeof(T), big_n, fp );
delete[] x;
fclose( fp );
Note that big_n is really big - like millions of records big, so any inefficiencies are pronounced.
PREVIOUS SOLUTION
In this answer from my previous question, I accepted this solution:
std::vector<T> x(big_n);
fread(x.data(), sizeof(T), big_n, fp);
ISSUE AND ATTEMPTED SOLUTION
That previous solution works, but the constructor actually calls T's default constructor big_n times. This is very slow when big_n is really big (and totally unnecessary as I am about to fread() the entire chunk from disk). FWIW, in my test case for one file, it was taking 3 seconds instead of 200ms.
So I tried to use this instead:
std::vector<T> x;
x.reserve( big_n );
fread(x.data(), sizeof(T), big_n, fp);
This seems to work, but then I run into the issue that size() returns 0 and not big_n.
How do I correct this without losing too much efficiency?
ADDENDUM
I just noticed that std::vector<>
can take a custom allocator. Could using that form of the constructor solve my problem? I'm looking into this approach now.
WHAT WORKS FOR ME
I've looked into Ali's custom allocator solution below in addition to jrok's simple array solution. I have decided to adapt jrock's solution for its ease-of-understanding/lower maintenance.
The working code I came up with is below:
#include <vector>
#include <set>
#include <memory>
#include <fstream>
#include <iostream>
#include <cassert>
struct Foo
{
int m_i;
Foo() { }
Foo( int i ) : m_i( i ) { }
bool operator==( Foo const& rhs ) const { return m_i==rhs.m_i; }
bool operator!=( Foo const& rhs ) const { return m_i!=rhs.m_i; }
friend std::ostream& operator<<( std::ostream& os, Foo const& rhs )
{ os << rhs.m_i; }
};
// DESIGN NOTES /*{{{*/
//
// LIMITATION T must be a POD so we can fread/fwrite quickly
//
// WHY DO WE NEED THIS CLASS?
//
// We want to write a large number of small PODs to disk and read them back without
// 1. spurious calls to default constructors by std::vector
// 2. writing to disk a gazillion times
//
// SOLUTION
// A hybrid class containing a std::vector<> for adding new items and a
// std::unique_ptr<T[]> for fast persistence. From the user's POV, it looks
// like a std::vector<>.
//
// Algorithm
// 1. add new items into:
// std::vector<T> m_v;
// 2. when writing to disk, write out m_v as a chunk
// 3. when reading from disk, read into m_chunk (m_v will start empty again)
// 4. m_chunk and m_v combined will represent all the data
/*}}}*/
template<typename T>
class vector_chunk
{
// STATE /*{{{*/
size_t m_n_in_chunk;
std::unique_ptr<T[]> m_chunk;
std::vector<T> m_v;
/*}}}*/
// CONSTRUCTOR, INITIALIZATION /*{{{*/
public:
vector_chunk() : m_n_in_chunk( 0 ) { }
/*}}}*/
// EQUALITY /*{{{*/
public:
bool operator==( vector_chunk const& rhs ) const
{
if ( rhs.size()!=size() )
return false;
for( size_t i=0; i<size(); ++i )
if ( operator[]( i )!=rhs[i] )
return false;
return true;
}
/*}}}*/
// OSTREAM /*{{{*/
public:
friend std::ostream& operator<<( std::ostream& os, vector_chunk const& rhs )
{
for( size_t i=0; i<rhs.m_n_in_chunk; ++i )
os << rhs.m_chunk[i] << "\n";
for( T const& t : rhs.m_v )
os << rhs.t << "\n";
}
/*}}}*/
// BINARY I/O /*{{{*/
public:
void write_as_binary( std::ostream& os ) const
{
// write everything out
size_t const n_total = size();
os.write( reinterpret_cast<const char*>( &n_total ), sizeof( n_total ));
os.write( reinterpret_cast<const char*>( &m_chunk[0] ), m_n_in_chunk * sizeof( T ));
os.write( reinterpret_cast<const char*>( m_v.data() ), m_v.size() * sizeof( T ));
}
void read_as_binary( std::istream& is )
{
// only read into m_chunk, clear m_v
is.read( reinterpret_cast<char*>( &m_n_in_chunk ), sizeof( m_n_in_chunk ));
m_chunk.reset( new T[ m_n_in_chunk ] );
is.read( reinterpret_cast<char*>( &m_chunk[0] ), m_n_in_chunk * sizeof( T ));
m_v.clear();
}
/*}}}*/
// DELEGATION to std::vector<T> /*{{{*/
public:
size_t size() const { return m_n_in_chunk + m_v.size(); }
void push_back( T const& value ) { m_v.push_back( value ); }
void push_back( T&& value ) { m_v.push_back( value ); }
template< class... Args >
void emplace_back( Args&&... args ) { m_v.emplace_back( args... ); }
typename std::vector<T>::const_reference
operator[]( size_t pos ) const
{ return ((pos < m_n_in_chunk) ? m_chunk[ pos ] : m_v[ pos - m_n_in_chunk]); }
typename std::vector<T>::reference
operator[]( size_t pos )
{ return ((pos < m_n_in_chunk) ? m_chunk[ pos ] : m_v[ pos - m_n_in_chunk]); }
/*}}}*/
};
int main()
{
size_t const n = 10;
vector_chunk<Foo> v, w;
for( int i=0; i<n; ++i )
v.emplace_back( Foo{ i } );
std::filebuf ofb, ifb;
std::unique_ptr<std::ostream> osp;
std::unique_ptr<std::istream> isp;
ofb.open( "/tmp/junk.bin", (std::ios::out | std::ios::binary));
osp.reset( new std::ostream( &ofb ));
v.write_as_binary( *osp );
ofb.close();
ifb.open( "/tmp/junk.bin", (std::ios::in | std::ios::binary));
isp.reset( new std::istream( &ifb ));
w.read_as_binary( *isp );
ifb.close();
assert( v==w );
}