8

I recently found boost::multi_index_container and I'm curious about his performance compared to my own implementation of a similar container based on multi-level mapping and defined as:

typedef int      Data;
typedef uint64_t MainKey;
typedef uint64_t SecondaryKey;

typedef std::unordered_map<SecondaryKey, Data>       SecondaryMap;
typedef std::unordered_map<PrimaryKey, SecondaryMap> PrimaryMap;

The key ordering is not important. The fast lookup is important and for this I'm using something like:

// find primaryKey=10 and secondaryKey=30
PrimaryMap m;
....
auto i1 = m.find( 10);
if ( i1 != m.end())
{
    auto& secondary = i1->second;
    auto i2 = secondary.find( 30);
    if ( i2 != secondary.end())
    {
        found = true;
        ....
    }
}

I would like to know what would be

  • the most closest configuration of boost::multi_index_container to match my implementation
  • the fastest way to search by primary key and secondary key.

I have tried to configure the template but I'm not sure if this is the best solution:

struct RecordKey
{
    MainKey         mainKey;
    SecondaryKey    secondaryKey;

    RecordKey( const MainKey mainKey, SecondaryKey secondaryKey):
        mainKey( mainKey),
        secondaryKey( secondaryKey)
    {}
};

struct Record: public RecordKey
{
    Data data;

    Record( const MainKey mainKey = 0, const SecondaryKey secondaryKey = 0, const Data& data = 0):
        RecordKey( mainKey, secondaryKey),
        data( data)
    {}
};


struct MainKeyTag {};
struct SecondaryKeyTag {};
struct CompositeKeyTag {};

using boost::multi_index_container;
using namespace boost::multi_index;

typedef boost::multi_index_container<Record,
    indexed_by  <   /*random_access<>,*/
                    hashed_non_unique<tag<MainKeyTag>,      BOOST_MULTI_INDEX_MEMBER( RecordKey, MainKey, mainKey) >,
                    hashed_non_unique<tag<SecondaryKeyTag>, BOOST_MULTI_INDEX_MEMBER( RecordKey, SecondaryKey, secondaryKey) >,
                    hashed_unique<tag<CompositeKeyTag>,     composite_key<Record,   member<RecordKey, MainKey, &RecordKey::mainKey>,
                                                                                    member<RecordKey, SecondaryKey, &RecordKey::secondaryKey> > > > > RecordContainer;
Flaviu
  • 931
  • 11
  • 16

3 Answers3

9

You can have your cake and eat it too, by choosing ordered (instead of hashed) indexes in BMI.

There is a nice property of ordered composite indexes that allows you to query by partial keys, so you need only define the composite index to be able to query by main index too:

typedef boost::multi_index_container<
    Record,
    indexed_by<
        ordered_non_unique< tag<CompositeKeyTag>, 
        composite_key<Record, 
                    member<RecordKey, MainKey, &RecordKey::mainKey>,
                    member<RecordKey, SecondaryKey, &RecordKey::secondaryKey>
    > > > > RecordContainer;

Now you can either query by mainKey:

range = index.equal_range(10);

Or by composite:

range = index.equal_range(boost::make_tuple(10,30));

Background:

Here's a full demo Live On Coliru:

#include <boost/multi_index_container.hpp>
#include <boost/multi_index/composite_key.hpp>
#include <boost/multi_index/hashed_index.hpp>
#include <boost/multi_index/ordered_index.hpp>
#include <boost/multi_index/member.hpp>
#include <boost/range/iterator_range.hpp>
#include <iostream>

using MainKey      = uint64_t;
using SecondaryKey = uint64_t;
using Data         = std::string;

struct RecordKey
{
    MainKey         mainKey;
    SecondaryKey    secondaryKey;

    RecordKey( const MainKey mainKey, SecondaryKey secondaryKey):
        mainKey( mainKey),
        secondaryKey( secondaryKey)
    {}
};

struct Record: public RecordKey
{
    Data data;

    Record( const MainKey mainKey = 0, const SecondaryKey secondaryKey = 0, const Data& data = 0):
        RecordKey( mainKey, secondaryKey),
        data( data)
    {}

    friend std::ostream& operator<<(std::ostream& os, Record const& r) {
        return os << " Record[" << r.mainKey << ", " << r.secondaryKey << ", " << r.data << "]";
    }
};

struct MainKeyTag {};
struct SecondaryKeyTag {};
struct CompositeKeyTag {};

using boost::multi_index_container;
using namespace boost::multi_index;

typedef boost::multi_index_container<
    Record,
    indexed_by<
        ordered_non_unique< tag<CompositeKeyTag>, 
        composite_key<Record, 
                    member<RecordKey, MainKey, &RecordKey::mainKey>,
                    member<RecordKey, SecondaryKey, &RecordKey::secondaryKey>
    > > > > RecordContainer;


int main()
{
    RecordContainer records;
    records.insert(Record(10, 20, "12"));
    records.insert(Record(10, 30, "13"));
    records.insert(Record(10, 30, "13 - need not be unique!"));
    records.insert(Record(30, 40, "34"));
    records.insert(Record(30, 50, "35"));
    records.insert(Record(50, 60, "56"));
    records.insert(Record(50, 70, "57"));
    records.insert(Record(70, 80, "78"));

    std::cout << "\nAll records:\n----------------------------------------------------------------------\n";
    for (auto const& r : records)
        std::cout << r << "\n";

    {
        std::cout << "\nAll records with (main) == (10):\n----------------------------------------------------------------------\n";
        auto& index = records.get<0>();
        auto range = index.equal_range(10);
        for (auto const& r : boost::make_iterator_range(range.first, range.second))
            std::cout << r << "\n";
    }

    {
        std::cout << "\nAll records with (main,secondary) == (10,30):\n----------------------------------------------------------------------\n";
        auto& index = records.get<0>();
        auto range = index.equal_range(boost::make_tuple(10,30));
        for (auto const& r : boost::make_iterator_range(range.first, range.second))
            std::cout << r << "\n";
    }
}

Output:

All records:
----------------------------------------------------------------------
 Record[10, 20, 12]
 Record[10, 30, 13]
 Record[10, 30, 13 - need not be unique!]
 Record[30, 40, 34]
 Record[30, 50, 35]
 Record[50, 60, 56]
 Record[50, 70, 57]
 Record[70, 80, 78]

All records with (main) == (10):
----------------------------------------------------------------------
 Record[10, 20, 12]
 Record[10, 30, 13]
 Record[10, 30, 13 - need not be unique!]

All records with (main,secondary) == (10,30):
----------------------------------------------------------------------
 Record[10, 30, 13]
 Record[10, 30, 13 - need not be unique!]
Community
  • 1
  • 1
sehe
  • 374,641
  • 47
  • 450
  • 633
  • Thanks but are you suggesting the ordered variant has no performance penalties for searching compared to the hashed variant? I want to do performance tests to find out which is faster, boost multi-index container or my own construction. I would need an index for primary key, and index for secondary key and a combined index for both keys. IMHO hashed should be faster that ordered. – Flaviu Oct 20 '14 at 18:15
  • @Flaviu ordered vs. hashed always depends on access patterns. But seeing how you seem to think that a map-of-maps would suit your access patterns, I'm pretty confident that ordered-composites are a good match (it's what you want, using two cascaded binary searches only). If you /also/ require direct lookup by complete composite, you may consider adding a hashed composite index ***as well***. However, keep in mind this will have a nontrivial overhead on each (key)modifying operation on the container. – sehe Oct 20 '14 at 21:39
  • I was able myself to insert and search using an ordered boost::multi_index_container but I was looking for a hashed or a random_access solution. Is the hashed composite key configuration the closest one to my own construction regarding the lookup? – Flaviu Oct 21 '14 at 05:41
  • Depends on what key. Also aging more indices isn't free. Consider external helpers and always be profiling. – sehe Oct 21 '14 at 06:58
  • I have also addressed similar questions here, where i had a little more context information to make additional suggestions: http://stackoverflow.com/questions/26474577/using-boost-multi-index-like-relational-db/26476131#26476131 In particular it might appeal to you for the random access aspect of your requirements (that you haven't clarified) – sehe Oct 21 '14 at 07:04
  • I will accept an solution that uses hashed_non_unique composite index instead of ordered_non_unique composite index. Perhaps only few changes are need in your example. – Flaviu Oct 21 '14 at 09:51
  • OK, so the hashed indexes doesn't support that kind of lookup? – Flaviu Oct 21 '14 at 09:52
  • Yay. You got the point :) Do look at the additional suggestions I linked 2 hours ago. I have a feeling you could benefit from a strategy like those – sehe Oct 21 '14 at 09:53
  • Thanks a lot :) I just had the impression there could be a more straight forward way to lookup instead of using equal_range. In conclusion a multi-index container with ordered indexes is the closest possible configuration to match my construction. Perhaps in the future a call like `m.get().find( make_tuple( 10, 30))` could do that kind of lookup. – Flaviu Oct 21 '14 at 10:02
  • @Flaviu erm, `find()` is already working. It's just something that's not very useful with non-unique indices. (And certainly not with partial keys) – sehe Oct 21 '14 at 10:56
  • I need only unique indices. Considering this, could I use find() on the composite index having primaryKey=10 and secondaryKey=30? – Flaviu Oct 21 '14 at 11:31
  • Hahaha. Yes, in that case, just a **[`hashed_unique>` would do](http://coliru.stacked-crooked.com/a/a3ab2604b2179f1c)** (note that the duplicate (10,30) insert was ignored as per the docs). I'm a bit surprised that the question was [posed in the way it was](https://www.google.nl/search?q=xy+problem) then :) – sehe Oct 21 '14 at 11:44
  • :)) The unique composite key was needed from the beginning because std::unordered_map doesn't accept duplicates. Indeed it was partially the XY problem. Meantime I found the issue of my solution: on `find()` call `std::make_tuple` was used instead of `boost::make_tuple`... but this doesn't necessary mean no other configurations of BMI could be closer to my own construction (I'm still not sure about the random_access<> usage). Regards – Flaviu Oct 21 '14 at 14:50
1

The multi-index container must have 3 indexes:

  • index for main key: hashed - because std::unordered_map is hashed, non_unique - because multiple records could have same key;
  • index for secondary key: same as for the main key;
  • index for composite key: hashed - because std::unordered_map is hashed, unique - because the tuples (main key, secondary key) are unique in the map of maps structure.

The container is defined as:

typedef boost::multi_index_container<Record, indexed_by <
    hashed_non_unique<tag<MainKeyTag>,      BOOST_MULTI_INDEX_MEMBER( RecordKey, MainKey, mainKey) >,
    hashed_non_unique<tag<SecondaryKeyTag>, BOOST_MULTI_INDEX_MEMBER( RecordKey, SecondaryKey, secondaryKey) >,
    hashed_unique<tag<CompositeKeyTag>,     
    composite_key<Record,
        member<RecordKey, MainKey, &RecordKey::mainKey>,
        member<RecordKey, SecondaryKey, &RecordKey::secondaryKey> > > > > RecordContainer;

The insertion is:

RecordContainer c;
c.insert( Record( 10, 20, 0));
c.insert( Record( 10, 30, 1));
c.insert( Record( 10, 40, 2));

The lookup is:

auto& index = c.get<CompositeKeyTag>();
auto pos = index.find( boost::make_tuple( 10, 30)); // don't use std::make_tuple!
if ( pos != index.end())
{
    found = true;
    data = pos->data;
}
Flaviu
  • 931
  • 11
  • 16
1

Similar situation, but boost was not allowed in my group so implemented in following way, in case of lookup by secondary key, only one lookup is required not two:

    #include<map>
    using namespace std;

    template<class PrimaryKey, class SecondaryKey, class Data>
    class MyMultiIndexedMap
    {
        private:
        typedef map<PrimaryKey, Data*> PT;
        typedef map<SecondaryKey, Data*> ST;

        PT pri_data_;
        ST sec_data_;

        public:
        bool insert(PrimaryKey pk, SecondaryKey sk, Data *data);
        Data* findBySecondaryKey(SecondaryKey sk);       
        Data* findByPrimaryKey(PrimaryKey pk);    
    };
Yogesh
  • 11
  • 2