C# HashSet VS C++ std::unordered_set with custom class keys. C++ slower... it can't be. How to achieve C#'s speed?

Question

I porting my program to C++ to achieve better speed, but met something terrible! I can't find a fast way to get unique by value custom class instances from array.

I made two minimal example projects for comparison.

C++ program RELEASE result (VC++ 2010 express): Unique vectors count is 666791. Took 5 seconds.

C# program DEBUG!!! result: Unique vectors count is 666533. Took 0,9060004 seconds.

I need a way to get only unique elements from array.

C++ code:

#include <conio.h>
#include <time.h>
#include <vector>
#include <unordered_set>

struct XVFields
{
    unsigned char JointPromosCountSinceBeginning;
    unsigned char PromoWeeksCountSinceCurrPromoBeginning;
    unsigned char NoPromoWeeksCountSinceLastJointPromo;
    unsigned char IsPromo;
};

class XVector
{
public:
        XVFields XVFs;
        unsigned char *DiscountUsagesCounts;

    XVector()
    {
        this->DiscountUsagesCounts = (unsigned char*)malloc(5);
    }
};

struct XVectorHasher
{
    size_t operator()(const XVector *k) const
    {
        size_t result = 0;
        const size_t prime = 31;

        int unibytes_count = sizeof(XVFields) + 5;
        unsigned char *unibytes = (unsigned char*)malloc(unibytes_count);
        memcpy(unibytes, &k->XVFs, sizeof(XVFields));
        memcpy(&unibytes[sizeof(XVFields)], k->DiscountUsagesCounts, 5);

        for (size_t i = 0; i < unibytes_count; i++)
            result = unibytes[i] + (result * prime);

        free(unibytes);

        return result;
    }
};

struct XVectorComparator
{
    bool operator()(const XVector *xv1, const XVector *xv2) const
    {
        if (memcmp(&xv1->XVFs, &xv2->XVFs, sizeof(XVFields)) != 0)
            return false;

        if (memcmp(xv1->DiscountUsagesCounts, xv2->DiscountUsagesCounts, 5) != 0)
            return false;

        return true;
    }
};

void main()
{
    srand(time(NULL));
    std::vector<XVector*> xvectors;

    for (int i = 0; i < 1500000; i++)
    {
        XVector *temp_xv = new XVector();
        temp_xv->XVFs.IsPromo = rand() % 2 > 0;
        temp_xv->XVFs.JointPromosCountSinceBeginning = rand() % 5;
        temp_xv->XVFs.NoPromoWeeksCountSinceLastJointPromo = rand() % 5;
        temp_xv->XVFs.PromoWeeksCountSinceCurrPromoBeginning = rand() % 5;

        for (int j = 0; j < 5; j++)
            temp_xv->DiscountUsagesCounts[j] = rand() % 5;

        xvectors.push_back(temp_xv);
    }

    time_t start_dt = time(NULL);
    std::unordered_set<XVector*, XVectorHasher, XVectorComparator> *unique_xvs = new std::unordered_set<XVector*, XVectorHasher, XVectorComparator>();

    for (int i = 0; i < xvectors.size(); i++)
        if (unique_xvs->find(xvectors[i]) == unique_xvs->end())
            unique_xvs->insert(xvectors[i]);

    printf("Unique vectors count is %i. Took %i seconds.", unique_xvs->size(), time(NULL) - start_dt);
    getch();
}

C# code:

using System;
using System.Text;
using System.Linq;
using System.Collections.Generic;

namespace DictSpeedTest
{
    class Program
    {
        static void Main(string[] args)
        {
            Random rnd = new Random((int)(DateTime.Now - new DateTime(1970, 1, 1)).TotalSeconds);
            List<XVector> xvectors = new List<XVector>();

            for (int i = 0; i < 1500000; i++)
            {
                XVector temp_xv = new XVector();
                temp_xv.XVFs.IsPromo = rnd.Next(2) > 0;
                temp_xv.XVFs.JointPromosCountSinceBeginning = (byte)rnd.Next(0, 5);
                temp_xv.XVFs.NoPromoWeeksCountSinceLastJointPromo = (byte)rnd.Next(0, 5);
                temp_xv.XVFs.PromoWeeksCountSinceCurrPromoBeginning = (byte)rnd.Next(0, 5);

                for (int j = 0; j < temp_xv.DiscountUsagesCounts.Length; j++)
                    temp_xv.DiscountUsagesCounts[j] = (byte)rnd.Next(0, 5);

                xvectors.Add(temp_xv);
            }

            DateTime start_dt = DateTime.Now;
            HashSet<XVector> unique_xvs = new HashSet<XVector>(new XVectorEqualityComparer());

            for (int i = 0; i < xvectors.Count; i++)
                if (!unique_xvs.Contains(xvectors[i]))
                    unique_xvs.Add(xvectors[i]);

            Console.WriteLine("Unique vectors count is " + unique_xvs.Count + ". Took " + (DateTime.Now - start_dt).TotalSeconds + " seconds.");
            Console.ReadKey();
        }
    }

    struct XVFields
    {
        public byte JointPromosCountSinceBeginning;
        public byte PromoWeeksCountSinceCurrPromoBeginning;
        public byte NoPromoWeeksCountSinceLastJointPromo;
        public bool IsPromo;
    }

    class XVector
    {
        public XVFields XVFs;
        public byte[] DiscountUsagesCounts;

        public XVector()
        {
            this.DiscountUsagesCounts = new byte[5];
        }

        public override bool Equals(object obj)
        {
            byte[] my_low_lvl_dump = new byte[4 + 5];

            my_low_lvl_dump[0] = this.XVFs.IsPromo ? (byte)1 : (byte)0;
            my_low_lvl_dump[1] = this.XVFs.JointPromosCountSinceBeginning;
            my_low_lvl_dump[2] = this.XVFs.PromoWeeksCountSinceCurrPromoBeginning;
            my_low_lvl_dump[3] = this.XVFs.NoPromoWeeksCountSinceLastJointPromo;
            my_low_lvl_dump[4] = this.DiscountUsagesCounts[0];
            my_low_lvl_dump[5] = this.DiscountUsagesCounts[1];
            my_low_lvl_dump[6] = this.DiscountUsagesCounts[2];
            my_low_lvl_dump[7] = this.DiscountUsagesCounts[3];
            my_low_lvl_dump[8] = this.DiscountUsagesCounts[4];

            XVector xv = (XVector)obj;
            byte[] obj_low_lvl_dump = new byte[4 + 5];

            obj_low_lvl_dump[0] = xv.XVFs.IsPromo ? (byte)1 : (byte)0;
            obj_low_lvl_dump[1] = xv.XVFs.JointPromosCountSinceBeginning;
            obj_low_lvl_dump[2] = xv.XVFs.PromoWeeksCountSinceCurrPromoBeginning;
            obj_low_lvl_dump[3] = xv.XVFs.NoPromoWeeksCountSinceLastJointPromo;
            obj_low_lvl_dump[4] = xv.DiscountUsagesCounts[0];
            obj_low_lvl_dump[5] = xv.DiscountUsagesCounts[1];
            obj_low_lvl_dump[6] = xv.DiscountUsagesCounts[2];
            obj_low_lvl_dump[7] = xv.DiscountUsagesCounts[3];
            obj_low_lvl_dump[8] = xv.DiscountUsagesCounts[4];

            return my_low_lvl_dump.SequenceEqual<byte>(obj_low_lvl_dump);
        }

        public override int GetHashCode()
        {
            byte[] low_lvl_dump = new byte[4 + 5];

            low_lvl_dump[0] = this.XVFs.IsPromo ? (byte)1 : (byte)0;
            low_lvl_dump[1] = this.XVFs.JointPromosCountSinceBeginning;
            low_lvl_dump[2] = this.XVFs.PromoWeeksCountSinceCurrPromoBeginning;
            low_lvl_dump[3] = this.XVFs.NoPromoWeeksCountSinceLastJointPromo;
            low_lvl_dump[4] = this.DiscountUsagesCounts[0];
            low_lvl_dump[5] = this.DiscountUsagesCounts[1];
            low_lvl_dump[6] = this.DiscountUsagesCounts[2];
            low_lvl_dump[7] = this.DiscountUsagesCounts[3];
            low_lvl_dump[8] = this.DiscountUsagesCounts[4];

            int result = 0;
            int prime = 31;

            for (int i = 0; i < low_lvl_dump.Length; i++)
                result = low_lvl_dump[i] + (result * prime);

            return result;
        }
    }

    class XVectorEqualityComparer : IEqualityComparer<XVector>
    {
        public bool Equals(XVector xv1, XVector xv2)
        {
            return xv1.Equals(xv2);
        }

        public int GetHashCode(XVector xv)
        {
            return xv.GetHashCode();
        }
    }
}

C# does way expensive operations for producing arrays for hash functions and yet final result is faster.

[Tangent] Why are you using `malloc` and `free` in your C++ implementation? Technically you have UB because of `unsigned char *unibytes = (unsigned char*)malloc(unibytes_count);` doesn't actually create an array for `char`'s. — NathanOliver, Dec 18 '19 at 16:19
Also, `std::unordered_set` is about the worst hashed container you can get. It is required to be implemented in buckets of linked lists and linked lists are performance-agnostic. Finding a better hash-set for your C++ code should improve the speed. — NathanOliver, Dec 18 '19 at 16:22
@NathanOliver-ReinstateMonica I like `malloc` since I can do `realloc` if needed. Do you know any better hash-sets for C++? — Kosmo零, Dec 18 '19 at 16:23
Recommendation: use less, looks like you can get away with almost zero, manual dynamic allocation in the C++ code. Let the containers handle the storage. Take advantage of `vector`s contiguous storage to provide cache friendliness. — user4581301, Dec 18 '19 at 16:23
If you need reallocation, use a `std::vector`. Then you have value semantics and well defined behavior. — NathanOliver, Dec 18 '19 at 16:24
@NathanOliver-ReinstateMonica - Will it help me to get C#'s speed? — Kosmo零, Dec 18 '19 at 16:27
I ran your C++ code … and it took 0.8 seconds. Are you **sure** you ran it in release? In debug it took 4.5 seconds … — ChrisMM, Dec 18 '19 at 16:28
@ChrisMM - Yes, I am sure it's release. I use VC++ 2010 Express. What you use? — Kosmo零, Dec 18 '19 at 16:28
Can't say. What I can say is UB is not good, even if it "does the right thing". As far as what set you can use in `unordered_set`'s place, I would suggest looking at google/FB/boost. They probably have a faster implementation. — NathanOliver, Dec 18 '19 at 16:29
@ChrisMM Asker could be using a 286 or a [Dorito](https://www.youtube.com/watch?v=qpMvS1Q1sos). — user4581301, Dec 18 '19 at 16:29
@Kosmos VS 2019 Community … Also, don't use `time` for measuring time. Upgrade to at least c++11, and use `high_resolution_clock` — ChrisMM, Dec 18 '19 at 16:29
Kind of impressed that VS2010 supported `unordered_set`. It wasn't formally added to C++ for another year. Since this would have been the first kick at the can, it's possible the implementation's a bit weak. — user4581301, Dec 18 '19 at 16:34
@MaxLanghof - Yes, but C# has same collisions count. What would you use in C++ to get hash from unsigned char array? — Kosmo零, Dec 18 '19 at 16:38
@Kosmos I misread the algorithm. The prime is still bad though :D — Max Langhof, Dec 18 '19 at 16:39

score 1 · Answer 1 · answered Dec 19 '19 at 02:58

I am using Visual Studio 2019, and my RELEASE build of your code ran in ~700ms.

You most certainly can beat this time with a couple of improvements.

While you insert 1,500,000 elements into your hash set, it is being re-hashed about 10 times. You can avoid that by specifying number of buckets when you construct that set. Setting it to 800,000 saves about 20%.

In your hash function, you create and destroy an array of 9 bytes. 1,500,000 times! You could have a static array and simply fill it with the required data. However, you could simply do the math on your fields directly, which will save you about 50%:

struct XVectorHasher
{
size_t operator()(const XVector* k) const
{
    size_t result = 0;
    const size_t prime = 31;

    result = k->XVFs.IsPromo;
    result = k->XVFs.JointPromosCountSinceBeginning + (result * prime);
    result = k->XVFs.PromoWeeksCountSinceCurrPromoBeginning + (result * prime);
    result = k->XVFs.NoPromoWeeksCountSinceLastJointPromo + (result * prime);
    for (size_t i = 0; i < 5; i++)
        result = k->DiscountUsagesCounts[i] + (result * prime);
    return result;
}
};

Is the data structure in your example real or simplified? What are the valid ranges for those unsigned char fields? The reason I am asking is that you only have 8 bytes + 1 bool. If you can spare a single bit in any of those 8 bytes, you can simply compose a long long key and won’t have to provide your own custom hash and compare. That will save you another 50%

Thank you. Sadly, 1 and 3 will not help me. That was just minimal example to demonstrate difference between set lookups between C++ and C#. Real program fills it in another way and another count of elements. I thinks the slow part was `find()`. And I can't spare single bit, because real structure has dynamic size for `DiscountUsagesCount`. I set it to 5 in example, but it could be more or less. I will make changes in hash function. I already switched to VC++2019 and my program now run like 10 times faster. I can't believe it. I always though than older soft, than faster and less memory it use — Kosmo零, Dec 20 '19 at 06:45

C# HashSet VS C++ std::unordered_set with custom class keys. C++ slower... it can't be. How to achieve C#'s speed?

1 Answers1