I porting my program to C++ to achieve better speed, but met something terrible! I can't find a fast
way to get unique by value custom class instances from array.
I made two minimal example projects for comparison.
C++ program RELEASE result (VC++ 2010 express):
Unique vectors count is 666791. Took 5 seconds.
C# program DEBUG!!! result:
Unique vectors count is 666533. Took 0,9060004 seconds.
I need a way to get only unique elements from array.
C++ code:
#include <conio.h>
#include <time.h>
#include <vector>
#include <unordered_set>
struct XVFields
{
unsigned char JointPromosCountSinceBeginning;
unsigned char PromoWeeksCountSinceCurrPromoBeginning;
unsigned char NoPromoWeeksCountSinceLastJointPromo;
unsigned char IsPromo;
};
class XVector
{
public:
XVFields XVFs;
unsigned char *DiscountUsagesCounts;
XVector()
{
this->DiscountUsagesCounts = (unsigned char*)malloc(5);
}
};
struct XVectorHasher
{
size_t operator()(const XVector *k) const
{
size_t result = 0;
const size_t prime = 31;
int unibytes_count = sizeof(XVFields) + 5;
unsigned char *unibytes = (unsigned char*)malloc(unibytes_count);
memcpy(unibytes, &k->XVFs, sizeof(XVFields));
memcpy(&unibytes[sizeof(XVFields)], k->DiscountUsagesCounts, 5);
for (size_t i = 0; i < unibytes_count; i++)
result = unibytes[i] + (result * prime);
free(unibytes);
return result;
}
};
struct XVectorComparator
{
bool operator()(const XVector *xv1, const XVector *xv2) const
{
if (memcmp(&xv1->XVFs, &xv2->XVFs, sizeof(XVFields)) != 0)
return false;
if (memcmp(xv1->DiscountUsagesCounts, xv2->DiscountUsagesCounts, 5) != 0)
return false;
return true;
}
};
void main()
{
srand(time(NULL));
std::vector<XVector*> xvectors;
for (int i = 0; i < 1500000; i++)
{
XVector *temp_xv = new XVector();
temp_xv->XVFs.IsPromo = rand() % 2 > 0;
temp_xv->XVFs.JointPromosCountSinceBeginning = rand() % 5;
temp_xv->XVFs.NoPromoWeeksCountSinceLastJointPromo = rand() % 5;
temp_xv->XVFs.PromoWeeksCountSinceCurrPromoBeginning = rand() % 5;
for (int j = 0; j < 5; j++)
temp_xv->DiscountUsagesCounts[j] = rand() % 5;
xvectors.push_back(temp_xv);
}
time_t start_dt = time(NULL);
std::unordered_set<XVector*, XVectorHasher, XVectorComparator> *unique_xvs = new std::unordered_set<XVector*, XVectorHasher, XVectorComparator>();
for (int i = 0; i < xvectors.size(); i++)
if (unique_xvs->find(xvectors[i]) == unique_xvs->end())
unique_xvs->insert(xvectors[i]);
printf("Unique vectors count is %i. Took %i seconds.", unique_xvs->size(), time(NULL) - start_dt);
getch();
}
C# code:
using System;
using System.Text;
using System.Linq;
using System.Collections.Generic;
namespace DictSpeedTest
{
class Program
{
static void Main(string[] args)
{
Random rnd = new Random((int)(DateTime.Now - new DateTime(1970, 1, 1)).TotalSeconds);
List<XVector> xvectors = new List<XVector>();
for (int i = 0; i < 1500000; i++)
{
XVector temp_xv = new XVector();
temp_xv.XVFs.IsPromo = rnd.Next(2) > 0;
temp_xv.XVFs.JointPromosCountSinceBeginning = (byte)rnd.Next(0, 5);
temp_xv.XVFs.NoPromoWeeksCountSinceLastJointPromo = (byte)rnd.Next(0, 5);
temp_xv.XVFs.PromoWeeksCountSinceCurrPromoBeginning = (byte)rnd.Next(0, 5);
for (int j = 0; j < temp_xv.DiscountUsagesCounts.Length; j++)
temp_xv.DiscountUsagesCounts[j] = (byte)rnd.Next(0, 5);
xvectors.Add(temp_xv);
}
DateTime start_dt = DateTime.Now;
HashSet<XVector> unique_xvs = new HashSet<XVector>(new XVectorEqualityComparer());
for (int i = 0; i < xvectors.Count; i++)
if (!unique_xvs.Contains(xvectors[i]))
unique_xvs.Add(xvectors[i]);
Console.WriteLine("Unique vectors count is " + unique_xvs.Count + ". Took " + (DateTime.Now - start_dt).TotalSeconds + " seconds.");
Console.ReadKey();
}
}
struct XVFields
{
public byte JointPromosCountSinceBeginning;
public byte PromoWeeksCountSinceCurrPromoBeginning;
public byte NoPromoWeeksCountSinceLastJointPromo;
public bool IsPromo;
}
class XVector
{
public XVFields XVFs;
public byte[] DiscountUsagesCounts;
public XVector()
{
this.DiscountUsagesCounts = new byte[5];
}
public override bool Equals(object obj)
{
byte[] my_low_lvl_dump = new byte[4 + 5];
my_low_lvl_dump[0] = this.XVFs.IsPromo ? (byte)1 : (byte)0;
my_low_lvl_dump[1] = this.XVFs.JointPromosCountSinceBeginning;
my_low_lvl_dump[2] = this.XVFs.PromoWeeksCountSinceCurrPromoBeginning;
my_low_lvl_dump[3] = this.XVFs.NoPromoWeeksCountSinceLastJointPromo;
my_low_lvl_dump[4] = this.DiscountUsagesCounts[0];
my_low_lvl_dump[5] = this.DiscountUsagesCounts[1];
my_low_lvl_dump[6] = this.DiscountUsagesCounts[2];
my_low_lvl_dump[7] = this.DiscountUsagesCounts[3];
my_low_lvl_dump[8] = this.DiscountUsagesCounts[4];
XVector xv = (XVector)obj;
byte[] obj_low_lvl_dump = new byte[4 + 5];
obj_low_lvl_dump[0] = xv.XVFs.IsPromo ? (byte)1 : (byte)0;
obj_low_lvl_dump[1] = xv.XVFs.JointPromosCountSinceBeginning;
obj_low_lvl_dump[2] = xv.XVFs.PromoWeeksCountSinceCurrPromoBeginning;
obj_low_lvl_dump[3] = xv.XVFs.NoPromoWeeksCountSinceLastJointPromo;
obj_low_lvl_dump[4] = xv.DiscountUsagesCounts[0];
obj_low_lvl_dump[5] = xv.DiscountUsagesCounts[1];
obj_low_lvl_dump[6] = xv.DiscountUsagesCounts[2];
obj_low_lvl_dump[7] = xv.DiscountUsagesCounts[3];
obj_low_lvl_dump[8] = xv.DiscountUsagesCounts[4];
return my_low_lvl_dump.SequenceEqual<byte>(obj_low_lvl_dump);
}
public override int GetHashCode()
{
byte[] low_lvl_dump = new byte[4 + 5];
low_lvl_dump[0] = this.XVFs.IsPromo ? (byte)1 : (byte)0;
low_lvl_dump[1] = this.XVFs.JointPromosCountSinceBeginning;
low_lvl_dump[2] = this.XVFs.PromoWeeksCountSinceCurrPromoBeginning;
low_lvl_dump[3] = this.XVFs.NoPromoWeeksCountSinceLastJointPromo;
low_lvl_dump[4] = this.DiscountUsagesCounts[0];
low_lvl_dump[5] = this.DiscountUsagesCounts[1];
low_lvl_dump[6] = this.DiscountUsagesCounts[2];
low_lvl_dump[7] = this.DiscountUsagesCounts[3];
low_lvl_dump[8] = this.DiscountUsagesCounts[4];
int result = 0;
int prime = 31;
for (int i = 0; i < low_lvl_dump.Length; i++)
result = low_lvl_dump[i] + (result * prime);
return result;
}
}
class XVectorEqualityComparer : IEqualityComparer<XVector>
{
public bool Equals(XVector xv1, XVector xv2)
{
return xv1.Equals(xv2);
}
public int GetHashCode(XVector xv)
{
return xv.GetHashCode();
}
}
}
C# does way expensive operations for producing arrays for hash functions and yet final result is faster.