Sorting a vector of chars and then iterating through that looking for the maximum run lengths seems to be 5 times faster than using the map approach (using the fairly unscientific test code below acting on 16M chars). On the surface both functions should perform close to each other because they execute with O(N log N). However the sorting method probably benefits from branch prediction and move semantics of the in-place vector sort.
The resultant output is:
Most freq char is '\334', appears 66288 times.
usingSort() took 938 milliseconds
Most freq char is '\334', appears 66288 times.
usingMap() took 5124 milliseconds
And the code is:
#include <iostream>
#include <map>
#include <vector>
#include <chrono>
void usingMap(std::vector<char> v)
{
std::map<char, int> m;
for ( auto c : v )
{
auto it= m.find(c);
if( it != m.end() )
m[c]++;
else
m[c] = 1;
}
char mostFreq;
int count = 0;
for ( auto mi : m )
if ( mi.second > count )
{
mostFreq = mi.first;
count = mi.second;
}
std::cout << "Most freq char is '" << mostFreq << "', appears " << count << " times.\n";
}
void usingSort(std::vector<char> v)
{
std::sort( v.begin(), v.end() );
char currentChar = v[0];
char mostChar = v[0];
int currentCount = 0;
int mostCount = 0;
for ( auto c : v )
{
if ( c == currentChar )
currentCount++;
else
{
if ( currentCount > mostCount)
{
mostChar = currentChar;
mostCount = currentCount;
}
currentChar = c;
currentCount = 1;
}
}
std::cout << "Most freq char is '" << mostChar << "', appears " << mostCount << " times.\n";
}
int main(int argc, const char * argv[])
{
size_t size = 1024*1024*16;
std::vector<char> v(size);
for ( int i = 0; i < size; i++)
{
v[i] = random() % 256;
}
auto t1 = std::chrono::high_resolution_clock::now();
usingSort(v);
auto t2 = std::chrono::high_resolution_clock::now();
std::cout
<< "usingSort() took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count()
<< " milliseconds\n";
auto t3 = std::chrono::high_resolution_clock::now();
usingMap(v);
auto t4 = std::chrono::high_resolution_clock::now();
std::cout
<< "usingMap() took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(t4-t3).count()
<< " milliseconds\n";
return 0;
}