I wrote the following two functions. In the second function, I used reserve()
so that there is no memory reallocation, but unfortunately the second function is slower than the first.
I used release mode and this CPU profiler in Visual Studio to count time. In the second function, reallocation takes place 33 times. So my question is: Really? Going one length string to count length takes longer time, than moving this string 33 times?
string commpres2(string str)
{
string strOut;
int count = 0;
for (int i = 0; i < str.length(); ++i)
{
++count;
if (i < str.length() - 1)
{
if (str[i + 1] != str[i])
{
strOut += str[i];
strOut += to_string(count);
count = 0;
}
}
else
{
strOut += str[i] + to_string(count);
}
}
return strOut.length() < str.length() ? strOut : str;
}
string commpres3(string str)
{
int compressedLength = 0;
int countConsecutive = 0;
for (int i = 0; i < str.length(); ++i)
{
++countConsecutive;
if (i + 1 >= str.length() || str[i] != str[i + 1])
{
compressedLength += 1 +
to_string(countConsecutive).length();
countConsecutive = 0;
}
}
if (compressedLength >= str.length())
return str;
string strOut;
strOut.reserve(compressedLength);
int count = 0;
for (int i = 0; i < str.length(); ++i)
{
++count;
if (i < str.length() - 1)
{
if (str[i + 1] != str[i])
{
strOut += str[i];
strOut += to_string(count);
count = 0;
}
}
else
{
strOut += str[i] + to_string(count);
}
}
return strOut;
}
int main()
{
string str = "aabcccccaaa";
//str.size ~ 11000000;
for (int i = 0; i < 20; ++i)
str += str;
commpres2(str); //107ms //30,32% CPU
commpres3(str); //147ms //42,58% CPU
}