Could it be that python's C regex implementation is 6 times faster or am I missing something ?
Python version:
import re
r=re.compile(r'(HELLO).+?(\d+)', re.I)
s=r"prefixdfadfadf adf adf adf adf he asdf dHello Regex 123"
%timeit r.search(s)
1000000 loops, best of 3: 1.3 µs per loop (769,000 per sec)
C++11 version:
#include<regex>
int main(int argc, char * argv[])
{
std::string s = "prefixdfadfadf adf adf adf adf he asdf dHello Regex 123";
std::regex my(R"((HELLO).+?(\d+))", regex_constants::icase);
bench_utils::run(std::chrono::seconds(10),
[&]{
std::smatch match;
bool found = std::regex_search(s, match, my);
});
return 0;
}
Results in about ~125,000 searches/second
Edit: Here is the code for bench_utils:
namespace bench_utils
{
template<typename T>
inline std::string formatNum(const T& value)
{
static std::locale loc("");
std::stringstream ss;
ss.imbue(loc);
ss << value;
return ss.str();
}
inline void run(const std::chrono::milliseconds &duration,
const std::function<void() >& fn)
{
using namespace std::chrono;
typedef steady_clock the_clock;
size_t counter = 0;
seconds printInterval(1);
auto startTime = the_clock::now();
auto lastPrintTime = startTime;
while (true)
{
fn();
counter++;
auto now = the_clock::now();
if (now - startTime >= duration)
break;
auto p = now - lastPrintTime;
if (now - lastPrintTime >= printInterval)
{
std::cout << formatNum<size_t>(counter) << " ops per second" << std::endl;
counter = 0;
lastPrintTime = the_clock::now();
}
}
}
}