I looked around and combined a basic temporary variable swap with a type template restricted to arithmetic types. Why is this faster than std::swap
?
template <typename T, typename std::enable_if<std::is_arithmetic<T>::value>::type* = nullptr>
void swp(T& x, T& y) {
T t = x; x = y; y = t;
}
Here is the specific implementation I am using for testing: (try clearing the cache for testing consistency, see this post for info)
int main() {
const size_t bigger_than_cachesize = 10 * 1024 * 1024;
long* p = new long[bigger_than_cachesize];
for (int i = 0; i < bigger_than_cachesize; i++) p[i] = rand();
std::cout << "Cache is flushed..." << std::endl;
/// IGNORE ABOVE (ATTEMPTING TO CLEAR CACHE FOR CONSISTENCY)
double duration;
int x = 2560, y = 435;
std::clock_t start;
start = std::clock();
for(int i = 0; i < 100000000; i++) std::swap(x,y);
duration = (std::clock() - start);
std::cout << "std::swap: " << duration << '\n';
duration = 0;
start = std::clock();
for (int i = 0; i < 100000000; i++) swp(x,y);
duration = (std::clock() - start);
std::cout << "swapTMP: " << duration << '\n';
}
Results: (5:1 ratio)
std::swap -> 5086
<T> swp -> 1397