In the case of Visual Studio, std::sort() is a mix of quick sort, heap sort (only to prevent worst case O(n^2) time complexity), and insertion sort, while std::stable_sort(), is a mix of merge sort and insertion sort. Both are reasonably fast, but it's possible to write faster code. The example code in the question is copying data before every merge, which consumes time. This can be avoided by doing a one time allocation of a working buffer, and switching merge direction based on level of recursion, using a pair of mutually recursive functions (shown below), or a boolean parameter to control merge direction (not used in example below).
Example C++ code for top down merge sort that is reasonably optimized (bottom up merge sort would be slightly faster, as it skips the recursion used to generate indices, using iteration instead).
// prototypes
void TopDownSplitMergeAtoA(int a[], int b[], size_t ll, size_t ee);
void TopDownSplitMergeAtoB(int a[], int b[], size_t ll, size_t ee);
void TopDownMerge(int a[], int b[], size_t ll, size_t rr, size_t ee);
void MergeSort(int a[], size_t n) // entry function
{
if(n < 2) // if size < 2 return
return;
int *b = new int[n];
TopDownSplitMergeAtoA(a, b, 0, n);
delete[] b;
}
void TopDownSplitMergeAtoA(int a[], int b[], size_t ll, size_t ee)
{
if((ee - ll) == 1) // if size == 1 return
return;
size_t rr = (ll + ee)>>1; // midpoint, start of right half
TopDownSplitMergeAtoB(a, b, ll, rr);
TopDownSplitMergeAtoB(a, b, rr, ee);
TopDownMerge(b, a, ll, rr, ee); // merge b to a
}
void TopDownSplitMergeAtoB(int a[], int b[], size_t ll, size_t ee)
{
if((ee - ll) == 1){ // if size == 1 copy a to b
b[ll] = a[ll];
return;
}
size_t rr = (ll + ee)>>1; // midpoint, start of right half
TopDownSplitMergeAtoA(a, b, ll, rr);
TopDownSplitMergeAtoA(a, b, rr, ee);
TopDownMerge(a, b, ll, rr, ee); // merge a to b
}
void TopDownMerge(int a[], int b[], size_t ll, size_t rr, size_t ee)
{
size_t o = ll; // b[] index
size_t l = ll; // a[] left index
size_t r = rr; // a[] right index
while(1){ // merge data
if(a[l] <= a[r]){ // if a[l] <= a[r]
b[o++] = a[l++]; // copy a[l]
if(l < rr) // if not end of left run
continue; // continue (back to while)
while(r < ee) // else copy rest of right run
b[o++] = a[r++];
break; // and return
} else { // else a[l] > a[r]
b[o++] = a[r++]; // copy a[r]
if(r < ee) // if not end of right run
continue; // continue (back to while)
while(l < rr) // else copy rest of left run
b[o++] = a[l++];
break; // and return
}
}
}