Introduction:
Using two identical mergesort algorithms, I tested the execution speed of C++ (using Visual Studios C++ 2010 express) vs Java (using NetBeans 7.0). I conjectured that the C++ execution would be at least slightly faster, but testing revealed that the C++ execution was 4 - 10 times slower than the Java execution. I believe that I have set all the speed optimisations for C++, and I am publishing as a release rather than as a debug. Why is this speed discrepancy occurring?
Code:
Java:
public class PerformanceTest1
{
/**
* Sorts the array using a merge sort algorithm
* @param array The array to be sorted
* @return The sorted array
*/
public static void sort(double[] array)
{
if(array.length > 1)
{
int centre;
double[] left;
double[] right;
int arrayPointer = 0;
int leftPointer = 0;
int rightPointer = 0;
centre = (int)Math.floor((array.length) / 2.0);
left = new double[centre];
right = new double[array.length - centre];
System.arraycopy(array,0,left,0,left.length);
System.arraycopy(array,centre,right,0,right.length);
sort(left);
sort(right);
while((leftPointer < left.length) && (rightPointer < right.length))
{
if(left[leftPointer] <= right[rightPointer])
{
array[arrayPointer] = left[leftPointer];
leftPointer += 1;
}
else
{
array[arrayPointer] = right[rightPointer];
rightPointer += 1;
}
arrayPointer += 1;
}
if(leftPointer < left.length)
{
System.arraycopy(left,leftPointer,array,arrayPointer,array.length - arrayPointer);
}
else if(rightPointer < right.length)
{
System.arraycopy(right,rightPointer,array,arrayPointer,array.length - arrayPointer);
}
}
}
public static void main(String args[])
{
//Number of elements to sort
int arraySize = 1000000;
//Create the variables for timing
double start;
double end;
double duration;
//Build array
double[] data = new double[arraySize];
for(int i = 0;i < data.length;i += 1)
{
data[i] = Math.round(Math.random() * 10000);
}
//Run performance test
start = System.nanoTime();
sort(data);
end = System.nanoTime();
//Output performance results
duration = (end - start) / 1E9;
System.out.println("Duration: " + duration);
}
}
C++:
#include <iostream>
#include <windows.h>
using namespace std;
//Mergesort
void sort1(double *data,int size)
{
if(size > 1)
{
int centre;
double *left;
int leftSize;
double *right;
int rightSize;
int dataPointer = 0;
int leftPointer = 0;
int rightPointer = 0;
centre = (int)floor((size) / 2.0);
leftSize = centre;
left = new double[leftSize];
for(int i = 0;i < leftSize;i += 1)
{
left[i] = data[i];
}
rightSize = size - leftSize;
right = new double[rightSize];
for(int i = leftSize;i < size;i += 1)
{
right[i - leftSize] = data[i];
}
sort1(left,leftSize);
sort1(right,rightSize);
while((leftPointer < leftSize) && (rightPointer < rightSize))
{
if(left[leftPointer] <= right[rightPointer])
{
data[dataPointer] = left[leftPointer];
leftPointer += 1;
}
else
{
data[dataPointer] = right[rightPointer];
rightPointer += 1;
}
dataPointer += 1;
}
if(leftPointer < leftSize)
{
for(int i = dataPointer;i < size;i += 1)
{
data[i] = left[leftPointer++];
}
}
else if(rightPointer < rightSize)
{
for(int i = dataPointer;i < size;i += 1)
{
data[i] = right[rightPointer++];
}
}
delete left;
delete right;
}
}
void main()
{
//Number of elements to sort
int arraySize = 1000000;
//Create the variables for timing
LARGE_INTEGER start; //Starting time
LARGE_INTEGER end; //Ending time
LARGE_INTEGER freq; //Rate of time update
double duration; //end - start
QueryPerformanceFrequency(&freq); //Determinine the frequency of the performance counter (high precision system timer)
//Build array
double *temp2 = new double[arraySize];
QueryPerformanceCounter(&start);
srand((int)start.QuadPart);
for(int i = 0;i < arraySize;i += 1)
{
double randVal = rand() % 10000;
temp2[i] = randVal;
}
//Run performance test
QueryPerformanceCounter(&start);
sort1(temp2,arraySize);
QueryPerformanceCounter(&end);
delete temp2;
//Output performance test results
duration = (double)(end.QuadPart - start.QuadPart) / (double)(freq.QuadPart);
cout << "Duration: " << duration << endl;
//Dramatic pause
system("pause");
}
Observations:
For 10000 elements, the C++ execution takes roughly 4 times the amount of time as the Java execution. For 100000 elements, the ratio is about 7:1. For 10000000 elements, the ratio is about 10:1. For over 10000000, the Java execution completes, but the C++ execution stalls, and I have to manually kill the process.