I want to investigate how much faster a task can be done with multithreading compared to single threading depending on the task size
I plot a graph showing:
- x_axis: how fast a task is done on a single thread.
- y-axis: how much faster the same task is done on two threads.
What I would expect to happen:
- If the task gets longer, the overhead of creating the threads gets less important. so the ratio (t_single / t_multi) increases
- as I use two threads I would expact the ratio (t_single / t_multi) to converge to 2 (two threads => twice as fast as one thread)
What I get:
- a peak at a single-thread-task time of 10e-2 sec
- the peak is at 2.5 (multiprocessing 2.5 times faster than single threading)
How can this be explained?
The plot is created with a mean over 10 meassurements. I run it on a 24-core-linux-machine.
CODE:
#include <string>
#include <iostream>
#include <thread>
#include <vector>
#include <ctime>
#include <math.h>
#include <chrono>
using namespace std;
using namespace std::chrono;
// function searches through vector and adds 1
// to the first element that equals 0
void task(int number)
{
int s = 0;
for(int i=0; i<number; i++){
s = s + i;
}
// cout << "the sum is " << s << endl;
}
double get_time_single(int m){
// init
int n_threads = 2;
int n = pow(10, m);
high_resolution_clock::time_point start = high_resolution_clock::now();
for(int jobs = 0; jobs < n_threads; jobs++){
task(n);
}
high_resolution_clock::time_point end = high_resolution_clock::now();
double time_single = duration<double, std::milli>(end - start).count();
return time_single;
}
double get_time_multi(int m){
// init
int n_threads = 2;
int n = pow(10, m);
vector<thread> threads;
high_resolution_clock::time_point start = high_resolution_clock::now();
// execute threads
for( int i = 1; i < n_threads + 1; i++ ){
threads.push_back(thread(task, n));
}
// joint threads
for( int i = 0; i < n_threads; i++ ){
threads.at(i).join();
}
high_resolution_clock::time_point end = high_resolution_clock::now();
double time_multi = duration<double, std::milli>(end - start).count();
return time_multi;
}
int main()
{
// print header of magnitude - multi-proc-time - single-proc-time table
cout << "mag" << "\t" << "time multi" << " \t" << "time single" << endl;
cout << "-------------------------------------" << endl;
// iterate through different task magnitudes
for(int m = 3; m<10; m++){
double t_single = 0;
double t_multi = 0;
// get the mean over 10 runs
for(int i = 0; i < 10; i++){
t_multi = t_multi + get_time_multi(m);
t_single = t_single + get_time_single(m);
}
t_multi = t_multi / 10;
t_single = t_single / 10;
cout << m << "\t" << t_multi << " \t" << t_single << endl;
}
}
OUTPUT:
mag time multi time single
-------------------------------------
3 0.133946 0.0082684
4 0.0666891 0.0393378
5 0.30651 0.681517
6 1.92084 5.19607
7 18.8701 41.1431
8 195.002 381.745
9 1866.32 3606.08