I am trying to write a multi-threaded program to produce a vector of N*NumPerThread
uniform random integers, where N
is the return value of std::thread::hardware_concurrency()
and NumPerThread
is the amount of random numbers I want each thread to generate.
I created a multi-threaded version:
#include <iostream>
#include <thread>
#include <vector>
#include <random>
#include <chrono>
using Clock = std::chrono::high_resolution_clock;
namespace Vars
{
const unsigned int N = std::thread::hardware_concurrency(); //number of threads on device
const unsigned int NumPerThread = 5e5; //number of random numbers to generate per thread
std::vector<int> RandNums(NumPerThread*N);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(1, 1000);
int sz = 0;
}
using namespace Vars;
void AddN(int start)
{
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
for (unsigned int i=start; i<start+NumPerThread; i++)
{
RandNums[i] = dis(gen);
++sz;
}
}
int main()
{
auto start_time = Clock::now();
std::vector<std::thread> threads;
threads.reserve(N);
for (unsigned int i=0; i<N; i++)
{
threads.emplace_back(std::move(std::thread(AddN, i*NumPerThread)));
}
for (auto &i: threads)
{
i.join();
}
auto end_time = Clock::now();
std::cout << "\nTime difference = "
<< std::chrono::duration<double, std::nano>(end_time - start_time).count() << " nanoseconds\n";
std::cout << "size = " << sz << '\n';
}
and a single-threaded version
#include <iostream>
#include <thread>
#include <vector>
#include <random>
#include <chrono>
using Clock = std::chrono::high_resolution_clock;
namespace Vars
{
const unsigned int N = std::thread::hardware_concurrency(); //number of threads on device
const unsigned int NumPerThread = 5e5; //number of random numbers to generate per thread
std::vector<int> RandNums(NumPerThread*N);
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<> dis(1, 1000);
int sz = 0;
}
using namespace Vars;
void AddN()
{
for (unsigned int i=0; i<NumPerThread*N; i++)
{
RandNums[i] = dis(gen);
++sz;
}
}
int main()
{
auto start_time = Clock::now();
AddN();
auto end_time = Clock::now();
std::cout << "\nTime difference = "
<< std::chrono::duration<double, std::nano>(end_time - start_time).count() << " nanoseconds\n";
std::cout << "size = " << sz << '\n';
}
The execution times are more or less the same. I am assuming there is a problem with the multi-threaded version?
P.S. I looked at all of the other similar questions here, I don't see how they directly apply to this task...