You can use threads to speed up the calculation either using unix pthread or C++ std::thread which is also cross platform. This will have performance gain only if the number is a large one or else it is not enough to offset the cost of thread creation.
Edit: This program uses four threads to calculate the factorial.
After running the program 8 times, the average threaded factorial time is 14 seconds and average non threaded factorial time is 18 seconds.
Sample program:
#include <iostream>
#include <thread>
#include <chrono>
#include "BigInt.h"
void fact(int upper, int lower, BigInt& val)
{
for (auto i = upper; i >= lower; i--)
{
val = val*i;
}
}
int main()
{
std::chrono::high_resolution_clock::time_point t1 = std::chrono::high_resolution_clock::now();
int n = 1000;
BigInt val1("1"), val2("1"), val3("1"), val4("1");
std::thread thr1(&fact, n, (3*n)/4, std::ref(val1));
std::thread thr2(&fact, ((3 * n) / 4) - 1, n/2, std::ref(val2));
std::thread thr3(&fact, (n / 2)-1, n/4, std::ref(val3));
std::thread thr4(&fact, (n/4)-1, 1, std::ref(val4));
thr1.join();
thr2.join();
thr3.join();
thr4.join();
auto ans = val1*val2*val3*val4;
std::chrono::high_resolution_clock::time_point t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::seconds>(t2 - t1).count();
std::cout << "threaded factorial time: " << duration << "\n";
t1 = std::chrono::high_resolution_clock::now();
BigInt ans2("1");
fact(n, 1, std::ref(ans2));
t2 = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::seconds>(t2 - t1).count();
std::cout << "non threaded factorial time: " << duration;
return 0;
}