i have successfully multithreaded my code but the result is very slow , i there are no thread conflict and yet i am getting very slow code.
the program is to use gaussian elimination to solve system of equations , and i have used async to parallelize the matrix row operations.
asynchronous:
void ge(Mat &mat)
{
using li = long int;
for (li p = 0; p < mat[0].size() - 1; p++)
{
std::vector<std::future<void>> ts;
for (li c = p + 1; c < mat.size(); c++)
{
auto x = mat[c][p] / mat[p][p];
auto temp = vecMul(x, mat[p]);
// vecSub(mat[c], temp);
ts.push_back(std::async(vecSub, std::ref(mat[c]), (temp)));
}
for (auto &t : ts)
{
t.get();
}
}
}
for sequencial execution:
void ge(Mat &mat)
{
using li = long int;
for (li p = 0; p < mat[0].size() - 1; p++)
{
for (li c = p + 1; c < mat.size(); c++)
{
auto x = mat[c][p] / mat[p][p];
auto temp = vecMul(x, mat[p]);
vecSub(mat[c], temp);
}
}
}
full code:
#include <bits/stdc++.h>
using Vec = std::vector<double>;
using Mat = std::vector<Vec>;
using eqn = Mat;
class solver
{
Mat mat;
public:
//give eqn in the form ax1+ax2+ax3..axN = k (coeffiants only)
Vec solve(Mat &in)
{
mat = in;
ge(mat);
return (bs(mat));
}
Vec solve(Mat &&in)
{
mat = std::move(in);
ge(mat);
return (bs(mat));
}
private:
void ge(Mat &mat)
{
using li = long int;
for (li p = 0; p < mat[0].size() - 1; p++)
{
std::vector<std::future<void>> ts;
for (li c = p + 1; c < mat.size(); c++)
{
auto x = mat[c][p] / mat[p][p];
auto temp = vecMul(x, mat[p]);
// single thread vecSub(mat[c], temp);
ts.push_back(std::async(vecSub, std::ref(mat[c]), (temp)));
}
for (auto &t : ts)
{
t.get();
}
}
}
Vec bs(Mat &mat)
{
using li = long int;
Vec x(mat.size());
for (li i = mat.size() - 1; i >= 0; i--)
{
double s = 0;
for (li j = i; j < mat[0].size() - 1; j++)
{
s += mat[i][j] * x[j];
x[i] = ((mat[i][mat[0].size() - 1] - s) / (mat[i][i]));
}
}
return x;
}
static Vec vecMul(double a, Vec b)
{
using li = size_t;
for (li i = 0; i < b.size(); i++)
b[i] *= a;
return b;
}
//static
static void vecAdd(Vec &a, Vec &b)
{
using li = size_t;
assert(a.size() == b.size());
for (li i = 0; i < a.size(); i++)
a[i] = a[i] + b[i];
}
static void vecSub(Vec &a, Vec b)
{
using li = size_t;
// assert(a.size() == b.size());
for (li i = 0; i < a.size(); i++)
a[i] = a[i] - b[i];
}
};
edit 1:
i tried using std::launch::async
edit 2:
single thread:
time taken for size 3 is 3.3929e-05
time taken for size 53 is 0.00372395
time taken for size 103 is 0.0146523
time taken for size 153 is 0.0320243
time taken for size 203 is 0.0702842
time taken for size 253 is 0.129702
time taken for size 303 is 0.219656
time taken for size 353 is 0.33951
time taken for size 403 is 0.496915
time taken for size 453 is 0.697524
multi thread:
time taken for size 3 is 0.000349948
time taken for size 53 is 0.0560127
time taken for size 103 is 0.160197
time taken for size 153 is 0.375889
time taken for size 203 is 0.663671
time taken for size 253 is 1.04643
time taken for size 303 is 1.52449
time taken for size 353 is 2.10555
time taken for size 403 is 2.7029
time taken for size 453 is 3.50366