I have a program having a function that has a for loop, at each iteration the loop solves a Cplex problem. every problem has some similar constraints with the others, and has also its particular constraints.
I used the command #pragma omp parallel for
so that we solve each one of them in parallel, and we did set each Cplex environment to use only one thread. And to avoid false sharing I created a copy of the values needed on each thread, for the read and write I used #pragma omp critical
.
Even with all that when I use 1 thread it gives the best time result, and when I increase it becomes slower.
void solving(vector<struct block> &blocks,
vector<vector<double>> A,
vector<double> b,
vector<vector<double>> C,
vector<double> V,
vector<vector<double>> ZZ,
vector<double> Mm,
double l,
bool &change,
vector<bool> &sign) {
int tt = th;
omp_set_num_threads(tt);
#pragma omp parallel for
for (int ii = 0; ii < tt; ii++) {
int TT;
vector<struct block> blocks1;
vector<vector<double>> A1;
vector<double> b1;
vector<vector<double>> C1;
vector<double> V1;
vector<vector<double>> ZZ1;
vector<double> Mm1;
double l1;
vector<bool> sign1;
double nvar1;
int p1;
int m1;
#pragma omp critical
{
TT = th;
blocks1 = blocks;
A1 = A;
b1 = b;
C1 = C;
V1 = V;
ZZ1 = ZZ;
Mm1 = Mm;
l1 = l;
sign1 = sign;
nvar1 = nvar;
p1 = p;
m1 = m;
}
IloEnv myenv1; // environment object
IloModel mymodel(myenv1); // model object
IloNumVarArray X(myenv1, nvar1);
for (int i = 0; i < nvar1; i++) {
// X[i] = IloNumVar(myenv1, 0, IloInfinity, ILOINT);
X[i] = IloNumVar(myenv1, 0, 1, ILOBOOL);
}
IloArray<IloNumVarArray> Y(myenv1, l1);
for (int i = 0; i < l1; i++) {
Y[i] = IloNumVarArray(myenv1, p1);
for (int j = 0; j < p; j++) {
Y[i][j] = IloNumVar(myenv1, 0, 1, ILOBOOL);
}
}
/* constraints diffrent in each problem*/
IloExprArray exp1(myenv1, m1);
for (int i = 0; i < m1; i++) {
exp1[i] = IloExpr(myenv1);
}
for (int i = 0; i < m1; i++) {
for (int k = 0; k < nvar1; k++) {
exp1[i] += A1[i][k] * X[k];
}
mymodel.add(exp1[i] <= b1[i]);
}
// Y
/* constraints diffrent in each problem*/
IloExprArray exp3(myenv1, l1 * (p1 + 1.0));
for (int i = 0; i < l1 * (p1 + 1.0); i++) {
exp3[i] = IloExpr(myenv1);
}
for (int i = 0; i < l1; i++) {
for (int j = 0; j < p1; j++) {
int k = j + (i * p1);
exp3[k] = (ZZ1[i][j] + 1) * Y[i][j] + (1 - Y[i][j]) * (-M);
}
}
IloExprArray exp4(myenv1, p1 * l1);
for (int i = 0; i < p1 * l1; i++) {
exp4[i] = IloExpr(myenv1);
}
for (int k = 0; k < l1; k++) {
for (int i = 0; i < p1; i++) {
for (int j = 0; j < nvar1; j++) {
int s = i + (k * p1);
exp4[s] += C1[i][j] * X[j];
}
}
}
for (int i = 0; i < l1 * p1; i++) {
mymodel.add(exp4[i] >= exp3[i]);
}
// Objedctive function
IloExpr exp(myenv1);
for (int i = 0; i < nvar1; i++) {
exp += V1[i] * X[i];
}
mymodel.add(IloMaximize(myenv1, exp));
// solve
IloCplex mycplex(myenv1);
mycplex.extract(mymodel);
mycplex.setParam(IloCplex::Param::Threads, 1); //////////////////////
// to delete the writing
mycplex.setOut(myenv1.getNullStream());
IloBool feasible =
mycplex.solve(); // solves model and stores
// whether or not it is feasible in an IloBool
// variable called "feasible"
if (feasible == IloTrue) {
for (int i = 0; i < p1; i++) {
blocks1[ii].Z[i] = 0;
}
// value of x
for (int i = 0; i < nvar1; i++) {
blocks1[ii].X[i] = round(mycplex.getValue(X[i]));
}
} else {
blocks1[ii].test1 = true;
for (int i = 0; i < nvar1; i++) {
blocks1[ii].X[i] = -M;
}
for (int i = 0; i < p1; i++) {
blocks1[ii].Z[i] = -M;
}
}
// Closing the Model
mycplex.clear();
myenv1.end();
#pragma omp critical
{
blocks[ii].test1 = blocks1[ii].test1;
for (int i = 0; i < nvar; i++) {
blocks[ii].X[i] = blocks1[ii].X[i];
}
for (int i = 0; i < p; i++) {
blocks[ii].Z[i] = blocks1[ii].Z[i];
}
}
}
}
I tried to make a copy of each variable on local memory and paste the values on it, and I used #pragma omp critical
to this procedure to avoid false sharing and each tread pauses other threads and copy the variables. It made the computational time a little better but still diverges when increasing number of threads.