I can't seem to get this algorithm to work and I believe that it may be due to 'race condition' but I could be wrong:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <mpi.h>
#define BILLION 1000000000L
double f(double, double);
double g(double, double);
int main(int argc, char** argv){
FILE *myA, *myB;
int rank, size;
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if (rank == 0){
myA = fopen("myA.py", "w");
myB = fopen("myB.py", "w");
}
int m = 255; // Max number of x values
int n = 255; // Max number of y values
int Tmax = 5;//10000; // Max number of time steps
double a = 0, b = 2.5; // starting and ending points along x-axis
double c = 0, d = 2.5; // starting and ending points along y-axis
double dx = (b - a)/m; // x partition width
double dy = (d - c)/n; // y partition width
double dt = 1.; // t partition width
double D_u = 0.00002; // diffusion coefficient
double alpha_u = D_u*dt/(dx*dx), gamma_u = D_u*dt/(dy*dy), beta_u = 1 - 2*alpha_u - 2*gamma_u; // coeffs for fwd Euler method
double D_v = 0.00001; // diffusion coefficient
double alpha_v = D_v*dt/(dx*dx), gamma_v = D_v*dt/(dy*dy), beta_v = 1 - 2*alpha_v - 2*gamma_v; // coeffs for fwd Euler method
// Parameters:
double F = 0.040;
double K = 0.063;
// Domain:
double u[m+1][n+1]; // soln to the diffusion equation
double utmp[m+1][n+1]; // temp storage
double v[m+1][n+1]; // soln to the diffusion equation
double vtmp[m+1][n+1]; // temp storage
int i, j, k;
// initialize time variables
struct timespec begin, end;
double time_lapsed;
// seed rand
srand(time(NULL));
double noise;
double lowest = -1./100.;
double highest = 1./100.;
double range = (highest - lowest);
// divide up the domain evenly among groups
int Np = floor((double)m/size); // Number of rows per processor
//int res = m % size/2; // in case extra row in subgroup
//int bigres = n % 2; // in case extra row overall
int istart = rank*Np;
int iend;
if (rank == 0){
istart = 1;
iend = (rank + 1)*Np;
}
else if (rank == size-1){
iend = m;
}
else {
iend = (rank + 1)*Np;
}
if (rank == 0){
fprintf(myA,"from numpy import array\n");
fprintf(myA,"\ndef myAi():\n");
fprintf(myA,"\treturn array([ ");
clock_gettime(CLOCK_MONOTONIC, &begin); // start timing u
}
// Initialization for u:
for (i = 0; i <= m; i += 1){
if (rank == 0){
fprintf(myA,"[ ");
}
for (j = 0; j <= n; j += 1){
// create square
if ((i >= 117 && i <= 137) && (j >= 117 && j <= 137)){
noise = (lowest + range*rand()/(RAND_MAX + 1.0));
if (abs(noise) > 0.01){
printf("noise: %f\n",noise);
}
utmp[i][j] = 1./2 + noise*(1./2.);//f(a + i*dx,c + j*dy);
u[i][j] = utmp[i][j];
}
else{
utmp[i][j] = 1.;//f(a + i*dx,c + j*dy);
u[i][j] = utmp[i][j];
}
if (rank == 0){
// print matrix entries
if (j != n){
fprintf(myA,"%f, ",utmp[i][j]);
}
else{
fprintf(myA,"%f ",utmp[i][j]);
}
}
}
if (rank == 0){
if (i != m){
fprintf(myA,"],\n");
}
else{
fprintf(myA,"]");
}
}
MPI_Bcast(&u[i][0],(n+1),MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&utmp[i][0],(n+1),MPI_DOUBLE,0,MPI_COMM_WORLD);
}
if (rank == 0){
fprintf(myA,"])\n");
clock_gettime(CLOCK_MONOTONIC, &end);
time_lapsed = (end.tv_sec - begin.tv_sec) + (double)(end.tv_nsec - begin.tv_nsec)/BILLION;
printf("\nprint 'Time to initialize u:',%f,'seconds.'\n",time_lapsed);
clock_gettime(CLOCK_MONOTONIC, &begin); // start timing v
fprintf(myB,"from numpy import array\n");
fprintf(myB,"\ndef myBi():\n");
fprintf(myB,"\treturn array([ ");
}
// Initialization for v:
for (i = 0; i <= m; i += 1){
if (rank == 0){
fprintf(myB,"[ ");
}
for (j = 0; j <= n; j += 1){
// create square
if ((i >= 117 && i <= 137) && (j >= 117 && j <= 137)){
noise = (lowest + range*rand()/(RAND_MAX + 1.0));
vtmp[i][j] = 1./4 + noise*(1./4.);//g(a + i*dx,c + j*dy);
if (abs(noise) > 0.01){
printf("noise: %f\n",noise);
}
v[i][j] = vtmp[i][j];
}
else{
vtmp[i][j] = 0.;//g(a + i*dx,c + j*dy);
v[i][j] = vtmp[i][j];
}
if (rank == 0){
// print matrix entries
if (j != n){
fprintf(myB,"%f, ",vtmp[i][j]);
}
else{
fprintf(myB,"%f ",vtmp[i][j]);
}
}
}
if (rank == 0){
if (i != m){
fprintf(myB,"],\n");
}
else{
fprintf(myB,"]");
}
}
MPI_Bcast(&v[i][0],(n+1),MPI_DOUBLE,0,MPI_COMM_WORLD);
MPI_Bcast(&vtmp[i][0],(n+1),MPI_DOUBLE,0,MPI_COMM_WORLD);
}
if (rank == 0){
fprintf(myB,"])\n");
clock_gettime(CLOCK_MONOTONIC, &end);
time_lapsed = (end.tv_sec - begin.tv_sec) + (double)(end.tv_nsec - begin.tv_nsec)/BILLION;
printf("\nprint 'Time to initialize v:',%f,'seconds.'\n",time_lapsed);
}
MPI_Barrier(MPI_COMM_WORLD);
// All together now...
if (iend > m/2){
if (rank == size-1){
for (k = 1; k <= Tmax; k++){
i = istart;
for (i = istart; i < iend-1; i++){
for (j = 1; j < n-1; j++){
// Do usual computation with u_i,j = alpha * (u_i-1,j + u_i+1,j) +
u[i][j] = alpha_u*(utmp[i-1][j] + utmp[i+1][j]) + beta_u*utmp[i][j] + gamma_u*(utmp[i][j-1] + utmp[i][j+1]) - u[i][j]*v[i][j]*v[i][j] + F*(1. - u[i][j]);
v[i][j] = alpha_v*(vtmp[i-1][j] + vtmp[i+1][j]) + beta_v*vtmp[i][j] + gamma_v*(vtmp[i][j-1] + vtmp[i][j+1]) + u[i][j]*v[i][j]*v[i][j] - (F+K)*v[i][j];
}
// left-right Periodic boundary conditions:
u[i][n-1] = alpha_u*(utmp[i-1][n-1] + utmp[i+1][n-1]) + beta_u*utmp[i][n-1] + gamma_u*(utmp[i][n-2] + utmp[i][0]) - u[i][n-1]*v[i][n-1]*v[i][n-1] + F*(1. - u[i][n-1]);
v[i][n-1] = alpha_v*(vtmp[i-1][n-1] + vtmp[i+1][n-1]) + beta_v*vtmp[i][n-1] + gamma_v*(vtmp[i][n-2] + vtmp[i][0]) + u[i][j]*v[i][n-1]*v[i][n-1] - (F+K)*v[i][n-1];
}
// top-bottom Periodic Boundary conditions:
for (j = 1; j < n-1; j++){
u[iend-1][j] = alpha_u*(utmp[iend-2][j] + utmp[0][j]) + beta_u*utmp[iend-1][j] + gamma_u*(utmp[iend-1][j-1] + utmp[iend-1][j+1]) - u[iend-1][j]*v[iend-1][j]*v[iend-1][j] + F*(1. - u[iend-1][j]);
v[iend-1][j] = alpha_v*(vtmp[iend-2][j] + vtmp[0][j]) + beta_v*vtmp[iend-1][j] + gamma_v*(vtmp[iend-1][j-1] + vtmp[iend-1][j+1]) + u[iend-1][j]*v[iend-1][j]*v[iend-1][j] - (F+K)*v[iend-1][j];
}
// top-bottom & left-right Periodic Boundary Conditions
u[iend-1][n-1] = alpha_u*(utmp[iend-2][n-1] + utmp[0][n-1]) + beta_u*utmp[iend-1][n-1] + gamma_u*(utmp[iend-1][n-2] + utmp[iend-1][0]) - u[iend-1][n-1]*v[iend-1][n-1]*v[iend-1][n-1] + F*(1. - u[iend-1][n-1]);
v[iend-1][n-1] = alpha_v*(vtmp[iend-2][n-1] + vtmp[0][n-1]) + beta_v*vtmp[iend-1][n-1] + gamma_v*(vtmp[iend-1][n-2] + vtmp[iend-1][0]) + u[iend-1][n-1]*v[iend-1][n-1]*v[iend-1][n-1] - (F+K)*v[iend-1][n-1];
i = istart;
for (i = istart; i <= iend; i++){ //istart; i <= iend; i++){
for (j = 0; j <= n; j++){
utmp[i][j] = u[i][j];
vtmp[i][j] = v[i][j];
}
}
}
}
else{
for (k = 1; k <= Tmax; k++){
i = istart;
for (i = istart; i <= iend-1; i++){
for (j = 1; j < n-1; j++){
// Do usual computation with u_i,j = alpha * (u_i-1,j + u_i+1,j) +
u[i][j] = alpha_u*(utmp[i-1][j] + utmp[i+1][j]) + beta_u*utmp[i][j] + gamma_u*(utmp[i][j-1] + utmp[i][j+1]) - u[i][j]*v[i][j]*v[i][j] + F*(1. - u[i][j]);
v[i][j] = alpha_v*(vtmp[i-1][j] + vtmp[i+1][j]) + beta_v*vtmp[i][j] + gamma_v*(vtmp[i][j-1] + vtmp[i][j+1]) + u[i][j]*v[i][j]*v[i][j] - (F+K)*v[i][j];
}
// left-right Periodic boundary conditions:
u[i][n-1] = alpha_u*(utmp[i-1][n-1] + utmp[i+1][n-1]) + beta_u*utmp[i][n-1] + gamma_u*(utmp[i][n-2] + utmp[i][0]) - u[i][n-1]*v[i][n-1]*v[i][n-1] + F*(1. - u[i][n-1]);
v[i][n-1] = alpha_v*(vtmp[i-1][n-1] + vtmp[i+1][n-1]) + beta_v*vtmp[i][n-1] + gamma_v*(vtmp[i][n-2] + vtmp[i][0]) + u[i][j]*v[i][n-1]*v[i][n-1] - (F+K)*v[i][n-1];
}
i = istart;
for (i = istart; i <= iend; i++){
for (j = 0; j <= n; j++){
utmp[i][j] = u[i][j];
vtmp[i][j] = v[i][j];
}
}
}
}
}
else {
int count;
for (k = 1; k <= Tmax; k++){
count = iend-1;
while (count >= istart){
//printf("i = %d\n",i);
for (j = 1; j < n-1; j++){
// Do usual computation with u_i,j = alpha * (u_i-1,j + u_i+1,j) +
u[count][j] = alpha_u*(utmp[count-1][j] + utmp[count+1][j]) + beta_u*utmp[count][j] + gamma_u*(utmp[count][j-1] + utmp[count][j+1]) - u[count][j]*v[count][j]*v[count][j] + F*(1. - u[count][j]);
v[count][j] = alpha_v*(vtmp[count-1][j] + vtmp[count+1][j]) + beta_v*vtmp[count][j] + gamma_v*(vtmp[count][j-1] + vtmp[count][j+1]) + u[count][j]*v[count][j]*v[count][j] - (F+K)*v[count][j];
}
// left-right Periodic boundary conditions:
u[count][n-1] = alpha_u*(utmp[count-1][n-1] + utmp[count+1][n-1]) + beta_u*utmp[count][n-1] + gamma_u*(utmp[count][n-2] + utmp[count][0]) - u[count][n-1]*v[count][n-1]*v[count][n-1] + F*(1. - u[count][n-1]);
v[count][n-1] = alpha_v*(vtmp[count-1][n-1] + vtmp[count+1][n-1]) + beta_v*vtmp[count][n-1] + gamma_v*(vtmp[count][n-2] + vtmp[count][0]) + u[count][j]*v[count][n-1]*v[count][n-1] - (F+K)*v[count][n-1];
count = count-1;
}
i = istart;
for (i = istart; i <= iend; i++){
for (j = 0; j <= n; j++){
utmp[i][j] = u[i][j];
vtmp[i][j] = v[i][j];
}
}
}
}
if (rank == 0){
clock_gettime(CLOCK_MONOTONIC, &end);
time_lapsed = (end.tv_sec - begin.tv_sec) + (double)(end.tv_nsec - begin.tv_nsec)/BILLION;
printf("\nprint 'Time for algorithm to complete:',%f,'seconds.'\n",time_lapsed);
fprintf(myA,"\n");
fprintf(myA,"\ndef myAf():\n");
fprintf(myA,"\treturn array([ ");
for (i = 0; i <= m; i++){
fprintf(myA,"[ ");
for (j = 0; j <= n; j++){
if (j != n){
fprintf(myA,"%f, ",utmp[i][j]);
}
else{
fprintf(myA,"%f ",utmp[i][j]);
}
}
if (i != m){
fprintf(myA,"],\n");
}
else{
fprintf(myA,"]");
}
}
fprintf(myA,"])\n");
fprintf(myB,"\ndef myBf():\n");
fprintf(myB,"\treturn array([");
for (i = 0; i <= m; i++){
fprintf(myB,"[ ");
for (j = 0; j <= n; j++){
if (j != n){
fprintf(myB,"%f, ",vtmp[i][j]);
}
else{
fprintf(myB,"%f ",vtmp[i][j]);
}
}
if (i != m){
fprintf(myB,"],\n");
}
else{
fprintf(myB,"]");
}
}
fprintf(myB,"])\n");
fclose(myA);
fclose(myB);
}
MPI_Finalize();
return 0;
}
// For experimentation with different initial conditions
double f(double x, double y){
return x - x*x + y - y*y; //sin(x*x + y*y);
//exp(20*(x-1./2)*(x-1./2) - 20*(y-1./2)*(y-1./2));//x - x*x + y - y*y;
}
double g(double x, double y){
return sin(x*x + y*y); //sin(x*x + y*y);
//exp(20*(x-1./2)*(x-1./2) - 20*(y-1./2)*(y-1./2));//x - x*x + y - y*y;
}
The algorithm is forward Euler method on a periodic 2D domain (the 2D arrays) and for clarity I left out a lot of the parts, unless more is needed. The initial and final results will be output to a file by the master processor (rank 0 as in code) ready to be plotted.
The idea that I have in mind here is to divide the domain among processors into (# of rows)/(# of processors) chunk sizes with the first half of all processors doing the top half of the domain (starting at the center to the top). Then, the other half of the processors doing the bottom half of the domain (starting at the center to the bottom).
However, only the bottom half of the domain is being updated which leads me to believe that some sort of 'race condition' is going on.
--EDIT--
Original code is being used instead.
I think I know what the problem is. Each processor has it's own 'local' copy of the domain that it's updating. Hence when rank 0 is printing to file, it's printing it's own 'local' version of the domain, which on two processors I would see half of the 'entire picture'.
However what I want is for each processor to update it's piece of the domain then have processor 0 print the entire updated domain to file. How might I go about doing this if this is the issue?