Efficient way to find norm of distributed vector in SCALAPACK

Question

consider the following piece of code using scalapack:

        ! if (norm2(h-x0) < tol) then
        tmp_vec = h - x0
        call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
        if (norm < tol) then
            x=h
            converged = .true.
            exit
        endif
        s = r0 - alpha*v
        call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0,t, 1, 1, descvec, 1)

its part of an iterative solver that i was trying, problem is that if my processor grid is two dimensional, my vectors do not have any elements on those procs, hence dnrm2 yields zero or the norm variable. hence resulting in early exit of some procs from the loop, hanging the entire loop.

What is the correct method to ensure norm value is properly assigned, other than manual broadcast etc.?

Note: this works fine with 1-d procs distribution, please see: Inconsistent rows allocation in scalapack

Given below is a simple Bi-CGSTAB solver i wrote from wikipedia article, which reads a vector and matrix from file b.dat and A.dat respectively. and proceeds to solve it using bicgstab_self_sclpk routine. Printed below is the value of norm

For ranks=4 run:

...
 current norm2   0.0000000000000000
 Bi-CG STAB solver converged in iteration #           1   0.0000000000000000
 current norm2   0.0000000000000000
 Bi-CG STAB solver converged in iteration #           1   0.0000000000000000
...

And everything just hangs on here.

for ranks = 7 run

 . . .
 current norm2   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 current norm2   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
 Bi-CG STAB solver converged in iteration #         369   1.2377699991821143E-008
. . .

module bicgstab

contains
subroutine bicgstab_self_sclpk(A,b,N,descvec,descmat,mloc_vec,nloc_vec)
    use mpi
    implicit none
    real                :: A(:,:), b(:,:), tol
    integer(kind=8)     :: N
    integer             :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec

    integer             :: i, ierr, rank, maxit
    real                :: rho0, alpha, omega0, rho, omega, beta, norm, tmp_real
    real, allocatable   :: r0(:,:), r(:,:), x0(:,:), x(:,:),h(:,:),t(:,:), tmp_vec(:,:)
    real, allocatable   :: rhat0(:,:),v(:,:), p(:,:), v0(:,:),p0(:,:),s(:,:)
    logical             :: converged

    ! ================== Initialize ======================
    allocate(r0(mloc_vec,nloc_vec))
    allocate(r(mloc_vec,nloc_vec))
    allocate(rhat0(mloc_vec,nloc_vec))
    allocate(x0(mloc_vec,nloc_vec))
    allocate(x(mloc_vec,nloc_vec))
    allocate(v0(mloc_vec,nloc_vec))
    allocate(v(mloc_vec,nloc_vec))
    allocate(p0(mloc_vec,nloc_vec))
    allocate(p(mloc_vec,nloc_vec))
    allocate(h(mloc_vec,nloc_vec))
    allocate(s(mloc_vec,nloc_vec))
    allocate(t(mloc_vec,nloc_vec))
    allocate(tmp_vec(mloc_vec,nloc_vec))
    x0  = 0
    r0  = 0
    r   = 0
    x   = 0
    v0  = 0
    v   = 0
    p0  = 0
    p   = 0
    h   = 0
    s   = 0
    t   = 0
    norm= 0
    rhat0 = 0
    rho0 = 1
    rho = 0
    alpha = 1
    omega0 = 1
    omega = 0
    beta = 0
    converged = .false.

    r0(1:mloc_vec,1:nloc_vec) = b(1:mloc_vec,1:nloc_vec)
    rhat0 = r0
    tol = 1E-6
    maxiter = 1000
    call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
    print *, rank , mloc_vec, nloc_vec
    ! print *, "rank",rank,"descmat",descmat
    ! print *, "rank",rank,"descvec",descvec
    ! ======================================================

    ! ================Loop==================================
    do i = 1, maxiter
        ! rho = dot_product(rhat0(:,1),r0(:,1))
        call pddot(N,rho, rhat0, 1,1,descvec,1,r0,1,1,descvec,1)
        beta = (rho/rho0)*(alpha/omega0)
        p = r0 + beta*(p0 - omega0*v0)

        ! v = matmul(A,p)
        call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, p, 1, 1, descvec, 1, 0.0,v, 1, 1, descvec, 1)

        ! alpha = rho/dot_product(rhat0(:,1),v(:,1))
        call pddot(N,alpha,rhat0, 1,1,descvec,1,v,1,1,descvec,1)
        alpha = rho/alpha
        h = x0 + alpha*p

        ! if (norm2(h-x0) < tol) then
        tmp_vec = h - x0
        norm = 999.0
        call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
        ! print *, "current norm1", norm, rank
        if (norm < tol) then
            x=h
            converged = .true.
            exit
        endif
        if (i==1) print *,"rank",rank,"was here"
        s = r0 - alpha*v
        ! t = matmul(A,s)
        call pdgemv('N', N, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0,t, 1, 1, descvec, 1)
        ! call pdgemm('N', 'N', N, 1, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 0.0, tmp_vec, 1, 1, descvec)
        ! t = tmp_vec
        ! omega = dot_product(t(:,1),s(:,1))/dot_product(t(:,1),t(:,1))
        call pddot(N,omega,t, 1,1,descvec,1,s,1,1,descvec,1)
        call pddot(N,tmp_real,t, 1,1,descvec,1,t,1,1,descvec,1)
        omega = omega/tmp_real

        x = h + omega*s

        ! if (norm2(x-x0)<tol) then
        tmp_vec = x - x0
        norm = 1000000
        call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
        if (norm < tol) then
                print *, "current norm2", norm
                converged = .true.
                exit
        endif
        r = s - omega*t
        x0 =x
        rho0 = rho
        p0 = p
        r0 = r
        v0 = v
        omega0 = omega
    enddo
    ! =========================================================
    if (converged) then
        print *, "Bi-CG STAB solver converged in iteration #", i, norm
    else
        print *, "Maximum iteration cycles reached"
    endif
    call MPI_Barrier(MPI_COMM_WORLD,ierr)
    b = x
    ! print *,"rank ",rank
    ! =================clean up!===============================
    deallocate(r0)
    deallocate(r)
    deallocate(rhat0)
    deallocate(x0)
    deallocate(x)
    deallocate(v0)
    deallocate(v)
    deallocate(p0)
    deallocate(p)
    deallocate(h)
    deallocate(s)
    deallocate(t)
    print *,"End of bicgstab"
end subroutine bicgstab_self_sclpk
end module bicgstab

program test_bicgstab
    use bicgstab
    use mpi
    implicit none
    character, parameter        :: UPLO="U"
    character(len=7)            :: char_size
    integer                     :: info
    integer(kind=8)             :: N, i, j
    real(kind=8), allocatable   :: A_global(:,:), b_global(:,:)
    integer(kind=8)             :: count_start, count_end,count_rate, dummy_int
    real(kind=8)                :: time
    ! =========================BLACS and MPI=======================
    integer                     :: ierr, size, rank,dims(2)
    ! -------------------------------------------------------------
    integer, parameter          :: block_size = 100
    integer                     :: context, nprow, npcol, local_nprow, local_npcol
    integer                     :: numroc, indxl2g, descmat(9),descvec(9)
    integer                     :: mloc_mat ,nloc_mat ,mloc_vec ,nloc_vec
    real(kind=8), allocatable   :: A(:,:), x(:,:), b(:,:)

    call blacs_pinfo(rank,size)
    dims=0
    call MPI_Dims_create(size, 2, dims, ierr)
    nprow = dims(1);npcol = dims(2)
    call blacs_get(0,0,context)
    call blacs_gridinit(context, 'R', nprow, npcol)
    call blacs_gridinfo(context, nprow, npcol, local_nprow,local_npcol)

    N = 700

    allocate(A_global(N,N))

    if (rank==0) open(101,file='A.dat')
    do i = 1, N
        if (rank==0) read(101,*) A_global(i,1:N)
        call MPI_Bcast(A_global(i,1:N), N,MPI_DOUBLE,0,MPI_COMM_WORLD,  ierr)
    enddo
    if (rank==0) close(101)

    mloc_mat = numroc(N,block_size,local_nprow,0,nprow)
    nloc_mat = numroc(N,block_size,local_npcol,0, npcol)
    allocate(A(mloc_mat,nloc_mat))

    do i = 1, mloc_mat
        do j = 1,nloc_mat
            A(i,j) = A_global(indxl2g(i,block_size,local_nprow,0, nprow),&
                             &indxl2g(j,block_size,local_npcol,0, npcol))
        enddo
    enddo

    if (rank==0) print *, "Read matrix"

    allocate(b_global(N,1))

    if (rank==0) then
        open(103,file='b.dat')
        do i = 1, N
            read(103,*) b_global(i,1)
        enddo
        close(103)
    endif
    call MPI_Bcast(b_global(:,1), N,MPI_DOUBLE,0,MPI_COMM_WORLD,  ierr)

    ! set up scalapack shared matrices
    if (rank==0) print *, "Matrix broadcasted"
    mloc_vec = numroc(N,block_size,local_nprow,0, nprow)
    nloc_vec = numroc(1,block_size,local_npcol,0, npcol)
    print *,"Rank", rank, mloc_vec, nloc_vec
    allocate(b(mloc_vec,nloc_vec))
    allocate(x(mloc_vec,nloc_vec))

    do i = 1, mloc_vec
        do j = 1,nloc_vec 
            b(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
                             &indxl2g(j,block_size,local_npcol,0, npcol))
            x(i,j)   = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
                               &indxl2g(j,block_size,local_npcol,0, npcol))
        enddo
    enddo

    call descinit(descmat    , N, N, block_size, block_size, 0,0,context,max(1,mloc_mat),info)
    call descinit(descvec    , N, 1, block_size, block_size, 0,0,context,max(1,mloc_vec),info)

    if (rank==0) print *, "Set up done,solving"

    ! setup done, call in the cavalary
    call MPI_Barrier(MPI_COMM_WORLD, ierr)
    call bicgstab_self_sclpk(A,x,N, descvec, descmat,mloc_vec,nloc_vec)
    ! print *,x
    call MPI_Barrier(MPI_COMM_WORLD,ierr)
    call blacs_gridexit(context)
    call blacs_exit(0)

end program test_bicgstab

If needed, matrix files can be downloaded here: https://github.com/ipcamit/temp_so

A short complete code showing the problem would be really useful here. It is my understanding that pdnrm2 should broadcast the resut amongst all processes in the grid's context, not just those with "values". So how you have set up descvec is vital - to be honest you probably want it to be the same as descmat, but without seeing the code I can't tell. But further honesty, I probably wouldn't use pdnrm2 at all as it is so easy to write for yourself. — Ian Bush, May 12 '20 at 11:07
Main reason to stick to pdnrm2 is, consistency and performance. I thought to use maximum library functions, in case they provide better performance for huge matrices — ipcamit, May 12 '20 at 11:29
Could you post the code that when run demonstrated the problem? The above won't compile due to MPI_DOUBLE not being correct in the Bcasts, and the argument mismatch due to the kinds of A as declared in main program and the subprogram being different. It would also be appreciated if you fix the errors with MPI_INIT and Integer kinds noted in the answer to the previous question — Ian Bush, May 12 '20 at 13:04
It also won't compile because MPI expects a default ind integer for N, and you are using integer( Kind = 8 ) — Ian Bush, May 12 '20 at 13:16
Use of `MPI_DOUBLE` in Fortran programs is erroneous and may lead to weird errors. Use the Fortran-specific `MPI_REAL8` instead. — Hristo Iliev, May 12 '20 at 23:52
OK. will keep it in mind. I wasnt aware of it as most mpi tutorials are in c — ipcamit, May 13 '20 at 12:24

score 0 · Accepted Answer · answered May 12 '20 at 13:45

OK, firstly there are quite a few problems with your code as posted, see below for what I believe is a corrected version using Random numbers in place of the files. In particular you really need to be more careful with kinds of reals, and I recommend using default integers throughout, you don't need long integers for this, and trying to use them will just cause unnecessary pain.

That said I think you have run into a bug in pdnrm2. Looking at the source in

http://www.netlib.org/scalapack/explore-html/d3/d5f/pdnrm2___8c_source.html

there is

 /*
 *  Process column 0 broadcasts the combined values of SCALE and SSQ within their
 *  process column
 */
             top = *PB_Ctop( &ctxt, BCAST, COLUMN, TOP_GET );
             if( myrow == 0 )
             {
                Cdgebs2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2 );
             }
             else
             {
                Cdgebr2d( ctxt, COLUMN, &top, 2, 1, ((char*)work), 2,
                          0, mycol );
             }
 /*

Now I don't claim to understand this in full, but the comment strongly suggests the result is only being broadcast along the column of processes with rank 0 in it, rather than to all processes. This works for a 1D decompostion, but not for a 2D one, precisely what you are seeing. So I am afraid IMO to work around this you will have to manually broadcast the value yourself from process zero.

Below is what I think is the corrected version of your code, along with it showing on 3 procs (a 1D decomposition) but not on 4 (which uses 2x2 so a 2D decomposition)

Module numbers

  Use, Intrinsic :: iso_fortran_env, Only :  wp => real64

  Public :: wp

  Private

End Module numbers

Module bicgstab

Contains
  Subroutine bicgstab_self_sclpk(A,b,N,descvec,descmat,mloc_vec,nloc_vec)
    Use numbers
    Use mpi
    Implicit None
    Real ( wp )               :: A(:,:), b(:,:), tol
    Integer :: N
    Integer             :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec

    Integer             :: i, ierr, rank, maxit
    Real( wp )                :: rho0, alpha, omega0, rho, omega, beta, norm, tmp_real
    Real( wp ), Allocatable   :: r0(:,:), r(:,:), x0(:,:), x(:,:),h(:,:),t(:,:), tmp_vec(:,:)
    Real( wp ), Allocatable   :: rhat0(:,:),v(:,:), p(:,:), v0(:,:),p0(:,:),s(:,:)
    Logical             :: converged

    ! ================== Initialize ======================
    Allocate(r0(mloc_vec,nloc_vec))
    Allocate(r(mloc_vec,nloc_vec))
    Allocate(rhat0(mloc_vec,nloc_vec))
    Allocate(x0(mloc_vec,nloc_vec))
    Allocate(x(mloc_vec,nloc_vec))
    Allocate(v0(mloc_vec,nloc_vec))
    Allocate(v(mloc_vec,nloc_vec))
    Allocate(p0(mloc_vec,nloc_vec))
    Allocate(p(mloc_vec,nloc_vec))
    Allocate(h(mloc_vec,nloc_vec))
    Allocate(s(mloc_vec,nloc_vec))
    Allocate(t(mloc_vec,nloc_vec))
    Allocate(tmp_vec(mloc_vec,nloc_vec))
    x0  = 0.0_wp
    r0  = 0.0_wp
    r   = 0.0_wp
    x   = 0.0_wp
    v0  = 0.0_wp
    v   = 0.0_wp
    p0  = 0.0_wp
    p   = 0.0_wp
    h   = 0.0_wp
    s   = 0.0_wp
    t   = 0.0_wp
    norm= 0.0_wp
    rhat0 = 0.0_wp
    rho0 = 1.0_wp
    rho = 0.0_wp
    alpha = 1.0_wp
    omega0 = 1.0_wp
    omega = 0.0_wp
    beta = 0.0_wp
    converged = .False.

    r0(1:mloc_vec,1:nloc_vec) = b(1:mloc_vec,1:nloc_vec)
    rhat0 = r0
    tol = 1E-6_wp
    maxiter = 1000
    Call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
    Print *, rank , mloc_vec, nloc_vec
    ! print *, "rank",rank,"descmat",descmat
    ! print *, "rank",rank,"descvec",descvec
    ! ======================================================

    ! ================Loop==================================
    Do i = 1, maxiter
       ! rho = dot_product(rhat0(:,1),r0(:,1))
       Call pddot(N,rho, rhat0, 1,1,descvec,1,r0,1,1,descvec,1)
       beta = (rho/rho0)*(alpha/omega0)
       p = r0 + beta*(p0 - omega0*v0)

       ! v = matmul(A,p)
       Call pdgemv('N', N, N, 1.0_wp, A, 1, 1, descmat, p, 1, 1, descvec, 1, 0.0_wp,v, 1, 1, descvec, 1)

       ! alpha = rho/dot_product(rhat0(:,1),v(:,1))
       Call pddot(N,alpha,rhat0, 1,1,descvec,1,v,1,1,descvec,1)
       alpha = rho/alpha
       h = x0 + alpha*p

       ! if (norm2(h-x0) < tol) then
       tmp_vec = h - x0
       Call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
       ! print *, "current norm1", norm, rank
       If (norm < tol) Then
          x=h
          converged = .True.
          Exit
       Endif
       If (i==1) Print *,"rank",rank,"was here"
       s = r0 - alpha*v
       ! t = matmul(A,s)
       Call pdgemv('N', N, N, 1.0_wp, A, 1, 1, descmat, s, 1, 1, descvec, 1, 0.0_wp,t, 1, 1, descvec, 1)
       ! call pdgemm('N', 'N', N, 1, N, 1.0, A, 1, 1, descmat, s, 1, 1, descvec, 0.0, tmp_vec, 1, 1, descvec)
       ! t = tmp_vec
       ! omega = dot_product(t(:,1),s(:,1))/dot_product(t(:,1),t(:,1))
       Call pddot(N,omega,t, 1,1,descvec,1,s,1,1,descvec,1)
       Call pddot(N,tmp_real,t, 1,1,descvec,1,t,1,1,descvec,1)
       omega = omega/tmp_real

       x = h + omega*s

       ! if (norm2(x-x0)<tol) then
       tmp_vec = x - x0
       norm = 1000000
       Call pdnrm2(N,norm,tmp_vec,1,1,descvec,1)
       If (norm < tol) Then
          Print *, "current norm2", norm
          converged = .True.
          Exit
       Endif
       r = s - omega*t
       x0 =x
       rho0 = rho
       p0 = p
       r0 = r
       v0 = v
       omega0 = omega
    Enddo
    ! =========================================================
    If (converged) Then
       Print *, "Bi-CG STAB solver converged in iteration #", i, norm
    Else
       Print *, "Maximum iteration cycles reached"
    Endif
    Call MPI_Barrier(MPI_COMM_WORLD,ierr)
    b = x
    ! print *,"rank ",rank
    ! =================clean up!===============================
    Deallocate(r0)
    Deallocate(r)
    Deallocate(rhat0)
    Deallocate(x0)
    Deallocate(x)
    Deallocate(v0)
    Deallocate(v)
    Deallocate(p0)
    Deallocate(p)
    Deallocate(h)
    Deallocate(s)
    Deallocate(t)
    Print *,"End of bicgstab"
  End Subroutine bicgstab_self_sclpk
End Module bicgstab

Program test_bicgstab
  Use numbers
  Use bicgstab
  Use mpi
  Implicit None
  Character, Parameter        :: UPLO="U"
  Character(len=7)            :: char_size
  Integer                     :: info
  Integer             :: N, i, j
  Real(wp), Allocatable   :: A_global(:,:), b_global(:,:)
  Integer             :: count_start, count_end,count_rate, dummy_int
  Real(wp)                :: time
  ! =========================BLACS and MPI=======================
  ! Size is a really bad name for a variable as it clashes with the very useful intrinsic
!!$  Integer                     :: ierr, size, rank,dims(2)
  Integer                     :: ierr, nprocs, rank,dims(2)
  ! -------------------------------------------------------------
  Integer, Parameter          :: block_size = 100
  Integer                     :: context, nprow, npcol, local_nprow, local_npcol
  Integer                     :: numroc, indxl2g, descmat(9),descvec(9)
  Integer                     :: mloc_mat ,nloc_mat ,mloc_vec ,nloc_vec
  Real(wp), Allocatable   :: A(:,:), x(:,:), b(:,:)

  ! CAN NOT DO ANY MPI UNTILL CALLED MPI_INIT
  Call MPI_Init( ierr )

  Call blacs_pinfo(rank,nprocs)
  dims=0
  Call MPI_Dims_create(nprocs, 2, dims, ierr)
  nprow = dims(1);npcol = dims(2)
  Call blacs_get(0,0,context)
  Call blacs_gridinit(context, 'R', nprow, npcol)
  Call blacs_gridinfo(context, nprow, npcol, local_nprow,local_npcol)

  N = 700

  Allocate(A_global(N,N))

  ! No file, fill with random numbers
!!$  If (rank==0) Open(101,file='A.dat')
!!$  Do i = 1, N
!!$     If (rank==0) Read(101,*) A_global(i,1:N)
!!$     Call MPI_Bcast(A_global(i,1:N), N,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,  ierr)
!!$  Enddo
!!$  If (rank==0) Close(101)
  If( rank == 0 ) Then
     Call Random_number( a_global )
     ! Add something onto the diagonal to hopefully avid horrible condition number
     Do i = 1, n
        a_global( i, i ) = a_global( i, i ) + n
     End Do
  End If
  Call MPI_Bcast(A_global, Size( a_global ),MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,  ierr)

  mloc_mat = numroc(N,block_size,local_nprow,0,nprow)
  nloc_mat = numroc(N,block_size,local_npcol,0, npcol)
  Allocate(A(mloc_mat,nloc_mat))

  Do i = 1, mloc_mat
     Do j = 1,nloc_mat
        A(i,j) = A_global(indxl2g(i,block_size,local_nprow,0, nprow),&
             &indxl2g(j,block_size,local_npcol,0, npcol))
     Enddo
  Enddo

  If (rank==0) Print *, "Read matrix"

  Allocate(b_global(N,1))
  ! No file, fill with random numbers
!!$
!!$  If (rank==0) Then
!!$     Open(103,file='b.dat')
!!$     Do i = 1, N
!!$        Read(103,*) b_global(i,1)
!!$     Enddo
!!$     Close(103)
!!$  Endif
  If( Rank == 0 ) Then
     Call Random_number( b_global )
  End If
  Call MPI_Bcast(b_global, Size( b_global ) ,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,  ierr)

  ! set up scalapack shared matrices
  If (rank==0) Print *, "Matrix broadcasted"
  mloc_vec = numroc(N,block_size,local_nprow,0, nprow)
  nloc_vec = numroc(1,block_size,local_npcol,0, npcol)
  Print *,"Rank", rank, mloc_vec, nloc_vec
  Allocate(b(mloc_vec,nloc_vec))
  Allocate(x(mloc_vec,nloc_vec))

  Do i = 1, mloc_vec
     Do j = 1,nloc_vec 
        b(i,j) = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
             &indxl2g(j,block_size,local_npcol,0, npcol))
        x(i,j)   = b_global(indxl2g(i,block_size,local_nprow,0, nprow),&
             &indxl2g(j,block_size,local_npcol,0, npcol))
     Enddo
  Enddo

  Call descinit(descmat    , N, N, block_size, block_size, 0,0,context,Max(1,mloc_mat),info)
  Call descinit(descvec    , N, 1, block_size, block_size, 0,0,context,Max(1,mloc_vec),info)

  If (rank==0) Print *, "Set up done,solving"

  ! setup done, call in the cavalary
  Call MPI_Barrier(MPI_COMM_WORLD, ierr)
  Call bicgstab_self_sclpk(A,x,N, descvec, descmat,mloc_vec,nloc_vec)
  ! print *,x
  Call MPI_Barrier(MPI_COMM_WORLD,ierr)
  Call blacs_gridexit(context)
!!$  Call blacs_exit(0)
  Call MPI_Finalize( ierr )

End Program test_bicgstab
ian@eris:~/work/stack$ mpif90 -g -Wall -Wextra -O -pedantic -fbacktrace -fcheck=all bicg.f90 -lblacsF77init-openmpi  -lblacs-openmpi -lscalapack-openmpi
bicg.f90:21:63:

     Integer             :: maxiter, descvec(:),descmat(:), info, mloc_vec ,nloc_vec
                                                               1
Warning: Unused variable ‘info’ declared at (1) [-Wunused-variable]
bicg.f90:23:47:

     Integer             :: i, ierr, rank, maxit
                                               1
Warning: Unused variable ‘maxit’ declared at (1) [-Wunused-variable]
bicg.f90:160:42:

   Character(len=7)            :: char_size
                                          1
Warning: Unused variable ‘char_size’ declared at (1) [-Wunused-variable]
bicg.f90:164:47:

   Integer             :: count_start, count_end,count_rate, dummy_int
                                               1
Warning: Unused variable ‘count_end’ declared at (1) [-Wunused-variable]
bicg.f90:164:58:

   Integer             :: count_start, count_end,count_rate, dummy_int
                                                          1
Warning: Unused variable ‘count_rate’ declared at (1) [-Wunused-variable]
bicg.f90:164:36:

   Integer             :: count_start, count_end,count_rate, dummy_int
                                    1
Warning: Unused variable ‘count_start’ declared at (1) [-Wunused-variable]
bicg.f90:164:69:

   Integer             :: count_start, count_end,count_rate, dummy_int
                                                                     1
Warning: Unused variable ‘dummy_int’ declared at (1) [-Wunused-variable]
bicg.f90:165:33:

   Real(wp)                :: time
                                 1
Warning: Unused variable ‘time’ declared at (1) [-Wunused-variable]
bicg.f90:159:37:

   Character, Parameter        :: UPLO="U"
                                     1
Warning: Unused parameter ‘uplo’ declared at (1) [-Wunused-parameter]
ian@eris:~/work/stack$ mpirun -np 3 ./a.out
 Read matrix
 Matrix broadcasted
 Rank           0         300           1
 Rank           2         200           1
 Set up done,solving
 Rank           1         200           1
           0         300           1
           2         200           1
           1         200           1
 rank           2 was here
 rank           0 was here
 rank           1 was here
 Bi-CG STAB solver converged in iteration #           3   6.1140242983184383E-008
 Bi-CG STAB solver converged in iteration #           3   6.1140242983184383E-008
 End of bicgstab
 Bi-CG STAB solver converged in iteration #           3   6.1140242983184383E-008
 End of bicgstab
 End of bicgstab
ian@eris:~/work/stack$ mpirun -np 4 ./a.out
 Read matrix
 Matrix broadcasted
 Rank           3         300           0
 Rank           0         400           1
 Rank           1         400           0
 Rank           2         300           1
 Set up done,solving
           1         400           0
           3         300           0
           2         300           1
           0         400           1
 Bi-CG STAB solver converged in iteration #           1   0.0000000000000000     
 Bi-CG STAB solver converged in iteration #           1   0.0000000000000000     
 rank           0 was here
 rank           2 was here
^Cian@eris:~/work/stack$

Efficient way to find norm of distributed vector in SCALAPACK

1 Answers1

Linked