I am experiencing an odd behaviour running a Fortran application with MPI and gfortran
. Consider the following code:
write(*,*) "Start"
call backtrace ()
write(*,*) "End"
If I compile and run the code with gfortran
, everything goes as expected, this is, the code prints "Start", followed by the backtrace and last by "End". However, if I compile this with mpifort
and run with mpiexec
, even though I ensure that only one process is running this sequence, the output is unpredictable. Sometimes the code outputs as expected, sometimes it prints "Start", "End" and then the backtrace, and can even fall in the case where it prints something in the middle of the backtrace.
Below you can find a minimal working example (test.f90
) that reproduces this behaviour. The code can be compiled with
mpifort -g -cpp -DMPI test.f90 -o a.out
and executed with
mpirun -n 2 ./a.out
test.f90
module mod
private
public sub1
contains
subroutine sub1
call sub2
end subroutine sub1
subroutine sub2
call sub3
end subroutine sub2
subroutine sub3
call sub4
end subroutine sub3
subroutine sub4
call sub5
end subroutine sub4
subroutine sub5
call sub6
end subroutine sub5
subroutine sub6
call sub7
end subroutine sub6
subroutine sub7
use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
#ifdef MPI
use mpi
integer :: i, n, e
call mpi_comm_size(mpi_comm_world, n, e)
call mpi_comm_rank(mpi_comm_world, i, e)
write(output_unit,*) "From ", i , " / ", n
#endif
flush (output_unit)
flush (error_unit)
write(*,*) "Start"
flush (output_unit)
call backtrace ()
flush (error_unit)
write(*,*) "End"
flush (output_unit)
end subroutine sub7
end module mod
program test
use mod
#ifdef MPI
use mpi
#endif
implicit none
integer :: i, n, e
#ifdef MPI
call mpi_init(e)
call mpi_comm_size(mpi_comm_world, n, e)
call mpi_comm_rank(mpi_comm_world, i, e)
if (i == 0) call sub1
call mpi_finalize(e)
#else
call sub1
#endif
end program test
One output:
From 0 / 2
Start
#0 0x7f0b9db05d21 in ???
#1 0x55b1dc9cc3f7 in sub7
at /home/amcc/mwe/backtrace/test.f90:45
#2 0x55b1dc9cc4de in sub6
at /home/amcc/mwe/backtrace/test.f90:29
#3 0x55b1dc9cc4ea in sub5
at /home/amcc/mwe/backtrace/test.f90:25
#4 0x55b1dc9cc4f6 in sub4
at /home/amcc/mwe/backtrace/test.f90:21
#5 0x55b1dc9cc502 in sub3
at /home/amcc/mwe/backtrace/test.f90:17
#6 0x55b1dc9cc50e in sub2
at /home/amcc/mwe/backtrace/test.f90:13
#7 0x55b1dc9cc51a in __mod_MOD_sub1
at /home/amcc/mwe/backtrace/test.f90:9
End
#8 0x55b1dc9cc56b in test
at /home/amcc/mwe/backtrace/test.f90:70
#9 0x55b1dc9cc5b0 in main
at /home/amcc/mwe/backtrace/test.f90:57
Another output:
From 0 / 2
Start
End
#0 0x7f23ae8b8d21 in ???
#1 0x557937a503f7 in sub7
at /home/amcc/mwe/backtrace/test.f90:45
#2 0x557937a504de in sub6
at /home/amcc/mwe/backtrace/test.f90:29
#3 0x557937a504ea in sub5
at /home/amcc/mwe/backtrace/test.f90:25
#4 0x557937a504f6 in sub4
at /home/amcc/mwe/backtrace/test.f90:21
#5 0x557937a50502 in sub3
at /home/amcc/mwe/backtrace/test.f90:17
#6 0x557937a5050e in sub2
at /home/amcc/mwe/backtrace/test.f90:13
#7 0x557937a5051a in __mod_MOD_sub1
at /home/amcc/mwe/backtrace/test.f90:9
#8 0x557937a5056b in test
at /home/amcc/mwe/backtrace/test.f90:70
#9 0x557937a505b0 in main
at /home/amcc/mwe/backtrace/test.f90:57
Any hints on ensuring that the output is written correctly?
Versions:
Output of mpifort --show
f95 -Wl,-Bsymbolic-functions -Wl,-z,relro -I/usr/include/x86_64-linux-gnu/mpich -I/usr/include/x86_64-linux-gnu/mpich -L/usr/lib/x86_64-linux-gnu -lmpichfort -lmpich
Output of mpifort --version
GNU Fortran (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0