I have some Fortran code that performs a matrix row exchange. In the legacy code, it is written as
do J = 1,N
save_val = B(IROW,J)
B(IROW,J) = B(JCOL,J)
B(JCOL,J) = save_val
end do
This exchanges row IROW
with row JCOL
(IROW
and JCOL
are integers). However, the function of that block of code is not intuitive. In my opinion, it would be more intuitive, or at least aid readability to write it as:
save_row = B(IROW,:)
B(IROW,:) = B(JCOL,:)
B(JCOL,:) = save_row
(it is more clear that rows are being moved).
From the image attached, it is clear that the looping methods provide better performance relative to the array operations. Why is this? Is it because this becomes a memory limited process when the number of elements in the array gets large? (i.e. the array would get "chunked") Or is it something else?
Compiling as gfortran -O3 test.f95
. Adding the flag fstack-arrays
did not make significant difference.
program test
implicit none
integer :: N
integer :: M
integer :: loop_max = 1e7
integer :: i ! loop index
real :: t1, t2
real :: time_loop, time_array, time_sub_loop, time_sub_array
real, dimension(:, :), allocatable :: B
real, dimension(:) , allocatable :: save_row
real :: save_val
integer :: IROW, J, JCOL
character(*), parameter :: format_header = '(A5, 1X, 4(A12,1X))'
character(*), parameter :: format_data = '(I5, 1X, 4(ES12.5, 1X))'
open(1, file = 'TimingRowExchange.txt', status = 'unknown')
write(1, format_header) 'N', 't_loop', 't_array', 't_sub_loop', 't_sub_array'
do N = 1, 100
M = N + 1
allocate(B(N,N), save_row(M))
call random_number(B)
JCOL = 1
IROW = 3
call CPU_time(t1)
do i = 1, loop_max
do J = 1,N
save_val = B(IROW,J)
B(IROW,J) = B(JCOL,J)
B(JCOL,J) = save_val
end do
end do
call CPU_time(t2)
time_loop = t2 - t1
! write ( *, * ) 'Using Loop =', t2 - t1
call CPU_time(t1)
do i = 1, loop_max
save_row(1:N) = B(IROW,:)
B(IROW,:) = B(JCOL,:)
B(JCOL,:) = save_row(1:N)
end do
call CPU_time(t2)
time_array = t2 - t1
! write ( *, * ) 'Using Array =', t2 - t1
call CPU_time(t1)
do i = 1, loop_max
call exchange_rows_loop(B, JCOL, IROW)
end do
call CPU_time(t2)
time_sub_loop = t2 - t1
! write ( *, * ) 'Loop Subrout =', t2 - t1
call CPU_time(t1)
do i = 1, loop_max
call exchange_rows_array(B, JCOL, IROW)
end do
call CPU_time(t2)
time_sub_array = t2 - t1
! write ( *, * ) 'Array Subrout =', t2 - t1
deallocate(B, save_row)
write(1, format_data) N, time_loop, time_array, time_sub_loop, time_sub_array
end do
contains
subroutine print_mat(A)
implicit none
real, dimension(:,:), intent(in) :: A
integer :: n
n = size(A,1) ! # of rows
do i = 1,n
print*, A(i,:)
end do
print*,
end subroutine print_mat
subroutine exchange_rows_loop(A, row1, row2)
implicit none
real, dimension(:,:), intent(in out) :: A
integer, intent(in) :: row1, row2
integer :: J
real :: save_val
do J = 1, size(A,1)
save_val = A(row1,J)
A(row1,J) = A(row2,J)
A(row2,J) = save_val
end do
end subroutine exchange_rows_loop
subroutine exchange_rows_array(A, row1, row2)
implicit none
real, dimension(:,:), intent(in out) :: A
integer, intent(in) :: row1, row2
real, dimension(size(A,1)) :: save_row
save_row = A(row1,:)
A(row1,:) = A(row2,:)
A(row2,:) = save_row
end subroutine exchange_rows_array
end program test