-1

I new to OpenMP and wondering what is wrong with this code. It works in serial. I am using Ubuntu Linux and gfortran.

 !
    program test_rand
    
    use omp_lib
    implicit none

    integer, parameter :: num_threads =36
    integer*8,parameter :: nc = 1000000000
    integer*8 ncirc,ncircs(0:35)
    integer i,thread_num,istart,iend,ppt
    real*8 x,y,dist,pi
    integer,parameter :: seed = 864

        call srand(seed)
        do i=1,4
            ncircs(i)=0
            end do
            ncirc=0

            ppt=(nc+num_threads-1)/num_threads

            istart=1
            iend=nc
            thread_num=1
    !$ call omp_set_num_threads(num_threads)
    !$omp parallel default(none) private(istart,iend,thread_num,i, &
    !$omp dist,x,y,ncircs) shared(ppt,ncirc)
        !$ thread_num = omp_get_thread_num()
        !$ istart=thread_num*ppt+1
        !$ iend = min(nc,thread_num*ppt+ppt)
            print*,thread_num

            do i=istart,iend
            x=rand()
            y=rand()
            dist=sqrt((x-0.5)**2+(y-0.5)**2)
            if (dist.le.0.5) ncircs(thread_num)=ncircs(thread_num)+1
            end do

        !$omp critical
            !$ print*, "thread_num=",thread_num, "istart=",istart," iend=",iend,ncircs(thread_num+1)
            ncirc=ncirc+ncircs(thread_num)
        !$omp end critical
    !$omp end parallel
            print*,ncircs
            pi=4*dble(ncirc)/dble(nc)
            print *,pi,ncirc
    end program test_rand

Output:

lakshmi@lakshmiVM:~/Documents/fortran$ gfortran -o pi pi.f95
lakshmi@lakshmiVM:~/Documents/fortran$ time ./pi
           1
                    2            785398441                    0                    0                    0             16777216                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0            268435456                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0
   3.1415937640000000                 785398441

real    0m25.211s
user    0m25.152s
sys 0m0.000s

lakshmi@lakshmiVM:~/Documents/fortran$ gfortran -o pi_omp pi.f95 -fopenmp
lakshmi@lakshmiVM:~/Documents/fortran$ time ./pi_omp
           3
          21
          25
          30
          35
          18
           4
          17
          11
          34
          12
          28
           7
          33
          29
          24
          31
          27
           8
          23
          10
          19
           0
           6
          26
          32
          16
          14
           1
          13
           5
          20
          15
           2
           9
          22
 thread_num=           6 istart=   166666669  iend=   194444446                    0
 thread_num=          11 istart=   305555559  iend=   333333336                    0
 thread_num=           5 istart=   138888891  iend=   166666668                    0
 thread_num=          27 istart=   750000007  iend=   777777784                    0
 thread_num=          19 istart=   527777783  iend=   555555560                    0
 thread_num=          25 istart=   694444451  iend=   722222228                    0
 thread_num=           2 istart=    55555557  iend=    83333334                    0
 thread_num=           1 istart=    27777779  iend=    55555556                    0
 thread_num=          21 istart=   583333339  iend=   611111116                    0
 thread_num=          18 istart=   500000005  iend=   527777782                    0
 thread_num=          20 istart=   555555561  iend=   583333338                    0
 thread_num=          24 istart=   666666673  iend=   694444450                    0
 thread_num=          29 istart=   805555563  iend=   833333340                    0
 thread_num=          28 istart=   777777785  iend=   805555562                    0
 thread_num=          26 istart=   722222229  iend=   750000006                    0
 thread_num=          12 istart=   333333337  iend=   361111114                    0
 thread_num=          22 istart=   611111117  iend=   638888894                    0
 thread_num=          23 istart=   638888895  iend=   666666672                    0
 thread_num=          10 istart=   277777781  iend=   305555558                    0
 thread_num=           3 istart=    83333335  iend=   111111112                    0
 thread_num=          34 istart=   944444453  iend=   972222230                    0
 thread_num=          17 istart=   472222227  iend=   500000004                    0
 thread_num=          15 istart=   416666671  iend=   444444448                    0
 thread_num=          14 istart=   388888893  iend=   416666670                    0
 thread_num=           8 istart=   222222225  iend=   250000002                    0
 thread_num=           7 istart=   194444447  iend=   222222224                    0
 thread_num=          30 istart=   833333341  iend=   861111118                    0
 thread_num=          32 istart=   888888897  iend=   916666674                    0
 thread_num=           9 istart=   250000003  iend=   277777780                    0
 thread_num=           4 istart=   111111113  iend=   138888890                    0
 thread_num=          16 istart=   444444449  iend=   472222226                    0
 thread_num=           0 istart=           1  iend=    27777778       93851473435152
 thread_num=          13 istart=   361111115  iend=   388888892                    0
 thread_num=          35 istart=   972222231  iend=  1000000000  4600331172021844405
 thread_num=          33 istart=   916666675  iend=   944444452                    0
 thread_num=          31 istart=   861111119  iend=   888888896                    0
      139938624438272                    0                    0                    0                    0      139942555560952                   10      140720670311236           3933000496                    0      139942559675136            466005475      139942562579304      140720670311528      139942559716466      140720670311360      140720670311376      139942562705897                    3      139942555561536                    1                    0                    1      139942562578432            361825296      139942555561536      139942562578432      139942562579304                    0      140720670311656      139942559737213      140720308486145           4294967295      139942562705897      139942557392112      139942562581024
   375409.03530958400            93852258827396

real    6m57.907s
user    8m35.083s
sys 219m5.670s
lakshmi@lakshmiVM:~/Documents/fortran$
  • Welcome, please take the [tour] and read [ask]. Always make sure that your post is properly formatted. Use proper tags, [tag:fortran] for Fortran and [tag:openmp] for OpenMP. You have to explain your problem. Do not write "it does not work", it does not say anything useful, tell us what is wrong. Are there any error messages? Or are the results wrong? How wrong? I assume here you mean the value of pi is wrong? – Vladimir F Героям слава Jun 10 '21 at 20:51
  • 1
    1) Assuming gfortran have you compiled with -Wall -Wextra -fcheck=all and where appropriate correct all the warnings and runtime errors? 2) Even better add -std=f2008 and correct all the compilation errors 3) Why do you only initialise 4 members of the ncircs array? 4) If ncircs is an array why is it private, and what does that do to the attempted initialisation? 5) If you ignore -std=f2008 Is rand() threadsafe? 6) I really hope this is not how you are being taught to write openmp, it is horrible - learn about worksharing constructs and reductions. Hope to write a proper answer tomorrow. – Ian Bush Jun 10 '21 at 21:34
  • Why do you have `ncircs(thread_num+1)` in the critical region? (Even worse is that there isn't an `ncircs(36)`.) And define only four elements out of 36 before using them? – francescalus Jun 10 '21 at 22:27
  • Note there are more errors than just an array out of bounds (and non-standard Fortran). The most egregious is uninitialised variables due to an array being scoped as private, when it should probably be shared - see my answer. – Ian Bush Jun 11 '21 at 05:54

1 Answers1

8

I'm afraid there's quite a lot wrong with your program, both in terms of errors and of style. Let's first see how you can use the compiler to pick up some of the problems, then discuss the problems this compiler can't help you with, and then talk about style. Finally I'll show how I would solve this.

So first off I would recommend when developing turning on all the compiler warning and error detecting flags. There's quite a lot of these. Let's see what -Wall -Wextra tells you about your code:

ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 --version
GNU Fortran (GCC) 11.1.0
Copyright © 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -fopenmp -O -g pi_orig.f90
pi_orig.f90:20:7:

   20 |   ppt=(nc+num_threads-1)/num_threads
      |       1
Warning: Integer division truncated to constant ‘27777778’ at (1) [-Winteger-division]
pi_orig.f90:30:12:

   30 |   !$ iend = min(nc,thread_num*ppt+ppt)
      |            1
Warning: Possible change of value in conversion from INTEGER(8) to INTEGER(4) at (1) [-Wconversion]

I'm not worried about the first warning, but the second is telling me that iend might not be a long enough integer to store the iteration numbers you require - and by implication neither is i and a number of other variables. Thus if you increase nc after a certain point you just won't get increased accuracy.

However your program is not written in standard Fortran. Following the standard will in the long run make your life much easier. So let's add -std=2008 to use the compiler to detect where you are not using standard Fortran:

ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -std=f2008 -fopenmp -O -g pi_orig.f90
pi_orig.f90:8:12:

    8 |   integer*8,parameter :: nc = 1000000000
      |            1
Error: GNU Extension: Nonstandard type declaration INTEGER*8 at (1)
pi_orig.f90:9:12:

    9 |   integer*8 ncirc,ncircs(0:35)
      |            1
Error: GNU Extension: Nonstandard type declaration INTEGER*8 at (1)
pi_orig.f90:11:9:

   11 |   real*8 x,y,dist,pi
      |         1
Error: GNU Extension: Nonstandard type declaration REAL*8 at (1)
pi_orig.f90:37:23:

   37 |      if (dist.le.0.5) ncircs(thread_num)=ncircs(thread_num)+1
      |                       1
Error: Syntax error in IF-clause after (1)
pi_orig.f90:45:15:

   45 |   print*,ncircs
      |               1
Error: Function ‘ncircs’ requires an argument list at (1)
pi_orig.f90:27:12:

   27 |   !$omp dist,x,y,ncircs) shared(ppt,ncirc)
      |            1
Error: Symbol ‘dist’ at (1) has no IMPLICIT type
pi_orig.f90:20:9:

   20 |   ppt=(nc+num_threads-1)/num_threads
      |         1
Error: Symbol ‘nc’ at (1) has no IMPLICIT type
pi_orig.f90:18:7:

   18 |   ncirc=0
      |       1
Error: Symbol ‘ncirc’ at (1) has no IMPLICIT type
pi_orig.f90:46:4:

   46 |   pi=4*dble(ncirc)/dble(nc)
      |    1
Error: Symbol ‘pi’ at (1) has no IMPLICIT type; did you mean ‘i’?
pi_orig.f90:27:14:

   27 |   !$omp dist,x,y,ncircs) shared(ppt,ncirc)
      |              1
Error: Symbol ‘x’ at (1) has no IMPLICIT type
pi_orig.f90:27:16:

   27 |   !$omp dist,x,y,ncircs) shared(ppt,ncirc)
      |                1
Error: Symbol ‘y’ at (1) has no IMPLICIT type
pi_orig.f90:14:12:

   14 |   call srand(seed)
      |            1
Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:14:12: Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:14:18:

   14 |   call srand(seed)
      |                  1
Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:16:5:

   16 |      ncircs(i)=0
      |     1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:34:11:

   34 |      x=rand()
      |           1
Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:11: Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:7:

   34 |      x=rand()
      |       1
Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:7:

   34 |      x=rand()
      |       1
Error: Function ‘rand’ at (1) has no IMPLICIT type
pi_orig.f90:35:7:

   35 |      y=rand()
      |       1
Error: Function ‘rand’ at (1) has no IMPLICIT type
pi_orig.f90:41:70:

   41 |   !$ print*, "thread_num=",thread_num, "istart=",istart," iend=",iend,ncircs(thread_num+1)
      |                                                                      1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:42:20:

   42 |   ncirc=ncirc+ncircs(thread_num)
      |                    1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:27:17:

   27 |   !$omp dist,x,y,ncircs) shared(ppt,ncirc)
      |                 1
Error: Object ‘ncircs’ is not a variable at (1)
ijb@ijb-Latitude-5410:~/work/stack$ 

Oh dear! In fact there's only really 3 errors:

  1. Real*8 is not, and has never been, part of standard Fortran. It is supported by gfortran, but might not be by other compilers. Don't use it!
  2. Integer*8 is not, and has never been, part of standard Fortran. It is supported by gfortran, but might not be by other compilers. Don't use it!
  3. srand and rand are not a standard Fortran intrinsic function. It is supported by gfortran, but might not be by other compilers. Don't use it!

Number 3 can be addressed by using the standard intrinsic Random_number (but see below). Numbers 1 and 2 are solved by using kinds - see Fortran 90 kind parameter . And because 1 and 2 affect variable declarations all lines that use variables declared by these erroneous lines are now flagged as errors themselves, hence the cascade of errors.

Fixing these solves some things, but I'm afraid there are more errors. In particular there are accesses of arrays outside their bounds. You can automatically detect these by use of -fcheck=all which I very strongly recommend while debugging, but note it will slow down your program a lot, so when running production runs omit it. If I fix the errors noted above, and then use -fcheck=all when I run the program I get:

ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -std=f2008 -fcheck=all -fopenmp -O -g pi2.f90 
pi2.f90:22:7:

   22 |   ppt=(nc+num_threads-1)/num_threads
      |       1
Warning: Integer division truncated to constant ‘2778’ at (1) [-Winteger-division]
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
                    1
                   28
                   14
                   26
                   32
                    9
                    5
                    8
                   10
                   35
                   21
                   29
                    0
                   22
                   17
                    4
                   19
 thread_num=                   14 istart=                38893  iend=                41670                    0
 thread_num=                   21 istart=                58339  iend=                61116                    0
 thread_num=                   28 istart=                77785  iend=                80562                    0
                   20
                   12
                    7
                   24
                   23
                   15
                   34
                   25
 thread_num=                   26 istart=                72229  iend=                75006                    0
 thread_num=                   23 istart=                63895  iend=                66672                    0
 thread_num=                   10 istart=                27781  iend=                30558                    0
 thread_num=                   15 istart=                41671  iend=                44448                    0
 thread_num=                    1 istart=                 2779  iend=                 5556                    0
 thread_num=                   25 istart=                69451  iend=                72228                    0
 thread_num=                    9 istart=                25003  iend=                27780                    0
                   16
                   33
                   30
 thread_num=                    0 istart=                    1  iend=                 2778                    0
                   27
 thread_num=                    7 istart=                19447  iend=                22224                    0
                   31
                    2
                    6
 thread_num=                   17 istart=                47227  iend=                50004                    0
                    3
                   13
                   11
 thread_num=                   27 istart=                75007  iend=                77784                    0
 thread_num=                   30 istart=                83341  iend=                86118                    0
 thread_num=                   32 istart=                88897  iend=                91674                    0
 thread_num=                    4 istart=                11113  iend=                13890                    0
 thread_num=                   19 istart=                52783  iend=                55560                    0
 thread_num=                    2 istart=                 5557  iend=                 8334                    0
                   18
 thread_num=                    6 istart=                16669  iend=                19446                    0
 thread_num=                    5 istart=                13891  iend=                16668                    0
 thread_num=                    3 istart=                 8335  iend=                11112                    0
 thread_num=                   24 istart=                66673  iend=                69450                    0
 thread_num=                    8 istart=                22225  iend=                25002                    0
 thread_num=                   34 istart=                94453  iend=                97230                    0
 thread_num=                   11 istart=                30559  iend=                33336                    0
 thread_num=                   29 istart=                80563  iend=                83340                    0
 thread_num=                   22 istart=                61117  iend=                63894                    0
 thread_num=                   16 istart=                44449  iend=                47226                    0
 thread_num=                   20 istart=                55561  iend=                58338                    0
 thread_num=                   12 istart=                33337  iend=                36114                    0
At line 45 of file pi2.f90
Fortran runtime error: Index '36' of dimension 1 of array 'ncircs' above upper bound of 35

Error termination. Backtrace:
#0  0x7f580fce1d01 in ???
#1  0x7f580fce2849 in ???
#2  0x7f580fce2ec6 in ???
#3  0x4015ab in MAIN__._omp_fn.0
    at /home/ijb/work/stack/pi2.f90:45
#4  0x7f580fb4a77d in ???
#5  0x7f580fab1608 in start_thread
    at /build/glibc-YbNSs7/glibc-2.31/nptl/pthread_create.c:477
#6  0x7f580f9d6292 in ???
#7  0xffffffffffffffff in ???

So you can use the compiler to find a lot of problems. However there are more that it can't catch. Most importantly you scope the array ncircs as private, but you don't initialise it within the parallel region - When you enter the parallel region each thread will generate their own new version of a private variable, and by default any initialisation outside the parallel region will be forgotten. So you are using an uninitialized variable. Even without that also note in the code as presented you only initialise part of ncircs, not all that you use, and you are off by one in the indexing.

Over and above the errors noted above, however, this is not really how you should write openmp code. OpenMP provides methods to automatically share out the work in loops, you really shouldn't be sharing out the iterations manually as you have done in your code. Further it provides reductions to perform exactly this sort of calculation, critical should only be used where it is needed, not for the specialist case shown here for which reduce excels. There is also no need for the ncircs array - each thread only needs one value for it, so why declare an array of them? As a rule of thumb if you declare memory that increases in size with the number of threads you are probably doing something wrong. Finally I also wouldn't burn the number of threads into the code, I would use the environment variable to set the number of threads so it can be changed without a recompile.

So here is how I would solve your problem, and showing that it compiles and runs correctly:

ijb@ijb-Latitude-5410:~/work/stack$ cat pi_ijb.f90
Program pi_calc

  Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64

  Use omp_lib
  
  Implicit None

  Real( wp ), Parameter :: pi_exact = 4.0_wp * Atan( 1.0_wp )

  Integer( li ), Parameter :: n_sample = 1000000000_li
  Integer      , Parameter :: seed = 864

  Real( wp ), Dimension( 1:2 ) :: rand
  
  Real( wp ) :: dist
  Real( wp ) :: pi_approx
  
  Integer( li ) :: ncirc
  Integer( li ) :: i_sample
  Integer( li ) :: start, finish, rate
  
  Integer :: thread_num
  Integer :: n_seed
  Integer :: i

  ! Assume using the maximum number of threads made available
  Write( *, * ) 'Running on ', omp_get_max_threads(), ' threads'

  ncirc = 0

  Call system_clock( start, rate )
  ! Create threads
  !$omp parallel default( none ) private( thread_num, n_seed, i, rand, dist ) reduction( +:ncirc )

  ! Set up the random number generator. Assume it is thread safe
  thread_num = omp_get_thread_num()
  Call Random_seed( size = n_seed )
  Call Random_seed( put = [ ( i * seed * ( thread_num + 1 ), i = 1, n_seed ) ] )

  ! Workshare the loop
  !$omp do
  Sampling_loop: Do i_sample = 1, n_sample
     Call Random_number( rand )
     rand = rand - 0.5_wp
     dist = rand( 1 ) * rand( 1 ) + rand( 2 ) * rand( 2 )
     If( dist <= 0.5_wp * 0.5_wp ) Then
        ncirc = ncirc + 1
     End If
  End Do Sampling_loop
  !$omp end do

  ! Close down the parallel region
  !$omp end parallel
  Call system_clock( finish, rate )

  pi_approx = 4.0_wp * Real( ncirc, wp ) / Real( n_sample, wp )
  Write( *, * ) 'Using ', n_sample, ' sampling points: '
  Write( *, * ) 'Approx pi ', pi_approx
  Write( *, * ) 'Exact  pi ', pi_exact
  Write( *, * ) 'Error     ', Abs( pi_approx - pi_exact )
  Write( *, * ) 'This took ', Real( finish - start ) / Real( rate ), ' seconds'

End Program pi_calc
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -fcheck=all -O -g -fopenmp pi_ijb.f90 
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Running on            4  threads
 Using            1000000000  sampling points: 
 Approx pi    3.1415474360000002     
 Exact  pi    3.1415926535897931     
 Error        4.5217589792923008E-005
 This took    5.21012735      seconds
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -O -g -fopenmp pi_ijb.f90 
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=1
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Running on            1  threads
 Using            1000000000  sampling points: 
 Approx pi    3.1415804920000001     
 Exact  pi    3.1415926535897931     
 Error        1.2161589793002747E-005
 This took    18.5533371      seconds
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=2
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Running on            2  threads
 Using            1000000000  sampling points: 
 Approx pi    3.1416029320000001     
 Exact  pi    3.1415926535897931     
 Error        1.0278410206954192E-005
 This took    8.87815666      seconds
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Running on            4  threads
 Using            1000000000  sampling points: 
 Approx pi    3.1415474360000002     
 Exact  pi    3.1415926535897931     
 Error        4.5217589792923008E-005
 This took    4.49851656      seconds

Finally note I have assumed the standard Random number generator is thread safe, for simplicitly. However it is my understanding that this is not gauranteed, and if you want to do this kind of thing seriously you should look into providing your own,

Ian Bush
  • 6,996
  • 1
  • 21
  • 27
  • 1
    A very through answer!! The advice to use all the tools provided by the compiler is excellent. The lore is that subscript checking (part of -fcheck=all) has a high run-time cost, but I've been frequently surprised by how small the cost is. I completely agree that after a program has been throughly debugged, if run-time speed is important, subscript checking should be omitted. – M. S. B. Jun 11 '21 at 09:24