Issue 129812
Summary Early exit optimization of Fortran array expressions
Labels new issue
Assignees
Reporter ivan-pi
    Consider a function for checking if an array is sorted:
```fortran
!
! Check an array of integers is sorted in ascending order
!
logical function is_sorted_scalar(n,a) result(is_sorted)
 integer, intent(in) :: n
    integer, intent(in) :: a(n)
    integer :: i
 !$omp simd simdlen(8) early_exit
    do i = 2, n
        if (a(i) < a(i-1)) then
            is_sorted = .false.
            return
 end if
    end do
    is_sorted = .true.
end function

logical function is_sorted_all(n,a) result(is_sorted)
    integer, intent(in) :: n
 integer, intent(in) :: a(n)
    is_sorted = all(a(2:n) >= a(1:n-1))
end function

program benchmark

    implicit none
    integer, allocatable :: a(:)

    integer :: i, n

    external :: is_sorted_scalar
 external :: is_sorted_all

    logical :: is_sorted_scalar
    logical :: is_sorted_all

    character(len=32) :: str
    integer :: tmp

    tmp = 0
    n = 20000

    if (command_argument_count() > 0) then
 call get_command_argument(1,str)
        read(str,*) tmp
        if (tmp > 0) n = tmp
    end if
    print *, "n = ",  n

    allocate(a(n))

 ! Fill ascending numbers
    do i = 1, n
        a(i) = i
    end do

 ! Introduce an unsorted value
    a(100) = 1001
    !a(101) = 1000

 call measure(100000,a,is_sorted_scalar,"scalar")
    call measure(100000,a,is_sorted_all,   "all")

contains

    impure subroutine measure(nreps,a,func,name)
        integer, intent(in) :: nreps
 integer, intent(in) :: a(:)
        logical :: func
 character(len=*), intent(in) :: name
        integer(8) :: t1, t2, rate
 real(kind(1.0d0)) :: elapsed
        logical :: res

 character(len=12) :: str

        integer :: k
        call system_clock(t1)
        do k = 1, nreps
            res = func(size(a),a)
        end do
        call system_clock(t2,rate)

 elapsed = (t2 - t1)/real(rate,kind(elapsed))

        str = adjustl(name)
        print '(A12,F12.4,L2)', str, elapsed/nreps*1.e6, res

        ! Time is in microseconds

    end subroutine

end program
```

It appears to me that in `is_sorted_all` flang generates a temporary array for the `a(2:n) >= a(1:n-1)` _expression_, and then performs the `all` reduction. This is fast due to vectorization, but it missed the chance of early exit. 

The effect is noticeable in the runtime:
```
~/fortran/is_sorted$ make FC=flang-new FFLAGS="-O2 -march=native" standalone
flang-new -O2 -march=native -o standalone standalone.f90
~/fortran/is_sorted$ ./standalone 
 n =  20000
scalar 0.0673 F
all               1.7358 F
~/fortran/is_sorted$ make clean
rm -rf *.o benchmark standalone
~/fortran/is_sorted$ make FC=gfortran FFLAGS="-O2 -march=native" standalone
gfortran -O2 -march=native -o standalone standalone.f90
~/fortran/is_sorted$ ./standalone 
 n = 20000
scalar            0.0389 F
all               0.0390 F
```

It would be nice if early exit vectorization were also supported (https://discourse.llvm.org/t/rfc-supporting-more-early-exit-loops/84690). With x86 SIMD extensions this still has to be done manually it seems: http://0x80.pl/notesen/2018-04-11-simd-is-sorted.html
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to