https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87166

Martin Liška <marxin at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
   Last reconfirmed|                            |2018-08-31
     Ever confirmed|0                           |1

--- Comment #1 from Martin Liška <marxin at gcc dot gnu.org> ---
I hope I found the root cause:

perf report
-Ofast:

# Overhead  Command          Shared Object                 Symbol               
# ........  ...............  ............................ 
........................................
#
    44.48%  calculix_peak.a  calculix_peak.amd64-m64-mine  [.] e_c3d_
    18.77%  calculix_peak.a  calculix_peak.amd64-m64-mine  [.] DVdot33
     6.82%  calculix_peak.a  calculix_peak.amd64-m64-mine  [.]
Network_findAugmentingPath

-Ofast with PGO:

# Overhead  Command          Shared Object                 Symbol               
# ........  ...............  ............................ 
........................................
#
    75.30%  calculix_peak.a  calculix_peak.amd64-m64-mine  [.] e_c3d_.cold.0
     7.53%  calculix_peak.a  calculix_peak.amd64-m64-mine  [.] DVdot33
     2.58%  calculix_peak.a  calculix_peak.amd64-m64-mine  [.]
Network_findAugmentingPath
     1.63%  calculix_peak.a  calculix_peak.amd64-m64-mine  [.] nident_

So part of the function is put into cold text section. Reason is that train and
reference run
have totally different coverage. The most hottest code in ref run is never
executed in train run:

e_c3d.f.gcov:

TRAIN run:

    #####:  591:                           sm(ii1+2,jj1+2)=sm(ii1,jj1)
        -:  592:                        endif
        -:  593:!
        -:  594:                     else
        -:  595:!
        -:  596:!                     buckling matrix  
        -:  597:!
        -:  598:                        senergyb=
        -:  599:     &                       (s11b*w(1,1)+s12b*(w(1,2)+w(2,1))
        -:  600:     &                       +s13b*(w(1,3)+w(3,1))+s22b*w(2,2)
    #####:  601:     &                      
+s23b*(w(2,3)+w(3,2))+s33b*w(3,3))*weight
    #####:  602:                        sm(ii1,jj1)=sm(ii1,jj1)-senergyb
    #####:  603:                       
sm(ii1+1,jj1+1)=sm(ii1+1,jj1+1)-senergyb
    #####:  604:                       
sm(ii1+2,jj1+2)=sm(ii1+2,jj1+2)-senergyb
        -:  605:!
        -:  606:                     endif
        -:  607:!
       5M:  608:                     ii1=ii1+3
        -:  609:                  enddo
     449k:  610:                  jj1=jj1+3
        -:  611:               enddo
        -:  612:            else
        -:  613:!
        -:  614:!               stiffness matrix for static and modal
        -:  615:!               2nd order calculations
        -:  616:!
        -:  617:!               large displacement stiffness
        -:  618:!               
    #####:  619:               do i1=1,3
    #####:  620:                  do j1=1,3
    #####:  621:                     vo(i1,j1)=0.d0
    #####:  622:                     do k1=1,nope
    #####:  623:                       
vo(i1,j1)=vo(i1,j1)+shp(j1,k1)*voldl(i1,k1)
        -:  624:                     enddo
        -:  625:                  enddo
        -:  626:               enddo
        -:  627:!
    #####:  628:               if(mattyp.eq.1) then
    #####:  629:                  call wcoef(v,vo,al,um)
        -:  630:               endif
        -:  631:!
        -:  632:!               calculating the total mass of the element for
        -:  633:!               lumping purposes: only for explicit nonlinear
        -:  634:!               dynamic calculations
        -:  635:!
    #####:  636:               if(mass.and.(iexpl.eq.1)) then
    #####:  637:                  summass=summass+rho*xsj
        -:  638:               endif
        -:  639:!
    #####:  640:               jj1=1
    #####:  641:               do jj=1,nope
        -:  642:!
    #####:  643:                  ii1=1
    #####:  644:                  do ii=1,jj
        -:  645:!
        -:  646:!                   all products of the shape functions for a
given ii
        -:  647:!                   and jj
        -:  648:!
    #####:  649:                     do i1=1,3
    #####:  650:                        do j1=1,3
    #####:  651:                           w(i1,j1)=shpj(i1,ii)*shpj(j1,jj)
        -:  652:                        enddo
        -:  653:                     enddo
        -:  654:!
    #####:  655:                     if(mattyp.eq.1) then
        -:  656:!
    #####:  657:                        do m1=1,3
    #####:  658:                           do m2=1,3
    #####:  659:                              do m3=1,3
    #####:  660:                                 do m4=1,3
        -:  661:                                    s(ii1+m2-1,jj1+m1-1)=
        -:  662:     &                                   s(ii1+m2-1,jj1+m1-1)
    #####:  663:     &                                  
+v(m4,m3,m2,m1)*w(m4,m3)*weight
        -:  664:                                 enddo
        -:  665:                              enddo
        -:  666:                           enddo
        -:  667:                        enddo
        -:  668:!                      
    #####:  669:                     elseif(mattyp.eq.2) then
        -:  670:!
    #####:  671:                        call
orthonl(w,vo,elas,s,ii1,jj1,weight)
        -:  672:!
        -:  673:                     else
        -:  674:!
    #####:  675:                      do i1=1,3
    #####:  676:                        iii1=ii1+i1-1
    #####:  677:                        do j1=1,3
    #####:  678:                          jjj1=jj1+j1-1
    #####:  679:                          do k1=1,3
    #####:  680:                            do l1=1,3
        -:  681:                              s(iii1,jjj1)=s(iii1,jjj1)
    #####:  682:     &                        
+anisox(i1,k1,j1,l1)*w(k1,l1)*weight
    #####:  683:                              do m1=1,3
        -:  684:                                s(iii1,jjj1)=s(iii1,jjj1)
        -:  685:     &                             
+anisox(i1,k1,m1,l1)*w(k1,l1)
        -:  686:     &                                 *vo(j1,m1)*weight
        -:  687:     &                             
+anisox(m1,k1,j1,l1)*w(k1,l1)
    #####:  688:     &                                 *vo(i1,m1)*weight
    #####:  689:                                do n1=1,3
        -:  690:                                  s(iii1,jjj1)=s(iii1,jjj1)
        -:  691:     &                                  +anisox(m1,k1,n1,l1)
        -:  692:     &                                 
*w(k1,l1)*vo(i1,m1)*vo(j1,n1)
    #####:  693:     &                                  *weight
        -:  694:                                enddo
        -:  695:                              enddo
        -:  696:                            enddo
        -:  697:                          enddo
        -:  698:                        enddo
        -:  699:                      enddo
        -:  700:!SPEC: The immediately preceding loop nest is also available in 
        -:  701:!SPEC: program-generated (much longer) form from the author's 
        -:  702:!SPEC: website (see 454.calculix/Docs) in file anisonl.f
        -:  703:!SPEC:
        -:  704:!SPEC:                   call
anisonl(w,vo,elas,s,ii1,jj1,weight)
        -:  705:!SPEC:
        -:  706:                     endif
        -:  707:!
        -:  708:!                   stress stiffness
        -:  709:!
        -:  710:                     senergy=

REF run:

    #####:  591:                           sm(ii1+2,jj1+2)=sm(ii1,jj1)
        -:  592:                        endif
        -:  593:!
        -:  594:                     else
        -:  595:!
        -:  596:!                     buckling matrix  
        -:  597:!
        -:  598:                        senergyb=
        -:  599:     &                       (s11b*w(1,1)+s12b*(w(1,2)+w(2,1))
        -:  600:     &                       +s13b*(w(1,3)+w(3,1))+s22b*w(2,2)
    #####:  601:     &                      
+s23b*(w(2,3)+w(3,2))+s33b*w(3,3))*weight
    #####:  602:                        sm(ii1,jj1)=sm(ii1,jj1)-senergyb
    #####:  603:                       
sm(ii1+1,jj1+1)=sm(ii1+1,jj1+1)-senergyb
    #####:  604:                       
sm(ii1+2,jj1+2)=sm(ii1+2,jj1+2)-senergyb
        -:  605:!
        -:  606:                     endif
        -:  607:!
    #####:  608:                     ii1=ii1+3
        -:  609:                  enddo
    #####:  610:                  jj1=jj1+3
        -:  611:               enddo
        -:  612:            else
        -:  613:!
        -:  614:!               stiffness matrix for static and modal
        -:  615:!               2nd order calculations
        -:  616:!
        -:  617:!               large displacement stiffness
        -:  618:!               
      11M:  619:               do i1=1,3
      36M:  620:                  do j1=1,3
      25M:  621:                     vo(i1,j1)=0.d0
     532M:  622:                     do k1=1,nope
     523M:  623:                       
vo(i1,j1)=vo(i1,j1)+shp(j1,k1)*voldl(i1,k1)
        -:  624:                     enddo
        -:  625:                  enddo
        -:  626:               enddo
        -:  627:!
       3M:  628:               if(mattyp.eq.1) then
       1M:  629:                  call wcoef(v,vo,al,um)
        -:  630:               endif
        -:  631:!
        -:  632:!               calculating the total mass of the element for
        -:  633:!               lumping purposes: only for explicit nonlinear
        -:  634:!               dynamic calculations
        -:  635:!
      3M*:  636:               if(mass.and.(iexpl.eq.1)) then
    #####:  637:                  summass=summass+rho*xsj
        -:  638:               endif
        -:  639:!
       3M:  640:               jj1=1
      58M:  641:               do jj=1,nope
        -:  642:!
      55M:  643:                  ii1=1
     637M:  644:                  do ii=1,jj
        -:  645:!
        -:  646:!                   all products of the shape functions for a
given ii
        -:  647:!                   and jj
        -:  648:!
       2G:  649:                     do i1=1,3
       8G:  650:                        do j1=1,3
       7G:  651:                           w(i1,j1)=shpj(i1,ii)*shpj(j1,jj)
        -:  652:                        enddo
        -:  653:                     enddo
        -:  654:!
     582M:  655:                     if(mattyp.eq.1) then
        -:  656:!
       1G:  657:                        do m1=1,3
       4G:  658:                           do m2=1,3
      11G:  659:                              do m3=1,3
      33G:  660:                                 do m4=1,3
        -:  661:                                    s(ii1+m2-1,jj1+m1-1)=
        -:  662:     &                                   s(ii1+m2-1,jj1+m1-1)
      31G:  663:     &                                  
+v(m4,m3,m2,m1)*w(m4,m3)*weight
        -:  664:                                 enddo
        -:  665:                              enddo
        -:  666:                           enddo
        -:  667:                        enddo
        -:  668:!                      
     299M:  669:                     elseif(mattyp.eq.2) then
        -:  670:!
       3M:  671:                        call
orthonl(w,vo,elas,s,ii1,jj1,weight)
        -:  672:!
        -:  673:                     else
        -:  674:!
       1G:  675:                      do i1=1,3
     889M:  676:                        iii1=ii1+i1-1
       4G:  677:                        do j1=1,3
       3G:  678:                          jjj1=jj1+j1-1
      12G:  679:                          do k1=1,3
      35G:  680:                            do l1=1,3
        -:  681:                              s(iii1,jjj1)=s(iii1,jjj1)
      24G:  682:     &                        
+anisox(i1,k1,j1,l1)*w(k1,l1)*weight
     104G:  683:                              do m1=1,3
        -:  684:                                s(iii1,jjj1)=s(iii1,jjj1)
        -:  685:     &                             
+anisox(i1,k1,m1,l1)*w(k1,l1)
        -:  686:     &                                 *vo(j1,m1)*weight
        -:  687:     &                             
+anisox(m1,k1,j1,l1)*w(k1,l1)
      72G:  688:     &                                 *vo(i1,m1)*weight
     312G:  689:                                do n1=1,3
        -:  690:                                  s(iii1,jjj1)=s(iii1,jjj1)
        -:  691:     &                                  +anisox(m1,k1,n1,l1)
        -:  692:     &                                 
*w(k1,l1)*vo(i1,m1)*vo(j1,n1)
     288G:  693:     &                                  *weight
        -:  694:                                enddo
        -:  695:                              enddo
        -:  696:                            enddo
        -:  697:                          enddo
        -:  698:                        enddo
        -:  699:                      enddo
        -:  700:!SPEC: The immediately preceding loop nest is also available in 
        -:  701:!SPEC: program-generated (much longer) form from the author's 
        -:  702:!SPEC: website (see 454.calculix/Docs) in file anisonl.f
        -:  703:!SPEC:
        -:  704:!SPEC:                   call
anisonl(w,vo,elas,s,ii1,jj1,weight)
        -:  705:!SPEC:
        -:  706:                     endif
        -:  707:!
        -:  708:!                   stress stiffness
        -:  709:!
        -:  710:                     senergy=

The hottest part are line 689-693 which execute 300G times in reference run.
Because of the code
is not executed in train run we optimize it for size.

Reply via email to