https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90491

--- Comment #2 from g.peterh...@t-online.de ---
example:

#include <array>
#include <iostream>

int main(const int argc, const char** argv)
{
        using value_type = float;
        using array_type = std::array<value_type, 16>;

        array_type      a;

        for (size_t i=0; i<a.size(); ++i)
                a[i] = argc + i;

        for (size_t i=0; i<a.size(); ++i)
                std::cout<<(a[i])<<' ';

        return EXIT_SUCCESS;
}

compile with gcc-9 and -O3 -march=native (avx/avx2 available @ ryzen 2 !)
-mtune=intel -mno-vzeroupper
generates this code:

0000000000000000 <main>:
    0:  55                      push   %rbp
    1:  48 63 ff                movslq %edi,%rdi
    4:  53                      push   %rbx
    5:  48 8d 64 24 a8          lea    -0x58(%rsp),%rsp
    a:  48 85 ff                test   %rdi,%rdi
    d:  0f 88 b9 01 00 00       js     1cc <main+0x1cc>
   13:  c4 e1 fa 2a c7          vcvtsi2ss %rdi,%xmm0,%xmm0
   18:  c5 fa 11 44 24 10       vmovss %xmm0,0x10(%rsp)
   1e:  48 89 f8                mov    %rdi,%rax
   21:  48 83 c0 01             add    $0x1,%rax
   25:  0f 88 2a 03 00 00       js     355 <main+0x355>
   2b:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   30:  c5 fa 11 44 24 14       vmovss %xmm0,0x14(%rsp)
   36:  48 89 f8                mov    %rdi,%rax
   39:  48 83 c0 02             add    $0x2,%rax
   3d:  0f 88 f8 02 00 00       js     33b <main+0x33b>
   43:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   48:  c5 fa 11 44 24 18       vmovss %xmm0,0x18(%rsp)
   4e:  48 89 f8                mov    %rdi,%rax
   51:  48 83 c0 03             add    $0x3,%rax
   55:  0f 88 c6 02 00 00       js     321 <main+0x321>
   5b:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   60:  c5 fa 11 44 24 1c       vmovss %xmm0,0x1c(%rsp)
   66:  48 89 f8                mov    %rdi,%rax
   69:  48 83 c0 04             add    $0x4,%rax
   6d:  0f 88 94 02 00 00       js     307 <main+0x307>
   73:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   78:  c5 fa 11 44 24 20       vmovss %xmm0,0x20(%rsp)
   7e:  48 89 f8                mov    %rdi,%rax
   81:  48 83 c0 05             add    $0x5,%rax
   85:  0f 88 62 02 00 00       js     2ed <main+0x2ed>
   8b:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   90:  c5 fa 11 44 24 24       vmovss %xmm0,0x24(%rsp)
   96:  48 89 f8                mov    %rdi,%rax
   99:  48 83 c0 06             add    $0x6,%rax
   9d:  0f 88 30 02 00 00       js     2d3 <main+0x2d3>
   a3:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   a8:  c5 fa 11 44 24 28       vmovss %xmm0,0x28(%rsp)
   ae:  48 89 f8                mov    %rdi,%rax
   b1:  48 83 c0 07             add    $0x7,%rax
   b5:  0f 88 fe 01 00 00       js     2b9 <main+0x2b9>
   bb:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   c0:  c5 fa 11 44 24 2c       vmovss %xmm0,0x2c(%rsp)
   c6:  48 89 f8                mov    %rdi,%rax
   c9:  48 83 c0 08             add    $0x8,%rax
   cd:  0f 88 cc 01 00 00       js     29f <main+0x29f>
   d3:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   d8:  c5 fa 11 44 24 30       vmovss %xmm0,0x30(%rsp)
   de:  48 89 f8                mov    %rdi,%rax
   e1:  48 83 c0 09             add    $0x9,%rax
   e5:  0f 88 9a 01 00 00       js     285 <main+0x285>
   eb:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
   f0:  c5 fa 11 44 24 34       vmovss %xmm0,0x34(%rsp)
   f6:  48 89 f8                mov    %rdi,%rax
   f9:  48 83 c0 0a             add    $0xa,%rax
   fd:  0f 88 68 01 00 00       js     26b <main+0x26b>
  103:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
  108:  c5 fa 11 44 24 38       vmovss %xmm0,0x38(%rsp)
  10e:  48 89 f8                mov    %rdi,%rax
  111:  48 83 c0 0b             add    $0xb,%rax
  115:  0f 88 36 01 00 00       js     251 <main+0x251>
  11b:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
  120:  c5 fa 11 44 24 3c       vmovss %xmm0,0x3c(%rsp)
  126:  48 89 f8                mov    %rdi,%rax
  129:  48 83 c0 0c             add    $0xc,%rax
  12d:  0f 88 04 01 00 00       js     237 <main+0x237>
  133:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
  138:  c5 fa 11 44 24 40       vmovss %xmm0,0x40(%rsp)
  13e:  48 89 f8                mov    %rdi,%rax
  141:  48 83 c0 0d             add    $0xd,%rax
  145:  0f 88 d2 00 00 00       js     21d <main+0x21d>
  14b:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
  150:  c5 fa 11 44 24 44       vmovss %xmm0,0x44(%rsp)
  156:  48 89 f8                mov    %rdi,%rax
  159:  48 83 c0 0e             add    $0xe,%rax
  15d:  0f 88 a0 00 00 00       js     203 <main+0x203>
  163:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
  168:  c5 fa 11 44 24 48       vmovss %xmm0,0x48(%rsp)
  16e:  48 83 c7 0f             add    $0xf,%rdi
  172:  78 75                   js     1e9 <main+0x1e9>
  174:  c4 e1 fa 2a c7          vcvtsi2ss %rdi,%xmm0,%xmm0
  179:  c5 fa 11 44 24 4c       vmovss %xmm0,0x4c(%rsp)
  17f:  48 8d 5c 24 10          lea    0x10(%rsp),%rbx
  184:  48 8d 6c 24 50          lea    0x50(%rsp),%rbp
  189:  0f 1f 80 00 00 00 00    nopl   0x0(%rax)
  190:  c5 fa 10 03             vmovss (%rbx),%xmm0
  194:  bf 00 00 00 00          mov    $0x0,%edi
                        195: R_X86_64_32        std::cout
  199:  c5 fa 5a c0             vcvtss2sd %xmm0,%xmm0,%xmm0
  19d:  48 83 c3 04             add    $0x4,%rbx
  1a1:  e8 00 00 00 00          callq  1a6 <main+0x1a6>
                        1a2: R_X86_64_PLT32     std::ostream&
std::ostream::_M_insert<double>(double)-0x4
  1a6:  48 89 c7                mov    %rax,%rdi
  1a9:  ba 01 00 00 00          mov    $0x1,%edx
  1ae:  c6 44 24 0f 20          movb   $0x20,0xf(%rsp)
  1b3:  48 8d 74 24 0f          lea    0xf(%rsp),%rsi
  1b8:  e8 00 00 00 00          callq  1bd <main+0x1bd>
                        1b9: R_X86_64_PLT32     std::basic_ostream<char,
std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char>
>(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)-0x4
  1bd:  48 39 eb                cmp    %rbp,%rbx
  1c0:  75 ce                   jne    190 <main+0x190>
  1c2:  48 8d 64 24 58          lea    0x58(%rsp),%rsp
  1c7:  31 c0                   xor    %eax,%eax
  1c9:  5b                      pop    %rbx
  1ca:  5d                      pop    %rbp
  1cb:  c3                      retq
  1cc:  48 89 f8                mov    %rdi,%rax
  1cf:  48 89 fa                mov    %rdi,%rdx
  1d2:  48 d1 e8                shr    %rax
  1d5:  83 e2 01                and    $0x1,%edx
  1d8:  48 09 d0                or     %rdx,%rax
  1db:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
  1e0:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  1e4:  e9 2f fe ff ff          jmpq   18 <main+0x18>
  1e9:  48 89 f8                mov    %rdi,%rax
  1ec:  83 e7 01                and    $0x1,%edi
  1ef:  48 d1 e8                shr    %rax
  1f2:  48 09 f8                or     %rdi,%rax
  1f5:  c4 e1 fa 2a c0          vcvtsi2ss %rax,%xmm0,%xmm0
  1fa:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  1fe:  e9 76 ff ff ff          jmpq   179 <main+0x179>
  203:  48 89 c2                mov    %rax,%rdx
  206:  83 e0 01                and    $0x1,%eax
  209:  48 d1 ea                shr    %rdx
  20c:  48 09 c2                or     %rax,%rdx
  20f:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  214:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  218:  e9 4b ff ff ff          jmpq   168 <main+0x168>
  21d:  48 89 c2                mov    %rax,%rdx
  220:  83 e0 01                and    $0x1,%eax
  223:  48 d1 ea                shr    %rdx
  226:  48 09 c2                or     %rax,%rdx
  229:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  22e:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  232:  e9 19 ff ff ff          jmpq   150 <main+0x150>
  237:  48 89 c2                mov    %rax,%rdx
  23a:  83 e0 01                and    $0x1,%eax
  23d:  48 d1 ea                shr    %rdx
  240:  48 09 c2                or     %rax,%rdx
  243:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  248:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  24c:  e9 e7 fe ff ff          jmpq   138 <main+0x138>
  251:  48 89 c2                mov    %rax,%rdx
  254:  83 e0 01                and    $0x1,%eax
  257:  48 d1 ea                shr    %rdx
  25a:  48 09 c2                or     %rax,%rdx
  25d:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  262:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  266:  e9 b5 fe ff ff          jmpq   120 <main+0x120>
  26b:  48 89 c2                mov    %rax,%rdx
  26e:  83 e0 01                and    $0x1,%eax
  271:  48 d1 ea                shr    %rdx
  274:  48 09 c2                or     %rax,%rdx
  277:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  27c:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  280:  e9 83 fe ff ff          jmpq   108 <main+0x108>
  285:  48 89 c2                mov    %rax,%rdx
  288:  83 e0 01                and    $0x1,%eax
  28b:  48 d1 ea                shr    %rdx
  28e:  48 09 c2                or     %rax,%rdx
  291:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  296:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  29a:  e9 51 fe ff ff          jmpq   f0 <main+0xf0>
  29f:  48 89 c2                mov    %rax,%rdx
  2a2:  83 e0 01                and    $0x1,%eax
  2a5:  48 d1 ea                shr    %rdx
  2a8:  48 09 c2                or     %rax,%rdx
  2ab:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  2b0:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  2b4:  e9 1f fe ff ff          jmpq   d8 <main+0xd8>
  2b9:  48 89 c2                mov    %rax,%rdx
  2bc:  83 e0 01                and    $0x1,%eax
  2bf:  48 d1 ea                shr    %rdx
  2c2:  48 09 c2                or     %rax,%rdx
  2c5:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  2ca:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  2ce:  e9 ed fd ff ff          jmpq   c0 <main+0xc0>
  2d3:  48 89 c2                mov    %rax,%rdx
  2d6:  83 e0 01                and    $0x1,%eax
  2d9:  48 d1 ea                shr    %rdx
  2dc:  48 09 c2                or     %rax,%rdx
  2df:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  2e4:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  2e8:  e9 bb fd ff ff          jmpq   a8 <main+0xa8>
  2ed:  48 89 c2                mov    %rax,%rdx
  2f0:  83 e0 01                and    $0x1,%eax
  2f3:  48 d1 ea                shr    %rdx
  2f6:  48 09 c2                or     %rax,%rdx
  2f9:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  2fe:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  302:  e9 89 fd ff ff          jmpq   90 <main+0x90>
  307:  48 89 c2                mov    %rax,%rdx
  30a:  83 e0 01                and    $0x1,%eax
  30d:  48 d1 ea                shr    %rdx
  310:  48 09 c2                or     %rax,%rdx
  313:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  318:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  31c:  e9 57 fd ff ff          jmpq   78 <main+0x78>
  321:  48 89 c2                mov    %rax,%rdx
  324:  83 e0 01                and    $0x1,%eax
  327:  48 d1 ea                shr    %rdx
  32a:  48 09 c2                or     %rax,%rdx
  32d:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  332:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  336:  e9 25 fd ff ff          jmpq   60 <main+0x60>
  33b:  48 89 c2                mov    %rax,%rdx
  33e:  83 e0 01                and    $0x1,%eax
  341:  48 d1 ea                shr    %rdx
  344:  48 09 c2                or     %rax,%rdx
  347:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  34c:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  350:  e9 f3 fc ff ff          jmpq   48 <main+0x48>
  355:  48 89 c2                mov    %rax,%rdx
  358:  83 e0 01                and    $0x1,%eax
  35b:  48 d1 ea                shr    %rdx
  35e:  48 09 c2                or     %rax,%rdx
  361:  c4 e1 fa 2a c2          vcvtsi2ss %rdx,%xmm0,%xmm0
  366:  c5 fa 58 c0             vaddss %xmm0,%xmm0,%xmm0
  36a:  e9 c1 fc ff ff          jmpq   30 <main+0x30>
  36f:  90                      nop

Reply via email to