https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90491
--- Comment #2 from g.peterh...@t-online.de --- example: #include <array> #include <iostream> int main(const int argc, const char** argv) { using value_type = float; using array_type = std::array<value_type, 16>; array_type a; for (size_t i=0; i<a.size(); ++i) a[i] = argc + i; for (size_t i=0; i<a.size(); ++i) std::cout<<(a[i])<<' '; return EXIT_SUCCESS; } compile with gcc-9 and -O3 -march=native (avx/avx2 available @ ryzen 2 !) -mtune=intel -mno-vzeroupper generates this code: 0000000000000000 <main>: 0: 55 push %rbp 1: 48 63 ff movslq %edi,%rdi 4: 53 push %rbx 5: 48 8d 64 24 a8 lea -0x58(%rsp),%rsp a: 48 85 ff test %rdi,%rdi d: 0f 88 b9 01 00 00 js 1cc <main+0x1cc> 13: c4 e1 fa 2a c7 vcvtsi2ss %rdi,%xmm0,%xmm0 18: c5 fa 11 44 24 10 vmovss %xmm0,0x10(%rsp) 1e: 48 89 f8 mov %rdi,%rax 21: 48 83 c0 01 add $0x1,%rax 25: 0f 88 2a 03 00 00 js 355 <main+0x355> 2b: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 30: c5 fa 11 44 24 14 vmovss %xmm0,0x14(%rsp) 36: 48 89 f8 mov %rdi,%rax 39: 48 83 c0 02 add $0x2,%rax 3d: 0f 88 f8 02 00 00 js 33b <main+0x33b> 43: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 48: c5 fa 11 44 24 18 vmovss %xmm0,0x18(%rsp) 4e: 48 89 f8 mov %rdi,%rax 51: 48 83 c0 03 add $0x3,%rax 55: 0f 88 c6 02 00 00 js 321 <main+0x321> 5b: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 60: c5 fa 11 44 24 1c vmovss %xmm0,0x1c(%rsp) 66: 48 89 f8 mov %rdi,%rax 69: 48 83 c0 04 add $0x4,%rax 6d: 0f 88 94 02 00 00 js 307 <main+0x307> 73: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 78: c5 fa 11 44 24 20 vmovss %xmm0,0x20(%rsp) 7e: 48 89 f8 mov %rdi,%rax 81: 48 83 c0 05 add $0x5,%rax 85: 0f 88 62 02 00 00 js 2ed <main+0x2ed> 8b: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 90: c5 fa 11 44 24 24 vmovss %xmm0,0x24(%rsp) 96: 48 89 f8 mov %rdi,%rax 99: 48 83 c0 06 add $0x6,%rax 9d: 0f 88 30 02 00 00 js 2d3 <main+0x2d3> a3: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 a8: c5 fa 11 44 24 28 vmovss %xmm0,0x28(%rsp) ae: 48 89 f8 mov %rdi,%rax b1: 48 83 c0 07 add $0x7,%rax b5: 0f 88 fe 01 00 00 js 2b9 <main+0x2b9> bb: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 c0: c5 fa 11 44 24 2c vmovss %xmm0,0x2c(%rsp) c6: 48 89 f8 mov %rdi,%rax c9: 48 83 c0 08 add $0x8,%rax cd: 0f 88 cc 01 00 00 js 29f <main+0x29f> d3: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 d8: c5 fa 11 44 24 30 vmovss %xmm0,0x30(%rsp) de: 48 89 f8 mov %rdi,%rax e1: 48 83 c0 09 add $0x9,%rax e5: 0f 88 9a 01 00 00 js 285 <main+0x285> eb: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 f0: c5 fa 11 44 24 34 vmovss %xmm0,0x34(%rsp) f6: 48 89 f8 mov %rdi,%rax f9: 48 83 c0 0a add $0xa,%rax fd: 0f 88 68 01 00 00 js 26b <main+0x26b> 103: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 108: c5 fa 11 44 24 38 vmovss %xmm0,0x38(%rsp) 10e: 48 89 f8 mov %rdi,%rax 111: 48 83 c0 0b add $0xb,%rax 115: 0f 88 36 01 00 00 js 251 <main+0x251> 11b: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 120: c5 fa 11 44 24 3c vmovss %xmm0,0x3c(%rsp) 126: 48 89 f8 mov %rdi,%rax 129: 48 83 c0 0c add $0xc,%rax 12d: 0f 88 04 01 00 00 js 237 <main+0x237> 133: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 138: c5 fa 11 44 24 40 vmovss %xmm0,0x40(%rsp) 13e: 48 89 f8 mov %rdi,%rax 141: 48 83 c0 0d add $0xd,%rax 145: 0f 88 d2 00 00 00 js 21d <main+0x21d> 14b: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 150: c5 fa 11 44 24 44 vmovss %xmm0,0x44(%rsp) 156: 48 89 f8 mov %rdi,%rax 159: 48 83 c0 0e add $0xe,%rax 15d: 0f 88 a0 00 00 00 js 203 <main+0x203> 163: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 168: c5 fa 11 44 24 48 vmovss %xmm0,0x48(%rsp) 16e: 48 83 c7 0f add $0xf,%rdi 172: 78 75 js 1e9 <main+0x1e9> 174: c4 e1 fa 2a c7 vcvtsi2ss %rdi,%xmm0,%xmm0 179: c5 fa 11 44 24 4c vmovss %xmm0,0x4c(%rsp) 17f: 48 8d 5c 24 10 lea 0x10(%rsp),%rbx 184: 48 8d 6c 24 50 lea 0x50(%rsp),%rbp 189: 0f 1f 80 00 00 00 00 nopl 0x0(%rax) 190: c5 fa 10 03 vmovss (%rbx),%xmm0 194: bf 00 00 00 00 mov $0x0,%edi 195: R_X86_64_32 std::cout 199: c5 fa 5a c0 vcvtss2sd %xmm0,%xmm0,%xmm0 19d: 48 83 c3 04 add $0x4,%rbx 1a1: e8 00 00 00 00 callq 1a6 <main+0x1a6> 1a2: R_X86_64_PLT32 std::ostream& std::ostream::_M_insert<double>(double)-0x4 1a6: 48 89 c7 mov %rax,%rdi 1a9: ba 01 00 00 00 mov $0x1,%edx 1ae: c6 44 24 0f 20 movb $0x20,0xf(%rsp) 1b3: 48 8d 74 24 0f lea 0xf(%rsp),%rsi 1b8: e8 00 00 00 00 callq 1bd <main+0x1bd> 1b9: R_X86_64_PLT32 std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)-0x4 1bd: 48 39 eb cmp %rbp,%rbx 1c0: 75 ce jne 190 <main+0x190> 1c2: 48 8d 64 24 58 lea 0x58(%rsp),%rsp 1c7: 31 c0 xor %eax,%eax 1c9: 5b pop %rbx 1ca: 5d pop %rbp 1cb: c3 retq 1cc: 48 89 f8 mov %rdi,%rax 1cf: 48 89 fa mov %rdi,%rdx 1d2: 48 d1 e8 shr %rax 1d5: 83 e2 01 and $0x1,%edx 1d8: 48 09 d0 or %rdx,%rax 1db: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 1e0: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 1e4: e9 2f fe ff ff jmpq 18 <main+0x18> 1e9: 48 89 f8 mov %rdi,%rax 1ec: 83 e7 01 and $0x1,%edi 1ef: 48 d1 e8 shr %rax 1f2: 48 09 f8 or %rdi,%rax 1f5: c4 e1 fa 2a c0 vcvtsi2ss %rax,%xmm0,%xmm0 1fa: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 1fe: e9 76 ff ff ff jmpq 179 <main+0x179> 203: 48 89 c2 mov %rax,%rdx 206: 83 e0 01 and $0x1,%eax 209: 48 d1 ea shr %rdx 20c: 48 09 c2 or %rax,%rdx 20f: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 214: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 218: e9 4b ff ff ff jmpq 168 <main+0x168> 21d: 48 89 c2 mov %rax,%rdx 220: 83 e0 01 and $0x1,%eax 223: 48 d1 ea shr %rdx 226: 48 09 c2 or %rax,%rdx 229: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 22e: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 232: e9 19 ff ff ff jmpq 150 <main+0x150> 237: 48 89 c2 mov %rax,%rdx 23a: 83 e0 01 and $0x1,%eax 23d: 48 d1 ea shr %rdx 240: 48 09 c2 or %rax,%rdx 243: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 248: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 24c: e9 e7 fe ff ff jmpq 138 <main+0x138> 251: 48 89 c2 mov %rax,%rdx 254: 83 e0 01 and $0x1,%eax 257: 48 d1 ea shr %rdx 25a: 48 09 c2 or %rax,%rdx 25d: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 262: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 266: e9 b5 fe ff ff jmpq 120 <main+0x120> 26b: 48 89 c2 mov %rax,%rdx 26e: 83 e0 01 and $0x1,%eax 271: 48 d1 ea shr %rdx 274: 48 09 c2 or %rax,%rdx 277: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 27c: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 280: e9 83 fe ff ff jmpq 108 <main+0x108> 285: 48 89 c2 mov %rax,%rdx 288: 83 e0 01 and $0x1,%eax 28b: 48 d1 ea shr %rdx 28e: 48 09 c2 or %rax,%rdx 291: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 296: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 29a: e9 51 fe ff ff jmpq f0 <main+0xf0> 29f: 48 89 c2 mov %rax,%rdx 2a2: 83 e0 01 and $0x1,%eax 2a5: 48 d1 ea shr %rdx 2a8: 48 09 c2 or %rax,%rdx 2ab: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 2b0: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 2b4: e9 1f fe ff ff jmpq d8 <main+0xd8> 2b9: 48 89 c2 mov %rax,%rdx 2bc: 83 e0 01 and $0x1,%eax 2bf: 48 d1 ea shr %rdx 2c2: 48 09 c2 or %rax,%rdx 2c5: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 2ca: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 2ce: e9 ed fd ff ff jmpq c0 <main+0xc0> 2d3: 48 89 c2 mov %rax,%rdx 2d6: 83 e0 01 and $0x1,%eax 2d9: 48 d1 ea shr %rdx 2dc: 48 09 c2 or %rax,%rdx 2df: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 2e4: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 2e8: e9 bb fd ff ff jmpq a8 <main+0xa8> 2ed: 48 89 c2 mov %rax,%rdx 2f0: 83 e0 01 and $0x1,%eax 2f3: 48 d1 ea shr %rdx 2f6: 48 09 c2 or %rax,%rdx 2f9: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 2fe: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 302: e9 89 fd ff ff jmpq 90 <main+0x90> 307: 48 89 c2 mov %rax,%rdx 30a: 83 e0 01 and $0x1,%eax 30d: 48 d1 ea shr %rdx 310: 48 09 c2 or %rax,%rdx 313: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 318: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 31c: e9 57 fd ff ff jmpq 78 <main+0x78> 321: 48 89 c2 mov %rax,%rdx 324: 83 e0 01 and $0x1,%eax 327: 48 d1 ea shr %rdx 32a: 48 09 c2 or %rax,%rdx 32d: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 332: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 336: e9 25 fd ff ff jmpq 60 <main+0x60> 33b: 48 89 c2 mov %rax,%rdx 33e: 83 e0 01 and $0x1,%eax 341: 48 d1 ea shr %rdx 344: 48 09 c2 or %rax,%rdx 347: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 34c: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 350: e9 f3 fc ff ff jmpq 48 <main+0x48> 355: 48 89 c2 mov %rax,%rdx 358: 83 e0 01 and $0x1,%eax 35b: 48 d1 ea shr %rdx 35e: 48 09 c2 or %rax,%rdx 361: c4 e1 fa 2a c2 vcvtsi2ss %rdx,%xmm0,%xmm0 366: c5 fa 58 c0 vaddss %xmm0,%xmm0,%xmm0 36a: e9 c1 fc ff ff jmpq 30 <main+0x30> 36f: 90 nop