https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57952
mmokrejs at gmail dot com changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |mmokrejs at gmail dot com
--- Comment #2 from mmokrejs at gmail dot com ---
I see a similar problem with gcc-4.9.6 unable to use 256-bit YMM registers. It
only uses 128-bit XMM registers from SSE instructions.
Try https://people.eecs.berkeley.edu/~samw/research/code/stream/stream.c
$ gcc -march=native -o stream stream.c
$ objdump -d stream | grep ymm
$
icc does the job properly:
$ icc -qopenmp -O3 -xhost stream.c
$ objdump -d a.out | grep ymm
401323: c5 fd 10 00 vmovupd (%rax),%ymm0
40132d: c5 fd 10 48 20 vmovupd 0x20(%rax),%ymm1
401332: c5 7d 10 40 40 vmovupd 0x40(%rax),%ymm8
401337: c5 7d 10 48 60 vmovupd 0x60(%rax),%ymm9
40133c: c5 7d 10 1d 3c 65 00 vmovupd 0x653c(%rip),%ymm11 #
407880 <_IO_stdin_used+0x340>
401344: c5 fd 5c 50 f8 vsubpd -0x8(%rax),%ymm0,%ymm2
401349: c5 f5 5c 58 18 vsubpd 0x18(%rax),%ymm1,%ymm3
40134e: c5 3d 5c 50 38 vsubpd 0x38(%rax),%ymm8,%ymm10
401353: c5 35 5c 60 58 vsubpd 0x58(%rax),%ymm9,%ymm12
401358: c5 a5 59 e2 vmulpd %ymm2,%ymm11,%ymm4
401361: c5 a5 59 f3 vmulpd %ymm3,%ymm11,%ymm6
401365: c4 41 25 59 ea vmulpd %ymm10,%ymm11,%ymm13
40136a: c4 41 25 59 fc vmulpd %ymm12,%ymm11,%ymm15
40136f: c5 fd e6 ec vcvttpd2dq %ymm4,%xmm5
401373: c5 fd e6 fe vcvttpd2dq %ymm6,%xmm7
401377: c4 41 7d e6 f5 vcvttpd2dq %ymm13,%xmm14
40137c: c5 f5 ef c9 vpxor %ymm1,%ymm1,%ymm1
401380: c4 41 7d e6 ef vcvttpd2dq %ymm15,%xmm13
401385: c4 e2 7d 58 e2 vpbroadcastd %xmm2,%ymm4
40139c: c4 e3 55 38 c7 01 vinserti128 $0x1,%xmm7,%ymm5,%ymm0
4013a2: c4 e2 7d 3d d9 vpmaxsd %ymm1,%ymm0,%ymm3
4013a7: c4 e2 65 39 f4 vpminsd %ymm4,%ymm3,%ymm6
4013ac: c4 43 0d 38 fd 01 vinserti128 $0x1,%xmm13,%ymm14,%ymm15
4013b2: c4 e2 05 3d e9 vpmaxsd %ymm1,%ymm15,%ymm5
4013b7: c4 e2 55 39 fe vpminsd %ymm6,%ymm5,%ymm7
4013bc: c4 c3 7d 39 f8 01 vextracti128 $0x1,%ymm7,%xmm8
401caa: c5 fd 57 c0 vxorpd %ymm0,%ymm0,%ymm0
401cc8: c5 fd 10 16 vmovupd (%rsi),%ymm2
401cd0: c5 fd 10 5e 20 vmovupd 0x20(%rsi),%ymm3
401cd5: c5 fd 10 66 40 vmovupd 0x40(%rsi),%ymm4
401cda: c5 fd 10 76 60 vmovupd 0x60(%rsi),%ymm6
401ce7: c4 e2 ed b8 0f vfmadd231pd (%rdi),%ymm2,%ymm1
401cec: c4 e2 e5 b8 47 20 vfmadd231pd 0x20(%rdi),%ymm3,%ymm0
401cf2: c5 dd 59 6f 40 vmulpd 0x40(%rdi),%ymm4,%ymm5
401cf7: c5 cd 59 7f 60 vmulpd 0x60(%rdi),%ymm6,%ymm7
401cfc: c5 d5 58 c9 vaddpd %ymm1,%ymm5,%ymm1
401d00: c5 c5 58 c0 vaddpd %ymm0,%ymm7,%ymm0
401d0d: c5 f5 58 c0 vaddpd %ymm0,%ymm1,%ymm0
401d11: c4 e3 7d 19 c1 01 vextractf128 $0x1,%ymm0,%xmm1
401d6a: c5 fd 10 8c c3 c0 be vmovupd 0x60bec0(%rbx,%rax,8),%ymm1
401d73: c5 f5 59 94 c3 c0 60 vmulpd
0x268660c0(%rbx,%rax,8),%ymm1,%ymm2
401d7c: c5 ed 58 c0 vaddpd %ymm0,%ymm2,%ymm0
401d85: c4 e3 7d 19 c1 01 vextractf128 $0x1,%ymm0,%xmm1
401f9b: c4 e2 7d 19 44 24 10 vbroadcastsd 0x10(%rsp),%ymm0
401fa2: c5 fd 10 8c d1 c0 be vmovupd 0x60bec0(%rcx,%rdx,8),%ymm1
401fab: c4 e2 fd a8 8c d1 c0 vfmadd213pd
0x268660c0(%rcx,%rdx,8),%ymm0,%ymm1
401fb5: c5 fd 2b 8c d1 c0 02 vmovntpd %ymm1,0x4cac02c0(%rcx,%rdx,8)
40213b: c5 fd 10 84 d1 c0 60 vmovupd 0x268660c0(%rcx,%rdx,8),%ymm0
402144: c5 fd 58 8c d1 c0 be vaddpd
0x60bec0(%rcx,%rdx,8),%ymm0,%ymm1
40214d: c5 fd 2b 8c d1 c0 02 vmovntpd %ymm1,0x4cac02c0(%rcx,%rdx,8)
4022dd: c4 e2 7d 19 44 24 10 vbroadcastsd 0x10(%rsp),%ymm0
4022e4: c5 fd 2b 84 d1 c0 02 vmovntpd %ymm0,0x4cac02c0(%rcx,%rdx,8)
4025c2: c5 fd 10 05 76 52 00 vmovupd 0x5276(%rip),%ymm0 #
407840 <_IO_stdin_used+0x300>
4025de: c5 fd 59 0e vmulpd (%rsi),%ymm0,%ymm1
4025e2: c5 fd 11 0e vmovupd %ymm1,(%rsi)
402759: c5 fd 10 15 ff 50 00 vmovupd 0x50ff(%rip),%ymm2 #
407860 <_IO_stdin_used+0x320>
402761: c5 fd 10 0d d7 50 00 vmovupd 0x50d7(%rip),%ymm1 #
407840 <_IO_stdin_used+0x300>
402769: c5 fd 57 c0 vxorpd %ymm0,%ymm0,%ymm0
40276d: c5 fd 2b 94 f8 c0 60 vmovntpd %ymm2,0x268660c0(%rax,%rdi,8)
402776: c5 fd 2b 8c f8 c0 be vmovntpd %ymm1,0x60bec0(%rax,%rdi,8)
40277f: c5 fd 2b 84 f8 c0 02 vmovntpd %ymm0,0x4cac02c0(%rax,%rdi,8)
4030b0: c5 fd 10 84 c8 c0 60 vmovupd 0x268660c0(%rax,%rcx,8),%ymm0
4030b9: c5 fd 2b 84 c8 c0 02 vmovntpd %ymm0,0x4cac02c0(%rax,%rcx,8)
4032f0: c5 fd 10 44 24 08 vmovupd 0x8(%rsp),%ymm0
4032fb: c5 fd 10 4c 24 28 vmovupd 0x28(%rsp),%ymm1
403301: c5 7d 10 44 24 48 vmovupd 0x48(%rsp),%ymm8
403307: c5 7d 10 4c 24 68 vmovupd 0x68(%rsp),%ymm9
40330d: c5 7d 10 1d 6b 45 00 vmovupd 0x456b(%rip),%ymm11 #
407880 <_IO_stdin_used+0x340>
403315: c5 fd 5c 14 24 vsubpd (%rsp),%ymm0,%ymm2
40331a: c5 f5 5c 5c 24 20 vsubpd 0x20(%rsp),%ymm1,%ymm3
403320: c5 3d 5c 54 24 40 vsubpd 0x40(%rsp),%ymm8,%ymm10
403326: c5 35 5c 64 24 60 vsubpd 0x60(%rsp),%ymm9,%ymm12
40332c: c5 a5 59 e2 vmulpd %ymm2,%ymm11,%ymm4
403334: c5 a5 59 f3 vmulpd %ymm3,%ymm11,%ymm6
403338: c4 41 25 59 ea vmulpd %ymm10,%ymm11,%ymm13
40333d: c4 41 25 59 fc vmulpd %ymm12,%ymm11,%ymm15
403342: c5 fd e6 ec vcvttpd2dq %ymm4,%xmm5
403346: c5 fd e6 fe vcvttpd2dq %ymm6,%xmm7
40334a: c4 41 7d e6 f5 vcvttpd2dq %ymm13,%xmm14
40334f: c5 f5 ef c9 vpxor %ymm1,%ymm1,%ymm1
403353: c4 41 7d e6 ef vcvttpd2dq %ymm15,%xmm13
403358: c4 e2 7d 58 e2 vpbroadcastd %xmm2,%ymm4
403368: c4 e3 55 38 c7 01 vinserti128 $0x1,%xmm7,%ymm5,%ymm0
40336e: c4 e2 7d 3d d9 vpmaxsd %ymm1,%ymm0,%ymm3
403373: c4 e2 65 39 f4 vpminsd %ymm4,%ymm3,%ymm6
403378: c4 43 0d 38 fd 01 vinserti128 $0x1,%xmm13,%ymm14,%ymm15
40337e: c4 e2 05 3d e9 vpmaxsd %ymm1,%ymm15,%ymm5
403383: c4 e2 55 39 fe vpminsd %ymm6,%ymm5,%ymm7
403388: c4 c3 7d 39 f8 01 vextracti128 $0x1,%ymm7,%xmm8
403694: c4 c1 7d 10 84 c0 c0 vmovupd 0x268660c0(%r8,%rax,8),%ymm0
40369e: c4 c1 7d 2b 84 c0 c0 vmovntpd %ymm0,0x4cac02c0(%r8,%rax,8)
4038ba: c5 fd 10 84 d1 c0 60 vmovupd 0x268660c0(%rcx,%rdx,8),%ymm0
4038c3: c5 fd 58 8c d1 c0 be vaddpd
0x60bec0(%rcx,%rdx,8),%ymm0,%ymm1
4038cc: c5 fd 2b 8c d1 c0 02 vmovntpd %ymm1,0x4cac02c0(%rcx,%rdx,8)
403b4a: c4 e2 7d 19 c1 vbroadcastsd %xmm1,%ymm0
403b4f: c5 fd 10 94 d1 c0 be vmovupd 0x60bec0(%rcx,%rdx,8),%ymm2
403b58: c4 e2 fd a8 94 d1 c0 vfmadd213pd
0x268660c0(%rcx,%rdx,8),%ymm0,%ymm2
403b62: c5 fd 2b 94 d1 c0 02 vmovntpd %ymm2,0x4cac02c0(%rcx,%rdx,8)
403e2e: c5 fd 57 c0 vxorpd %ymm0,%ymm0,%ymm0
403e40: c5 fd 10 14 dd c0 be vmovupd 0x60bec0(,%rbx,8),%ymm2
403e4d: c5 fd 10 1c dd e0 be vmovupd 0x60bee0(,%rbx,8),%ymm3
403e56: c5 fd 10 24 dd 00 bf vmovupd 0x60bf00(,%rbx,8),%ymm4
403e5f: c5 fd 10 34 dd 20 bf vmovupd 0x60bf20(,%rbx,8),%ymm6
403e68: c4 e2 ed b8 0c dd c0 vfmadd231pd
0x268660c0(,%rbx,8),%ymm2,%ymm1
403e72: c4 e2 e5 b8 04 dd e0 vfmadd231pd
0x268660e0(,%rbx,8),%ymm3,%ymm0
403e7c: c5 dd 59 2c dd 00 61 vmulpd 0x26866100(,%rbx,8),%ymm4,%ymm5
403e85: c5 cd 59 3c dd 20 61 vmulpd 0x26866120(,%rbx,8),%ymm6,%ymm7
403e8e: c5 d5 58 c9 vaddpd %ymm1,%ymm5,%ymm1
403e92: c5 c5 58 c0 vaddpd %ymm0,%ymm7,%ymm0
403e9f: c5 f5 58 c0 vaddpd %ymm0,%ymm1,%ymm0
403ea3: c4 e3 7d 19 c1 01 vextractf128 $0x1,%ymm0,%xmm1
403efb: c4 c1 7d 10 8c c1 c0 vmovupd 0x60bec0(%r9,%rax,8),%ymm1
403f05: c4 c1 75 59 94 c1 c0 vmulpd
0x268660c0(%r9,%rax,8),%ymm1,%ymm2
403f0f: c5 ed 58 c0 vaddpd %ymm0,%ymm2,%ymm0
403f18: c4 e3 7d 19 c1 01 vextractf128 $0x1,%ymm0,%xmm1
404216: c4 e2 7d 19 c1 vbroadcastsd %xmm1,%ymm0
40421b: c5 fd 2b 84 d1 c0 02 vmovntpd %ymm0,0x4cac02c0(%rcx,%rdx,8)
404690: c5 fe 6f 0e vmovdqu (%rsi),%ymm1
404694: c5 fe 6f 56 20 vmovdqu 0x20(%rsi),%ymm2
404699: c5 fe 6f 5e 40 vmovdqu 0x40(%rsi),%ymm3
40469e: c5 fe 6f 66 60 vmovdqu 0x60(%rsi),%ymm4
4046a3: c5 fe 6f ae 80 00 00 vmovdqu 0x80(%rsi),%ymm5
4046ab: c5 fe 6f b6 a0 00 00 vmovdqu 0xa0(%rsi),%ymm6
4046b3: c5 fe 6f be c0 00 00 vmovdqu 0xc0(%rsi),%ymm7
4046bb: c5 7e 6f 86 e0 00 00 vmovdqu 0xe0(%rsi),%ymm8
4046c3: c5 fd 7f 0f vmovdqa %ymm1,(%rdi)
4046c7: c5 fd 7f 57 20 vmovdqa %ymm2,0x20(%rdi)
4046cc: c5 fd 7f 5f 40 vmovdqa %ymm3,0x40(%rdi)
4046d1: c5 fd 7f 67 60 vmovdqa %ymm4,0x60(%rdi)
4046d6: c5 fd 7f af 80 00 00 vmovdqa %ymm5,0x80(%rdi)
4046de: c5 fd 7f b7 a0 00 00 vmovdqa %ymm6,0xa0(%rdi)
4046e6: c5 fd 7f bf c0 00 00 vmovdqa %ymm7,0xc0(%rdi)
4046ee: c5 7d 7f 87 e0 00 00 vmovdqa %ymm8,0xe0(%rdi)
40475c: c5 fe 6f 0e vmovdqu (%rsi),%ymm1
404760: c5 fe 6f 56 20 vmovdqu 0x20(%rsi),%ymm2
404765: c5 fe 6f 5e 40 vmovdqu 0x40(%rsi),%ymm3
40476a: c5 fe 6f 66 60 vmovdqu 0x60(%rsi),%ymm4
40476f: c5 fe 6f ae 80 00 00 vmovdqu 0x80(%rsi),%ymm5
404777: c5 fe 6f b6 a0 00 00 vmovdqu 0xa0(%rsi),%ymm6
40477f: c5 fe 6f be c0 00 00 vmovdqu 0xc0(%rsi),%ymm7
404787: c5 7e 6f 86 e0 00 00 vmovdqu 0xe0(%rsi),%ymm8
404796: c5 fd e7 0f vmovntdq %ymm1,(%rdi)
40479a: c5 fd e7 57 20 vmovntdq %ymm2,0x20(%rdi)
40479f: c5 fd e7 5f 40 vmovntdq %ymm3,0x40(%rdi)
4047a4: c5 fd e7 67 60 vmovntdq %ymm4,0x60(%rdi)
4047a9: c5 fd e7 af 80 00 00 vmovntdq %ymm5,0x80(%rdi)
4047b1: c5 fd e7 b7 a0 00 00 vmovntdq %ymm6,0xa0(%rdi)
4047b9: c5 fd e7 bf c0 00 00 vmovntdq %ymm7,0xc0(%rdi)
4047c1: c5 7d e7 87 e0 00 00 vmovntdq %ymm8,0xe0(%rdi)
4048f0: c5 fc 10 86 20 ff ff vmovups -0xe0(%rsi),%ymm0
4048f8: c5 fc 29 87 20 ff ff vmovaps %ymm0,-0xe0(%rdi)
404900: c5 fc 10 86 40 ff ff vmovups -0xc0(%rsi),%ymm0
404908: c5 fc 29 87 40 ff ff vmovaps %ymm0,-0xc0(%rdi)
404910: c5 fc 10 86 60 ff ff vmovups -0xa0(%rsi),%ymm0
404918: c5 fc 29 87 60 ff ff vmovaps %ymm0,-0xa0(%rdi)
404920: c5 fc 10 46 80 vmovups -0x80(%rsi),%ymm0
404925: c5 fc 29 47 80 vmovaps %ymm0,-0x80(%rdi)
40492a: c5 fc 10 46 a0 vmovups -0x60(%rsi),%ymm0
40492f: c5 fc 29 47 a0 vmovaps %ymm0,-0x60(%rdi)
404934: c5 fc 10 46 c0 vmovups -0x40(%rsi),%ymm0
404939: c5 fc 29 47 c0 vmovaps %ymm0,-0x40(%rdi)
40493e: c5 fc 10 46 e0 vmovups -0x20(%rsi),%ymm0
404943: c5 fc 29 47 e0 vmovaps %ymm0,-0x20(%rdi)
404a40: c5 fc 10 06 vmovups (%rsi),%ymm0
404a44: c5 fc 11 07 vmovups %ymm0,(%rdi)
404a48: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0
404a4e: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1)
404a60: c5 fc 10 06 vmovups (%rsi),%ymm0
404a64: c5 fc 11 07 vmovups %ymm0,(%rdi)
404a68: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0
404a6d: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi)
404a72: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0
404a78: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1)
404a90: c5 fc 10 06 vmovups (%rsi),%ymm0
404a94: c5 fc 11 07 vmovups %ymm0,(%rdi)
404a98: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0
404a9d: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi)
404aa2: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0
404aa7: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi)
404aac: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0
404ab2: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1)
404ac0: c5 fc 10 06 vmovups (%rsi),%ymm0
404ac4: c5 fc 11 07 vmovups %ymm0,(%rdi)
404ac8: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0
404acd: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi)
404ad2: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0
404ad7: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi)
404adc: c5 fc 10 46 60 vmovups 0x60(%rsi),%ymm0
404ae1: c5 fc 11 47 60 vmovups %ymm0,0x60(%rdi)
404ae6: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0
404aec: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1)
404b00: c5 fc 10 06 vmovups (%rsi),%ymm0
404b04: c5 fc 11 07 vmovups %ymm0,(%rdi)
404b08: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0
404b0d: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi)
404b12: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0
404b17: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi)
404b1c: c5 fc 10 46 60 vmovups 0x60(%rsi),%ymm0
404b21: c5 fc 11 47 60 vmovups %ymm0,0x60(%rdi)
404b26: c5 fc 10 86 80 00 00 vmovups 0x80(%rsi),%ymm0
404b2e: c5 fc 11 87 80 00 00 vmovups %ymm0,0x80(%rdi)
404b36: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0
404b3c: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1)
404b50: c5 fc 10 06 vmovups (%rsi),%ymm0
404b54: c5 fc 11 07 vmovups %ymm0,(%rdi)
404b58: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0
404b5d: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi)
404b62: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0
404b67: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi)
404b6c: c5 fc 10 46 60 vmovups 0x60(%rsi),%ymm0
404b71: c5 fc 11 47 60 vmovups %ymm0,0x60(%rdi)
404b76: c5 fc 10 86 80 00 00 vmovups 0x80(%rsi),%ymm0
404b7e: c5 fc 11 87 80 00 00 vmovups %ymm0,0x80(%rdi)
404b86: c5 fc 10 86 a0 00 00 vmovups 0xa0(%rsi),%ymm0
404b8e: c5 fc 11 87 a0 00 00 vmovups %ymm0,0xa0(%rdi)
404b96: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0
404b9c: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1)
404bb0: c5 fc 10 06 vmovups (%rsi),%ymm0
404bb4: c5 fc 11 07 vmovups %ymm0,(%rdi)
404bb8: c5 fc 10 46 20 vmovups 0x20(%rsi),%ymm0
404bbd: c5 fc 11 47 20 vmovups %ymm0,0x20(%rdi)
404bc2: c5 fc 10 46 40 vmovups 0x40(%rsi),%ymm0
404bc7: c5 fc 11 47 40 vmovups %ymm0,0x40(%rdi)
404bcc: c5 fc 10 46 60 vmovups 0x60(%rsi),%ymm0
404bd1: c5 fc 11 47 60 vmovups %ymm0,0x60(%rdi)
404bd6: c5 fc 10 86 80 00 00 vmovups 0x80(%rsi),%ymm0
404bde: c5 fc 11 87 80 00 00 vmovups %ymm0,0x80(%rdi)
404be6: c5 fc 10 86 a0 00 00 vmovups 0xa0(%rsi),%ymm0
404bee: c5 fc 11 87 a0 00 00 vmovups %ymm0,0xa0(%rdi)
404bf6: c5 fc 10 86 c0 00 00 vmovups 0xc0(%rsi),%ymm0
404bfe: c5 fc 11 87 c0 00 00 vmovups %ymm0,0xc0(%rdi)
404c06: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0
404c0c: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1)
404c20: c5 fc 10 84 0e 00 ff vmovups -0x100(%rsi,%rcx,1),%ymm0
404c29: c5 fc 11 84 0f 00 ff vmovups %ymm0,-0x100(%rdi,%rcx,1)
404c32: c5 fc 10 84 0e 20 ff vmovups -0xe0(%rsi,%rcx,1),%ymm0
404c3b: c5 fc 11 84 0f 20 ff vmovups %ymm0,-0xe0(%rdi,%rcx,1)
404c44: c5 fc 10 84 0e 40 ff vmovups -0xc0(%rsi,%rcx,1),%ymm0
404c4d: c5 fc 11 84 0f 40 ff vmovups %ymm0,-0xc0(%rdi,%rcx,1)
404c56: c5 fc 10 84 0e 60 ff vmovups -0xa0(%rsi,%rcx,1),%ymm0
404c5f: c5 fc 11 84 0f 60 ff vmovups %ymm0,-0xa0(%rdi,%rcx,1)
404c68: c5 fc 10 44 0e 80 vmovups -0x80(%rsi,%rcx,1),%ymm0
404c6e: c5 fc 11 44 0f 80 vmovups %ymm0,-0x80(%rdi,%rcx,1)
404c74: c5 fc 10 44 0e a0 vmovups -0x60(%rsi,%rcx,1),%ymm0
404c7a: c5 fc 11 44 0f a0 vmovups %ymm0,-0x60(%rdi,%rcx,1)
404c80: c5 fc 10 44 0e c0 vmovups -0x40(%rsi,%rcx,1),%ymm0
404c86: c5 fc 11 44 0f c0 vmovups %ymm0,-0x40(%rdi,%rcx,1)
404c8c: c5 fc 10 44 0e e0 vmovups -0x20(%rsi,%rcx,1),%ymm0
404c92: c5 fc 11 44 0f e0 vmovups %ymm0,-0x20(%rdi,%rcx,1)
$