https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67124

--- Comment #4 from Uroš Bizjak <ubizjak at gmail dot com> ---
(In reply to Richard Biener from comment #2)
> Hmm, looks like a target issue.  We expand from

Not this time. It looks to me that generic code is at fault, performing DImode
move from %rdx to %xmm0, instead of inserting %rdx into %xmm0.

This can be seen with -O2 -msse2 -mtune=corei7 (please note that arrow points
to an instruction *before* it is executed):

   0x0000000000400440 <+0>:     movzbl 0x200c4b(%rip),%eax
   0x0000000000400447 <+7>:     movdqa 0x200c41(%rip),%xmm0
   0x000000000040044f <+15>:    movq   %xmm0,%rdx
=> 0x0000000000400454 <+20>:    mov    %rax,%rcx
   0x0000000000400457 <+23>:    and    $0xffffffffff00ffff,%rdx
   0x000000000040045e <+30>:    shl    $0x10,%rax
   0x0000000000400462 <+34>:    or     %rax,%rdx
   0x0000000000400465 <+37>:    xor    %eax,%eax
   0x0000000000400467 <+39>:    movq   %rdx,%xmm0
   0x000000000040046c <+44>:    pinsrw $0x2,%eax,%xmm0
   0x0000000000400471 <+49>:    mov    0x200be8(%rip),%rax
   0x0000000000400478 <+56>:    movups %xmm0,(%rax)

(gdb) i r rax rdx xmm0
rax            0x1
rdx            0x10000
xmm0           {uint128 = 0x00000000000000010000000000010000}

   0x0000000000400465 <+37>:    xor    %eax,%eax
=> 0x0000000000400467 <+39>:    movq   %rdx,%xmm0
   0x000000000040046c <+44>:    pinsrw $0x2,%eax,%xmm0

(gdb) i r rdx xmm0
rdx            0x10000
xmm0           {uint128 = 0x00000000000000010000000000010000}

   0x0000000000400465 <+37>:    xor    %eax,%eax
   0x0000000000400467 <+39>:    movq   %rdx,%xmm0
=> 0x000000000040046c <+44>:    pinsrw $0x2,%eax,%xmm0

(gdb) i r rdx xmm0
rdx            0x10000
xmm0           {uint128 = 0x00000000000000000000000000010000}

The same code can be compiled with -msse4 instead of -msse2, so we will have
pinsrb insn available:

   0x0000000000400440 <+0>:     movzbl 0x200c4b(%rip),%eax
   0x0000000000400447 <+7>:     xor    %edx,%edx
   0x0000000000400449 <+9>:     movdqa 0x200c3f(%rip),%xmm0
=> 0x0000000000400451 <+17>:    pinsrb $0x2,%eax,%xmm0
   0x0000000000400457 <+23>:    pinsrw $0x2,%edx,%xmm0
   0x000000000040045c <+28>:    mov    0x200bfd(%rip),%rdx        # 0x601060
<b>
   0x0000000000400463 <+35>:    movups %xmm0,(%rdx)

(gdb) i r rax xmm0
rax            0x1
xmm0           {uint128 = 0x00000000000000010000000000010000}

   0x0000000000400449 <+9>:     movdqa 0x200c3f(%rip),%xmm0
   0x0000000000400451 <+17>:    pinsrb $0x2,%eax,%xmm0
=> 0x0000000000400457 <+23>:    pinsrw $0x2,%edx,%xmm0

i r xmm0
xmm0           {uint128 = 0x00000000000000010000000000010000}

As shown in the first case, the value in %xmm0 is already wrong before pinsrw
is executed.

Reply via email to