On Wed, Jan 23, 2019 at 12:27 PM Uros Bizjak <ubiz...@gmail.com> wrote:
>
> On Wed, Jan 23, 2019 at 8:52 PM H.J. Lu <hjl.to...@gmail.com> wrote:
> >
> > On Wed, Jan 23, 2019 at 11:22 AM Uros Bizjak <ubiz...@gmail.com> wrote:
> > >
> > > Attached patch adds SSE alternatives to sse2_cvtpi2pd, sse2_cvtpd2pi
> > > and sse2_cvttpd2pi to avoid MMX registers when e.g. _mm_cvtepi32_pd
> > > intrinsics is used. Without the patch, the testcase compiles to (-O2
> > > -mavx):
> > >
> > > _Z7prepareii:
> > >         vmovd   %edi, %xmm1
> > >         vpinsrd $1, %esi, %xmm1, %xmm0
> > >         movdq2q %xmm0, %mm0
> > >         cvtpi2pd        %mm0, %xmm0
> > >         vhaddpd %xmm0, %xmm0, %xmm0
> > >         ret
> > >
> > > while patched gcc generates:
> > >
> > >         vmovd   %edi, %xmm1
> > >         vpinsrd $1, %esi, %xmm1, %xmm0
> > >         vcvtdq2pd       %xmm0, %xmm0
> > >         vhaddpd %xmm0, %xmm0, %xmm0
> > >         ret
> > >
> > > The later avoids transition of FPU to MMX mode.
> > >
> >
> > Is that possible to support 64-bit vectors, like V2SI, with SSE
> > instead of MMX for x86-64 under a command-line switch?
>
> SSE registers are preferred for 64bit vectors (see number of
> exclamation marks in *mov<mode>_internal in mmx.md), so the value will
> be passed in SSE regs unless there is pure MMX instruction, where due
> to missing SSE alternatives, RA will need to allocate MMX register.
>

[hjl@gnu-cfl-1 v64-1]$ cat x.i
typedef float __v2sf __attribute__ ((__vector_size__ (8)));

extern __v2sf z;

void
add (__v2sf x, __v2sf y)
{
  z = x + y;
}
[hjl@gnu-cfl-1 v64-1]$ make x.s
/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc
-B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/  -O2
-S x.i
[hjl@gnu-cfl-1 v64-1]$ cat x.s
.file "x.i"
.text
.p2align 4
.globl add
.type add, @function
add:
.LFB0:
.cfi_startproc
movlps %xmm0, -32(%rsp)
movss -32(%rsp), %xmm0
movlps %xmm1, -40(%rsp)
addss -40(%rsp), %xmm0
movss %xmm0, -56(%rsp)
movss -28(%rsp), %xmm0
addss -36(%rsp), %xmm0
movss %xmm0, -52(%rsp)
movq -56(%rsp), %rax
movq %rax, z(%rip)
ret
.cfi_endproc
.LFE0:
.size add, .-add
.ident "GCC: (GNU) 9.0.0 20190122 (experimental)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-cfl-1 v64-1]$

I am expecting:

addps %xmm1, %xmm0
movlps %xmm0, z(%rip)
retq

-- 
H.J.

Reply via email to