On Wed, Jan 23, 2019 at 12:27 PM Uros Bizjak <ubiz...@gmail.com> wrote: > > On Wed, Jan 23, 2019 at 8:52 PM H.J. Lu <hjl.to...@gmail.com> wrote: > > > > On Wed, Jan 23, 2019 at 11:22 AM Uros Bizjak <ubiz...@gmail.com> wrote: > > > > > > Attached patch adds SSE alternatives to sse2_cvtpi2pd, sse2_cvtpd2pi > > > and sse2_cvttpd2pi to avoid MMX registers when e.g. _mm_cvtepi32_pd > > > intrinsics is used. Without the patch, the testcase compiles to (-O2 > > > -mavx): > > > > > > _Z7prepareii: > > > vmovd %edi, %xmm1 > > > vpinsrd $1, %esi, %xmm1, %xmm0 > > > movdq2q %xmm0, %mm0 > > > cvtpi2pd %mm0, %xmm0 > > > vhaddpd %xmm0, %xmm0, %xmm0 > > > ret > > > > > > while patched gcc generates: > > > > > > vmovd %edi, %xmm1 > > > vpinsrd $1, %esi, %xmm1, %xmm0 > > > vcvtdq2pd %xmm0, %xmm0 > > > vhaddpd %xmm0, %xmm0, %xmm0 > > > ret > > > > > > The later avoids transition of FPU to MMX mode. > > > > > > > Is that possible to support 64-bit vectors, like V2SI, with SSE > > instead of MMX for x86-64 under a command-line switch? > > SSE registers are preferred for 64bit vectors (see number of > exclamation marks in *mov<mode>_internal in mmx.md), so the value will > be passed in SSE regs unless there is pure MMX instruction, where due > to missing SSE alternatives, RA will need to allocate MMX register. >
[hjl@gnu-cfl-1 v64-1]$ cat x.i typedef float __v2sf __attribute__ ((__vector_size__ (8))); extern __v2sf z; void add (__v2sf x, __v2sf y) { z = x + y; } [hjl@gnu-cfl-1 v64-1]$ make x.s /export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/ -O2 -S x.i [hjl@gnu-cfl-1 v64-1]$ cat x.s .file "x.i" .text .p2align 4 .globl add .type add, @function add: .LFB0: .cfi_startproc movlps %xmm0, -32(%rsp) movss -32(%rsp), %xmm0 movlps %xmm1, -40(%rsp) addss -40(%rsp), %xmm0 movss %xmm0, -56(%rsp) movss -28(%rsp), %xmm0 addss -36(%rsp), %xmm0 movss %xmm0, -52(%rsp) movq -56(%rsp), %rax movq %rax, z(%rip) ret .cfi_endproc .LFE0: .size add, .-add .ident "GCC: (GNU) 9.0.0 20190122 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-cfl-1 v64-1]$ I am expecting: addps %xmm1, %xmm0 movlps %xmm0, z(%rip) retq -- H.J.