Hi! This patch fixes a regression where since the removal of specialized builtin from _mm_storel_epi64 we force the extraction of DImode (or SImode) low value out of 16/32/64 byte vector registers into memory. As the vector extraction is from a vector register with a different element mode, the expander doesn't know it might be beneficial to subreg it to a vector mode with the same size, but different element mode and do vector extraction out of that. This patch adds a pre-reload splitter that will turn it into such a vector extraction. At least for the -m32 DImode extraction directly into memory, I think teaching RA to do that would be much harder.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2015-03-17 Jakub Jelinek <ja...@redhat.com> PR target/65078 * config/i386/sse.md (movsi/movdi -> vec_extract_*_0 splitter): New. * gcc.target/i386/pr65078-1.c: New test. * gcc.target/i386/pr65078-2.c: New test. * gcc.target/i386/pr65078-3.c: New test. * gcc.target/i386/pr65078-4.c: New test. * gcc.target/i386/pr65078-5.c: New test. * gcc.target/i386/pr65078-6.c: New test. --- gcc/config/i386/sse.md.jj 2015-01-23 20:52:13.000000000 +0100 +++ gcc/config/i386/sse.md 2015-03-17 15:57:31.274655235 +0100 @@ -12805,6 +12805,65 @@ (define_split operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs); }) +;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F +;; vector modes into vec_extract*. +(define_split + [(set (match_operand:SWI48x 0 "nonimmediate_operand") + (match_operand:SWI48x 1 "register_operand"))] + "can_create_pseudo_p () + && GET_CODE (operands[1]) == SUBREG + && REG_P (SUBREG_REG (operands[1])) + && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT + || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) + == MODE_VECTOR_FLOAT)) + && SUBREG_BYTE (operands[1]) == 0 + && TARGET_SSE + && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16 + || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32 + && TARGET_AVX) + || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64 + && TARGET_AVX512F)) + && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))" + [(set (match_dup 0) (vec_select:SWI48x (match_dup 1) + (parallel [(const_int 0)])))] +{ + rtx tmp; + operands[1] = SUBREG_REG (operands[1]); + switch (GET_MODE_SIZE (GET_MODE (operands[1]))) + { + case 64: + if (<MODE>mode == SImode) + { + tmp = gen_reg_rtx (V8SImode); + emit_insn (gen_vec_extract_lo_v16si (tmp, + gen_lowpart (V16SImode, + operands[1]))); + } + else + { + tmp = gen_reg_rtx (V4DImode); + emit_insn (gen_vec_extract_lo_v8di (tmp, + gen_lowpart (V8DImode, + operands[1]))); + } + operands[1] = tmp; + /* FALLTHRU */ + case 32: + tmp = gen_reg_rtx (<ssevecmode>mode); + if (<MODE>mode == SImode) + emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode, + operands[1]))); + else + emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode, + operands[1]))); + operands[1] = tmp; + break; + case 16: + operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]); + break; + } +}) + (define_insn "*vec_concatv2si_sse4_1" [(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,x, Yr,*x,x, x, *y,*y") (vec_concat:V2SI --- gcc/testsuite/gcc.target/i386/pr65078-1.c.jj 2015-03-17 15:43:43.735200197 +0100 +++ gcc/testsuite/gcc.target/i386/pr65078-1.c 2015-03-17 16:08:17.022117378 +0100 @@ -0,0 +1,61 @@ +/* PR target/65078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2" } */ +/* { dg-additional-options "-mregparm=2" { target ia32 } } */ +/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" } } */ + +typedef unsigned char V __attribute__((vector_size (16))); +typedef unsigned long long W __attribute__((vector_size (16))); +typedef unsigned int T __attribute__((vector_size (16))); + +void +f1 (unsigned long long *x, V y) +{ + *x = ((W)y)[0]; +} + +#if defined(__x86_64__) || defined(ALL) +unsigned long long +f2 (V y) +{ + return ((W)y)[0]; +} +#endif + +void +f3 (unsigned int *x, V y) +{ + *x = ((T)y)[0]; +} + +unsigned int +f4 (V y) +{ + return ((T)y)[0]; +} + +void +f5 (unsigned long long *x, W y) +{ + *x = ((W)y)[0]; +} + +#if defined(__x86_64__) || defined(ALL) +unsigned long long +f6 (W y) +{ + return ((W)y)[0]; +} +#endif + +void +f7 (unsigned int *x, T y) +{ + *x = ((T)y)[0]; +} + +unsigned int +f8 (T y) +{ + return ((T)y)[0]; +} --- gcc/testsuite/gcc.target/i386/pr65078-2.c.jj 2015-03-17 15:44:19.097620771 +0100 +++ gcc/testsuite/gcc.target/i386/pr65078-2.c 2015-03-17 16:08:09.440240908 +0100 @@ -0,0 +1,61 @@ +/* PR target/65078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx" } */ +/* { dg-additional-options "-mregparm=2" { target ia32 } } */ +/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" } } */ + +typedef unsigned char V __attribute__((vector_size (32))); +typedef unsigned long long W __attribute__((vector_size (32))); +typedef unsigned int T __attribute__((vector_size (32))); + +void +f1 (unsigned long long *x, V y) +{ + *x = ((W)y)[0]; +} + +#if defined(__x86_64__) || defined(ALL) +unsigned long long +f2 (V y) +{ + return ((W)y)[0]; +} +#endif + +void +f3 (unsigned int *x, V y) +{ + *x = ((T)y)[0]; +} + +unsigned int +f4 (V y) +{ + return ((T)y)[0]; +} + +void +f5 (unsigned long long *x, W y) +{ + *x = ((W)y)[0]; +} + +#if defined(__x86_64__) || defined(ALL) +unsigned long long +f6 (W y) +{ + return ((W)y)[0]; +} +#endif + +void +f7 (unsigned int *x, T y) +{ + *x = ((T)y)[0]; +} + +unsigned int +f8 (T y) +{ + return ((T)y)[0]; +} --- gcc/testsuite/gcc.target/i386/pr65078-3.c.jj 2015-03-17 15:44:21.943574191 +0100 +++ gcc/testsuite/gcc.target/i386/pr65078-3.c 2015-03-17 16:08:24.930988521 +0100 @@ -0,0 +1,61 @@ +/* PR target/65078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f" } */ +/* { dg-additional-options "-mregparm=2" { target ia32 } } */ +/* { dg-final { scan-assembler-not "\\(%\[er\]sp\\)" } } */ + +typedef unsigned char V __attribute__((vector_size (64))); +typedef unsigned long long W __attribute__((vector_size (64))); +typedef unsigned int T __attribute__((vector_size (64))); + +void +f1 (unsigned long long *x, V y) +{ + *x = ((W)y)[0]; +} + +#if defined(__x86_64__) || defined(ALL) +unsigned long long +f2 (V y) +{ + return ((W)y)[0]; +} +#endif + +void +f3 (unsigned int *x, V y) +{ + *x = ((T)y)[0]; +} + +unsigned int +f4 (V y) +{ + return ((T)y)[0]; +} + +void +f5 (unsigned long long *x, W y) +{ + *x = ((W)y)[0]; +} + +#if defined(__x86_64__) || defined(ALL) +unsigned long long +f6 (W y) +{ + return ((W)y)[0]; +} +#endif + +void +f7 (unsigned int *x, T y) +{ + *x = ((T)y)[0]; +} + +unsigned int +f8 (T y) +{ + return ((T)y)[0]; +} --- gcc/testsuite/gcc.target/i386/pr65078-4.c.jj 2015-03-17 16:05:28.777858535 +0100 +++ gcc/testsuite/gcc.target/i386/pr65078-4.c 2015-03-17 16:06:41.911666986 +0100 @@ -0,0 +1,5 @@ +/* PR target/65078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse -DALL" } */ + +#include "pr65078-1.c" --- gcc/testsuite/gcc.target/i386/pr65078-5.c.jj 2015-03-17 16:06:49.899536842 +0100 +++ gcc/testsuite/gcc.target/i386/pr65078-5.c 2015-03-17 16:06:58.916389933 +0100 @@ -0,0 +1,5 @@ +/* PR target/65078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx -DALL" } */ + +#include "pr65078-2.c" --- gcc/testsuite/gcc.target/i386/pr65078-6.c.jj 2015-03-17 16:07:05.977274892 +0100 +++ gcc/testsuite/gcc.target/i386/pr65078-6.c 2015-03-17 16:07:13.856146524 +0100 @@ -0,0 +1,5 @@ +/* PR target/65078 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512f -DALL" } */ + +#include "pr65078-3.c" Jakub