On Fri, Dec 21, 2018 at 9:08 AM Jan Beulich <jbeul...@suse.com> wrote: > > For 64-bit these should not be emitted without suffix in AT&T mode (as > being ambiguous that way); the suffixes are benign for 32-bit. For > consistency also omit the suffix in Intel mode for {,V}CVTSI2SxQ. > > The omission has originally (prior to rev 260691) lead to wrong code > being generated for the 64-bit unsigned-to-float/double conversions (as > gas guesses an L suffix instead of the required Q one when the operand > is in memory). In all remaining cases (being changed here) the omission > would "just" lead to warnings with future gas versions. > > Since rex64suffix so far has been used also on {,V}CVTSx2SI (but > not on VCVTSx2USI, as gas doesn't permit suffixes there), testsuite > adjustments are also necessary for their test cases. Rather than > making thinks check for the L suffixes in 32-bit cases, make things > symmetric with VCVTSx2USI and drop the redundant suffixes instead, > dropping the Q suffix expectations at the same time from the 64-bit > cases.
This diverges from established practice, where all instructions have suffixes in ATT dialect. I think that we should to continue to follow established convention (that found a couple of bugs in the past), so I think that "l" should be emitted where appropriate. I wonder if gas should be fixed to accept suffixes for VCVTSx2USI. For now, let's leave all suffixes, but skip problematic VCVTSx2USI. > In order for related test cases to actually test what they're supposed > to test, add (seemingly unrelated) a few empty "asm volatile()". > Presumably there are more where constant propagation voids the intended > effect of the tests, but these are ones helping make sure the assembler > actually still assembles correctly the output after the changes here. Please just make relevant variable volatile. There are plenty of examples in the i386 target testsuite. Uros. > gcc/ > 2018-12-21 Jan Beulich <jbeul...@suse.com> > > * config/i386/i386.md (rex64suffix): Add L suffix for SI. > * config/i386/sse.md (sse_cvtss2si<rex64namesuffix><round_name>, > sse_cvtss2si<rex64namesuffix>_2, > sse_cvttss2si<rex64namesuffix><round_saeonly_name>, > sse2_cvtsd2si<rex64namesuffix><round_name>, > sse2_cvtsd2si<rex64namesuffix>_2, > sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>): Drop > <rex64suffix>. > (cvtusi2<ssescalarmodesuffix>32<round_name>, sse2_cvtsi2sd): Add > {l}. > (sse2_cvtsi2sdq<round_name>): Make q conditional upon AT&T > syntax. > > gcc/testsuite/ > 2018-12-21 Jan Beulich <jbeul...@suse.com> > > * gcc.target/i386/avx512f-vcvtsd2si64-1.c, > gcc.target/i386/avx512f-vcvtss2si64-1.c > gcc.target/i386/avx512f-vcvttsd2si64-1.c > gcc.target/i386/avx512f-vcvttss2si64-1.c: Drop q suffix > expectation. > * gcc.target/i386/avx512f-vcvtsi2ss-1.c, > gcc.target/i386/avx512f-vcvtusi2sd-1.c, > gcc.target/i386/avx512f-vcvtusi2ss-1.c: Expect l suffix. > * gcc.target/i386/avx512f-vcvtusi2sd-2.c, > gcc.target/i386/avx512f-vcvtusi2sd64-2.c, > gcc.target/i386/avx512f-vcvtusi2ss-2.c, > gcc.target/i386/avx512f-vcvtusi2ss64-2.c: Add asm volatile(). > > --- a/gcc/config/i386/i386.md > +++ b/gcc/config/i386/i386.md > @@ -1162,7 +1162,7 @@ > [(QI "V64QI") (HI "V32HI") (SI "V16SI") (DI "V8DI") (SF "V16SF") (DF > "V8DF")]) > > ;; Instruction suffix for REX 64bit operators. > -(define_mode_attr rex64suffix [(SI "") (DI "{q}")]) > +(define_mode_attr rex64suffix [(SI "{l}") (DI "{q}")]) > (define_mode_attr rex64namesuffix [(SI "") (DI "q")]) > > ;; This mode iterator allows :P to be used for patterns that operate on > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -4720,7 +4720,7 @@ > (parallel [(const_int 0)]))] > UNSPEC_FIX_NOTRUNC))] > "TARGET_SSE" > - "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}" > + "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}" > [(set_attr "type" "sseicvt") > (set_attr "athlon_decode" "double,vector") > (set_attr "bdver1_decode" "double,double") > @@ -4733,7 +4733,7 @@ > (unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")] > UNSPEC_FIX_NOTRUNC))] > "TARGET_SSE" > - "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %k1}" > + "%vcvtss2si\t{%1, %0|%0, %k1}" > [(set_attr "type" "sseicvt") > (set_attr "athlon_decode" "double,vector") > (set_attr "amdfam10_decode" "double,double") > @@ -4749,7 +4749,7 @@ > (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" > "v,<round_saeonly_constraint>") > (parallel [(const_int 0)]))))] > "TARGET_SSE" > - "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, > %k1<round_saeonly_op2>}" > + "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}" > [(set_attr "type" "sseicvt") > (set_attr "athlon_decode" "double,vector") > (set_attr "amdfam10_decode" "double,double") > @@ -4767,7 +4767,7 @@ > (match_operand:VF_128 1 "register_operand" "v") > (const_int 1)))] > "TARGET_AVX512F && <round_modev4sf_condition>" > - "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, > %2}" > + "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, > %1<round_op3>, %2}" > [(set_attr "type" "sseicvt") > (set_attr "prefix" "evex") > (set_attr "mode" "<ssescalarmode>")]) > @@ -5026,9 +5026,9 @@ > (const_int 1)))] > "TARGET_SSE2" > "@ > - cvtsi2sd\t{%2, %0|%0, %2} > - cvtsi2sd\t{%2, %0|%0, %2} > - vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}" > + cvtsi2sd{l}\t{%2, %0|%0, %2} > + cvtsi2sd{l}\t{%2, %0|%0, %2} > + vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "isa" "noavx,noavx,avx") > (set_attr "type" "sseicvt") > (set_attr "athlon_decode" "double,direct,*") > @@ -5048,9 +5048,9 @@ > (const_int 1)))] > "TARGET_SSE2 && TARGET_64BIT" > "@ > - cvtsi2sdq\t{%2, %0|%0, %2} > - cvtsi2sdq\t{%2, %0|%0, %2} > - vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" > + cvtsi2sd{q}\t{%2, %0|%0, %2} > + cvtsi2sd{q}\t{%2, %0|%0, %2} > + vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}" > [(set_attr "isa" "noavx,noavx,avx") > (set_attr "type" "sseicvt") > (set_attr "athlon_decode" "double,direct,*") > @@ -5119,7 +5119,7 @@ > (parallel [(const_int 0)]))] > UNSPEC_FIX_NOTRUNC))] > "TARGET_SSE2" > - "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}" > + "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}" > [(set_attr "type" "sseicvt") > (set_attr "athlon_decode" "double,vector") > (set_attr "bdver1_decode" "double,double") > @@ -5133,7 +5133,7 @@ > (unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")] > UNSPEC_FIX_NOTRUNC))] > "TARGET_SSE2" > - "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}" > + "%vcvtsd2si\t{%1, %0|%0, %q1}" > [(set_attr "type" "sseicvt") > (set_attr "athlon_decode" "double,vector") > (set_attr "amdfam10_decode" "double,double") > @@ -5149,7 +5149,7 @@ > (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" > "v,<round_saeonly_constraint2>") > (parallel [(const_int 0)]))))] > "TARGET_SSE2" > - "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, > %q1<round_saeonly_op2>}" > + "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}" > [(set_attr "type" "sseicvt") > (set_attr "athlon_decode" "double,vector") > (set_attr "amdfam10_decode" "double,double") > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si64-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsd2si64-1.c > @@ -1,6 +1,6 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-options "-O2 -mavx512f" } */ > -/* { dg-final { scan-assembler-times "vcvtsd2siq\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vcvtsd2si\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtsi2ss-1.c > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > -/* { dg-final { scan-assembler-times "vcvtsi2ss\[ > \\t\]+\[^%\n\]*%e\[^\{\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vcvtsi2ssl\[ > \\t\]+\[^%\n\]*%e\[^\{\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" > 1 } } */ > > #include <immintrin.h> > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si64-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtss2si64-1.c > @@ -1,6 +1,6 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-options "-O2 -mavx512f" } */ > -/* { dg-final { scan-assembler-times "vcvtss2siq\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vcvtss2si\[ > \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttsd2si64-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-options "-O2 -mavx512f" } */ > -/* { dg-final { scan-assembler-times "vcvttsd2siq\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > -/* { dg-final { scan-assembler-times "vcvttsd2siq\[ > \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vcvttsd2si\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vcvttsd2si\[ > \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvttss2si64-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile { target { ! ia32 } } } */ > /* { dg-options "-O2 -mavx512f" } */ > -/* { dg-final { scan-assembler-times "vcvttss2siq\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > -/* { dg-final { scan-assembler-times "vcvttss2siq\[ > \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vcvttss2si\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vcvttss2si\[ > \\t\]+\[^\{\n\]*\{sae\}\[^\n\]*%xmm\[0-9\]+.{6}(?:\n|\[ \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-1.c > @@ -1,6 +1,6 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > -/* { dg-final { scan-assembler-times "vcvtusi2sd\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vcvtusi2sdl\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > > #include <immintrin.h> > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd-2.c > @@ -22,7 +22,9 @@ avx512f_test (void) > s1.x = _mm_set_pd (-24.43, -43.35); > s2 = 0xFEDCA987; > > + asm volatile ("" : "+m" (s2)); > res.x = _mm_cvtu32_sd (s1.x, s2); > + asm volatile ("" : "+m" (s2)); > > compute_vcvtusi2sd (s1.a, s2, res_ref); > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2sd64-2.c > @@ -22,7 +22,9 @@ avx512f_test (void) > s1.x = _mm_set_pd (-24.43, -43.35); > s2 = 0xFEDCBA9876543210; > > + asm volatile ("" : "+m" (s2)); > res.x = _mm_cvtu64_sd (s1.x, s2); > + asm volatile ("" : "+m" (s2)); > > compute_vcvtusi2sd (s1.a, s2, res_ref); > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-1.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-1.c > @@ -1,7 +1,7 @@ > /* { dg-do compile } */ > /* { dg-options "-mavx512f -O2" } */ > -/* { dg-final { scan-assembler-times "vcvtusi2ss\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > -/* { dg-final { scan-assembler-times "vcvtusi2ss\[ > \\t\]+\[^%\n\]*%e\[^\{\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" > 1 } } */ > +/* { dg-final { scan-assembler-times "vcvtusi2ssl\[ > \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ > +/* { dg-final { scan-assembler-times "vcvtusi2ssl\[ > \\t\]+\[^%\n\]*%e\[^\{\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" > 1 } } */ > > #include <immintrin.h> > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss-2.c > @@ -24,7 +24,9 @@ avx512f_test (void) > s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46); > s2 = 0xFEDCA987; > > + asm volatile ("" : "+m" (s2)); > res.x = _mm_cvtu32_ss (s1.x, s2); > + asm volatile ("" : "+m" (s2)); > > compute_vcvtusi2ss (s1.a, s2, res_ref); > > --- a/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-2.c > +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtusi2ss64-2.c > @@ -24,7 +24,9 @@ avx512f_test (void) > s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46); > s2 = 0xFEDCBA9876543210; > > + asm volatile ("" : "+m" (s2)); > res.x = _mm_cvtu64_ss (s1.x, s2); > + asm volatile ("" : "+m" (s2)); > > compute_vcvtusi2ss (s1.a, s2, res_ref); > > > >