On Wed, Mar 26, 2025 at 9:50 AM Hu, Lin1 <lin1...@intel.com> wrote: > > Hi, all > > This patch aims to ensure each alternative with constraint "jm" should > set addr "gpr16", otherwise maybe raise ICE in reload pass. > > Bootstrapped and Regtested for x86_64-pc-linux-gnu{-m32,-m64}, ok for trunk? Ok. > > BRs, > Lin > > gcc/ChangeLog: > > PR target/119425 > * config/i386/sse.md: > (vec_set<mode>_0): Set the alternative with constraint "jm"'s > attribute "addr" to "gpr16". > (<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>): > Ditto. > (avx512vl_shuf_<shuffletype>32x4_1<mask_name>): Ditto. > (avx2_pblendd<mode>): Ditto. > (aesenc): Ditto. > (aesenclast): Ditto. > (aesdec): Ditto. > (aesdeclast): Ditto. > (vaesdec_<mode>): Ditto. > (vaesdeclast_<mode>): Ditto. > (vaesenc_<mode>):: Ditto. > (vaesenclast_<mode>):: Ditto. > (aes<aesklvariant>u8): Ditto. > (*aes<aeswideklvariant>u8): Ditto. > > gcc/testsuite/ChangeLog: > > PR target/119425 > * gcc.target/i386/pr119425.c: New test. > > Co-authered-by: Hongyu Wang <hongyu.w...@intel.com> > --- > gcc/config/i386/sse.md | 31 +++++++++++++------- > gcc/testsuite/gcc.target/i386/pr119425.c | 37 ++++++++++++++++++++++++ > 2 files changed, 57 insertions(+), 11 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr119425.c > > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md > index 70c2cf3f60d..1a9214fdedc 100644 > --- a/gcc/config/i386/sse.md > +++ b/gcc/config/i386/sse.md > @@ -11935,7 +11935,7 @@ (define_insn "vec_set<mode>_0" > ] > (const_string "ssemov"))) > (set (attr "addr") > - (if_then_else (eq_attr "alternative" "8,9") > + (if_then_else (eq_attr "alternative" "9,10") > (const_string "gpr16") > (const_string "*"))) > (set (attr "prefix_extra") > @@ -20204,6 +20204,7 @@ (define_insn > "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>" > return "vshuf<shuffletype>64x2\t{%3, %2, %1, > %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}"; > } > [(set_attr "type" "sselog") > + (set_attr "addr" "gpr16,*") > (set_attr "length_immediate" "1") > (set_attr "prefix" "evex") > (set_attr "mode" "XI")]) > @@ -20365,6 +20366,7 @@ (define_insn > "avx512vl_shuf_<shuffletype>32x4_1<mask_name>" > return "vshuf<shuffletype>32x4\t{%3, %2, %1, > %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}"; > } > [(set_attr "type" "sselog") > + (set_attr "addr" "gpr16,*") > (set_attr "length_immediate" "1") > (set_attr "prefix" "evex") > (set_attr "mode" "<sseinsnmode>")]) > @@ -24107,6 +24109,7 @@ (define_insn "avx2_pblendd<mode>" > "TARGET_AVX2" > "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}" > [(set_attr "type" "ssemov") > + (set_attr "addr" "gpr16") > (set_attr "prefix_extra" "1") > (set_attr "length_immediate" "1") > (set_attr "prefix" "vex") > @@ -27116,7 +27119,7 @@ (define_insn "aesenc" > vaesenc\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "isa" "noavx,avx,vaes_avx512vl") > (set_attr "type" "sselog1") > - (set_attr "addr" "gpr16,*,*") > + (set_attr "addr" "gpr16,gpr16,*") > (set_attr "prefix_extra" "1") > (set_attr "prefix" "orig,maybe_evex,evex") > (set_attr "btver2_decode" "double,double,double") > @@ -27134,7 +27137,7 @@ (define_insn "aesenclast" > vaesenclast\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "isa" "noavx,avx,vaes_avx512vl") > (set_attr "type" "sselog1") > - (set_attr "addr" "gpr16,*,*") > + (set_attr "addr" "gpr16,gpr16,*") > (set_attr "prefix_extra" "1") > (set_attr "prefix" "orig,maybe_evex,evex") > (set_attr "btver2_decode" "double,double,double") > @@ -27152,7 +27155,7 @@ (define_insn "aesdec" > vaesdec\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "isa" "noavx,avx,vaes_avx512vl") > (set_attr "type" "sselog1") > - (set_attr "addr" "gpr16,*,*") > + (set_attr "addr" "gpr16,gpr16,*") > (set_attr "prefix_extra" "1") > (set_attr "prefix" "orig,maybe_evex,evex") > (set_attr "btver2_decode" "double,double,double") > @@ -27169,7 +27172,7 @@ (define_insn "aesdeclast" > * return TARGET_AES ? \"vaesdeclast\t{%2, %1, %0|%0, %1, %2}\" : > \"%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}\"; > vaesdeclast\t{%2, %1, %0|%0, %1, %2}" > [(set_attr "isa" "noavx,avx,vaes_avx512vl") > - (set_attr "addr" "gpr16,*,*") > + (set_attr "addr" "gpr16,gpr16,*") > (set_attr "type" "sselog1") > (set_attr "prefix_extra" "1") > (set_attr "prefix" "orig,maybe_evex,evex") > @@ -30873,7 +30876,8 @@ (define_insn "vaesdec_<mode>" > return "%{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}"; > else > return "vaesdec\t{%2, %1, %0|%0, %1, %2}"; > -}) > +} > +[(set_attr "addr" "gpr16,*")]) > > (define_insn "vaesdeclast_<mode>" > [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") > @@ -30887,7 +30891,8 @@ (define_insn "vaesdeclast_<mode>" > return "%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}"; > else > return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"; > -}) > +} > +[(set_attr "addr" "gpr16,*")]) > > (define_insn "vaesenc_<mode>" > [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") > @@ -30901,7 +30906,8 @@ (define_insn "vaesenc_<mode>" > return "%{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}"; > else > return "vaesenc\t{%2, %1, %0|%0, %1, %2}"; > -}) > +} > +[(set_attr "addr" "gpr16,*")]) > > (define_insn "vaesenclast_<mode>" > [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") > @@ -30915,7 +30921,8 @@ (define_insn "vaesenclast_<mode>" > return "%{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}"; > else > return "vaesenclast\t{%2, %1, %0|%0, %1, %2}"; > -}) > +} > +[(set_attr "addr" "gpr16,*")]) > > (define_insn "vpclmulqdq_<mode>" > [(set (match_operand:VI8_FVL 0 "register_operand" "=v") > @@ -31362,7 +31369,8 @@ (define_insn "aes<aesklvariant>u8" > (unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))] > "TARGET_KL" > "aes<aesklvariant>\t{%2, %0|%0, %2}" > - [(set_attr "type" "other")]) > + [(set_attr "type" "other") > + (set_attr "addr" "gpr16")]) > > (define_int_iterator AESDECENCWIDEKL > [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8 > @@ -31424,7 +31432,8 @@ (define_insn "*aes<aeswideklvariant>u8" > AESDECENCWIDEKL))])] > "TARGET_WIDEKL" > "aes<aeswideklvariant>\t%0" > - [(set_attr "type" "other")]) > + [(set_attr "type" "other") > + (set_attr "addr" "gpr16")]) > > ;; Modes handled by broadcast patterns. NB: Allow V64QI and V32HI with > ;; TARGET_AVX512F since ix86_expand_vector_init_duplicate can expand > diff --git a/gcc/testsuite/gcc.target/i386/pr119425.c > b/gcc/testsuite/gcc.target/i386/pr119425.c > new file mode 100644 > index 00000000000..b926979dcac > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr119425.c > @@ -0,0 +1,37 @@ > +/* PR target/119425 */ > +/* { dg-do compile { target { ! ia32 } } } */ > +/* { dg-options "-Os -fno-vect-cost-model -ftree-slp-vectorize > -mavxneconvert -mapxf" } */ > +extern long K512[]; > +extern long sha512_block_data_order_ctx[]; > + > +#define Ch(x, y, z) ~x &z > +#define ROUND_00_15(i, a, b, c, d, e, f, g, h) > \ > + T1 += ~e & g + K512[i]; \ > +h = 0; \ > +d += h += T1 > +#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X) > \ > + ROUND_00_15(i + j, , , , d, e, , g, h) > + > +unsigned sha512_block_data_order_f, sha512_block_data_order_g; > + > +void > +sha512_block_data_order() > +{ > + unsigned a, b, c, d, e, h, T1; > + int i = 6; > + for (; i < 80; i += 6) { > + ROUND_16_80(i, 0, , , , d, e, , , h, ); > + ROUND_16_80(i, 11, , , , a, b, , d, e, ); > + ROUND_16_80(i, 12, , , , h, a, , c, d, ); > + ROUND_16_80(i, 13, , , , sha512_block_data_order_g, h, , b, c, ); > + ROUND_16_80(i, 14, , , , sha512_block_data_order_f, > + sha512_block_data_order_g, , a, b, ); > + ROUND_16_80(i, 15, , , , e, sha512_block_data_order_f, , , a, ); > + > + } > + sha512_block_data_order_ctx[0] += a; > + sha512_block_data_order_ctx[1] += b; > + sha512_block_data_order_ctx[2] += c; > + sha512_block_data_order_ctx[3] += d; > + > +} > -- > 2.31.1 >
-- BR, Hongtao