ping. On Sun, Apr 25, 2021 at 2:57 PM Hongtao Liu <crazy...@gmail.com> wrote: > > On Fri, Apr 23, 2021 at 5:13 PM Jakub Jelinek <ja...@redhat.com> wrote: > > > > On Fri, Apr 23, 2021 at 12:53:58PM +0800, Hongtao Liu via Gcc-patches wrote: > > > + if (!CONST_INT_P (er)) > > > + return 0; > > > + ei = INTVAL (er); > > > + if (i < nelt2 && ei != i) > > > + return 0; > > > + if (i >= nelt2 > > > + && (ei < nelt || ei >= nelt<<1)) > > > > Formatting: > > 1) you have spaces followed by tab, remove the spaces; but, > > if (i >= nelt2 && (ei < nelt || ei >= nelt<<1)) > > fits on one line, so keep it on one line. > > 2) nelt<<1 should be nelt << 1 with spaces around the << > > > > Done. > > > > -(define_insn "*vec_concatv4si_0" > > > - [(set (match_operand:V4SI 0 "register_operand" "=v,x") > > > - (vec_concat:V4SI > > > - (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y") > > > - (match_operand:V2SI 2 "const0_operand" " C,C")))] > > > +(define_insn "*vec_concat<mode>_0" > > > + [(set (match_operand:VI124_128 0 "register_operand" "=v,x") > > > + (vec_concat:VI124_128 > > > + (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" > > > "vm,?!*y") > > > + (match_operand:<ssehalfvecmode> 2 "const0_operand" " > > > C,C")))] > > > "TARGET_SSE2" > > > "@ > > > %vmovq\t{%1, %0|%0, %1} > > > @@ -22154,6 +22157,24 @@ (define_insn "avx_vec_concat<mode>" > > > (set_attr "prefix" "maybe_evex") > > > (set_attr "mode" "<sseinsnmode>")]) > > > > > > +(define_insn_and_split "*vec_concat<mode>_0" > > > > Would be better to use a different pattern name, *vec_concat<mode>_0 > > is already used in the above define_insn. > > Use some additional suffix after _0? > > > > Changed to "*vec_concat<mode>_0_1" > > > > + return __builtin_shuffle (x, (v32qi) { 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0 }, > > > + (v32qi) { 0, 1, 2, 3, 4, 5, 6, 7, > > > + 8, 9, 10, 11, 12, 13, 14, 15, > > > + 32, 49, 34, 58, 36, 53, 38, 39, > > > + 40, 60, 42, 43, 63, 45, 46, 47 }); > > > > In this testcase the shuffles in the part taking indexes from the zero > > vector are nicely randomized. > > > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/i386/avx512f-pr94680.c > > > @@ -0,0 +1,78 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-options "-mavx512bw -mavx512vbmi -O2" } */ > > > +/* { dg-final { scan-assembler-times {(?n)vmov[a-z0-9]*[ \t]*%ymm[0-9]} > > > 6} } */ > > > +/* { dg-final { scan-assembler-not "pxor" } } */ > > > + > > > + > > > +typedef float v16sf __attribute__((vector_size(64))); > > > +typedef double v8df __attribute__ ((vector_size (64))); > > > +typedef long long v8di __attribute__((vector_size(64))); > > > +typedef int v16si __attribute__((vector_size(64))); > > > +typedef short v32hi __attribute__ ((vector_size (64))); > > > +typedef char v64qi __attribute__ ((vector_size (64))); > > > + > > > +v8df > > > +foo_v8df (v8df x) > > > +{ > > > + return __builtin_shuffle (x, (v8df) { 0, 0, 0, 0, 0, 0, 0, 0 }, > > > + (v8di) { 0, 1, 2, 3, 8, 9, 10, 11 }); > > > +} > > > + > > > +v8di > > > +foo_v8di (v8di x) > > > +{ > > > + return __builtin_shuffle (x, (v8di) { 0, 0, 0, 0, 0, 0, 0, 0 }, > > > + (v8di) { 0, 1, 2, 3, 8, 9, 10, 11 }); > > > +} > > > + > > > +v16sf > > > +foo_v16sf (v16sf x) > > > +{ > > > + return __builtin_shuffle (x, (v16sf) { 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0 }, > > > + (v16si) { 0, 1, 2, 3, 4, 5, 6, 7, > > > + 16, 17, 18, 19, 20, 21, 22, 23 }); > > > +} > > > + > > > +v16si > > > +foo_v16si (v16si x) > > > +{ > > > + return __builtin_shuffle (x, (v16si) { 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0 }, > > > + (v16si) { 0, 1, 2, 3, 4, 5, 6, 7, > > > + 16, 17, 18, 19, 20, 21, 22, 23 }); > > > +} > > > + > > > +v32hi > > > +foo_v32hi (v32hi x) > > > +{ > > > + return __builtin_shuffle (x, (v32hi) { 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0 }, > > > + (v32hi) { 0, 1, 2, 3, 4, 5, 6, 7, > > > + 8, 9, 10, 11, 12, 13, 14, 15, > > > + 32, 33, 34, 35, 36, 37, 38, 39, > > > + 40,41, 42, 43, 44, 45, 46, 47 }); > > > +} > > > + > > > +v64qi > > > +foo_v64qi (v64qi x) > > > +{ > > > + return __builtin_shuffle (x, (v64qi) { 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0, > > > + 0, 0, 0, 0, 0, 0, 0, 0 }, > > > + (v64qi) {0, 1, 2, 3, 4, 5, 6, 7, > > > + 8, 9, 10, 11, 12, 13, 14, 15, > > > + 16, 17, 18, 19, 20, 21, 22, 23, > > > + 24, 25, 26, 27, 28, 29, 30, 31, > > > + 64, 65, 66, 67, 68, 69, 70, 71, > > > + 72, 73, 74, 75, 76, 77, 78, 79, > > > + 80, 81, 82, 83, 84, 85, 86, 87, > > > + 88, 89, 90, 91, 92, 93, 94, 95 }); > > > > Can't you randomize a little bit at least some of these too? > > > > Done. > > > Also, what happens with __builtin_shuffle (zero_vector, x, ...) (i.e. when > > you swap the two vectors and adjust correspondingly the permutation)? > > Will it be also recognized or do we just punt on those? > > when building gimple, vec_perm(0, x, sel) is simplified to vec_perm(x, > 0, sel*)(, with adjustment of selector), since arg0 is a constant. > Not sure if rtl phase would do same simplification, anyway i add > testcases for __builtin_shuffle (zero_vector, x, ...), but not extend > pre-reload splitters to handle (vec_select: (vec_concat: cosnt0_rtx > op1) selector). > > > > > Jakub > > > > > -- > BR, > Hongtao
-- BR, Hongtao