Hi Hao Chen, On 9/15/21 2:35 AM, HAO CHEN GUI wrote: > Bill, > > Yes, I built the gcc with p10 binutils. Then power10_ok tests can pass. > Thanks again for your kindly explanation. > > I finally realized that the line wrap settings on my thunderbird didn't > take any effect. I have to set a very large line size, just for a workaround. > > ChangeLog > > 2021-09-15 Haochen Gui <guih...@linux.ibm.com> > > gcc/ > * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): > Modify the expansion for sign extension. All extentions are done > within VSX resgisters.
Two typos here: extentions => extensions, resgisters => registers. The patch itself looks good to me. I recommend the maintainers approve with the ChangeLog fixed. Thanks! Bill > * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define. > > gcc/testsuite/ > * gcc.target/powerpc/p10_vec_xl_sext.c: New test. > > patch.diff > > diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c > index b4e13af4dc6..587e9fa2a2a 100644 > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree > exp, rtx target, bool bl > > if (sign_extend) > { > - rtx discratch = gen_reg_rtx (DImode); > + rtx discratch = gen_reg_rtx (V2DImode); > rtx tiscratch = gen_reg_rtx (TImode); > > /* Emit the lxvr*x insn. */ > @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, > tree exp, rtx target, bool bl > return 0; > emit_insn (pat); > > - /* Emit a sign extension from QI,HI,WI to double (DI). */ > - rtx scratch = gen_lowpart (smode, tiscratch); > + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ > + rtx temp1, temp2; > if (icode == CODE_FOR_vsx_lxvrbx) > - emit_insn (gen_extendqidi2 (discratch, scratch)); > + { > + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); > + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); > + } > else if (icode == CODE_FOR_vsx_lxvrhx) > - emit_insn (gen_extendhidi2 (discratch, scratch)); > + { > + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); > + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); > + } > else if (icode == CODE_FOR_vsx_lxvrwx) > - emit_insn (gen_extendsidi2 (discratch, scratch)); > - /* Assign discratch directly if scratch is already DI. */ > - if (icode == CODE_FOR_vsx_lxvrdx) > - discratch = scratch; > + { > + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); > + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); > + } > + else if (icode == CODE_FOR_vsx_lxvrdx) > + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); > + else > + gcc_unreachable (); > > - /* Emit the sign extension from DI (double) to TI (quad). */ > - emit_insn (gen_extendditi2 (target, discratch)); > + /* Emit the sign extension from V2DI (double) to TI (quad). */ > + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); > + emit_insn (gen_extendditi2_vector (target, temp2)); > > return target; > } > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > index bcb92be2f5c..987f21bbc22 100644 > --- a/gcc/config/rs6000/vsx.md > +++ b/gcc/config/rs6000/vsx.md > @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>" > "vextsh2<wd> %0,%1" > [(set_attr "type" "vecexts")]) > > -(define_insn "*vsx_sign_extend_si_v2di" > +(define_insn "vsx_sign_extend_si_v2di" > [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") > (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] > UNSPEC_VSX_SIGN_EXTEND))] > diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c > b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c > new file mode 100644 > index 00000000000..78e72ac5425 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c > @@ -0,0 +1,35 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-require-effective-target power10_ok } */ > +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ > + > +#include <altivec.h> > + > +vector signed __int128 > +foo1 (signed long a, signed char *b) > +{ > + return vec_xl_sext (a, b); > +} > + > +vector signed __int128 > +foo2 (signed long a, signed short *b) > +{ > + return vec_xl_sext (a, b); > +} > + > +vector signed __int128 > +foo3 (signed long a, signed int *b) > +{ > + return vec_xl_sext (a, b); > +} > + > +vector signed __int128 > +foo4 (signed long a, signed long *b) > +{ > + return vec_xl_sext (a, b); > +} > + > +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */ > +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */ > +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */ > > > On 10/9/2021 下午 8:18, Bill Schmidt wrote: >> >> >> On 9/10/21 12:45 AM, HAO CHEN GUI wrote: >>> Bill, >>> >>> Thanks so much for your advice. >>> >>> I refined the patch and passed the bootstrap and regression test. >>> Just one thing, the test case becomes unsupported on P9 if I set "{ >>> dg-require-effective-target power10_ok }". I just want the test case to >>> be compiled and check its assembly. Do we need set "power10_ok"? >> >> Yes. power10_ok tests whether your toolchain supports *assembly* of p10 >> instructions. It isn't unsupported on P9 per se; it's unsupported if the >> toolchain you're using on P9 doesn't have binutils support for P10. >> >> You can look at gcc/testsuite/lib/target-supports.exp to see how this stuff >> works. In this case we have: >> >> proc check_effective_target_power10_ok { } { >> if { ([istarget powerpc64*-*-linux*]) } { >> return [check_no_compiler_messages power10_ok object { >> int main (void) { >> long e; >> asm ("pli %0,%1" : "=r" (e) : "n" (0x12345)); >> return e; >> } >> } "-mcpu=power10"] >> } else { >> return 0 >> } >> } >> >>> I tried to disable line wrap in my email editor. Please let me know >>> if you still see line wrap. Thanks. >> >> Thanks, but unfortunately it's still broken as I still see: >> >> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, >> tree exp, rtx target, bool bl >> >> This should all be on one line. The headers show: >> >> Content-Type: text/plain; charset=utf-8; format=flowed >> >> Not sure what mailer you're using, but often you can avoid problems by >> marking the patch portion as untouchable in some way. In Thunderbird, you >> can set it to Preformat, as an example. Then you can get line wrap on the >> non-patch portions if you prefer that. >> >>> ChangeLog >>> >>> 2021-09-10 Haochen Gui<guih...@linux.ibm.com> >>> >>> gcc/ >>> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): >>> Modify the expansion for sign extension. All extentions are done >>> within VSX resgisters. >>> * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define. >>> >>> gcc/testsuite/ >>> * gcc.target/powerpc/p10_vec_xl_sext.c: New test. >>> >>> patch.diff >>> >>> diff --git a/gcc/config/rs6000/rs6000-call.c >>> b/gcc/config/rs6000/rs6000-call.c >>> index b4e13af4dc6..587e9fa2a2a 100644 >>> --- a/gcc/config/rs6000/rs6000-call.c >>> +++ b/gcc/config/rs6000/rs6000-call.c >>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, >>> tree exp, rtx target, bool bl >>> >>> if (sign_extend) >>> { >>> - rtx discratch = gen_reg_rtx (DImode); >>> + rtx discratch = gen_reg_rtx (V2DImode); >>> rtx tiscratch = gen_reg_rtx (TImode); >>> >>> /* Emit the lxvr*x insn. */ >>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code >>> icode, tree exp, rtx target, bool bl >>> return 0; >>> emit_insn (pat); >>> >>> - /* Emit a sign extension from QI,HI,WI to double (DI). */ >>> - rtx scratch = gen_lowpart (smode, tiscratch); >>> + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ >>> + rtx temp1, temp2; >>> if (icode == CODE_FOR_vsx_lxvrbx) >>> - emit_insn (gen_extendqidi2 (discratch, scratch)); >>> + { >>> + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); >>> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); >>> + } >>> else if (icode == CODE_FOR_vsx_lxvrhx) >>> - emit_insn (gen_extendhidi2 (discratch, scratch)); >>> + { >>> + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); >>> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); >>> + } >>> else if (icode == CODE_FOR_vsx_lxvrwx) >>> - emit_insn (gen_extendsidi2 (discratch, scratch)); >>> - /* Assign discratch directly if scratch is already DI. */ >>> - if (icode == CODE_FOR_vsx_lxvrdx) >>> - discratch = scratch; >>> + { >>> + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); >>> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); >>> + } >>> + else if (icode == CODE_FOR_vsx_lxvrdx) >>> + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); >>> + else >>> + gcc_unreachable (); >>> >>> - /* Emit the sign extension from DI (double) to TI (quad). */ >>> - emit_insn (gen_extendditi2 (target, discratch)); >>> + /* Emit the sign extension from V2DI (double) to TI (quad). */ >>> + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); >>> + emit_insn (gen_extendditi2_vector (target, temp2)); >>> >>> return target; >>> } >>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md >>> index bcb92be2f5c..987f21bbc22 100644 >>> --- a/gcc/config/rs6000/vsx.md >>> +++ b/gcc/config/rs6000/vsx.md >>> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>" >>> "vextsh2<wd> %0,%1" >>> [(set_attr "type" "vecexts")]) >>> >>> -(define_insn "*vsx_sign_extend_si_v2di" >>> +(define_insn "vsx_sign_extend_si_v2di" >>> [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") >>> (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] >>> UNSPEC_VSX_SIGN_EXTEND))] >>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c >>> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c >>> new file mode 100644 >>> index 00000000000..fcecc542d07 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c >>> @@ -0,0 +1,34 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target int128 } */ >> >> As discussed, add the power10_ok directive. >> >> Otherwise looks fine to me, will let Segher have a look. :) >> >> Thanks! >> Bill >> >>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ >>> + >>> +#include <altivec.h> >>> + >>> +vector signed __int128 >>> +foo1 (signed long a, signed char *b) >>> +{ >>> + return vec_xl_sext (a, b); >>> +} >>> + >>> +vector signed __int128 >>> +foo2 (signed long a, signed short *b) >>> +{ >>> + return vec_xl_sext (a, b); >>> +} >>> + >>> +vector signed __int128 >>> +foo3 (signed long a, signed int *b) >>> +{ >>> + return vec_xl_sext (a, b); >>> +} >>> + >>> +vector signed __int128 >>> +foo4 (signed long a, signed long *b) >>> +{ >>> + return vec_xl_sext (a, b); >>> +} >>> + >>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */ >>> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */ >>> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */ >>> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */ >>> >>> >>> On 10/9/2021 上午 4:49, Bill Schmidt wrote: >>>> Hi Haochen, >>>> >>>> This patch was sent with "format=flowed", so it doesn't apply. That >>>> makes it harder to review. Can you please make sure you disable line >>>> wrap from your patch submissions, at least in the patch part? >>>> >>>> On 9/6/21 1:18 AM, HAO CHEN GUI wrote: >>>>> Hi, >>>>> >>>>> The patch optimized the code generation for vec_xl_sext builtin. >>>>> Now >>>>> all the sign extensions are done on VSX registers directly. >>>>> >>>>> Bootstrapped and tested on powerpc64le-linux with no >>>>> regressions. Is >>>>> this okay for trunk? Any recommendations? Thanks a lot. >>>>> >>>>> ChangeLog >>>>> >>>>> 2021-09-06 Haochen Gui<guih...@linux.ibm.com> >>>>> >>>>> gcc/ >>>>> * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin): >>>>> Modify the expansion for sign extension. >>>>> * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define. >>>>> >>>>> gcc/testsuite/ >>>>> * gcc.target/powerpc/p10_vec_xl_sext.c: New test. >>>>> >>>>> >>>>> patch.diff >>>>> >>>>> diff --git a/gcc/config/rs6000/rs6000-call.c >>>>> b/gcc/config/rs6000/rs6000-call.c >>>>> index b4e13af4dc6..54fdaf47be3 100644 >>>>> --- a/gcc/config/rs6000/rs6000-call.c >>>>> +++ b/gcc/config/rs6000/rs6000-call.c >>>>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, >>>>> tree exp, rtx target, bool bl >>>> Above is the first wrapped line that causes the patch to not apply. >>>>> if (sign_extend) >>>>> { >>>>> - rtx discratch = gen_reg_rtx (DImode); >>>>> + rtx discratch = gen_reg_rtx (V2DImode); >>>>> rtx tiscratch = gen_reg_rtx (TImode); >>>>> >>>>> /* Emit the lxvr*x insn. */ >>>>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code >>>>> icode, tree exp, rtx target, bool bl >>>>> return 0; >>>>> emit_insn (pat); >>>>> >>>>> - /* Emit a sign extension from QI,HI,WI to double (DI). */ >>>>> - rtx scratch = gen_lowpart (smode, tiscratch); >>>>> + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ >>>>> + rtx temp1, temp2; >>>>> if (icode == CODE_FOR_vsx_lxvrbx) >>>>> - emit_insn (gen_extendqidi2 (discratch, scratch)); >>>>> + { >>>>> + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); >>>>> + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); >>>>> + } >>>>> else if (icode == CODE_FOR_vsx_lxvrhx) >>>>> - emit_insn (gen_extendhidi2 (discratch, scratch)); >>>>> + { >>>>> + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); >>>>> + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); >>>>> + } >>>>> else if (icode == CODE_FOR_vsx_lxvrwx) >>>>> - emit_insn (gen_extendsidi2 (discratch, scratch)); >>>>> - /* Assign discratch directly if scratch is already DI. */ >>>>> - if (icode == CODE_FOR_vsx_lxvrdx) >>>>> - discratch = scratch; >>>>> + { >>>>> + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); >>>>> + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); >>>>> + } >>>>> + else if (icode == CODE_FOR_vsx_lxvrdx) >>>>> + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); >>>>> + else >>>>> + return 0; >>>> Please call gcc_unreachable () here, instead of returning 0. If we >>>> call this function with an unexpected insn_code, we want to ICE right >>>> away to make it easier to debug the problem. >>>> >>>>> - /* Emit the sign extension from DI (double) to TI (quad). */ >>>>> - emit_insn (gen_extendditi2 (target, discratch)); >>>>> + /* Emit the sign extension from V2DI (double) to TI (quad). */ >>>>> + temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); >>>>> + emit_insn (gen_extendditi2_vector (target, temp2)); >>>>> >>>>> return target; >>>>> } >>>>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md >>>>> index bcb92be2f5c..987f21bbc22 100644 >>>>> --- a/gcc/config/rs6000/vsx.md >>>>> +++ b/gcc/config/rs6000/vsx.md >>>>> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>" >>>>> "vextsh2<wd> %0,%1" >>>>> [(set_attr "type" "vecexts")]) >>>>> >>>>> -(define_insn "*vsx_sign_extend_si_v2di" >>>>> +(define_insn "vsx_sign_extend_si_v2di" >>>>> [(set (match_operand:V2DI 0 "vsx_register_operand" "=v") >>>>> (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")] >>>>> UNSPEC_VSX_SIGN_EXTEND))] >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c >>>>> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c >>>>> new file mode 100644 >>>>> index 00000000000..e5b84e6167a >>>>> --- /dev/null >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c >>>>> @@ -0,0 +1,32 @@ >>>>> +/* { dg-do compile } */ >>>>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ >>>> You need a { dg-require-effective-target int128 } directive. Also { >>>> dg-require-effective-target power10_ok }. >>>>> + >>>>> +#include <altivec.h> >>>>> + >>>>> +vector signed __int128 >>>>> +foo1 (signed long a, signed char *b) >>>>> +{ >>>>> + return vec_xl_sext (a, b); >>>>> +} >>>>> + >>>>> +vector signed __int128 >>>>> +foo2 (signed long a, signed short *b) >>>>> +{ >>>>> + return vec_xl_sext (a, b); >>>>> +} >>>>> + >>>>> +vector signed __int128 >>>>> +foo3 (signed long a, signed int *b) >>>>> +{ >>>>> + return vec_xl_sext (a, b); >>>>> +} >>>>> + >>>>> +vector signed __int128 >>>>> +foo4 (signed long a, signed long *b) >>>>> +{ >>>>> + return vec_xl_sext (a, b); >>>>> +} >>>>> + >>>>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */ >>>>> +/* { dg-final { scan-assembler-times >>>>> {\mvextsh2d\M|\mvextsw2d\M|\mvextsb2d\M} 3 } } */ >>>> I'd rather see the exact number of times each instruction is expected, >>>> since this would match if all of foo1, foo2, and foo3 emitted >>>> vextsb2d, for example. >>>> >>>> I don't see any additional problems...over to Segher. >>>> >>>> Thanks for the patch! >>>> Bill >>>> >>>>> +/* { dg-final { scan-assembler-not "mtvsrdd" } } */ >>>>>