Hi Hao Chen,

On 9/15/21 2:35 AM, HAO CHEN GUI wrote:
> Bill,
>
>     Yes, I built the gcc with p10 binutils. Then power10_ok tests can pass. 
> Thanks again for your kindly explanation.
>
>     I finally realized that the line wrap settings on my thunderbird didn't 
> take any effect. I have to set a very large line size,  just for a workaround.
>
> ChangeLog
>
> 2021-09-15 Haochen Gui <guih...@linux.ibm.com>
>
> gcc/
>         * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>         Modify the expansion for sign extension. All extentions are done
>         within VSX resgisters.

Two typos here:  extentions => extensions, resgisters => registers.

The patch itself looks good to me.  I recommend the maintainers approve with 
the ChangeLog fixed.

Thanks!
Bill
>         * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
>
> gcc/testsuite/
>         * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>
> patch.diff
>
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index b4e13af4dc6..587e9fa2a2a 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree 
> exp, rtx target, bool bl
>
>    if (sign_extend)
>      {
> -      rtx discratch = gen_reg_rtx (DImode);
> +      rtx discratch = gen_reg_rtx (V2DImode);
>        rtx tiscratch = gen_reg_rtx (TImode);
>
>        /* Emit the lxvr*x insn.  */
> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, 
> tree exp, rtx target, bool bl
>         return 0;
>        emit_insn (pat);
>
> -      /* Emit a sign extension from QI,HI,WI to double (DI).  */
> -      rtx scratch = gen_lowpart (smode, tiscratch);
> +      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
> +      rtx temp1, temp2;
>        if (icode == CODE_FOR_vsx_lxvrbx)
> -       emit_insn (gen_extendqidi2 (discratch, scratch));
> +       {
> +         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
> +         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
> +       }
>        else if (icode == CODE_FOR_vsx_lxvrhx)
> -       emit_insn (gen_extendhidi2 (discratch, scratch));
> +       {
> +         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
> +         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
> +       }
>        else if (icode == CODE_FOR_vsx_lxvrwx)
> -       emit_insn (gen_extendsidi2 (discratch, scratch));
> -      /*  Assign discratch directly if scratch is already DI.  */
> -      if (icode == CODE_FOR_vsx_lxvrdx)
> -       discratch = scratch;
> +       {
> +         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
> +         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
> +       }
> +      else if (icode == CODE_FOR_vsx_lxvrdx)
> +       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
> +      else
> +       gcc_unreachable ();
>
> -      /* Emit the sign extension from DI (double) to TI (quad). */
> -      emit_insn (gen_extendditi2 (target, discratch));
> +      /* Emit the sign extension from V2DI (double) to TI (quad).  */
> +      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
> +      emit_insn (gen_extendditi2_vector (target, temp2));
>
>        return target;
>      }
> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
> index bcb92be2f5c..987f21bbc22 100644
> --- a/gcc/config/rs6000/vsx.md
> +++ b/gcc/config/rs6000/vsx.md
> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
>    "vextsh2<wd> %0,%1"
>    [(set_attr "type" "vecexts")])
>
> -(define_insn "*vsx_sign_extend_si_v2di"
> +(define_insn "vsx_sign_extend_si_v2di"
>    [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
>         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
>                      UNSPEC_VSX_SIGN_EXTEND))]
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c 
> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> new file mode 100644
> index 00000000000..78e72ac5425
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
> +
> +#include <altivec.h>
> +
> +vector signed __int128
> +foo1 (signed long a, signed char *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo2 (signed long a, signed short *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo3 (signed long a, signed int *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +vector signed __int128
> +foo4 (signed long a, signed long *b)
> +{
> +  return vec_xl_sext (a, b);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
>
>
> On 10/9/2021 下午 8:18, Bill Schmidt wrote:
>>
>>
>> On 9/10/21 12:45 AM, HAO CHEN GUI wrote:
>>> Bill,
>>>
>>>     Thanks so much for your advice.
>>>
>>>     I refined the patch and passed the bootstrap and regression test.
>>> Just one thing, the test case becomes unsupported on P9 if I set "{
>>> dg-require-effective-target power10_ok }". I just want the test case to
>>> be compiled and check its assembly. Do we need set "power10_ok"?
>>
>> Yes.  power10_ok tests whether your toolchain supports *assembly* of p10 
>> instructions.  It isn't unsupported on P9 per se; it's unsupported if the 
>> toolchain you're using on P9 doesn't have binutils support for P10.
>>
>> You can look at gcc/testsuite/lib/target-supports.exp to see how this stuff 
>> works.  In this case we have:
>>
>> proc check_effective_target_power10_ok { } {
>>      if { ([istarget powerpc64*-*-linux*]) } {
>>     return [check_no_compiler_messages power10_ok object {
>>              int main (void) {
>>                  long e;
>>                  asm ("pli %0,%1" : "=r" (e) : "n" (0x12345));
>>                  return e;
>>         }
>>          } "-mcpu=power10"]
>>      } else {
>>          return 0
>>      }
>> }
>>
>>>     I tried to disable line wrap in my email editor. Please let me know
>>> if you still see line wrap. Thanks.
>>
>> Thanks, but unfortunately it's still broken as I still see:
>>
>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
>> tree exp, rtx target, bool bl
>>
>> This should all be on one line.  The headers show:
>>
>> Content-Type: text/plain; charset=utf-8; format=flowed
>>
>> Not sure what mailer you're using, but often you can avoid problems by 
>> marking the patch portion as untouchable in some way.  In Thunderbird, you 
>> can set it to Preformat, as an example.  Then you can get line wrap on the 
>> non-patch portions if you prefer that.
>>
>>> ChangeLog
>>>
>>> 2021-09-10 Haochen Gui<guih...@linux.ibm.com>
>>>
>>> gcc/
>>>           * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>>>           Modify the expansion for sign extension. All extentions are done
>>>           within VSX resgisters.
>>>           * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
>>>
>>> gcc/testsuite/
>>>           * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>>>
>>> patch.diff
>>>
>>> diff --git a/gcc/config/rs6000/rs6000-call.c
>>> b/gcc/config/rs6000/rs6000-call.c
>>> index b4e13af4dc6..587e9fa2a2a 100644
>>> --- a/gcc/config/rs6000/rs6000-call.c
>>> +++ b/gcc/config/rs6000/rs6000-call.c
>>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
>>> tree exp, rtx target, bool bl
>>>
>>>      if (sign_extend)
>>>        {
>>> -      rtx discratch = gen_reg_rtx (DImode);
>>> +      rtx discratch = gen_reg_rtx (V2DImode);
>>>          rtx tiscratch = gen_reg_rtx (TImode);
>>>
>>>          /* Emit the lxvr*x insn.  */
>>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
>>> icode, tree exp, rtx target, bool bl
>>>           return 0;
>>>          emit_insn (pat);
>>>
>>> -      /* Emit a sign extension from QI,HI,WI to double (DI).  */
>>> -      rtx scratch = gen_lowpart (smode, tiscratch);
>>> +      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
>>> +      rtx temp1, temp2;
>>>          if (icode == CODE_FOR_vsx_lxvrbx)
>>> -       emit_insn (gen_extendqidi2 (discratch, scratch));
>>> +       {
>>> +         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
>>> +         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
>>> +       }
>>>          else if (icode == CODE_FOR_vsx_lxvrhx)
>>> -       emit_insn (gen_extendhidi2 (discratch, scratch));
>>> +       {
>>> +         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
>>> +         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
>>> +       }
>>>          else if (icode == CODE_FOR_vsx_lxvrwx)
>>> -       emit_insn (gen_extendsidi2 (discratch, scratch));
>>> -      /*  Assign discratch directly if scratch is already DI.  */
>>> -      if (icode == CODE_FOR_vsx_lxvrdx)
>>> -       discratch = scratch;
>>> +       {
>>> +         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
>>> +         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
>>> +       }
>>> +      else if (icode == CODE_FOR_vsx_lxvrdx)
>>> +       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
>>> +      else
>>> +       gcc_unreachable ();
>>>
>>> -      /* Emit the sign extension from DI (double) to TI (quad). */
>>> -      emit_insn (gen_extendditi2 (target, discratch));
>>> +      /* Emit the sign extension from V2DI (double) to TI (quad).  */
>>> +      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
>>> +      emit_insn (gen_extendditi2_vector (target, temp2));
>>>
>>>          return target;
>>>        }
>>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>>> index bcb92be2f5c..987f21bbc22 100644
>>> --- a/gcc/config/rs6000/vsx.md
>>> +++ b/gcc/config/rs6000/vsx.md
>>> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
>>>      "vextsh2<wd> %0,%1"
>>>      [(set_attr "type" "vecexts")])
>>>
>>> -(define_insn "*vsx_sign_extend_si_v2di"
>>> +(define_insn "vsx_sign_extend_si_v2di"
>>>      [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
>>>           (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
>>>                        UNSPEC_VSX_SIGN_EXTEND))]
>>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>> new file mode 100644
>>> index 00000000000..fcecc542d07
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>> @@ -0,0 +1,34 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-require-effective-target int128 } */
>>
>> As discussed, add the power10_ok directive.
>>
>> Otherwise looks fine to me, will let Segher have a look. :)
>>
>> Thanks!
>> Bill
>>
>>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>>> +
>>> +#include <altivec.h>
>>> +
>>> +vector signed __int128
>>> +foo1 (signed long a, signed char *b)
>>> +{
>>> +  return vec_xl_sext (a, b);
>>> +}
>>> +
>>> +vector signed __int128
>>> +foo2 (signed long a, signed short *b)
>>> +{
>>> +  return vec_xl_sext (a, b);
>>> +}
>>> +
>>> +vector signed __int128
>>> +foo3 (signed long a, signed int *b)
>>> +{
>>> +  return vec_xl_sext (a, b);
>>> +}
>>> +
>>> +vector signed __int128
>>> +foo4 (signed long a, signed long *b)
>>> +{
>>> +  return vec_xl_sext (a, b);
>>> +}
>>> +
>>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
>>> +/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
>>> +/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
>>> +/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
>>>
>>>
>>> On 10/9/2021 上午 4:49, Bill Schmidt wrote:
>>>> Hi Haochen,
>>>>
>>>> This patch was sent with "format=flowed", so it doesn't apply. That
>>>> makes it harder to review.  Can you please make sure you disable line
>>>> wrap from your patch submissions, at least in the patch part?
>>>>
>>>> On 9/6/21 1:18 AM, HAO CHEN GUI wrote:
>>>>> Hi,
>>>>>
>>>>>       The patch optimized the code generation for vec_xl_sext builtin.
>>>>> Now
>>>>> all the sign extensions are done on VSX registers directly.
>>>>>
>>>>>       Bootstrapped and tested on powerpc64le-linux with no
>>>>> regressions. Is
>>>>> this okay for trunk? Any recommendations? Thanks a lot.
>>>>>
>>>>> ChangeLog
>>>>>
>>>>> 2021-09-06 Haochen Gui<guih...@linux.ibm.com>
>>>>>
>>>>> gcc/
>>>>>        * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
>>>>>        Modify the expansion for sign extension.
>>>>>        * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
>>>>>
>>>>> gcc/testsuite/
>>>>>        * gcc.target/powerpc/p10_vec_xl_sext.c: New test.
>>>>>
>>>>>
>>>>> patch.diff
>>>>>
>>>>> diff --git a/gcc/config/rs6000/rs6000-call.c
>>>>> b/gcc/config/rs6000/rs6000-call.c
>>>>> index b4e13af4dc6..54fdaf47be3 100644
>>>>> --- a/gcc/config/rs6000/rs6000-call.c
>>>>> +++ b/gcc/config/rs6000/rs6000-call.c
>>>>> @@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
>>>>> tree exp, rtx target, bool bl
>>>> Above is the first wrapped line that causes the patch to not apply.
>>>>>       if (sign_extend)
>>>>>         {
>>>>> -      rtx discratch = gen_reg_rtx (DImode);
>>>>> +      rtx discratch = gen_reg_rtx (V2DImode);
>>>>>           rtx tiscratch = gen_reg_rtx (TImode);
>>>>>
>>>>>           /* Emit the lxvr*x insn.  */
>>>>> @@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
>>>>> icode, tree exp, rtx target, bool bl
>>>>>         return 0;
>>>>>           emit_insn (pat);
>>>>>
>>>>> -      /* Emit a sign extension from QI,HI,WI to double (DI). */
>>>>> -      rtx scratch = gen_lowpart (smode, tiscratch);
>>>>> +      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
>>>>> +      rtx temp1, temp2;
>>>>>           if (icode == CODE_FOR_vsx_lxvrbx)
>>>>> -    emit_insn (gen_extendqidi2 (discratch, scratch));
>>>>> +    {
>>>>> +      temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
>>>>> +      emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
>>>>> +    }
>>>>>           else if (icode == CODE_FOR_vsx_lxvrhx)
>>>>> -    emit_insn (gen_extendhidi2 (discratch, scratch));
>>>>> +    {
>>>>> +      temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
>>>>> +      emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
>>>>> +    }
>>>>>           else if (icode == CODE_FOR_vsx_lxvrwx)
>>>>> -    emit_insn (gen_extendsidi2 (discratch, scratch));
>>>>> -      /*  Assign discratch directly if scratch is already DI. */
>>>>> -      if (icode == CODE_FOR_vsx_lxvrdx)
>>>>> -    discratch = scratch;
>>>>> +    {
>>>>> +      temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
>>>>> +      emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
>>>>> +    }
>>>>> +      else if (icode == CODE_FOR_vsx_lxvrdx)
>>>>> +    discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
>>>>> +      else
>>>>> +    return 0;
>>>> Please call gcc_unreachable () here, instead of returning 0.  If we
>>>> call this function with an unexpected insn_code, we want to ICE right
>>>> away to make it easier to debug the problem.
>>>>
>>>>> -      /* Emit the sign extension from DI (double) to TI (quad). */
>>>>> -      emit_insn (gen_extendditi2 (target, discratch));
>>>>> +      /* Emit the sign extension from V2DI (double) to TI (quad).  */
>>>>> +      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
>>>>> +      emit_insn (gen_extendditi2_vector (target, temp2));
>>>>>
>>>>>           return target;
>>>>>         }
>>>>> diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
>>>>> index bcb92be2f5c..987f21bbc22 100644
>>>>> --- a/gcc/config/rs6000/vsx.md
>>>>> +++ b/gcc/config/rs6000/vsx.md
>>>>> @@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
>>>>>       "vextsh2<wd> %0,%1"
>>>>>       [(set_attr "type" "vecexts")])
>>>>>
>>>>> -(define_insn "*vsx_sign_extend_si_v2di"
>>>>> +(define_insn "vsx_sign_extend_si_v2di"
>>>>>       [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
>>>>>         (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
>>>>>                  UNSPEC_VSX_SIGN_EXTEND))]
>>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>>>> b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>>>> new file mode 100644
>>>>> index 00000000000..e5b84e6167a
>>>>> --- /dev/null
>>>>> +++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
>>>>> @@ -0,0 +1,32 @@
>>>>> +/* { dg-do compile } */
>>>>> +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
>>>> You need a { dg-require-effective-target int128 } directive.  Also {
>>>> dg-require-effective-target power10_ok }.
>>>>> +
>>>>> +#include <altivec.h>
>>>>> +
>>>>> +vector signed __int128
>>>>> +foo1 (signed long a, signed char *b)
>>>>> +{
>>>>> +  return vec_xl_sext (a, b);
>>>>> +}
>>>>> +
>>>>> +vector signed __int128
>>>>> +foo2 (signed long a, signed short *b)
>>>>> +{
>>>>> +  return vec_xl_sext (a, b);
>>>>> +}
>>>>> +
>>>>> +vector signed __int128
>>>>> +foo3 (signed long a, signed int *b)
>>>>> +{
>>>>> +  return vec_xl_sext (a, b);
>>>>> +}
>>>>> +
>>>>> +vector signed __int128
>>>>> +foo4 (signed long a, signed long *b)
>>>>> +{
>>>>> +  return vec_xl_sext (a, b);
>>>>> +}
>>>>> +
>>>>> +/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
>>>>> +/* { dg-final { scan-assembler-times
>>>>> {\mvextsh2d\M|\mvextsw2d\M|\mvextsb2d\M} 3 } } */
>>>> I'd rather see the exact number of times each instruction is expected,
>>>> since this would match if all of foo1, foo2, and foo3 emitted
>>>> vextsb2d, for example.
>>>>
>>>> I don't see any additional problems...over to Segher.
>>>>
>>>> Thanks for the patch!
>>>> Bill
>>>>
>>>>> +/* { dg-final { scan-assembler-not "mtvsrdd"   } } */
>>>>>

Reply via email to