Bill,

  Thanks so much for your advice.

  I refined the patch and passed the bootstrap and regression test. Just one thing, the test case becomes unsupported on P9 if I set "{ dg-require-effective-target power10_ok }". I just want the test case to be compiled and check its assembly. Do we need set "power10_ok"?

  I tried to disable line wrap in my email editor. Please let me know if you still see line wrap. Thanks.

ChangeLog

2021-09-10 Haochen Gui <guih...@linux.ibm.com>

gcc/
        * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
        Modify the expansion for sign extension. All extentions are done
        within VSX resgisters.
        * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.

gcc/testsuite/
        * gcc.target/powerpc/p10_vec_xl_sext.c: New test.

patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl

   if (sign_extend)
     {
-      rtx discratch = gen_reg_rtx (DImode);
+      rtx discratch = gen_reg_rtx (V2DImode);
       rtx tiscratch = gen_reg_rtx (TImode);

       /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code icode, tree exp, rtx target, bool bl
        return 0;
       emit_insn (pat);

-      /* Emit a sign extension from QI,HI,WI to double (DI).  */
-      rtx scratch = gen_lowpart (smode, tiscratch);
+      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI.  */
+      rtx temp1, temp2;
       if (icode == CODE_FOR_vsx_lxvrbx)
-       emit_insn (gen_extendqidi2 (discratch, scratch));
+       {
+         temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+       }
       else if (icode == CODE_FOR_vsx_lxvrhx)
-       emit_insn (gen_extendhidi2 (discratch, scratch));
+       {
+         temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+       }
       else if (icode == CODE_FOR_vsx_lxvrwx)
-       emit_insn (gen_extendsidi2 (discratch, scratch));
-      /*  Assign discratch directly if scratch is already DI.  */
-      if (icode == CODE_FOR_vsx_lxvrdx)
-       discratch = scratch;
+       {
+         temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+         emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+       }
+      else if (icode == CODE_FOR_vsx_lxvrdx)
+       discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+      else
+       gcc_unreachable ();

-      /* Emit the sign extension from DI (double) to TI (quad). */
-      emit_insn (gen_extendditi2 (target, discratch));
+      /* Emit the sign extension from V2DI (double) to TI (quad).  */
+      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+      emit_insn (gen_extendditi2_vector (target, temp2));

       return target;
     }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
   "vextsh2<wd> %0,%1"
   [(set_attr "type" "vecexts")])

-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
   [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
        (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
                     UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..fcecc542d07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */


On 10/9/2021 上午 4:49, Bill Schmidt wrote:
Hi Haochen,

This patch was sent with "format=flowed", so it doesn't apply. That makes it harder to review.  Can you please make sure you disable line wrap from your patch submissions, at least in the patch part?

On 9/6/21 1:18 AM, HAO CHEN GUI wrote:
Hi,

     The patch optimized the code generation for vec_xl_sext builtin. Now
all the sign extensions are done on VSX registers directly.

     Bootstrapped and tested on powerpc64le-linux with no regressions. Is
this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog

2021-09-06 Haochen Gui <guih...@linux.ibm.com>

gcc/
      * config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
      Modify the expansion for sign extension.
      * gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.

gcc/testsuite/
      * gcc.target/powerpc/p10_vec_xl_sext.c: New test.


patch.diff

diff --git a/gcc/config/rs6000/rs6000-call.c
b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..54fdaf47be3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
tree exp, rtx target, bool bl

Above is the first wrapped line that causes the patch to not apply.

     if (sign_extend)
       {
-      rtx discratch = gen_reg_rtx (DImode);
+      rtx discratch = gen_reg_rtx (V2DImode);
         rtx tiscratch = gen_reg_rtx (TImode);

         /* Emit the lxvr*x insn.  */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
icode, tree exp, rtx target, bool bl
       return 0;
         emit_insn (pat);

-      /* Emit a sign extension from QI,HI,WI to double (DI). */
-      rtx scratch = gen_lowpart (smode, tiscratch);
+      /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
+      rtx temp1, temp2;
         if (icode == CODE_FOR_vsx_lxvrbx)
-    emit_insn (gen_extendqidi2 (discratch, scratch));
+    {
+      temp1  = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+      emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+    }
         else if (icode == CODE_FOR_vsx_lxvrhx)
-    emit_insn (gen_extendhidi2 (discratch, scratch));
+    {
+      temp1  = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+      emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+    }
         else if (icode == CODE_FOR_vsx_lxvrwx)
-    emit_insn (gen_extendsidi2 (discratch, scratch));
-      /*  Assign discratch directly if scratch is already DI. */
-      if (icode == CODE_FOR_vsx_lxvrdx)
-    discratch = scratch;
+    {
+      temp1  = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+      emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+    }
+      else if (icode == CODE_FOR_vsx_lxvrdx)
+    discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+      else
+    return 0;


Please call gcc_unreachable () here, instead of returning 0.  If we call this function with an unexpected insn_code, we want to ICE right away to make it easier to debug the problem.


-      /* Emit the sign extension from DI (double) to TI (quad). */
-      emit_insn (gen_extendditi2 (target, discratch));
+      /* Emit the sign extension from V2DI (double) to TI (quad).  */
+      temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+      emit_insn (gen_extendditi2_vector (target, temp2));

         return target;
       }
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
     "vextsh2<wd> %0,%1"
     [(set_attr "type" "vecexts")])

-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
     [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
       (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
                UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..e5b84e6167a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */

You need a { dg-require-effective-target int128 } directive.  Also { dg-require-effective-target power10_ok }.
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+  return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times
{\mvextsh2d\M|\mvextsw2d\M|\mvextsb2d\M} 3 } } */

I'd rather see the exact number of times each instruction is expected, since this would match if all of foo1, foo2, and foo3 emitted vextsb2d, for example.

I don't see any additional problems...over to Segher.

Thanks for the patch!
Bill

+/* { dg-final { scan-assembler-not "mtvsrdd"   } } */

Reply via email to