Bill,
Thanks so much for your advice.
I refined the patch and passed the bootstrap and regression test.
Just one thing, the test case becomes unsupported on P9 if I set "{
dg-require-effective-target power10_ok }". I just want the test case to
be compiled and check its assembly. Do we need set "power10_ok"?
I tried to disable line wrap in my email editor. Please let me know
if you still see line wrap. Thanks.
ChangeLog
2021-09-10 Haochen Gui <guih...@linux.ibm.com>
gcc/
* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
Modify the expansion for sign extension. All extentions are done
within VSX resgisters.
* gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
gcc/testsuite/
* gcc.target/powerpc/p10_vec_xl_sext.c: New test.
patch.diff
diff --git a/gcc/config/rs6000/rs6000-call.c
b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..587e9fa2a2a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
tree exp, rtx target, bool bl
if (sign_extend)
{
- rtx discratch = gen_reg_rtx (DImode);
+ rtx discratch = gen_reg_rtx (V2DImode);
rtx tiscratch = gen_reg_rtx (TImode);
/* Emit the lxvr*x insn. */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
icode, tree exp, rtx target, bool bl
return 0;
emit_insn (pat);
- /* Emit a sign extension from QI,HI,WI to double (DI). */
- rtx scratch = gen_lowpart (smode, tiscratch);
+ /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
+ rtx temp1, temp2;
if (icode == CODE_FOR_vsx_lxvrbx)
- emit_insn (gen_extendqidi2 (discratch, scratch));
+ {
+ temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+ }
else if (icode == CODE_FOR_vsx_lxvrhx)
- emit_insn (gen_extendhidi2 (discratch, scratch));
+ {
+ temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+ }
else if (icode == CODE_FOR_vsx_lxvrwx)
- emit_insn (gen_extendsidi2 (discratch, scratch));
- /* Assign discratch directly if scratch is already DI. */
- if (icode == CODE_FOR_vsx_lxvrdx)
- discratch = scratch;
+ {
+ temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+ }
+ else if (icode == CODE_FOR_vsx_lxvrdx)
+ discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+ else
+ gcc_unreachable ();
- /* Emit the sign extension from DI (double) to TI (quad). */
- emit_insn (gen_extendditi2 (target, discratch));
+ /* Emit the sign extension from V2DI (double) to TI (quad). */
+ temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+ emit_insn (gen_extendditi2_vector (target, temp2));
return target;
}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
"vextsh2<wd> %0,%1"
[(set_attr "type" "vecexts")])
-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
[(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
(unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..fcecc542d07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target int128 } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mvextsb2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsh2d\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvextsw2d\M} 1 } } */
On 10/9/2021 上午 4:49, Bill Schmidt wrote:
Hi Haochen,
This patch was sent with "format=flowed", so it doesn't apply. That
makes it harder to review. Can you please make sure you disable line
wrap from your patch submissions, at least in the patch part?
On 9/6/21 1:18 AM, HAO CHEN GUI wrote:
Hi,
The patch optimized the code generation for vec_xl_sext builtin.
Now
all the sign extensions are done on VSX registers directly.
Bootstrapped and tested on powerpc64le-linux with no
regressions. Is
this okay for trunk? Any recommendations? Thanks a lot.
ChangeLog
2021-09-06 Haochen Gui <guih...@linux.ibm.com>
gcc/
* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
Modify the expansion for sign extension.
* gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
gcc/testsuite/
* gcc.target/powerpc/p10_vec_xl_sext.c: New test.
patch.diff
diff --git a/gcc/config/rs6000/rs6000-call.c
b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..54fdaf47be3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
tree exp, rtx target, bool bl
Above is the first wrapped line that causes the patch to not apply.
if (sign_extend)
{
- rtx discratch = gen_reg_rtx (DImode);
+ rtx discratch = gen_reg_rtx (V2DImode);
rtx tiscratch = gen_reg_rtx (TImode);
/* Emit the lxvr*x insn. */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
icode, tree exp, rtx target, bool bl
return 0;
emit_insn (pat);
- /* Emit a sign extension from QI,HI,WI to double (DI). */
- rtx scratch = gen_lowpart (smode, tiscratch);
+ /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
+ rtx temp1, temp2;
if (icode == CODE_FOR_vsx_lxvrbx)
- emit_insn (gen_extendqidi2 (discratch, scratch));
+ {
+ temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+ }
else if (icode == CODE_FOR_vsx_lxvrhx)
- emit_insn (gen_extendhidi2 (discratch, scratch));
+ {
+ temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+ }
else if (icode == CODE_FOR_vsx_lxvrwx)
- emit_insn (gen_extendsidi2 (discratch, scratch));
- /* Assign discratch directly if scratch is already DI. */
- if (icode == CODE_FOR_vsx_lxvrdx)
- discratch = scratch;
+ {
+ temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+ }
+ else if (icode == CODE_FOR_vsx_lxvrdx)
+ discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+ else
+ return 0;
Please call gcc_unreachable () here, instead of returning 0. If we
call this function with an unexpected insn_code, we want to ICE right
away to make it easier to debug the problem.
- /* Emit the sign extension from DI (double) to TI (quad). */
- emit_insn (gen_extendditi2 (target, discratch));
+ /* Emit the sign extension from V2DI (double) to TI (quad). */
+ temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+ emit_insn (gen_extendditi2_vector (target, temp2));
return target;
}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
"vextsh2<wd> %0,%1"
[(set_attr "type" "vecexts")])
-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
[(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
(unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..e5b84e6167a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
You need a { dg-require-effective-target int128 } directive. Also {
dg-require-effective-target power10_ok }.
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times
{\mvextsh2d\M|\mvextsw2d\M|\mvextsb2d\M} 3 } } */
I'd rather see the exact number of times each instruction is expected,
since this would match if all of foo1, foo2, and foo3 emitted
vextsb2d, for example.
I don't see any additional problems...over to Segher.
Thanks for the patch!
Bill
+/* { dg-final { scan-assembler-not "mtvsrdd" } } */