Hi,
The patch optimized the code generation for vec_xl_sext builtin. Now
all the sign extensions are done on VSX registers directly.
Bootstrapped and tested on powerpc64le-linux with no regressions. Is
this okay for trunk? Any recommendations? Thanks a lot.
ChangeLog
2021-09-06 Haochen Gui <guih...@linux.ibm.com>
gcc/
* config/rs6000/rs6000-call.c (altivec_expand_lxvr_builtin):
Modify the expansion for sign extension.
* gcc/config/rs6000/vsx.md (vsx_sign_extend_si_v2di): Define.
gcc/testsuite/
* gcc.target/powerpc/p10_vec_xl_sext.c: New test.
patch.diff
diff --git a/gcc/config/rs6000/rs6000-call.c
b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..54fdaf47be3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9779,7 +9779,7 @@ altivec_expand_lxvr_builtin (enum insn_code icode,
tree exp, rtx target, bool bl
if (sign_extend)
{
- rtx discratch = gen_reg_rtx (DImode);
+ rtx discratch = gen_reg_rtx (V2DImode);
rtx tiscratch = gen_reg_rtx (TImode);
/* Emit the lxvr*x insn. */
@@ -9788,20 +9788,31 @@ altivec_expand_lxvr_builtin (enum insn_code
icode, tree exp, rtx target, bool bl
return 0;
emit_insn (pat);
- /* Emit a sign extension from QI,HI,WI to double (DI). */
- rtx scratch = gen_lowpart (smode, tiscratch);
+ /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */
+ rtx temp1, temp2;
if (icode == CODE_FOR_vsx_lxvrbx)
- emit_insn (gen_extendqidi2 (discratch, scratch));
+ {
+ temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1));
+ }
else if (icode == CODE_FOR_vsx_lxvrhx)
- emit_insn (gen_extendhidi2 (discratch, scratch));
+ {
+ temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1));
+ }
else if (icode == CODE_FOR_vsx_lxvrwx)
- emit_insn (gen_extendsidi2 (discratch, scratch));
- /* Assign discratch directly if scratch is already DI. */
- if (icode == CODE_FOR_vsx_lxvrdx)
- discratch = scratch;
+ {
+ temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0);
+ emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1));
+ }
+ else if (icode == CODE_FOR_vsx_lxvrdx)
+ discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0);
+ else
+ return 0;
- /* Emit the sign extension from DI (double) to TI (quad). */
- emit_insn (gen_extendditi2 (target, discratch));
+ /* Emit the sign extension from V2DI (double) to TI (quad). */
+ temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0);
+ emit_insn (gen_extendditi2_vector (target, temp2));
return target;
}
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..987f21bbc22 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4830,7 +4830,7 @@ (define_insn "vsx_sign_extend_hi_<mode>"
"vextsh2<wd> %0,%1"
[(set_attr "type" "vecexts")])
-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_extend_si_v2di"
[(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
(unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
UNSPEC_VSX_SIGN_EXTEND))]
diff --git a/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
new file mode 100644
index 00000000000..e5b84e6167a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10_vec_xl_sext.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+#include <altivec.h>
+
+vector signed __int128
+foo1 (signed long a, signed char *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo2 (signed long a, signed short *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo3 (signed long a, signed int *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+vector signed __int128
+foo4 (signed long a, signed long *b)
+{
+ return vec_xl_sext (a, b);
+}
+
+/* { dg-final { scan-assembler-times {\mvextsd2q\M} 4 } } */
+/* { dg-final { scan-assembler-times
{\mvextsh2d\M|\mvextsw2d\M|\mvextsb2d\M} 3 } } */
+/* { dg-final { scan-assembler-not "mtvsrdd" } } */