Optimize multiply/add of DImode extended to TImode, PR target/103109.

On power9 and power10 systems, we have instructions that support doing
64-bit integers converted to 128-bit integers and producing 128-bit
results.  This patch adds support to generate these instructions.

Previously GCC had define_expands to handle conversion of the 64-bit
extend to 128-bit and multiply.  This patch changes these define_expands
to define_insn_and_split and then it provides combiner patterns to
generate thes multiply/add instructions.

To support using this optimization on power9, this patch extends the sign
extend DImode to TImode to also run on power9 (added for PR
target/104698).

This patch needs the previous patch to add unsigned DImode to TImode
conversion so that the combiner can combine the extend, multiply, and add
instructions.

I have built this patch on little endian power10, little endian power9, and big
endian power8 systems.  There were no regressions when I ran it.  Can I install
this patch into the GCC 13 master branch?

2022-05-13   Michael Meissner  <meiss...@linux.ibm.com>

gcc/
        PR target/103109
        * config/rs6000/rs6000.md (su_int32): New code attribute.
        (<u>mul<mode><dmode>3): Convert from define_expand to
        define_insn_and_split.
        (maddld<mode>4): Add generator function.
        (<u>mulditi3_<u>adddi3): New insn.
        (<u>mulditi3_add_const): New insn.
        (<u>mulditi3_<u>adddi3_upper): New insn.

gcc/testsuite/
        PR target/103109
        * gcc.target/powerpc/pr103109.c: New test.
---
 gcc/config/rs6000/rs6000.md                 | 128 +++++++++++++++++++-
 gcc/testsuite/gcc.target/powerpc/pr103109.c |  62 ++++++++++
 2 files changed, 184 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr103109.c

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2aba70393d8..83eacec57ba 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -667,6 +667,9 @@ (define_code_attr uns [(fix         "")
                       (float           "")
                       (unsigned_float  "uns")])
 
+(define_code_attr su_int32 [(sign_extend "s32bit_cint_operand")
+                           (zero_extend "c32bit_cint_operand")])
+
 ; Various instructions that come in SI and DI forms.
 ; A generic w/d attribute, for things like cmpw/cmpd.
 (define_mode_attr wd [(QI    "b")
@@ -3190,13 +3193,16 @@ (define_insn "<su>mulsi3_highpart_64"
   "mulhw<u> %0,%1,%2"
   [(set_attr "type" "mul")])
 
-(define_expand "<u>mul<mode><dmode>3"
-  [(set (match_operand:<DMODE> 0 "gpc_reg_operand")
+(define_insn_and_split "<u>mul<mode><dmode>3"
+  [(set (match_operand:<DMODE> 0 "gpc_reg_operand" "=&r")
        (mult:<DMODE> (any_extend:<DMODE>
-                       (match_operand:GPR 1 "gpc_reg_operand"))
+                      (match_operand:GPR 1 "gpc_reg_operand" "r"))
                      (any_extend:<DMODE>
-                       (match_operand:GPR 2 "gpc_reg_operand"))))]
+                      (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
   "!(<MODE>mode == SImode && TARGET_POWERPC64)"
+  "#"
+  "&& 1"
+  [(pc)]
 {
   rtx l = gen_reg_rtx (<MODE>mode);
   rtx h = gen_reg_rtx (<MODE>mode);
@@ -3205,9 +3211,10 @@ (define_expand "<u>mul<mode><dmode>3"
   emit_move_insn (gen_lowpart (<MODE>mode, operands[0]), l);
   emit_move_insn (gen_highpart (<MODE>mode, operands[0]), h);
   DONE;
-})
+}
+  [(set_attr "length" "8")])
 
-(define_insn "*maddld<mode>4"
+(define_insn "maddld<mode>4"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
        (plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
                            (match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3216,6 +3223,115 @@ (define_insn "*maddld<mode>4"
   "maddld %0,%1,%2,%3"
   [(set_attr "type" "mul")])
 
+(define_insn_and_split "*<u>mulditi3_<u>adddi3"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+       (plus:TI
+        (mult:TI
+         (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+         (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+        (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r"))))]
+  "TARGET_MADDLD && TARGET_POWERPC64"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx dest_hi = gen_highpart (DImode, dest);
+  rtx dest_lo = gen_lowpart (DImode, dest);
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = operands[3];
+  rtx tmp_hi, tmp_lo;
+
+  if (can_create_pseudo_p ())
+    {
+      tmp_hi = gen_reg_rtx (DImode);
+      tmp_lo = gen_reg_rtx (DImode);
+    }
+  else
+    {
+      tmp_hi = dest_hi;
+      tmp_lo = dest_lo;
+    }
+
+  emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+  emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+  if (can_create_pseudo_p ())
+    {
+      emit_move_insn (dest_hi, tmp_hi);
+      emit_move_insn (dest_lo, tmp_lo);
+    }
+  DONE;
+}
+  [(set_attr "length" "8")])
+
+;; Optimize 128-bit multiply with zero/sign extend and adding a constant.  We
+;; force the constant into a register to generate li, maddhd, and maddld,
+;; instead of mulld, mulhd, addic, and addze.  We can't combine this pattern
+;; with the pattern that handles registers, since constants don't have a sign
+;; or zero extend around them.
+(define_insn_and_split "*<u>mulditi3_add_const"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=&r")
+       (plus:TI
+        (mult:TI
+         (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+         (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+        (match_operand 3 "<su_int32>" "r")))]
+  "TARGET_MADDLD && TARGET_POWERPC64
+"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  rtx dest = operands[0];
+  rtx dest_hi = gen_highpart (DImode, dest);
+  rtx dest_lo = gen_lowpart (DImode, dest);
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  rtx op3 = force_reg (DImode, operands[3]);
+  rtx tmp_hi, tmp_lo;
+
+  if (can_create_pseudo_p ())
+    {
+      tmp_hi = gen_reg_rtx (DImode);
+      tmp_lo = gen_reg_rtx (DImode);
+    }
+  else
+    {
+      tmp_hi = dest_hi;
+      tmp_lo = dest_lo;
+    }
+
+  emit_insn (gen_<u>mulditi3_<u>adddi3_upper (tmp_hi, op1, op2, op3));
+  emit_insn (gen_maddlddi4 (tmp_lo, op1, op2, op3));
+
+  if (can_create_pseudo_p ())
+    {
+      emit_move_insn (dest_hi, tmp_hi);
+      emit_move_insn (dest_lo, tmp_lo);
+    }
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "mul")
+   (set_attr "size" "64")])
+
+(define_insn "<u>mulditi3_<u>adddi3_upper"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+       (truncate:DI
+        (lshiftrt:TI
+         (plus:TI
+          (mult:TI
+           (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+           (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+          (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+         (const_int 64))))]
+  "TARGET_MADDLD && TARGET_POWERPC64"
+  "maddhd<u> %0,%1,%2,%3"
+  [(set_attr "type" "mul")
+   (set_attr "size" "64")])
+
 (define_insn "udiv<mode>3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
         (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c 
b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 00000000000..ae2cfb9eda7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,62 @@
+/* { dg-require-effective-target int128     } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+
+/* This test makes sure that GCC generates the maddhd, maddhdu, and maddld
+   power9 instructions when doing some forms of 64-bit integers converted to
+   128-bit integers and used with multiply/add operations.  */
+
+__int128_t
+s_mult_add (long long a,
+           long long b,
+           long long c)
+{
+  /* maddhd, maddld.  */
+  return ((__int128_t)a * (__int128_t)b) + (__int128_t)c;
+}
+
+/* Test 32-bit constants that are loaded into GPRs instead of doing the
+   mulld/mulhd and then addic/addime or addc/addze.  */
+__int128_t
+s_mult_add_m10 (long long a,
+               long long b)
+{
+  /* maddhd, maddld.  */
+  return ((__int128_t)a * (__int128_t)b) - 10;
+}
+
+__int128_t
+s_mult_add_70000 (long long a,
+                 long long b)
+{
+  /* maddhd, maddld.  */
+  return ((__int128_t)a * (__int128_t)b) + 70000;
+}
+
+__uint128_t
+u_mult_add (unsigned long long a,
+           unsigned long long b,
+           unsigned long long c)
+{
+  /* maddhd, maddld.  */
+  return ((__uint128_t)a * (__uint128_t)b) + (__uint128_t)c;
+}
+
+__uint128_t
+u_mult_add_0x80000000 (unsigned long long a,
+                      unsigned long long b)
+{
+  /* maddhd, maddld.  */
+  return ((__uint128_t)a * (__uint128_t)b) + 0x80000000UL;
+}
+
+/* { dg-final { scan-assembler-not   {\maddc\M}      } } */
+/* { dg-final { scan-assembler-not   {\madde\M}      } } */
+/* { dg-final { scan-assembler-not   {\maddid\M}     } } */
+/* { dg-final { scan-assembler-not   {\maddme\M}     } } */
+/* { dg-final { scan-assembler-not   {\maddze\M}     } } */
+/* { dg-final { scan-assembler-not   {\mmulhd\M}     } } */
+/* { dg-final { scan-assembler-not   {\mmulld\M}     } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M}  3 } } */
+/* { dg-final { scan-assembler-times {\mmaddhdu\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmaddld\M}  5 } } */
-- 
2.35.3


-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com

Reply via email to