Doh! ENOPATCH.  This time with attachments...
https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576922.html

Roger
--

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 108de1c..2b18f6a 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -614,6 +614,18 @@
   ""
   "%.\\tmul.hi.s32\\t%0, %1, %2;")
 
+(define_insn "smuldi3_highpart"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+       (truncate:DI
+        (lshiftrt:TI
+         (mult:TI (sign_extend:TI
+                   (match_operand:DI 1 "nvptx_register_operand" "R"))
+                  (sign_extend:TI
+                   (match_operand:DI 2 "nvptx_register_operand" "R")))
+         (const_int 64))))]
+  ""
+  "%.\\tmul.hi.s64\\t%0, %1, %2;")
+
 (define_insn "umulhi3_highpart"
   [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
        (truncate:HI
@@ -638,6 +650,18 @@
   ""
   "%.\\tmul.hi.u32\\t%0, %1, %2;")
 
+(define_insn "umuldi3_highpart"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
+       (truncate:DI
+        (lshiftrt:TI
+         (mult:TI (zero_extend:TI
+                   (match_operand:DI 1 "nvptx_register_operand" "R"))
+                  (zero_extend:TI
+                   (match_operand:DI 2 "nvptx_register_operand" "R")))
+         (const_int 64))))]
+  ""
+  "%.\\tmul.hi.u64\\t%0, %1, %2;")
+
 ;; Shifts
 
 (define_insn "ashl<mode>3"
diff --git a/gcc/expr.c b/gcc/expr.c
index b65cfcf..c032d54 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8904,6 +8904,91 @@ expand_expr_divmod (tree_code code, machine_mode mode, 
tree treeop0,
   return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp);
 }
 
+/* Helper function of expand_expr_real_2, to recognized and expand a
+   (rshift (mult (convert X) (convert Y)) INTEGER_CST) as a highpart
+   multiplication.  This also handles multiplication by a constant.  */
+
+static rtx
+try_expand_mult_highpart (sepops ops, rtx target, machine_mode tmode)
+{
+  tree stype = ops->type;
+  tree sarg0 = ops->op0;
+  tree sarg1 = ops->op1;
+
+  if (!INTEGRAL_TYPE_P (stype)
+      || TREE_CODE (sarg1) != INTEGER_CST
+      || TREE_CODE (sarg0) != SSA_NAME
+      || !tree_fits_uhwi_p (sarg1))
+    return NULL_RTX;
+
+  gimple *def = get_def_for_expr (sarg0, MULT_EXPR);
+  if (!def)
+    {
+      /* Allow NOP_EXPR between the multiplication and the rshift.  */
+      def = get_def_for_expr (sarg0, NOP_EXPR);
+      if (def)
+       def = get_def_for_expr (gimple_assign_rhs1 (def), MULT_EXPR);
+      if (!def)
+       return NULL_RTX;
+    }
+
+  tree marg0 = gimple_assign_rhs1 (def);
+  tree mtype = TREE_TYPE (marg0);
+  if (!INTEGRAL_TYPE_P (mtype)
+      || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
+    return NULL_RTX;
+
+  gimple *ldef = get_def_for_expr (marg0, NOP_EXPR);
+  if (!ldef)
+    return NULL_RTX;
+
+  tree lhs = gimple_assign_rhs1 (ldef);
+  tree ltype = TREE_TYPE (lhs);
+  bool unsignedp = TYPE_UNSIGNED (mtype);
+  
+  if (TYPE_UNSIGNED (ltype) != unsignedp
+      || TYPE_PRECISION (ltype) != tree_to_uhwi (sarg1)
+      || TYPE_PRECISION (mtype) < 2 * TYPE_PRECISION (ltype))
+    return NULL_RTX;
+
+  tree marg1 = gimple_assign_rhs2 (def);
+  tree rhs;
+  if (TREE_CODE (marg1) == INTEGER_CST)
+    {
+      if (!int_fits_type_p (marg1, ltype))
+       return NULL_RTX;
+      rhs = fold_convert (ltype, marg1);
+    }
+  else
+    {
+      gimple *rdef = get_def_for_expr (marg1, NOP_EXPR);
+      if (!rdef)
+       return NULL_RTX;
+
+      rhs = gimple_assign_rhs1 (rdef);
+      if (TYPE_MAIN_VARIANT (ltype) != TYPE_MAIN_VARIANT (TREE_TYPE (rhs)))
+       return NULL_RTX;
+    }
+
+  machine_mode mode = TYPE_MODE (ltype);
+  optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+  if (optab_handler (tab, mode) == CODE_FOR_nothing)
+    return NULL_RTX;
+  
+  /* Commit to RTL expansion.  */
+  rtx op0, op1, result;
+  expand_operands (lhs, rhs, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+  if (tmode != mode)
+    target = NULL_RTX;
+  result = expand_binop (mode, tab, op0, op1, target, unsignedp,
+                        OPTAB_LIB_WIDEN);
+  if (!result)
+    return NULL_RTX;
+  if (tmode != mode)
+    result = convert_to_mode (tmode, result, TYPE_UNSIGNED (stype));
+  return result;
+}
+
 rtx
 expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
                    enum expand_modifier modifier)
@@ -9771,6 +9856,14 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode 
tmode,
        if (ALL_FIXED_POINT_MODE_P (mode))
          goto binop;
 
+       /* Try to use mul_highpart optab if available.  */
+       if (code == RSHIFT_EXPR)
+         {
+           temp = try_expand_mult_highpart (ops, target, tmode);
+           if (temp)
+             return REDUCE_BIT_FIELD (temp);
+         }
+
        if (! safe_from_p (subtarget, treeop1, 1))
          subtarget = 0;
        if (modifier == EXPAND_STACK_PARM)
/* { dg-do compile } */
/* { dg-options "-O2 -Wno-long-long" } */

typedef unsigned int __attribute ((mode(TI))) uti_t;
typedef int __attribute ((mode(TI))) ti_t;

long test1(long x, long y)
{
  return ((ti_t)x * (ti_t)y) >> 64;
}

long test2(long x)
{
  return ((ti_t)x * 19065) >> 64;
}

long test3(long x, long y)
{
  return (uti_t)((ti_t)x * (ti_t)y) >> 64;
}

long test4(long x)
{
  return (uti_t)((ti_t)x * 19065) >> 64;
}

ti_t test5(long x, long y)
{
  return ((ti_t)x * (ti_t)y) >> 64;
}

ti_t test6(long x)
{
  return ((ti_t)x * 19065) >> 64;
}

uti_t test7(long x, long y)
{
  return (uti_t)((ti_t)x * (ti_t)y) >> 64;
}

uti_t test8(long x)
{
  return (uti_t)((ti_t)x * 19065) >> 64;
}

/* { dg-final { scan-assembler-times "mul.hi.s64" 8 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -Wno-long-long" } */

typedef unsigned int __attribute ((mode(TI))) uti_t;
typedef int __attribute ((mode(TI))) ti_t;

unsigned long test1(unsigned long x, unsigned long y)
{
  return ((uti_t)x * (uti_t)y) >> 64;
}

unsigned long test2(unsigned long x)
{
  return ((uti_t)x * 19065) >> 64;
}

unsigned long test3(unsigned long x, unsigned long y)
{
  return (ti_t)((uti_t)x * (uti_t)y) >> 64;
}

unsigned long test4(unsigned long x)
{
  return (ti_t)((uti_t)x * 19065) >> 64;
}

uti_t test5(unsigned long x, unsigned long y)
{
  return ((uti_t)x * (uti_t)y) >> 64;
}

uti_t test6(unsigned long x)
{
  return ((uti_t)x * 19065) >> 64;
}

ti_t test7(unsigned long x, unsigned long y)
{
  return (ti_t)((uti_t)x * (uti_t)y) >> 64;
}

ti_t test8(unsigned long x)
{
  return (ti_t)((uti_t)x * 19065) >> 64;
}

/* { dg-final { scan-assembler-times "mul.hi.u64" 8 } } */

Reply via email to