Doh! ENOPATCH. This time with attachments... https://gcc.gnu.org/pipermail/gcc-patches/2021-August/576922.html
Roger --
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 108de1c..2b18f6a 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -614,6 +614,18 @@ "" "%.\\tmul.hi.s32\\t%0, %1, %2;") +(define_insn "smuldi3_highpart" + [(set (match_operand:DI 0 "nvptx_register_operand" "=R") + (truncate:DI + (lshiftrt:TI + (mult:TI (sign_extend:TI + (match_operand:DI 1 "nvptx_register_operand" "R")) + (sign_extend:TI + (match_operand:DI 2 "nvptx_register_operand" "R"))) + (const_int 64))))] + "" + "%.\\tmul.hi.s64\\t%0, %1, %2;") + (define_insn "umulhi3_highpart" [(set (match_operand:HI 0 "nvptx_register_operand" "=R") (truncate:HI @@ -638,6 +650,18 @@ "" "%.\\tmul.hi.u32\\t%0, %1, %2;") +(define_insn "umuldi3_highpart" + [(set (match_operand:DI 0 "nvptx_register_operand" "=R") + (truncate:DI + (lshiftrt:TI + (mult:TI (zero_extend:TI + (match_operand:DI 1 "nvptx_register_operand" "R")) + (zero_extend:TI + (match_operand:DI 2 "nvptx_register_operand" "R"))) + (const_int 64))))] + "" + "%.\\tmul.hi.u64\\t%0, %1, %2;") + ;; Shifts (define_insn "ashl<mode>3" diff --git a/gcc/expr.c b/gcc/expr.c index b65cfcf..c032d54 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8904,6 +8904,91 @@ expand_expr_divmod (tree_code code, machine_mode mode, tree treeop0, return expand_divmod (mod_p, code, mode, op0, op1, target, unsignedp); } +/* Helper function of expand_expr_real_2, to recognized and expand a + (rshift (mult (convert X) (convert Y)) INTEGER_CST) as a highpart + multiplication. This also handles multiplication by a constant. */ + +static rtx +try_expand_mult_highpart (sepops ops, rtx target, machine_mode tmode) +{ + tree stype = ops->type; + tree sarg0 = ops->op0; + tree sarg1 = ops->op1; + + if (!INTEGRAL_TYPE_P (stype) + || TREE_CODE (sarg1) != INTEGER_CST + || TREE_CODE (sarg0) != SSA_NAME + || !tree_fits_uhwi_p (sarg1)) + return NULL_RTX; + + gimple *def = get_def_for_expr (sarg0, MULT_EXPR); + if (!def) + { + /* Allow NOP_EXPR between the multiplication and the rshift. */ + def = get_def_for_expr (sarg0, NOP_EXPR); + if (def) + def = get_def_for_expr (gimple_assign_rhs1 (def), MULT_EXPR); + if (!def) + return NULL_RTX; + } + + tree marg0 = gimple_assign_rhs1 (def); + tree mtype = TREE_TYPE (marg0); + if (!INTEGRAL_TYPE_P (mtype) + || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype)) + return NULL_RTX; + + gimple *ldef = get_def_for_expr (marg0, NOP_EXPR); + if (!ldef) + return NULL_RTX; + + tree lhs = gimple_assign_rhs1 (ldef); + tree ltype = TREE_TYPE (lhs); + bool unsignedp = TYPE_UNSIGNED (mtype); + + if (TYPE_UNSIGNED (ltype) != unsignedp + || TYPE_PRECISION (ltype) != tree_to_uhwi (sarg1) + || TYPE_PRECISION (mtype) < 2 * TYPE_PRECISION (ltype)) + return NULL_RTX; + + tree marg1 = gimple_assign_rhs2 (def); + tree rhs; + if (TREE_CODE (marg1) == INTEGER_CST) + { + if (!int_fits_type_p (marg1, ltype)) + return NULL_RTX; + rhs = fold_convert (ltype, marg1); + } + else + { + gimple *rdef = get_def_for_expr (marg1, NOP_EXPR); + if (!rdef) + return NULL_RTX; + + rhs = gimple_assign_rhs1 (rdef); + if (TYPE_MAIN_VARIANT (ltype) != TYPE_MAIN_VARIANT (TREE_TYPE (rhs))) + return NULL_RTX; + } + + machine_mode mode = TYPE_MODE (ltype); + optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab; + if (optab_handler (tab, mode) == CODE_FOR_nothing) + return NULL_RTX; + + /* Commit to RTL expansion. */ + rtx op0, op1, result; + expand_operands (lhs, rhs, NULL_RTX, &op0, &op1, EXPAND_NORMAL); + if (tmode != mode) + target = NULL_RTX; + result = expand_binop (mode, tab, op0, op1, target, unsignedp, + OPTAB_LIB_WIDEN); + if (!result) + return NULL_RTX; + if (tmode != mode) + result = convert_to_mode (tmode, result, TYPE_UNSIGNED (stype)); + return result; +} + rtx expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, enum expand_modifier modifier) @@ -9771,6 +9856,14 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode, if (ALL_FIXED_POINT_MODE_P (mode)) goto binop; + /* Try to use mul_highpart optab if available. */ + if (code == RSHIFT_EXPR) + { + temp = try_expand_mult_highpart (ops, target, tmode); + if (temp) + return REDUCE_BIT_FIELD (temp); + } + if (! safe_from_p (subtarget, treeop1, 1)) subtarget = 0; if (modifier == EXPAND_STACK_PARM)
/* { dg-do compile } */ /* { dg-options "-O2 -Wno-long-long" } */ typedef unsigned int __attribute ((mode(TI))) uti_t; typedef int __attribute ((mode(TI))) ti_t; long test1(long x, long y) { return ((ti_t)x * (ti_t)y) >> 64; } long test2(long x) { return ((ti_t)x * 19065) >> 64; } long test3(long x, long y) { return (uti_t)((ti_t)x * (ti_t)y) >> 64; } long test4(long x) { return (uti_t)((ti_t)x * 19065) >> 64; } ti_t test5(long x, long y) { return ((ti_t)x * (ti_t)y) >> 64; } ti_t test6(long x) { return ((ti_t)x * 19065) >> 64; } uti_t test7(long x, long y) { return (uti_t)((ti_t)x * (ti_t)y) >> 64; } uti_t test8(long x) { return (uti_t)((ti_t)x * 19065) >> 64; } /* { dg-final { scan-assembler-times "mul.hi.s64" 8 } } */
/* { dg-do compile } */ /* { dg-options "-O2 -Wno-long-long" } */ typedef unsigned int __attribute ((mode(TI))) uti_t; typedef int __attribute ((mode(TI))) ti_t; unsigned long test1(unsigned long x, unsigned long y) { return ((uti_t)x * (uti_t)y) >> 64; } unsigned long test2(unsigned long x) { return ((uti_t)x * 19065) >> 64; } unsigned long test3(unsigned long x, unsigned long y) { return (ti_t)((uti_t)x * (uti_t)y) >> 64; } unsigned long test4(unsigned long x) { return (ti_t)((uti_t)x * 19065) >> 64; } uti_t test5(unsigned long x, unsigned long y) { return ((uti_t)x * (uti_t)y) >> 64; } uti_t test6(unsigned long x) { return ((uti_t)x * 19065) >> 64; } ti_t test7(unsigned long x, unsigned long y) { return (ti_t)((uti_t)x * (uti_t)y) >> 64; } ti_t test8(unsigned long x) { return (ti_t)((uti_t)x * 19065) >> 64; } /* { dg-final { scan-assembler-times "mul.hi.u64" 8 } } */