The truly_noop_truncation target hook is documented, in target.def, as "true if it is safe to convert a value of inprec bits to one of outprec bits (where outprec is smaller than inprec) by merely operating on it as if it had only outprec bits", i.e. the middle-end can use a SUBREG instead of a TRUNCATE.
What's perhaps potentially a little ambiguous in the above description is whether it is the caller or the callee that's responsible for ensuring or checking whether "outprec < inprec". The name TRULY_NOOP_TRUNCATION_P, like SUBREG_PROMOTED_P, may be prone to being understood as a predicate that confirms that something is a no-op truncation or a promoted subreg, when in fact the caller must first confirm this is a truncation/subreg and only then call the "classification" macro. Alas making the following minor tweak (for testing) to the i386 backend: static bool ix86_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) { gcc_assert (outprec < inprec); return true; } #undef TARGET_TRULY_NOOP_TRUNCATION #define TARGET_TRULY_NOOP_TRUNCATION ix86_truly_noop_truncation reveals that there are numerous callers in middle-end that rely on the default behaviour of silently returning true for any (invalid) input. These are fixed below. This patch has been tested on x86_64-pc-linux-gnu with make bootstrap and make -k check, both with and without --target_board=unix{-m32} with no new failures. Ok for mainline? 2023-12-28 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog * combine.cc (make_extraction): Confirm that OUTPREC is less than INPREC before calling TRULY_NOOP_TRUNCATION_MODES_P. * expmed.cc (store_bit_field_using_insv): Likewise. (extract_bit_field_using_extv): Likewise. (extract_bit_field_as_subreg): Likewise. * optabs-query.cc (get_best_extraction_insn): Likewise. * optabs.cc (expand_parity): Likewise. * rtlhooks.cc (gen_lowpart_general): Likewise. * simplify-rtx.cc (simplify_truncation): Disallow truncations to the same precision. (simplify_unary_operation_1) <case TRUNCATE>: Move optimization of truncations to the same mode earlier. Thanks in advance, Roger --
diff --git a/gcc/combine.cc b/gcc/combine.cc index f2c64a9..5aa2f57 100644 --- a/gcc/combine.cc +++ b/gcc/combine.cc @@ -7613,7 +7613,8 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos, && (pos == 0 || REG_P (inner)) && (inner_mode == tmode || !REG_P (inner) - || TRULY_NOOP_TRUNCATION_MODES_P (tmode, inner_mode) + || (known_lt (GET_MODE_SIZE (tmode), GET_MODE_SIZE (inner_mode)) + && TRULY_NOOP_TRUNCATION_MODES_P (tmode, inner_mode)) || reg_truncated_to_mode (tmode, inner)) && (! in_dest || (REG_P (inner) @@ -7856,6 +7857,8 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos, /* On the LHS, don't create paradoxical subregs implicitely truncating the register unless TARGET_TRULY_NOOP_TRUNCATION. */ if (in_dest + && known_lt (GET_MODE_SIZE (GET_MODE (inner)), + GET_MODE_SIZE (wanted_inner_mode)) && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (inner), wanted_inner_mode)) return NULL_RTX; diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 0bba93f..8940d47 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -26707,6 +26707,16 @@ ix86_libm_function_max_error (unsigned cfn, machine_mode mode, #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests #endif /* #if CHECKING_P */ +static bool +ix86_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) +{ + gcc_assert (outprec < inprec); + return true; +} + +#undef TARGET_TRULY_NOOP_TRUNCATION +#define TARGET_TRULY_NOOP_TRUNCATION ix86_truly_noop_truncation + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc/expmed.cc b/gcc/expmed.cc index 05331dd..6398bf9 100644 --- a/gcc/expmed.cc +++ b/gcc/expmed.cc @@ -651,6 +651,7 @@ store_bit_field_using_insv (const extraction_insn *insv, rtx op0, X) 0)) is (reg:N X). */ if (GET_CODE (xop0) == SUBREG && REG_P (SUBREG_REG (xop0)) + && paradoxical_subreg_p (xop0) && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)), op_mode)) { @@ -1585,7 +1586,11 @@ extract_bit_field_using_extv (const extraction_insn *extv, rtx op0, mode. Instead, create a temporary and use convert_move to set the target. */ if (REG_P (target) - && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode) + && (known_lt (GET_MODE_SIZE (GET_MODE (target)), + GET_MODE_SIZE (ext_mode)) + ? TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode) + : known_eq (GET_MODE_SIZE (GET_MODE (target)), + GET_MODE_SIZE (ext_mode))) && (temp = gen_lowpart_if_possible (ext_mode, target))) { target = temp; @@ -1626,7 +1631,9 @@ extract_bit_field_as_subreg (machine_mode mode, rtx op0, if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum) && known_eq (bitsize, GET_MODE_BITSIZE (mode)) && lowpart_bit_field_p (bitnum, bitsize, op0_mode) - && TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode)) + && (known_lt (GET_MODE_SIZE (mode), GET_MODE_SIZE (op0_mode)) + ? TRULY_NOOP_TRUNCATION_MODES_P (mode, op0_mode) + : known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (op0_mode)))) return simplify_gen_subreg (mode, op0, op0_mode, bytenum); return NULL_RTX; } diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc index 947ccef..f33253f 100644 --- a/gcc/optabs-query.cc +++ b/gcc/optabs-query.cc @@ -213,7 +213,7 @@ get_best_extraction_insn (extraction_insn *insn, FOR_EACH_MODE_FROM (mode_iter, mode) { mode = mode_iter.require (); - if (maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (field_mode)) + if (maybe_ge (GET_MODE_SIZE (mode), GET_MODE_SIZE (field_mode)) || TRULY_NOOP_TRUNCATION_MODES_P (insn->field_mode, field_mode)) break; diff --git a/gcc/optabs.cc b/gcc/optabs.cc index 6a34276..fad0d59 100644 --- a/gcc/optabs.cc +++ b/gcc/optabs.cc @@ -2954,7 +2954,11 @@ expand_parity (scalar_int_mode mode, rtx op0, rtx target) if (temp) { if (mclass != MODE_INT - || !TRULY_NOOP_TRUNCATION_MODES_P (mode, wider_mode)) + || (known_lt (GET_MODE_SIZE (mode), + GET_MODE_SIZE (wider_mode)) + ? !TRULY_NOOP_TRUNCATION_MODES_P (mode, wider_mode) + : maybe_ne (GET_MODE_SIZE (mode), + GET_MODE_SIZE (wider_mode)))) return convert_to_mode (mode, temp, 0); else return gen_lowpart (mode, temp); diff --git a/gcc/rtlhooks.cc b/gcc/rtlhooks.cc index 989d3c9..9a53ab0 100644 --- a/gcc/rtlhooks.cc +++ b/gcc/rtlhooks.cc @@ -66,7 +66,9 @@ gen_lowpart_general (machine_mode mode, rtx x) scalar_int_mode xmode; if (is_a <scalar_int_mode> (GET_MODE (x), &xmode) && GET_MODE_SIZE (xmode) <= UNITS_PER_WORD - && TRULY_NOOP_TRUNCATION_MODES_P (mode, xmode) + && (known_lt (GET_MODE_SIZE (mode), GET_MODE_SIZE (xmode)) + ? TRULY_NOOP_TRUNCATION_MODES_P (mode, xmode) + : known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (xmode))) && !reload_completed) return gen_lowpart_general (mode, force_reg (xmode, x)); diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index f3745d8..27518f5 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -617,7 +617,7 @@ simplify_context::simplify_truncation (machine_mode mode, rtx op, unsigned int op_precision = GET_MODE_UNIT_PRECISION (op_mode); scalar_int_mode int_mode, int_op_mode, subreg_mode; - gcc_assert (precision <= op_precision); + gcc_assert (precision < op_precision); /* Optimize truncations of zero and sign extended values. */ if (GET_CODE (op) == ZERO_EXTEND @@ -1207,6 +1207,10 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode, break; case TRUNCATE: + /* Check for useless truncation. */ + if (GET_MODE (op) == mode) + return op; + /* Don't optimize (lshiftrt (mult ...)) as it would interfere with the umulXi3_highpart patterns. */ if (GET_CODE (op) == LSHIFTRT @@ -1271,9 +1275,6 @@ simplify_context::simplify_unary_operation_1 (rtx_code code, machine_mode mode, return temp; } - /* Check for useless truncation. */ - if (GET_MODE (op) == mode) - return op; break; case FLOAT_TRUNCATE: