This patch completely replaces the existing "constantsynth" with a new
implementation, which has become unsightly due to the expansion.
This new version offers the following benefits:
- Independence from the insn splitting mechanism. No define_split
descriptions are required
- Resource saving as internally required information storage no longer
persists across passes
- The replacement of insns is based on the actual costs (for both size
and speed) of the insns before and after the conversion, rather than
on some arbitrary pre-determined ones
- Easy-to-understand/-add interface for constant synthesis methods
The built-in synthesis methods are (supposedly) very effective, with 2
instructions for certain values and up to 5 instructions to cover all
32-bit values.
/* example */
_Complex double test(int a[], float b[]) {
a[0] = 2045 * 2045;
a[1] = 0xDEADBEEF;
a[2] = 0xDEADBEEF - 15;
a[3] = 4182000;
a[4] = 131071;
a[5] = 293805;
a[6] = 700972933;
a[7] = -372738139;
b[0] = 3.14159265359f;
b[1] *= 0.12005615234375f;
return 1-1i;
}
;; result (-O2 -mextra-l32r-costs=5)
test:
entry sp, 32
movi a8, 0x7af
float.s f1, a8, 14
movi a8, 0x7fd
mull a8, a8, a8
lsi f0, a3, 4
s32i.n a8, a2, 0
movi.n a8, 0x57
addmi a8, a8, -0x1100
slli a8, a8, 17
addmi a8, a8, -0x4100
addi a8, a8, -17
s32i.n a8, a2, 4
addi a8, a8, -15
s32i.n a8, a2, 8
movi a8, 0x3fd
slli a8, a8, 12
addi a8, a8, -16
s32i.n a8, a2, 12
movi.n a8, -1
srli a8, a8, 15
s32i.n a8, a2, 16
movi a8, 0x85
addmi a8, a8, 0x7f00
addx8 a8, a8, a8
s32i.n a8, a2, 20
movi a8, 0x539
slli a8, a8, 19
addi a8, a8, -123
s32i.n a8, a2, 24
movi a8, -0x2c7
slli a8, a8, 19
addmi a8, a8, 0x7800
addi a8, a8, -91
s32i.n a8, a2, 28
movi.n a8, 0x49
mul.s f0, f0, f1
addmi a8, a8, 0x4000
slli a8, a8, 16
addmi a8, a8, 0x1000
addi a8, a8, -37
s32i.n a8, a3, 0
ssi f0, a3, 4
movi a5, -0x401
movi a3, 0x3ff
movi.n a2, 0
slli a3, a3, 20
movi.n a4, 0
slli a5, a5, 20
retw.n
gcc/ChangeLog:
* config/xtensa/xtensa-protos.h (xtensa_constantsynth): Remove.
* config/xtensa/xtensa.cc
(#include): Remove "context.h" and "pass_manager.h".
(machine_function): Remove "litpool_usage" member.
(xtensa_constantsynth_2insn, xtensa_constantsynth_rtx_SLLI,
xtensa_constantsynth_rtx_ADDSUBX, xtensa_constantsynth): Remove.
(constantsynth_method_lshr_m1, split_hwi_to_MOVI_ADDMI,
constantsynth_method_16bits, constantsynth_method_32bits,
constantsynth_method_square): New worker function related to
constant synthesis methods.
(constantsynth_method_info, constantsynth_methods):
New structure representing the list of all constant synthesis
methods.
(constantsynth_info): New structure that stores internal
information for "constantsynth".
(constantsynth_pass1, constantsynth_pass2):
New functions that are the core of "constantsynth".
(do_largeconst): Add a call to constantsynth_pass1() to the insn
enumeration loop, and add a call to constantsynth_pass2() to the
end of this function.
* config/xtensa/xtensa.md (SHI): Remove.
(The two auxiliary define_splits for mov[sh]i_internal): Remove.
(The two auxiliary define_splits for movsf_internal): Remove.
gcc/testsuite/ChangeLog:
* gcc.target/xtensa/constsynth_2insns.c,
gcc.target/xtensa/constsynth_3insns.c,
gcc.target/xtensa/constsynth_double.c: Remove due to outdated.
* gcc.target/xtensa/constsynthV2_O2_costs0.c,
gcc.target/xtensa/constsynthV2_O2_costs5.c,
gcc.target/xtensa/constsynthV2_Os.c: New.
---
gcc/config/xtensa/xtensa-protos.h | 1 -
gcc/config/xtensa/xtensa.cc | 633 ++++++++++++------
gcc/config/xtensa/xtensa.md | 51 --
.../xtensa/constsynthV2_O2_costs0.c | 19 +
.../xtensa/constsynthV2_O2_costs5.c | 19 +
.../gcc.target/xtensa/constsynthV2_Os.c | 23 +
.../gcc.target/xtensa/constsynth_2insns.c | 44 --
.../gcc.target/xtensa/constsynth_3insns.c | 35 -
.../gcc.target/xtensa/constsynth_double.c | 11 -
9 files changed, 478 insertions(+), 358 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynthV2_O2_costs0.c
create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynthV2_O2_costs5.c
create mode 100644 gcc/testsuite/gcc.target/xtensa/constsynthV2_Os.c
delete mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c
delete mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c
delete mode 100644 gcc/testsuite/gcc.target/xtensa/constsynth_double.c
diff --git a/gcc/config/xtensa/xtensa-protos.h
b/gcc/config/xtensa/xtensa-protos.h
index 562e4b9283b..404180723e4 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -44,7 +44,6 @@ extern int xtensa_expand_scc (rtx *, machine_mode);
extern int xtensa_expand_block_move (rtx *);
extern int xtensa_expand_block_set (rtx *);
extern void xtensa_split_operand_pair (rtx *, machine_mode);
-extern int xtensa_constantsynth (rtx, rtx);
extern int xtensa_emit_move_sequence (rtx *, machine_mode);
extern rtx xtensa_copy_incoming_a7 (rtx);
extern void xtensa_expand_nonlocal_goto (rtx *);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 0a6768f3957..7e156347f59 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -58,8 +58,6 @@ along with GCC; see the file COPYING3. If not see
#include "insn-attr.h"
#include "tree-pass.h"
#include "print-rtl.h"
-#include "context.h"
-#include "pass_manager.h"
#include <math.h>
#include "opts.h"
@@ -110,7 +108,6 @@ struct GTY(()) machine_function
bool inhibit_logues_a1_adjusts;
rtx last_logues_a9_content;
HARD_REG_SET eliminated_callee_saved;
- hash_map<rtx, int> *litpool_usage;
bool postreload_completed;
};
@@ -1119,219 +1116,6 @@ xtensa_split_operand_pair (rtx operands[4], machine_mode mode)
}
-/* Try to emit insns to load src (either naked or pooled SI/SF constant)
- into dst with synthesizing a such constant value from a sequence of
- load-immediate / arithmetic ones, instead of a L32R instruction
- (plus a constant in litpool). */
-
-static int
-xtensa_constantsynth_2insn (rtx dst, HOST_WIDE_INT srcval,
- rtx (*gen_op)(rtx, HOST_WIDE_INT),
- HOST_WIDE_INT op_imm)
-{
- HOST_WIDE_INT imm = INT_MAX;
- rtx x = NULL_RTX;
- int shift, sqr;
-
- gcc_assert (REG_P (dst));
-
- shift = exact_log2 (srcval + 1);
- if (IN_RANGE (shift, 1, 31))
- {
- imm = -1;
- x = gen_lshrsi3 (dst, dst, GEN_INT (32 - shift));
- }
-
- shift = ctz_hwi (srcval);
- if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95)))
- && xtensa_simm12b (srcval >> shift))
- {
- imm = srcval >> shift;
- x = gen_ashlsi3 (dst, dst, GEN_INT (shift));
- }
-
- if ((!x || (TARGET_DENSITY && ! IN_RANGE (imm, -32, 95)))
- && IN_RANGE (srcval, (-2048 - 32768), (2047 + 32512)))
- {
- HOST_WIDE_INT imm0, imm1;
-
- if (srcval < -32768)
- imm1 = -32768;
- else if (srcval > 32512)
- imm1 = 32512;
- else
- imm1 = srcval & ~255;
- imm0 = srcval - imm1;
- if (TARGET_DENSITY && imm1 < 32512 && IN_RANGE (imm0, 224, 255))
- imm0 -= 256, imm1 += 256;
- imm = imm0;
- x = gen_addsi3 (dst, dst, GEN_INT (imm1));
- }
-
- sqr = (int) floorf (sqrtf (srcval));
- if (TARGET_MUL32 && optimize_size
- && !x && IN_RANGE (srcval, 0, (2047 * 2047)) && sqr * sqr == srcval)
- {
- imm = sqr;
- x = gen_mulsi3 (dst, dst, dst);
- }
-
- if (!x)
- return 0;
-
- emit_move_insn (dst, GEN_INT (imm));
- emit_insn (x);
- if (gen_op)
- emit_move_insn (dst, gen_op (dst, op_imm));
-
- return 1;
-}
-
-static rtx
-xtensa_constantsynth_rtx_SLLI (rtx reg, HOST_WIDE_INT imm)
-{
- return gen_rtx_ASHIFT (SImode, reg, GEN_INT (imm));
-}
-
-static rtx
-xtensa_constantsynth_rtx_ADDSUBX (rtx reg, HOST_WIDE_INT imm)
-{
- return imm == 7
- ? gen_rtx_MINUS (SImode, gen_rtx_ASHIFT (SImode, reg, GEN_INT (3)),
- reg)
- : gen_rtx_PLUS (SImode, gen_rtx_ASHIFT (SImode, reg,
- GEN_INT (floor_log2 (imm -
1))),
- reg);
-}
-
-int
-xtensa_constantsynth (rtx dst, rtx src)
-{
- HOST_WIDE_INT srcval;
- static opt_pass *pass_rtl_split2;
- int *pv;
-
- /* Derefer if src is litpool entry, and get integer constant value. */
- src = avoid_constant_pool_reference (src);
- if (CONST_INT_P (src))
- srcval = INTVAL (src);
- else if (CONST_DOUBLE_P (src) && GET_MODE (src) == SFmode)
- {
- long l;
-
- REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (src), l);
- srcval = (int32_t)l, src = GEN_INT (srcval);
- }
- else
- return 0;
-
- /* Force dst as SImode. */
- gcc_assert (REG_P (dst));
- if (GET_MODE (dst) != SImode)
- dst = gen_rtx_REG (SImode, REGNO (dst));
-
- if (optimize_size)
- {
- /* During the first split pass after register allocation (rtl-split2),
- record the occurrence of integer src value and do nothing. */
- if (!pass_rtl_split2)
- pass_rtl_split2 = g->get_passes ()->get_pass_by_name ("rtl-split2");
- if (current_pass == pass_rtl_split2)
- {
- if (!cfun->machine->litpool_usage)
- cfun->machine->litpool_usage = hash_map<rtx, int>::create_ggc ();
- if ((pv = cfun->machine->litpool_usage->get (src)))
- ++*pv;
- else
- cfun->machine->litpool_usage->put (src, 1);
- return 0;
- }
-
- /* If two or more identical integer constants appear in the function,
- the code size can be reduced by re-emitting a "move" (load from an
- either litpool entry or relaxed immediate) instruction in SImode
- to increase the chances that the litpool entry will be shared. */
- if (cfun->machine->litpool_usage
- && (pv = cfun->machine->litpool_usage->get (src))
- && *pv > 1)
- {
- emit_move_insn (dst, src);
- return 1;
- }
- }
-
- /* No need for synthesizing for what fits into MOVI instruction. */
- if (xtensa_simm12b (srcval))
- {
- emit_move_insn (dst, src);
- return 1;
- }
-
- /* 2-insns substitution. */
- if ((optimize_size || (optimize && xtensa_extra_l32r_costs >= 1))
- && xtensa_constantsynth_2insn (dst, srcval, NULL, 0))
- return 1;
-
- /* 3-insns substitution. */
- if (optimize > 1 && !optimize_size && xtensa_extra_l32r_costs >= 2)
- {
- int shift, divisor;
-
- /* 2-insns substitution followed by SLLI. */
- shift = ctz_hwi (srcval);
- if (IN_RANGE (shift, 1, 31) &&
- xtensa_constantsynth_2insn (dst, srcval >> shift,
- xtensa_constantsynth_rtx_SLLI,
- shift))
- return 1;
-
- /* 2-insns substitution followed by ADDX[248] or SUBX8. */
- if (TARGET_ADDX)
- for (divisor = 3; divisor <= 9; divisor += 2)
- if (srcval % divisor == 0 &&
- xtensa_constantsynth_2insn (dst, srcval / divisor,
- xtensa_constantsynth_rtx_ADDSUBX,
- divisor))
- return 1;
-
- /* loading simm12 followed by left/right bitwise rotation:
- MOVI + SSAI + SRC. */
- if ((srcval & 0x001FF800) == 0
- || (srcval & 0x001FF800) == 0x001FF800)
- {
- int32_t v;
-
- for (shift = 1; shift < 12; ++shift)
- {
- v = (int32_t)(((uint32_t)srcval >> shift)
- | ((uint32_t)srcval << (32 - shift)));
- if (xtensa_simm12b(v))
- {
- emit_move_insn (dst, GEN_INT (v));
- emit_insn (gen_rotlsi3 (dst, dst, GEN_INT (shift)));
- return 1;
- }
- v = (int32_t)(((uint32_t)srcval << shift)
- | ((uint32_t)srcval >> (32 - shift)));
- if (xtensa_simm12b(v))
- {
- emit_move_insn (dst, GEN_INT (v));
- emit_insn (gen_rotrsi3 (dst, dst, GEN_INT (shift)));
- return 1;
- }
- }
- }
- }
-
- /* If cannot synthesize the value and also cannot fit into MOVI instruc-
- tion, re-emit a "move" (load from an either litpool entry or relaxed
- immediate) instruction in SImode in order to increase the chances that
- the litpool entry will be shared. */
- emit_move_insn (dst, src);
- return 1;
-}
-
-
/* Emit insns to move operands[1] into operands[0].
Return 1 if we have written out everything that needs to be done to
do the move. Otherwise, return 0 and the caller will emit the move
@@ -5926,6 +5710,410 @@ split_DI_DF_const (rtx_insn *insn)
return false;
}
+/* The constant-synthesis optimization (constantsynth for short).
+
+ This is an optimization that attempts to replace the assignment of a
+ large integer (and some single-precision floating-point) constant value
+ that won't fit in the immediate field of a single machine instruction
+ with a smaller integer value that does fit, and a group of subsequent
+ instructions that derive the equivalent value through some arithmetic/
+ bitwise operations.
+
+ In Xtensa ISA, when TARGET_CONST16 is not enabled, such large immediate
+ assignments are typically treated as references to literal pool entries
+ using the L32R machine instruction, which has a one-clock delay to load
+ from memory, plus possible further implementation-dependent exclusive
+ clock penalties (a.k.a. pipeline stall).
+
+ To mitigate this, when optimization is enabled, we use several synthesis
+ methods to find alternative instruction sequences that do not exceed
+ the expected insn cost of single L32R instruction, based on either
+ clock cycle or # of bytes depending on whether optimizing for speed or
+ size.
+
+ However, using L32R instructions has the advantage of sharing literal
+ pool entries when two or more identical immediate values are needed
+ within a function, this also needs to be considered especially when
+ optimizing for size.
+
+ Below this are the definitions of each synthesis method. Each method
+ takes a destination register (which can be assumed to be SImode) and
+ an integer value, and returns an insn sequence that sets the register
+ to that value, if applicable. The framework takes care of the rest of
+ the heavy lifting, making it easy to test and add new methods. */
+
+/* A method that generates two machine instructions to logically right-
+ shift minus one by a certain number of bits to synthesize a power of
+ two minus one (eg., 65535). */
+
+static rtx_insn *
+constantsynth_method_lshr_m1 (rtx dest, HOST_WIDE_INT v)
+{
+ int i;
+
+ if (! IN_RANGE (i = exact_log2 (v + 1), 1, 31))
+ return NULL;
+
+ start_sequence ();
+ emit_insn (gen_rtx_SET (dest, constm1_rtx));
+ emit_insn (gen_lshrsi3 (dest, dest, GEN_INT (32 - i)));
+ return end_sequence ();
+}
+
+/* Split the specified value between -34816 and 34559 into the two
+ immediates for the MOVI and ADDMI instruction. */
+
+static bool
+split_hwi_to_MOVI_ADDMI (HOST_WIDE_INT v,
+ HOST_WIDE_INT &v_movi, HOST_WIDE_INT &v_addmi)
+{
+ HOST_WIDE_INT v0, v1;
+
+ if (xtensa_simm12b (v))
+ {
+ v_movi = v, v_addmi = 0;
+ return true;
+ }
+
+ if (v < -32768)
+ v1 = -32768;
+ else if (v > 32512)
+ v1 = 32512;
+ else
+ v1 = v & ~255;
+ if (! xtensa_simm12b (v0 = v - v1))
+ return false;
+ if (TARGET_DENSITY && v0 >= 224 && v1 < 32512)
+ v0 -= 256, v1 += 256;
+
+ v_movi = v0, v_addmi = v1;
+ return true;
+}
+
+/* A method that generates two machine instructions to add a signed 12-bit
+ value to 256 times a signed 8-bit value to synthesize values between
+ -34816 and 34559. Also, if the result of dividing the specified value
+ by a power of 2, or 9, 7, 5 or 3 with a remainder of 0 is within the
+ above range, the same processing is performed to append one instruction. */
+
+static rtx_insn *
+constantsynth_method_16bits (rtx dest, HOST_WIDE_INT v)
+{
+ HOST_WIDE_INT v_movi, v_addmi;
+ rtx postfix;
+ int i;
+
+ if (split_hwi_to_MOVI_ADDMI (v, v_movi, v_addmi))
+ postfix = NULL_RTX;
+ else if (i = ctz_hwi (v),
+ split_hwi_to_MOVI_ADDMI (v >> i, v_movi, v_addmi))
+ postfix = gen_ashlsi3 (dest, dest, GEN_INT (i));
+ else if (!TARGET_ADDX)
+ return NULL;
+ else for (i = 9; ; i -= 2)
+ if (i < 3)
+ return NULL;
+ else if (v % i == 0
+ && split_hwi_to_MOVI_ADDMI (v / i, v_movi, v_addmi))
+ {
+ postfix = (i == 7)
+ ? gen_subsi3 (dest,
+ gen_rtx_ASHIFT (SImode, dest, GEN_INT (3)),
+ dest)
+ : gen_addsi3 (dest,
+ gen_rtx_ASHIFT (SImode, dest,
+ GEN_INT (floor_log2 (i))),
+ dest);
+ break;
+ }
+
+ start_sequence ();
+ emit_insn (gen_rtx_SET (dest, GEN_INT (v_movi)));
+ if (v_addmi)
+ emit_insn (gen_addsi3 (dest, dest, GEN_INT (v_addmi)));
+ emit_insn (postfix);
+ return end_sequence ();
+}
+
+/* A method of generating up to five machine instructions; a signed 12-bit
+ immediate assignment, a signed 8-bit immediate addition multiplied by
+ 256, a logical left bit shift, a signed 8-bit immediate addition multi-
+ plied by 256, and a signed 8-bit immediate addition to synthesize a value
+ that can effectively be specified as 32 bits (adding zero is of course
+ omitted in the process). */
+
+static rtx_insn *
+constantsynth_method_32bits (rtx dest, HOST_WIDE_INT v)
+{
+ HOST_WIDE_INT v0, v1, v_movi, v_addmi;
+ int i;
+
+ v1 = 0;
+ v0 = ((v += 128) & 255) - 128, v >>= 8;
+ if (v == 0)
+ i = 0, v_movi = 0, v_addmi = 0;
+ else if (i = ctz_hwi (v),
+ split_hwi_to_MOVI_ADDMI (v >> i, v_movi, v_addmi))
+ i += 8;
+ else
+ {
+ v1 = ((v += 128) & 255) - 128, v >>= 8;
+ if (v == 0)
+ i = 0, v_movi = 0, v_addmi = 0;
+ else if (i = ctz_hwi (v),
+ split_hwi_to_MOVI_ADDMI (v >> i, v_movi, v_addmi))
+ i += 16;
+ else
+ return NULL;
+ }
+
+ start_sequence ();
+ emit_insn (gen_rtx_SET (dest, GEN_INT (v_movi)));
+ if (v_addmi)
+ emit_insn (gen_addsi3 (dest, dest, GEN_INT (v_addmi)));
+ if (i)
+ emit_insn (gen_ashlsi3 (dest, dest, GEN_INT (i)));
+ if (v1)
+ emit_insn (gen_addsi3 (dest, dest, GEN_INT (v1 * 256)));
+ if (v0)
+ emit_insn (gen_addsi3 (dest, dest, GEN_INT (v0)));
+ return end_sequence ();
+}
+
+/* A method that generates two machine instructions to synthesize a
+ positive square number (up to 2047*2047) by assigning its square root
+ and multiplying it by itself. This method only works when TARGET_MUL32
+ is enabled. */
+
+static rtx_insn *
+constantsynth_method_square (rtx dest, HOST_WIDE_INT v)
+{
+ int v0;
+
+ if (!TARGET_MUL32 || ! IN_RANGE (v, 0, 2047 * 2047)
+ || (v0 = (int)sqrtf (v), v0 * v0 != v))
+ return NULL;
+
+ start_sequence ();
+ emit_insn (gen_rtx_SET (dest, GEN_INT (v0)));
+ emit_insn (gen_mulsi3 (dest, dest, dest));
+ return end_sequence ();
+}
+
+/* List of all available synthesis methods. */
+
+struct constantsynth_method_info
+{
+ rtx_insn *(* const func) (rtx, HOST_WIDE_INT);
+ const char *name;
+};
+
+static const struct constantsynth_method_info constantsynth_methods[] =
+{
+ { constantsynth_method_lshr_m1, "lshr_m1" },
+ { constantsynth_method_16bits, "16bits" },
+ { constantsynth_method_32bits, "32bits" },
+ { constantsynth_method_square, "square" },
+};
+
+/* Information that mediates between synthesis pass 1 and 2. */
+
+struct constantsynth_info
+{
+ xt_full_rtx_costs costs;
+ hash_map<rtx_insn *, rtx> insns;
+ hash_map<rtx, int> usage;
+ constantsynth_info ()
+ {
+ /* To avoid wasting literal pool entries, we use fake references to
+ estimate the costs of an L32R instruction. */
+ rtx x = gen_rtx_SYMBOL_REF (Pmode, "*.LC-1");
+ SYMBOL_REF_FLAGS (x) |= SYMBOL_FLAG_LOCAL;
+ CONSTANT_POOL_ADDRESS_P (x) = 1;
+ x = gen_const_mem (SImode, x);
+ gcc_assert (constantpool_mem_p (x));
+ costs += make_insn_raw (gen_rtx_SET (gen_rtx_REG (SImode, A9_REG),
+ x));
+ }
+};
+
+/* constantsynth pass 1.
+ Detect and record large constant assignments within the function. */
+
+static bool
+constantsynth_pass1 (rtx_insn *insn, constantsynth_info &info)
+{
+ rtx pat, dest, src;
+ HOST_WIDE_INT v;
+ int *pcount;
+
+ /* Check whether the insn is an assignment to a constant that is eligible
+ for constantsynth. If a large constant, record the insn and also the
+ number of occurrences of the constant if optimizing for size. If the
+ constant fits in the immediate field, use SImode to assign to it and
+ update the insn.
+ Note that all constant values and assignments are treated as SImode
+ because:
+ - Synthesis methods rely on SImode operations
+ - SImode assignments may be shorter
+ - More opportunity for sharing literal pool entries
+ This behavior would be acceptable if TARGET_CAN_CHANGE_MODE_CLASS
+ always returned true (the current and default configuration). */
+ if (!TARGET_CONST16
+ && GET_CODE (pat = PATTERN (insn)) == SET
+ && REG_P (dest = SET_DEST (pat)) && GP_REG_P (REGNO (dest)))
+ {
+ if (CONST_INT_P (src = avoid_constant_pool_reference (SET_SRC (pat))))
+ v = INTVAL (src);
+ else if (CONST_DOUBLE_P (src) && GET_MODE (src) == SFmode)
+ {
+ long l;
+ REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (src), l);
+ v = (int32_t)l;
+ if (dump_file)
+ {
+ fputs ("constantsynth_pass1: ", dump_file);
+ dump_value_slim (dump_file, src, 0);
+ fprintf (dump_file,
+ "f -> " HOST_WIDE_INT_PRINT_DEC " ("
+ HOST_WIDE_INT_PRINT_HEX ")\n",
+ v, v);
+ }
+ src = GEN_INT (v);
+ }
+ else
+ return false;
+
+ if (xtensa_simm12b (v))
+ {
+ if (GET_MODE (dest) != SImode)
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+ remove_reg_equal_equiv_notes (insn);
+ validate_change (insn, &PATTERN (insn),
+ gen_rtx_SET (dest, src), 0);
+ if (dump_file)
+ fprintf (dump_file,
+ "constantsynth_pass1: immediate, " HOST_WIDE_INT_PRINT_DEC
+ " (" HOST_WIDE_INT_PRINT_HEX ")\n",
+ v, v);
+ }
+ else
+ {
+ info.insns.put (insn, src);
+ if (optimize_size)
+ {
+ if ((pcount = info.usage.get (src)))
+ ++*pcount;
+ else
+ info.usage.put (src, 1);
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
+/* constantsynth pass 2.
+ For each large constant value assignment collected in pass 1, try to
+ find a more efficient way to derive the value than referencing a literal
+ pool entry, and if found, replace the assignment with it. */
+
+static void
+constantsynth_pass2 (constantsynth_info &info)
+{
+ rtx_insn *insn, *min_seq, *seq, *last;
+ rtx pat, dest, src;
+ enum machine_mode mode;
+ int *pcount, processed = 0;
+ HOST_WIDE_INT v;
+ const char *name;
+
+ /* For each insn recorded in pass 1... */
+ for (const auto &iter : info.insns)
+ {
+ dest = SET_DEST (pat = PATTERN (insn = iter.first));
+ if ((mode = GET_MODE (dest)) != SImode)
+ dest = gen_rtx_REG (SImode, REGNO (dest));
+ v = INTVAL (src = iter.second);
+
+ /* Only attempt to synthesize large constants if they occur at most
+ once in a function, since it is more space-efficient to reference
+ a shared literal pool entry multiple times. */
+ if (! (pcount = info.usage.get (src))
+ || *pcount == 1)
+ {
+ /* Try multiple synthesis methods and choose the least expensive
+ one. */
+ xt_full_rtx_costs min_costs = info.costs;
+
+ v = INTVAL (src), min_seq = NULL, name = NULL;
+ for (const auto &method : constantsynth_methods)
+ if ((seq = method.func (dest, v)))
+ {
+ xt_full_rtx_costs costs (seq);
+
+ if (costs < min_costs)
+ min_costs = costs, min_seq = seq, name = method.name;
+ }
+
+ /* If there is a most efficient synthesis method, replace the
+ insn with the result. */
+ if (min_seq)
+ {
+ for (last = min_seq; NEXT_INSN (last);
+ last = NEXT_INSN (last))
+ ;
+ add_reg_note (last, REG_EQUIV, copy_rtx (src));
+ if (dump_file)
+ {
+ fprintf (dump_file,
+ "constantsynth_pass2: method \"%s\", "
+ HOST_WIDE_INT_PRINT_DEC " (",
+ name, v);
+ dump_value_slim (dump_file, src, 0);
+ fprintf (dump_file, ")\n");
+ dump_insn_slim (dump_file, insn);
+ fprintf (dump_file,
+ "constantsynth_pass2: costs (%d,%d) -> (%d,%d)\n",
+ info.costs.major (), info.costs.minor (),
+ min_costs.major (), min_costs.minor ());
+ dump_rtl_slim (dump_file, min_seq, NULL, -1, 0);
+ }
+ emit_insn_before (min_seq, insn);
+ set_insn_deleted (insn);
+ ++processed;
+ continue;
+ }
+ }
+
+ /* Large constants that are not subject to synthesize are sent back
+ to the literal pool. */
+ if (mode != SImode)
+ {
+ remove_reg_equal_equiv_notes (insn);
+ validate_change (insn, &PATTERN (insn),
+ gen_rtx_SET (dest, force_const_mem (SImode, src)),
+ 0);
+ add_reg_note (insn, REG_EQUIV, copy_rtx (src));
+ if (dump_file)
+ fprintf (dump_file,
+ "constantsynth_pass2: litpool, " HOST_WIDE_INT_PRINT_DEC
+ " (" HOST_WIDE_INT_PRINT_HEX ")\n",
+ v, v);
+ }
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "constantsynth_pass2: %u insns",
+ (unsigned)info.insns.elements ());
+ if (optimize_size)
+ fprintf (dump_file, ", %u large CONST_INTs",
+ (unsigned int)info.usage.elements ());
+ fprintf (dump_file, ", %d processed\n", processed);
+ }
+}
+
/* Replace the source of [SH]Imode allocation whose value does not fit
into signed 12 bits with a reference to litpool entry. */
@@ -5989,6 +6177,7 @@ do_largeconst (void)
bool replacing_required = !TARGET_CONST16 && !TARGET_AUTO_LITPOOLS;
bool optimize_enabled = optimize && !optimize_debug;
rtx_insn *insn;
+ constantsynth_info cs_info;
for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
if (NONJUMP_INSN_P (insn))
@@ -6010,7 +6199,19 @@ do_largeconst (void)
that follows immediately after. */
if (optimize_enabled)
split_DI_DF_const (insn);
+
+ /* constantsynth pass 1.
+ Detect and record large constant assignments within a function, */
+ if (optimize_enabled)
+ constantsynth_pass1 (insn, cs_info);
}
+
+ /* constantsynth pass 2.
+ For each large constant value assignment collected in pass 1, try to
+ find a more efficient way to derive the value than referencing a literal
+ pool entry, and if found, replace the assignment with it. */
+ if (optimize_enabled)
+ constantsynth_pass2 (cs_info);
}
/* Convert assignments for large constants. */
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index a5c8a66aafb..cb047b0f655 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -90,10 +90,6 @@
(define_mode_iterator HQI [HI QI])
(define_mode_attr mode_bits [(HI "16") (QI "8")])
-;; This mode iterator allows the SI and HI patterns to be defined from
-;; the same template.
-(define_mode_iterator SHI [SI HI])
-
;; This iterator and attribute allow signed/unsigned FP truncations to be
;; generated from one template.
(define_code_iterator any_fix [fix unsigned_fix])
@@ -1285,30 +1281,6 @@
}
[(set_attr "mode" "SI")])
-(define_split
- [(set (match_operand:SHI 0 "register_operand")
- (match_operand:SHI 1 "constantpool_operand"))]
- "!optimize_debug && reload_completed"
- [(const_int 0)]
-{
- if (xtensa_constantsynth (operands[0], operands[1]))
- DONE;
- FAIL;
-})
-
-(define_split
- [(set (match_operand:SHI 0 "register_operand")
- (match_operand:SHI 1 "const_int_operand"))]
- "!optimize_debug && reload_completed
- && !TARGET_CONST16 && TARGET_AUTO_LITPOOLS
- && ! xtensa_simm12b (INTVAL (operands[1]))"
- [(const_int 0)]
-{
- if (xtensa_constantsynth (operands[0], operands[1]))
- DONE;
- FAIL;
-})
-
;; 16-bit Integer moves
(define_expand "movhi"
@@ -1509,29 +1481,6 @@
(set_attr "mode" "SF")
(set_attr "length" "3")])
-(define_split
- [(set (match_operand:SF 0 "register_operand")
- (match_operand 1 "constantpool_operand"))]
- "!optimize_debug && reload_completed"
- [(const_int 0)]
-{
- if (xtensa_constantsynth (operands[0], operands[1]))
- DONE;
- FAIL;
-})
-
-(define_split
- [(set (match_operand:SF 0 "register_operand")
- (match_operand 1 "const_double_operand"))]
- "!optimize_debug && reload_completed
- && !TARGET_CONST16 && TARGET_AUTO_LITPOOLS"
- [(const_int 0)]
-{
- if (xtensa_constantsynth (operands[0], operands[1]))
- DONE;
- FAIL;
-})
-
;; 64-bit floating point moves
(define_expand "movdf"
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynthV2_O2_costs0.c
b/gcc/testsuite/gcc.target/xtensa/constsynthV2_O2_costs0.c
new file mode 100644
index 00000000000..a32d75c360a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/constsynthV2_O2_costs0.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mextra-l32r-costs=0" } */
+
+_Complex double test(int a[], float b[])
+{
+ a[0] = 2045 * 2045;
+ a[1] = 4182000; /* postreload const-anchored */
+ a[2] = 0xDEADBEEF;
+ a[3] = 0xDEADBEEF - 15; /* postreload const-anchored */
+ a[4] = 131071;
+ a[5] = 293805;
+ a[6] = 700972933;
+ a[7] = -372738139;
+ b[0] = 3.14159265359f;
+ b[1] = 0.12005615234375f;
+ return 1-1i;
+}
+
+/* { dg-final { scan-assembler-times "l32r" 10 } } */
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynthV2_O2_costs5.c
b/gcc/testsuite/gcc.target/xtensa/constsynthV2_O2_costs5.c
new file mode 100644
index 00000000000..5d29ddb8dae
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/constsynthV2_O2_costs5.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mextra-l32r-costs=5" } */
+
+_Complex double test(int a[], float b[])
+{
+ a[0] = 2045 * 2045; /* method "square" */
+ a[1] = 4182000; /* postreload const-anchored */
+ a[2] = 0xDEADBEEF; /* method "32bits" */
+ a[3] = 0xDEADBEEF - 15; /* postreload const-anchored */
+ a[4] = 131071; /* method "lshr_m1" */
+ a[5] = 293805; /* method "16bits" */
+ a[6] = 700972933; /* method "32bits" */
+ a[7] = -372738139; /* method "32bits" */
+ b[0] = 3.14159265359f; /* method "32bits" */
+ b[1] = 0.12005615234375f; /* method "32bits" */
+ return 1-1i; /* method "16bits", method "16bits" */
+}
+
+/* { dg-final { scan-assembler-not "l32r" } } */
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynthV2_Os.c
b/gcc/testsuite/gcc.target/xtensa/constsynthV2_Os.c
new file mode 100644
index 00000000000..f56c68837b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/constsynthV2_Os.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mabi=windowed" } */
+
+_Complex double test(int a[], float b[])
+{
+ a[0] = 2045 * 2045; /* method "square", but not unique */
+ a[1] = 4182000; /* postreload const-anchored */
+ a[2] = 0xDEADBEEF;
+ a[3] = 0xDEADBEEF - 15; /* postreload const-anchored */
+ a[4] = 131071; /* method "lshr_m1", but not unique */
+ a[5] = 293805;
+ a[6] = 700972933;
+ a[7] = -372738139;
+ asm volatile ("#
clobbers":::"a2","a3","a4","a5","a6","a7","a8","a9","a10","a11","a12","a13","a14","a15");
+ a[8] = 2045 * 2045; /* method "square", but not unique */
+ a[9] = 131071; /* method "lshr_m1", but not unique */
+ b[0] = 3.14159265359f;
+ b[1] = 0.12005615234375f;
+ return 1-1i; /* method "16bits", method "16bits" */
+}
+
+/* { dg-final { scan-assembler-times "l32r" 10 } } */
+/* { dg-final { scan-assembler-times ".literal " 8 } } */
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c
b/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c
deleted file mode 100644
index 43c85a25086..00000000000
--- a/gcc/testsuite/gcc.target/xtensa/constsynth_2insns.c
+++ /dev/null
@@ -1,44 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-Os" } */
-
-int test_0(void)
-{
- return 4095;
-}
-
-int test_1(void)
-{
- return 2147483647;
-}
-
-int test_2(void)
-{
- return -34816;
-}
-
-int test_3(void)
-{
- return -2049;
-}
-
-int test_4(void)
-{
- return 2048;
-}
-
-int test_5(void)
-{
- return 34559;
-}
-
-int test_6(void)
-{
- return 43680;
-}
-
-void test_7(int *p)
-{
- *p = -1432354816;
-}
-
-/* { dg-final { scan-assembler-not "l32r" } } */
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c
b/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c
deleted file mode 100644
index 831288c7ddd..00000000000
--- a/gcc/testsuite/gcc.target/xtensa/constsynth_3insns.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2 -mextra-l32r-costs=3" } */
-
-int test_0(void)
-{
- return 134217216;
-}
-
-int test_1(void)
-{
- return -27604992;
-}
-
-int test_2(void)
-{
- return -162279;
-}
-
-void test_3(int *p)
-{
- *p = 192437;
-}
-
-struct foo
-{
- unsigned int b : 10;
- unsigned int g : 11;
- unsigned int r : 11;
-};
-void test_4(struct foo *p, unsigned int v)
-{
- p->g = v;
-}
-
-/* { dg-final { scan-assembler-not "l32r" } } */
diff --git a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
b/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
deleted file mode 100644
index 5fba6a98650..00000000000
--- a/gcc/testsuite/gcc.target/xtensa/constsynth_double.c
+++ /dev/null
@@ -1,11 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-Os" } */
-
-void test(unsigned int count, double array[])
-{
- unsigned int i;
- for (i = 0; i < count; ++i)
- array[i] = 8.988474246316506e+307;
-}
-
-/* { dg-final { scan-assembler-not "l32r" } } */
--
2.39.5