Fixed an issue where the compiler would not take four 64-bit immediate load instructions out of the loop.
gcc/ChangeLog: * config/loongarch/constraints.md (x): New constraint. * config/loongarch/loongarch.cc (struct loongarch_integer_op): Define a new member curr_value, that records the value of the number stored in the destination register immediately after the current instruction has run. (loongarch_build_integer): Adds a method to load the immediate 32-bit to 63-bit field. (loongarch_move_integer): Same as above. * config/loongarch/loongarch.h (HWIT_UC_0xFFFFFFFF): (HI32_OPERAND): NEW macro. * config/loongarch/loongarch.md (load_hi32):New template. * config/loongarch/predicates.md (const_hi32_operand): Determines whether the value is an immediate number that has a value of only the higher 32 bits. (hi32_mask_operand): Immediately counts the mask of 32 to 61 bits. gcc/testsuite/ChangeLog: * gcc.target/loongarch/imm-load.c: New test. --- gcc/config/loongarch/constraints.md | 7 +- gcc/config/loongarch/loongarch.cc | 95 ++++++++++++------- gcc/config/loongarch/loongarch.h | 6 ++ gcc/config/loongarch/loongarch.md | 26 +++++ gcc/config/loongarch/predicates.md | 8 ++ gcc/testsuite/gcc.target/loongarch/imm-load.c | 25 +++++ 6 files changed, 133 insertions(+), 34 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load.c diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md index 43cb7b5f0f5..1dcf09ce5eb 100644 --- a/gcc/config/loongarch/constraints.md +++ b/gcc/config/loongarch/constraints.md @@ -46,7 +46,7 @@ ;; "u" "A signed 52bit constant and low 32-bit is zero (for logic instructions)" ;; "v" "A signed 64-bit constant and low 44-bit is zero (for logic instructions)." ;; "w" "Matches any valid memory." -;; "x" <-----unused +;; "x" "A signed 64-bit constant and low 32-bit is zero (for logic instructions)." ;; "y" <-----unused ;; "z" FCC_REGS ;; "A" <-----unused @@ -139,6 +139,11 @@ (define_constraint "v" (and (match_code "const_int") (match_test "LU52I_OPERAND (ival)"))) +(define_constraint "x" + "A signed 64-bit constant and low 32-bit is zero (for logic instructions)." + (and (match_code "const_int") + (match_test "HI32_OPERAND (ival)"))) + (define_register_constraint "z" "FCC_REGS" "A floating-point condition code register.") diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index f54c233f90c..5e8cd293645 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -139,6 +139,9 @@ struct loongarch_address_info METHOD_LU52I: Load 52-63 bit of the immediate number. + METHOD_LD_HI32: + Load 32-63 bit of the immediate number. + METHOD_INSV: immediate like 0xfff00000fffffxxx */ @@ -147,13 +150,18 @@ enum loongarch_load_imm_method METHOD_NORMAL, METHOD_LU32I, METHOD_LU52I, + METHOD_LD_HI32, METHOD_INSV }; struct loongarch_integer_op { enum rtx_code code; + /* Current Immediate Count The immediate count of the load instruction. */ HOST_WIDE_INT value; + /* Represent the result of the immediate count of the load instruction at + each step. */ + HOST_WIDE_INT curr_value; enum loongarch_load_imm_method method; }; @@ -1474,24 +1482,27 @@ loongarch_build_integer (struct loongarch_integer_op *codes, { /* The value of the lower 32 bit be loaded with one instruction. lu12i.w. */ - codes[0].code = UNKNOWN; - codes[0].method = METHOD_NORMAL; - codes[0].value = low_part; + codes[cost].code = UNKNOWN; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = low_part; + codes[cost].curr_value = low_part; cost++; } else { /* lu12i.w + ior. */ - codes[0].code = UNKNOWN; - codes[0].method = METHOD_NORMAL; - codes[0].value = low_part & ~(IMM_REACH - 1); + codes[cost].code = UNKNOWN; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = low_part & ~(IMM_REACH - 1); + codes[cost].curr_value = codes[cost].value; cost++; HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1); if (iorv != 0) { - codes[1].code = IOR; - codes[1].method = METHOD_NORMAL; - codes[1].value = iorv; + codes[cost].code = IOR; + codes[cost].method = METHOD_NORMAL; + codes[cost].value = iorv; + codes[cost].curr_value = low_part; cost++; } } @@ -1514,23 +1525,34 @@ loongarch_build_integer (struct loongarch_integer_op *codes, { codes[cost].method = METHOD_LU52I; codes[cost].value = value & LU52I_B; - return cost + 1; + codes[cost].curr_value = codes[cost].value | (codes[cost-1].curr_value & + 0xfffffffffffff); + return cost++; } - codes[cost].method = METHOD_LU32I; - codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0); - cost++; - - /* Determine whether the 52-61 bits are sign-extended from the low order, - and if not, load the 52-61 bits. */ - if (!lu52i[(value & (HOST_WIDE_INT_1U << 51)) >> 51]) + if (lu52i[sign51]) { - codes[cost].method = METHOD_LU52I; - codes[cost].value = value & LU52I_B; + /* Determine whether the 52-61 bits are sign-extended from the low order. + If so, the 52-61 bits of the immediate number do not need to be loaded. + */ + codes[cost].method = METHOD_LU32I; + codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0); + codes[cost].curr_value = codes[cost].value | (codes[cost-1].curr_value & + 0xffffffff); + cost++; + } + else + { + /* If the higher 32 bits of the 64bit immediate need to be loaded + separately by two instructions, a false immediate load instruction + load_hi32 is used to load them. */ + codes[cost].method = METHOD_LD_HI32; + codes[cost].value = value & 0xffffffff00000000; + codes[cost].curr_value = codes[cost].value | (codes[cost-1].curr_value & + 0xffffffff); cost++; } } - gcc_assert (cost <= LARCH_MAX_INTEGER_OPS); return cost; @@ -2910,30 +2932,37 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) else x = force_reg (mode, x); + set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (codes[i-1].curr_value)); + switch (codes[i].method) { case METHOD_NORMAL: + /* mov or ior. */ x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value)); break; case METHOD_LU32I: - emit_insn ( - gen_rtx_SET (x, - gen_rtx_IOR (DImode, - gen_rtx_ZERO_EXTEND ( - DImode, gen_rtx_SUBREG (SImode, x, 0)), - GEN_INT (codes[i].value)))); + gcc_assert (mode == DImode); + /* lu32i_d */ + x = gen_rtx_IOR (mode, gen_rtx_ZERO_EXTEND (mode, + gen_rtx_SUBREG (SImode, x, 0)), + GEN_INT (codes[i].value)); break; case METHOD_LU52I: - emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff), - GEN_INT (codes[i].value))); + gcc_assert (mode == DImode); + /* lu52i_d */ + x = gen_rtx_IOR (mode, gen_rtx_AND (mode, x, GEN_INT (0xfffffffffffff)), + GEN_INT (codes[i].value)); break; - case METHOD_INSV: - emit_insn ( - gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20), - GEN_INT (32)), - gen_rtx_REG (DImode, 0))); + case METHOD_LD_HI32: + /* Load the high 32 bits of the immediate number. */ + gcc_assert (mode == DImode); + /* load_hi32 */ + x = gen_rtx_IOR (mode, gen_rtx_AND (mode, x, GEN_INT (0xffffffff)), + GEN_INT (codes[i].value)); break; + case METHOD_INSV: + /* It is not currently implemented. */ default: gcc_unreachable (); } diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index f4a9c329fef..cfc046f546e 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -605,6 +605,12 @@ enum reg_class #define LU52I_OPERAND(VALUE) \ (((VALUE) | (HWIT_UC_0xFFF << 52)) == (HWIT_UC_0xFFF << 52)) +/* True if VALUE can be loaded into a register using load_hi32. */ + +#define HWIT_UC_0xFFFFFFFF HOST_WIDE_INT_UC(0xffffffff) +#define HI32_OPERAND(VALUE) \ + (((VALUE) | (HWIT_UC_0xFFFFFFFF << 32)) == (HWIT_UC_0xFFFFFFFF << 32)) + /* Return a value X with the low 12 bits clear, and such that VALUE - X is a signed 12-bit value. */ diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 214b14bddd3..7eaa9ab66e3 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -1882,6 +1882,32 @@ (define_expand "mov<mode>cc" DONE; }) +(define_insn_and_split "load_hi32" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI + (and:DI (match_operand:DI 1 "register_operand" "0") + (match_operand 2 "hi32_mask_operand")) + (match_operand 3 "const_hi32_operand" "x")))] + "TARGET_64BIT" + "#" + "" + [(set (match_dup 0) + (ior:DI + (zero_extend:DI + (subreg:SI (match_dup 1) 0)) + (match_dup 4))) + (set (match_dup 0) + (ior:DI + (and:DI (match_dup 0) + (match_dup 6)) + (match_dup 5)))] +{ + operands[4] = GEN_INT (INTVAL (operands[3]) << 12 >> 12); + operands[5] = GEN_INT (INTVAL (operands[3]) & 0xfff0000000000000); + operands[6] = GEN_INT (0xfffffffffffff); +} + [(set_attr "insn_count" "2")]) + (define_insn "lu32i_d" [(set (match_operand:DI 0 "register_operand" "=r") (ior:DI diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index 8bd0c1376c9..29d81ff0250 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -35,6 +35,10 @@ (define_predicate "const_lu52i_operand" (and (match_code "const_int") (match_test "LU52I_OPERAND (INTVAL (op))"))) +(define_predicate "const_hi32_operand" + (and (match_code "const_int") + (match_test "HI32_OPERAND (INTVAL (op))"))) + (define_predicate "const_arith_operand" (and (match_code "const_int") (match_test "IMM12_OPERAND (INTVAL (op))"))) @@ -103,6 +107,10 @@ (define_predicate "lu52i_mask_operand" (and (match_code "const_int") (match_test "UINTVAL (op) == 0xfffffffffffff"))) +(define_predicate "hi32_mask_operand" + (and (match_code "const_int") + (match_test "UINTVAL (op) == 0xffffffff"))) + (define_predicate "low_bitmask_operand" (and (match_code "const_int") (match_test "low_bitmask_len (mode, INTVAL (op)) > 12"))) diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load.c b/gcc/testsuite/gcc.target/loongarch/imm-load.c new file mode 100644 index 00000000000..91ceb33d058 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/imm-load.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-loop2_invariant" } */ + +extern long long b[10]; +static inline long long +repeat_bytes (void) +{ + long long r = 0x0101010101010101; + + return r; +} + +static inline long long +highbit_mask (long long m) +{ + return m & repeat_bytes (); +} + +void test(long long *a) +{ + for (int i = 0; i < 10; i++) + b[i] = highbit_mask (a[i]); + +} +/* { dg-final { scan-rtl-dump-times "moved without introducing a new temporary register" 4 "loop2_invariant" } } */ -- 2.31.1