On Tue, 2022-11-22 at 22:03 +0800, Xi Ruoyao via Gcc-patches wrote: > While I still can't fully understand the immediate load issue and how > this patch fix it, I've tested this patch (alongside the prefetch > instruction patch) with bootstrap-ubsan. And the compiled result of > imm-load1.c seems OK.
And it's doing correct thing for Glibc "improved generic string functions" patch, producing some really tight loop now. > > On Thu, 2022-11-17 at 17:59 +0800, Lulu Cheng wrote: > > v1 -> v2: > > 1. Change the code format. > > 2. Fix bugs in the code. > > > > v2 -> v3: > > Modifying a code implementation of an undefined behavior. > > > > v3 -> v4: > > Move the part of the immediate number decomposition from expand pass > > to split > > pass. > > > > Both regression tests and spec2006 passed. > > > > The problem mentioned in the link does not move the four immediate > > load > > instructions out of the loop. It has been optimized. Now, as in the > > test case, > > four immediate load instructions are generated outside the loop. > > ( > > https://sourceware.org/pipermail/libc-alpha/2022-September/142202.html > > ) > > > > -------------------------------------------------------------------- > > Because loop2_invariant pass will extract the instructions that do > > not > > change > > in the loop out of the loop, some instructions will not meet the > > extraction > > conditions if the machine performs immediate decomposition while > > expand pass, > > so the immediate decomposition will be transferred to the split > > process. > > > > gcc/ChangeLog: > > > > * config/loongarch/loongarch.cc (enum > > loongarch_load_imm_method): > > Remove the member METHOD_INSV that is not currently used. > > (struct loongarch_integer_op): Define a new member > > curr_value, > > that records the value of the number stored in the > > destination > > register immediately after the current instruction has run. > > (loongarch_build_integer): Assign a value to the curr_value > > member variable. > > (loongarch_move_integer): Adds information for the immediate > > load instruction. > > * config/loongarch/loongarch.md (*movdi_32bit): Redefine as > > define_insn_and_split. > > (*movdi_64bit): Likewise. > > (*movsi_internal): Likewise. > > (*movhi_internal): Likewise. > > * config/loongarch/predicates.md: Return true as long as it > > is > > CONST_INT, ensure > > that the immediate number is not optimized by decomposition > > during expand > > optimization loop. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/loongarch/imm-load.c: New test. > > * gcc.target/loongarch/imm-load1.c: New test. > > --- > > gcc/config/loongarch/loongarch.cc | 62 ++++++++++------ > > -- > > - > > gcc/config/loongarch/loongarch.md | 44 +++++++++++-- > > gcc/config/loongarch/predicates.md | 2 +- > > gcc/testsuite/gcc.target/loongarch/imm-load.c | 10 +++ > > .../gcc.target/loongarch/imm-load1.c | 26 ++++++++ > > 5 files changed, 110 insertions(+), 34 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load.c > > create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load1.c > > > > diff --git a/gcc/config/loongarch/loongarch.cc > > b/gcc/config/loongarch/loongarch.cc > > index 8ee32c90573..9e0d6c7c3ea 100644 > > --- a/gcc/config/loongarch/loongarch.cc > > +++ b/gcc/config/loongarch/loongarch.cc > > @@ -139,22 +139,21 @@ struct loongarch_address_info > > > > METHOD_LU52I: > > Load 52-63 bit of the immediate number. > > - > > - METHOD_INSV: > > - immediate like 0xfff00000fffffxxx > > - */ > > +*/ > > enum loongarch_load_imm_method > > { > > METHOD_NORMAL, > > METHOD_LU32I, > > - METHOD_LU52I, > > - METHOD_INSV > > + METHOD_LU52I > > }; > > > > struct loongarch_integer_op > > { > > enum rtx_code code; > > HOST_WIDE_INT value; > > + /* Represent the result of the immediate count of the load > > instruction at > > + each step. */ > > + HOST_WIDE_INT curr_value; > > enum loongarch_load_imm_method method; > > }; > > > > @@ -1475,24 +1474,27 @@ loongarch_build_integer (struct > > loongarch_integer_op *codes, > > { > > /* The value of the lower 32 bit be loaded with one > > instruction. > > lu12i.w. */ > > - codes[0].code = UNKNOWN; > > - codes[0].method = METHOD_NORMAL; > > - codes[0].value = low_part; > > + codes[cost].code = UNKNOWN; > > + codes[cost].method = METHOD_NORMAL; > > + codes[cost].value = low_part; > > + codes[cost].curr_value = low_part; > > cost++; > > } > > else > > { > > /* lu12i.w + ior. */ > > - codes[0].code = UNKNOWN; > > - codes[0].method = METHOD_NORMAL; > > - codes[0].value = low_part & ~(IMM_REACH - 1); > > + codes[cost].code = UNKNOWN; > > + codes[cost].method = METHOD_NORMAL; > > + codes[cost].value = low_part & ~(IMM_REACH - 1); > > + codes[cost].curr_value = codes[cost].value; > > cost++; > > HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1); > > if (iorv != 0) > > { > > - codes[1].code = IOR; > > - codes[1].method = METHOD_NORMAL; > > - codes[1].value = iorv; > > + codes[cost].code = IOR; > > + codes[cost].method = METHOD_NORMAL; > > + codes[cost].value = iorv; > > + codes[cost].curr_value = low_part; > > cost++; > > } > > } > > @@ -1515,11 +1517,14 @@ loongarch_build_integer (struct > > loongarch_integer_op *codes, > > { > > codes[cost].method = METHOD_LU52I; > > codes[cost].value = value & LU52I_B; > > + codes[cost].curr_value = value; > > return cost + 1; > > } > > > > codes[cost].method = METHOD_LU32I; > > codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : > > 0); > > + codes[cost].curr_value = (value & 0xfffffffffffff) > > + | (sign51 ? LU52I_B : 0); > > cost++; > > > > /* Determine whether the 52-61 bits are sign-extended from > > the > > low order, > > @@ -1528,6 +1533,7 @@ loongarch_build_integer (struct > > loongarch_integer_op *codes, > > { > > codes[cost].method = METHOD_LU52I; > > codes[cost].value = value & LU52I_B; > > + codes[cost].curr_value = value; > > cost++; > > } > > } > > @@ -2911,6 +2917,9 @@ loongarch_move_integer (rtx temp, rtx dest, > > unsigned HOST_WIDE_INT value) > > else > > x = force_reg (mode, x); > > > > + set_unique_reg_note (get_last_insn (), REG_EQUAL, > > + GEN_INT (codes[i-1].curr_value)); > > + > > switch (codes[i].method) > > { > > case METHOD_NORMAL: > > @@ -2918,22 +2927,17 @@ loongarch_move_integer (rtx temp, rtx dest, > > unsigned HOST_WIDE_INT value) > > GEN_INT (codes[i].value)); > > break; > > case METHOD_LU32I: > > - emit_insn ( > > - gen_rtx_SET (x, > > - gen_rtx_IOR (DImode, > > - gen_rtx_ZERO_EXTEND ( > > - DImode, gen_rtx_SUBREG > > (SImode, x, 0)), > > - GEN_INT (codes[i].value)))); > > + gcc_assert (mode == DImode); > > + x = gen_rtx_IOR (DImode, > > + gen_rtx_ZERO_EXTEND (DImode, > > + gen_rtx_SUBREG > > (SImode, x, 0)), > > + GEN_INT (codes[i].value)); > > break; > > case METHOD_LU52I: > > - emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff), > > - GEN_INT (codes[i].value))); > > - break; > > - case METHOD_INSV: > > - emit_insn ( > > - gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT > > (20), > > - GEN_INT (32)), > > - gen_rtx_REG (DImode, 0))); > > + gcc_assert (mode == DImode); > > + x = gen_rtx_IOR (DImode, > > + gen_rtx_AND (DImode, x, GEN_INT > > (0xfffffffffffff)), > > + GEN_INT (codes[i].value)); > > break; > > default: > > gcc_unreachable (); > > diff --git a/gcc/config/loongarch/loongarch.md > > b/gcc/config/loongarch/loongarch.md > > index 2fda5381904..f61db66d535 100644 > > --- a/gcc/config/loongarch/loongarch.md > > +++ b/gcc/config/loongarch/loongarch.md > > @@ -1718,23 +1718,41 @@ (define_expand "movdi" > > DONE; > > }) > > > > -(define_insn "*movdi_32bit" > > +(define_insn_and_split "*movdi_32bit" > > [(set (match_operand:DI 0 "nonimmediate_operand" > > "=r,r,r,w,*f,*f,*r,*m") > > (match_operand:DI 1 "move_operand" > > "r,i,w,r,*J*r,*m,*f,*f"))] > > "!TARGET_64BIT > > && (register_operand (operands[0], DImode) > > || reg_or_0_operand (operands[1], DImode))" > > { return loongarch_output_move (operands[0], operands[1]); } > > + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P > > (REGNO > > + (operands[0]))" > > + [(const_int 0)] > > + " > > +{ > > + loongarch_move_integer (operands[0], operands[0], INTVAL > > (operands[1])); > > + DONE; > > +} > > + " > > [(set_attr "move_type" > > "move,const,load,store,mgtf,fpload,mftg,fpstore") > > (set_attr "mode" "DI")]) > > > > -(define_insn "*movdi_64bit" > > +(define_insn_and_split "*movdi_64bit" > > [(set (match_operand:DI 0 "nonimmediate_operand" > > "=r,r,r,w,*f,*f,*r,*m") > > (match_operand:DI 1 "move_operand" > > "r,Yd,w,rJ,*r*J,*m,*f,*f"))] > > "TARGET_64BIT > > && (register_operand (operands[0], DImode) > > || reg_or_0_operand (operands[1], DImode))" > > { return loongarch_output_move (operands[0], operands[1]); } > > + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P > > (REGNO > > + (operands[0]))" > > + [(const_int 0)] > > + " > > +{ > > + loongarch_move_integer (operands[0], operands[0], INTVAL > > (operands[1])); > > + DONE; > > +} > > + " > > [(set_attr "move_type" > > "move,const,load,store,mgtf,fpload,mftg,fpstore") > > (set_attr "mode" "DI")]) > > > > @@ -1749,12 +1767,21 @@ (define_expand "movsi" > > DONE; > > }) > > > > -(define_insn "*movsi_internal" > > +(define_insn_and_split "*movsi_internal" > > [(set (match_operand:SI 0 "nonimmediate_operand" > > "=r,r,r,w,*f,*f,*r,*m,*r,*z") > > (match_operand:SI 1 "move_operand" > > "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))] > > "(register_operand (operands[0], SImode) > > || reg_or_0_operand (operands[1], SImode))" > > { return loongarch_output_move (operands[0], operands[1]); } > > + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P > > (REGNO > > + (operands[0]))" > > + [(const_int 0)] > > + " > > +{ > > + loongarch_move_integer (operands[0], operands[0], INTVAL > > (operands[1])); > > + DONE; > > +} > > + " > > [(set_attr "move_type" > > "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf") > > (set_attr "mode" "SI")]) > > > > @@ -1774,12 +1801,21 @@ (define_expand "movhi" > > DONE; > > }) > > > > -(define_insn "*movhi_internal" > > +(define_insn_and_split "*movhi_internal" > > [(set (match_operand:HI 0 "nonimmediate_operand" > > "=r,r,r,r,m,r,k") > > (match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))] > > "(register_operand (operands[0], HImode) > > || reg_or_0_operand (operands[1], HImode))" > > { return loongarch_output_move (operands[0], operands[1]); } > > + "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P > > (REGNO > > + (operands[0]))" > > + [(const_int 0)] > > + " > > +{ > > + loongarch_move_integer (operands[0], operands[0], INTVAL > > (operands[1])); > > + DONE; > > +} > > + " > > [(set_attr "move_type" "move,const,const,load,store,load,store") > > (set_attr "mode" "HI")]) > > > > diff --git a/gcc/config/loongarch/predicates.md > > b/gcc/config/loongarch/predicates.md > > index 8bd0c1376c9..58c3dc2261c 100644 > > --- a/gcc/config/loongarch/predicates.md > > +++ b/gcc/config/loongarch/predicates.md > > @@ -226,7 +226,7 @@ (define_predicate "move_operand" > > switch (GET_CODE (op)) > > { > > case CONST_INT: > > - return !splittable_const_int_operand (op, mode); > > + return true; > > > > case CONST: > > case SYMBOL_REF: > > diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load.c > > b/gcc/testsuite/gcc.target/loongarch/imm-load.c > > new file mode 100644 > > index 00000000000..c04ca33996f > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/loongarch/imm-load.c > > @@ -0,0 +1,10 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-split1" } */ > > + > > +long int > > +test (void) > > +{ > > + return 0x1234567890abcdef; > > +} > > +/* { dg-final { scan-rtl-dump-times "scanning new insn with uid" 6 > > "split1" } } */ > > + > > diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c > > b/gcc/testsuite/gcc.target/loongarch/imm-load1.c > > new file mode 100644 > > index 00000000000..2ff02971239 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c > > @@ -0,0 +1,26 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-mabi=lp64d -O2" } */ > > +/* { dg-final { scan-assembler > > "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */ > > + > > + > > +extern long long b[10]; > > +static inline long long > > +repeat_bytes (void) > > +{ > > + long long r = 0x0101010101010101; > > + > > + return r; > > +} > > + > > +static inline long long > > +highbit_mask (long long m) > > +{ > > + return m & repeat_bytes (); > > +} > > + > > +void test(long long *a) > > +{ > > + for (int i = 0; i < 10; i++) > > + b[i] = highbit_mask (a[i]); > > + > > +} > -- Xi Ruoyao <xry...@xry111.site> School of Aerospace Science and Technology, Xidian University