Hi, on 2023/11/15 11:02, Jiufu Guo wrote: > Hi, > > Trunk gcc supports more constants to be built via two instructions: e.g. > "li/lis; xori/xoris/rldicl/rldicr/rldic". > And then num_insns_constant should also be updated. > > Function "rs6000_emit_set_long_const" is used to build complicate > constants; and "num_insns_constant_gpr" is used to compute 'how > many instructions are needed" to build the constant. So, these > two functions should be aligned. > > The idea is: reusing "rs6000_emit_set_long_const" to compute/record > the instruction number(when computing the insn_num, then do not emit > instructions). > > Compare with previous verions: > https://gcc.gnu.org/pipermail/gcc-patches/2023-October/634195.html > This verion adds an argument to "rs6000_emit_set_long_const" to > indicate computing instruction number instead emit intructions. > > Bootstrap & regtest pass ppc64{,le}. > Is this ok for trunk? > > BR, > Jeff (Jiufu Guo) > > gcc/ChangeLog: > > * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new > parameter to record number of instructions to build the constant. > (num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute > num_insn. > (ADJUST_INSN_NUM_AND_RET): New macro. > (rs6000_emit_set_const): Call rs6000_emit_set_long_const with NULL > argument. > > --- > gcc/config/rs6000/rs6000.cc | 245 +++++++++++++++++++----------------- > 1 file changed, 133 insertions(+), 112 deletions(-) > > diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc > index cc24dd5301e..ba40dd6eee4 100644 > --- a/gcc/config/rs6000/rs6000.cc > +++ b/gcc/config/rs6000/rs6000.cc > @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, > tree, tree, int, bool *); > static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool > *); > static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); > static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); > -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); > +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int *);
Make the new argument default as nullptr... > static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); > static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, > bool); > static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, > @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value) > > else if (TARGET_POWERPC64) > { > - HOST_WIDE_INT low = sext_hwi (value, 32); > - HOST_WIDE_INT high = value >> 31; > - > - if (high == 0 || high == -1) > - return 2; > - > - high >>= 1; > - > - if (low == 0 || low == high) > - return num_insns_constant_gpr (high) + 1; > - else if (high == 0) > - return num_insns_constant_gpr (low) + 1; > - else > - return (num_insns_constant_gpr (high) > - + num_insns_constant_gpr (low) + 1); > + int num_insns = 0; > + rs6000_emit_set_long_const (NULL, value, &num_insns); > + return num_insns; > } > > else > @@ -10284,7 +10272,7 @@ rs6000_emit_set_const (rtx dest, rtx source) > emit_move_insn (lo, GEN_INT (c)); > } > else > - rs6000_emit_set_long_const (dest, c); > + rs6000_emit_set_long_const (dest, c, NULL); ... then we don't need to change this line. > break; > > default: > @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int > *shift, HOST_WIDE_INT *mask) > > /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. > Output insns to set DEST equal to the constant C as a series of > - lis, ori and shl instructions. */ > + lis, ori and shl instructions. If NUM_INSNS is not NULL, then > + only increase *NUM_INSNS as the number of insns, and do not output > + real insns. */ > > static void > -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) > +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) > { > - rtx temp; > - int shift; > - HOST_WIDE_INT mask; > HOST_WIDE_INT ud1, ud2, ud3, ud4; > > ud1 = c & 0xffff; > @@ -10509,41 +10496,71 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT > c) > ud3 = (c >> 32) & 0xffff; > ud4 = (c >> 48) & 0xffff; > > + /* This macro RETURNs this function after increasing *NUM_INSNS!!! */ > +#define ADJUST_INSN_NUM_AND_RET(N) > \ > + if (num_insns) > \ > + { > \ > + *num_insns += (N); > \ > + return; > \ > + } This macro and its uses below can still have the chance to get the inconsistent counts, as in some arms the counts get pre-computed. Can we introduce one lambda function named as count_or_emit_insn and use it to replace all the current uses of emit_move_insn in this function? If so, each place where we emit insn will do the counting accordingly, it avoids the possible mismatch pre-computed count (out-of-date in some day). BR, Kewen > + > if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) > - || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) > - emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16))); > + || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) > + { > + /* li */ > + ADJUST_INSN_NUM_AND_RET (1); > + emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16))); > + return; > + } > + > + rtx temp = num_insns > + ? NULL > + : (!can_create_pseudo_p () ? dest : gen_reg_rtx (DImode)); > > - else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) > - || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) > + if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) > + || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000))) > { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + /* lis[; ori] */ > + ADJUST_INSN_NUM_AND_RET (ud1 != 0 ? 2 : 1); > > emit_move_insn (ud1 != 0 ? temp : dest, > GEN_INT (sext_hwi (ud2 << 16, 32))); > if (ud1 != 0) > emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + return; > } > - else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) > + > + if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) > { > /* lis; xoris */ > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + ADJUST_INSN_NUM_AND_RET (2); > + > emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); > emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT > (0x80000000))); > + return; > } > - else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) > + > + if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) > { > /* li; xoris */ > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + ADJUST_INSN_NUM_AND_RET (2); > + > emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16))); > emit_move_insn (dest, gen_rtx_XOR (DImode, temp, > GEN_INT ((ud2 ^ 0xffff) << 16))); > + return; > } > - else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) > - || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) > - || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) > - || can_be_built_by_li_and_rldic (c, &shift, &mask)) > + > + int shift; > + HOST_WIDE_INT mask; > + if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) > + || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) > + || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) > + || can_be_built_by_li_and_rldic (c, &shift, &mask)) > { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + /* li/lis; rldicX */ > + ADJUST_INSN_NUM_AND_RET (2); > + > unsigned HOST_WIDE_INT imm = (c | ~mask); > imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); > > @@ -10553,49 +10570,48 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT > c) > if (mask != HOST_WIDE_INT_M1) > temp = gen_rtx_AND (DImode, temp, GEN_INT (mask)); > emit_move_insn (dest, temp); > + > + return; > } > - else if (ud3 == 0 && ud4 == 0) > - { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > > - gcc_assert (ud2 & 0x8000); > + if (ud3 == 0 && ud4 == 0) > + { > + gcc_assert ((ud2 & 0x8000) && ud1 != 0); > + ADJUST_INSN_NUM_AND_RET (!(ud1 & 0x8000) ? 2 : 3); > > - if (ud1 == 0) > - { > - /* lis; rldicl */ > - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); > - emit_move_insn (dest, > - gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); > - } > - else if (!(ud1 & 0x8000)) > + if (!(ud1 & 0x8000)) > { > /* li; oris */ > emit_move_insn (temp, GEN_INT (ud1)); > emit_move_insn (dest, > gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); > + return; > } > - else > - { > - /* lis; ori; rldicl */ > - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); > - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > - emit_move_insn (dest, > - gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); > - } > + > + /* lis; ori; rldicl */ > + emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); > + emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + emit_move_insn (dest, gen_rtx_AND (DImode, temp, GEN_INT > (0xffffffff))); > + return; > } > - else if (ud1 == ud3 && ud2 == ud4) > + > + if (ud1 == ud3 && ud2 == ud4) > { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > HOST_WIDE_INT num = (ud2 << 16) | ud1; > - rs6000_emit_set_long_const (temp, sext_hwi (num, 32)); > + rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns); > + > + /* Adjust(+1) insn number after half part is adjusted. */ > + ADJUST_INSN_NUM_AND_RET (1); > + > rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); > rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); > emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); > + return; > } > - else if ((ud4 == 0xffff && (ud3 & 0x8000)) > - || (ud4 == 0 && ! (ud3 & 0x8000))) > + > + if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000))) > { > - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); > + ADJUST_INSN_NUM_AND_RET (2 + (ud2 != 0 ? 1 : 0) + (ud1 != 0 ? 1 : 0)); > > emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); > if (ud2 != 0) > @@ -10604,73 +10620,78 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT > c) > gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); > if (ud1 != 0) > emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); > + return; > } > - else if (TARGET_PREFIXED) > + > + if (TARGET_PREFIXED) > { > if (can_create_pseudo_p ()) > { > - /* pli A,L + pli B,H + rldimi A,B,32,0. */ > - temp = gen_reg_rtx (DImode); > + /* pli A,L; pli B,H; rldimi A,B,32,0. */ > + ADJUST_INSN_NUM_AND_RET (3); > + > rtx temp1 = gen_reg_rtx (DImode); > emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3)); > emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); > - > emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1, > GEN_INT (0xffffffff))); > + return; > } > - else > - { > - /* pli A,H + sldi A,32 + paddi A,A,L. */ > - emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3)); > > - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); > + /* There may be 1 insn inaccurate because of no info about dest. */ > + ADJUST_INSN_NUM_AND_RET (2 + (ud2 != 0 && ud1 != 0 ? 2 : 1)); > > - bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO; > + bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO; > > - /* Use paddi for the low 32 bits. */ > - if (ud2 != 0 && ud1 != 0 && can_use_paddi) > - emit_move_insn (dest, gen_rtx_PLUS (DImode, dest, > - GEN_INT ((ud2 << 16) | ud1))); > + /* pli A,H; sldi A,32; paddi A,A,L. */ > + emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3)); > + emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); > > - /* Use oris, ori for low 32 bits. */ > - if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) > - emit_move_insn (dest, > - gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); > - if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) > - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); > - } > + /* Use paddi for the low 32 bits. */ > + if (ud2 != 0 && ud1 != 0 && can_use_paddi) > + emit_move_insn (dest, gen_rtx_PLUS (DImode, dest, > + GEN_INT ((ud2 << 16) | ud1))); > + /* Use oris, ori for low 32 bits. */ > + if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) > + emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); > + if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) > + emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); > + return; > } > - else > - { > - if (can_create_pseudo_p ()) > - { > - /* lis HIGH,UD4 ; ori HIGH,UD3 ; > - lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ > - rtx high = gen_reg_rtx (DImode); > - rtx low = gen_reg_rtx (DImode); > - HOST_WIDE_INT num = (ud2 << 16) | ud1; > - rs6000_emit_set_long_const (low, sext_hwi (num, 32)); > - num = (ud4 << 16) | ud3; > - rs6000_emit_set_long_const (high, sext_hwi (num, 32)); > - emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low, > - GEN_INT (0xffffffff))); > - } > - else > - { > - /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; > - oris DEST,UD2 ; ori DEST,UD1. */ > - emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); > - if (ud3 != 0) > - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); > > - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); > - if (ud2 != 0) > - emit_move_insn (dest, > - gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); > - if (ud1 != 0) > - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); > - } > + if (can_create_pseudo_p ()) > + { > + /* lis HIGH,UD4 ; ori HIGH,UD3 ; > + lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ > + rtx high = num_insns ? nullptr : gen_reg_rtx (DImode); > + rtx low = num_insns ? nullptr : gen_reg_rtx (DImode); > + HOST_WIDE_INT num = (ud2 << 16) | ud1; > + rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns); > + num = (ud4 << 16) | ud3; > + rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns); > + > + /* Adjust(+1) insn number after 'high' and 'low' parts are adjusted. > */ > + ADJUST_INSN_NUM_AND_RET (1); > + emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low, > + GEN_INT (0xffffffff))); > + return; > } > + > + /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; > + oris DEST,UD2 ; ori DEST,UD1. */ > + ADJUST_INSN_NUM_AND_RET (2 + (ud3 != 0 ? 1 : 0) + (ud2 != 0 ? 1 : 0) > + + (ud1 != 0 ? 1 : 0)); > + emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); > + if (ud3 != 0) > + emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); > + > + emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); > + if (ud2 != 0) > + emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); > + if (ud1 != 0) > + emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); > + > + return; > } > > /* Helper for the following. Get rid of [r+r] memory refs