Hi,
Thanks for your always kind and helpful review!! "Kewen.Lin" <li...@linux.ibm.com> writes: > Hi Jeff, > > on 2023/12/6 13:24, Jiufu Guo wrote: >> Hi, >> >> Trunk gcc supports more constants to be built via two instructions: >> e.g. "li/lis; xori/xoris/rldicl/rldicr/rldic". >> And then num_insns_constant should also be updated. >> >> Function "rs6000_emit_set_long_const" is used to build complicated >> constants; and "num_insns_constant_gpr" is used to compute 'how >> many instructions are needed" to build the constant. So, these >> two functions should be aligned. >> >> The idea of this patch is: to reuse "rs6000_emit_set_long_const" to >> compute/record the instruction number(when computing the insn_num, >> then do not emit instructions). >> >> Compare with the previous version: >> https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636565.html >> This version updates "rs6000_emit_set_long_const" to use a condition >> if to select either "computing insn number" or "emitting the insn". >> And put them together to avoid misalign in the future. >> >> Bootstrap & regtest pass ppc64{,le}. >> Is this ok for trunk? >> >> BR, >> Jeff (Jiufu Guo) >> >> gcc/ChangeLog: >> >> * config/rs6000/rs6000.cc (rs6000_emit_set_long_const): Add new >> parameter to record number of instructions to build the constant. >> (num_insns_constant_gpr): Call rs6000_emit_set_long_const to compute >> num_insn. >> >> --- >> gcc/config/rs6000/rs6000.cc | 272 ++++++++++++++++++------------------ >> 1 file changed, 137 insertions(+), 135 deletions(-) >> >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >> index 3dfd79c4c43..dbdc72dce5d 100644 >> --- a/gcc/config/rs6000/rs6000.cc >> +++ b/gcc/config/rs6000/rs6000.cc >> @@ -1115,7 +1115,7 @@ static tree rs6000_handle_longcall_attribute (tree *, >> tree, tree, int, bool *); >> static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool >> *); >> static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool >> *); >> static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); >> -static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); >> +static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = >> nullptr); >> static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); >> static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, >> bool); >> static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, >> @@ -6054,21 +6054,9 @@ num_insns_constant_gpr (HOST_WIDE_INT value) >> >> else if (TARGET_POWERPC64) >> { >> - HOST_WIDE_INT low = sext_hwi (value, 32); >> - HOST_WIDE_INT high = value >> 31; >> - >> - if (high == 0 || high == -1) >> - return 2; >> - >> - high >>= 1; >> - >> - if (low == 0 || low == high) >> - return num_insns_constant_gpr (high) + 1; >> - else if (high == 0) >> - return num_insns_constant_gpr (low) + 1; >> - else >> - return (num_insns_constant_gpr (high) >> - + num_insns_constant_gpr (low) + 1); >> + int num_insns = 0; >> + rs6000_emit_set_long_const (NULL, value, &num_insns); > > Nit: Maybe nullptr to align with the others in this patch? ok. > >> + return num_insns; >> } >> >> else >> @@ -10494,14 +10482,13 @@ can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int >> *shift, HOST_WIDE_INT *mask) >> >> /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. >> Output insns to set DEST equal to the constant C as a series of >> - lis, ori and shl instructions. */ >> + lis, ori and shl instructions. If NUM_INSNS is not NULL, then >> + only increase *NUM_INSNS as the number of insns, and do not output >> + real insns. */ > > Nit: Maybe s/output real/emit any/. Thanks. > >> >> static void >> -rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) >> +rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) >> { >> - rtx temp; >> - int shift; >> - HOST_WIDE_INT mask; >> HOST_WIDE_INT ud1, ud2, ud3, ud4; >> >> ud1 = c & 0xffff; >> @@ -10509,168 +10496,183 @@ rs6000_emit_set_long_const (rtx dest, >> HOST_WIDE_INT c) >> ud3 = (c >> 32) & 0xffff; >> ud4 = (c >> 48) & 0xffff; >> >> - if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) >> - || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) >> - emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16))); >> + /* This lambda is used to emit one insn or just increase the insn count. >> + When counting the insn number, no need to emit the insn. Here, two >> + kinds of insns are needed: move and rldimi. */ > > Can we make the latter a bit more generic? Like something below? Great sugguestion! Thanks. > >> + auto count_or_emit_insn = [&num_insns] (rtx dest, rtx op1, rtx op2 = >> NULL) { >> + if (num_insns) >> + (*num_insns)++; > > Nit: Make it early return. ok. > >> + else if (!op2) >> + emit_move_insn (dest, op1); >> + else >> + emit_insn (gen_rotldi3_insert_3 (dest, op1, GEN_INT (32), op2, >> + GEN_INT (0xffffffff))); > > > [&num_insns] (rtx dest_or_insn, rtx src) > > if (src) > emit_move_insn (dest_or_insn, src); > else > emit_insn (dest_or_insn); > This could support other gen_X in future. Thanks! > >> + }; >> >> - else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) >> - || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) >> + if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) >> + || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000))) >> { >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> + /* li */ >> + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16))); >> + return; >> + } >> + >> + rtx temp = num_insns ? nullptr >> + : can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest; > > Nit: Maybe > > temp = (num_insns || !can_create_pseudo_p ()) ? dest: gen_reg_rtx (DImode); > > since NULL passed as dest for num_insns. ok. > >> >> - emit_move_insn (ud1 != 0 ? temp : dest, >> - GEN_INT (sext_hwi (ud2 << 16, 32))); >> + if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) >> + || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000))) >> + { >> + /* lis[; ori] */ >> + count_or_emit_insn (ud1 != 0 ? temp : dest, >> + GEN_INT (sext_hwi (ud2 << 16, 32))); >> if (ud1 != 0) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + return; >> } >> - else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) >> + >> + if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0) >> { >> /* lis; xoris */ >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> - emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32))); >> - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT >> (0x80000000))); >> + count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, >> 32))); >> + count_or_emit_insn (dest, >> + gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000))); >> + return; >> } >> - else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) >> + >> + if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000)) >> { >> /* li; xoris */ >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> - emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16))); >> - emit_move_insn (dest, gen_rtx_XOR (DImode, temp, >> - GEN_INT ((ud2 ^ 0xffff) << 16))); >> + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16))); >> + count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp, >> + GEN_INT ((ud2 ^ 0xffff) << 16))); >> + return; >> } >> - else if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) >> - || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) >> - || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) >> - || can_be_built_by_li_and_rldic (c, &shift, &mask)) >> + >> + int shift; >> + HOST_WIDE_INT mask; >> + if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask) >> + || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask) >> + || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask) >> + || can_be_built_by_li_and_rldic (c, &shift, &mask)) >> { >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> + /* li/lis; rldicX */ >> unsigned HOST_WIDE_INT imm = (c | ~mask); >> imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); >> >> - emit_move_insn (temp, GEN_INT (imm)); >> + count_or_emit_insn (temp, GEN_INT (imm)); >> if (shift != 0) >> temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift)); >> if (mask != HOST_WIDE_INT_M1) >> temp = gen_rtx_AND (DImode, temp, GEN_INT (mask)); >> - emit_move_insn (dest, temp); >> - } >> - else if (ud3 == 0 && ud4 == 0) >> - { >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> + count_or_emit_insn (dest, temp); >> >> - gcc_assert (ud2 & 0x8000); >> + return; >> + } >> >> - if (ud1 == 0) >> - { >> - /* lis; rldicl */ >> - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); >> - emit_move_insn (dest, >> - gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); >> - } >> - else if (!(ud1 & 0x8000)) >> + if (ud3 == 0 && ud4 == 0) >> + { >> + gcc_assert ((ud2 & 0x8000) && ud1 != 0); >> + if (!(ud1 & 0x8000)) >> { >> /* li; oris */ >> - emit_move_insn (temp, GEN_INT (ud1)); >> - emit_move_insn (dest, >> - gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); >> + count_or_emit_insn (temp, GEN_INT (ud1)); >> + count_or_emit_insn (dest, >> + gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16))); >> + return; >> } >> - else >> - { >> - /* lis; ori; rldicl */ >> - emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); >> - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> - emit_move_insn (dest, >> + >> + /* lis; ori; rldicl */ >> + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32))); >> + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + count_or_emit_insn (dest, >> gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff))); >> - } >> + return; >> } >> - else if (ud1 == ud3 && ud2 == ud4) >> + >> + if (ud1 == ud3 && ud2 == ud4) >> { > > Nit: Like the others, it's still preferred to have a comment indicating > what's insn sequence for this hunk, ... Understand you point. Since the half 32bit maybe with various insn, so it may be hard to list the insn seq. While I also feel we may need a comment here. > >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> HOST_WIDE_INT num = (ud2 << 16) | ud1; >> - rs6000_emit_set_long_const (temp, sext_hwi (num, 32)); >> + rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns); >> + >> rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)); >> rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32)); >> - emit_move_insn (dest, gen_rtx_IOR (DImode, one, two)); >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, one, two)); >> + return; >> } >> - else if ((ud4 == 0xffff && (ud3 & 0x8000)) >> - || (ud4 == 0 && ! (ud3 & 0x8000))) >> - { >> - temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); >> >> - emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); >> + if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000))) >> + { > > ... and this. ok. > >> + count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32))); >> if (ud2 != 0) >> - emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); >> - emit_move_insn (ud1 != 0 ? temp : dest, >> - gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); >> + count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2))); >> + count_or_emit_insn (ud1 != 0 ? temp : dest, >> + gen_rtx_ASHIFT (DImode, temp, GEN_INT (16))); >> if (ud1 != 0) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1))); >> + return; >> } >> - else if (TARGET_PREFIXED) >> + >> + if (TARGET_PREFIXED) >> { >> if (can_create_pseudo_p ()) >> { >> - /* pli A,L + pli B,H + rldimi A,B,32,0. */ >> - temp = gen_reg_rtx (DImode); >> - rtx temp1 = gen_reg_rtx (DImode); >> - emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3)); >> - emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); >> - >> - emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1, >> - GEN_INT (0xffffffff))); >> + /* pli A,L; pli B,H; rldimi A,B,32,0. */ >> + rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode); >> + count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3)); >> + count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1)); >> + count_or_emit_insn (dest, temp, temp1); >> + return; >> } >> - else >> - { >> - /* pli A,H + sldi A,32 + paddi A,A,L. */ >> - emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3)); >> >> - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); >> + /* There may be 1 insn inaccurate because of no info about dest. */ >> + bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false; > > Nit: Move this line ... > >> >> - bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO; >> + /* pli A,H; sldi A,32; paddi A,A,L. */ >> + count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3)); >> + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT >> (32))); >> > > ... here, just before its use. ok. > > The others look good to me, thanks! Thanks again for your greate comments. BR, Jeff (Jiufu Guo) > > BR, > Kewen > >> - /* Use paddi for the low 32 bits. */ >> - if (ud2 != 0 && ud1 != 0 && can_use_paddi) >> - emit_move_insn (dest, gen_rtx_PLUS (DImode, dest, >> + /* Use paddi for the low 32 bits. */ >> + if (ud2 != 0 && ud1 != 0 && can_use_paddi) >> + count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest, >> GEN_INT ((ud2 << 16) | ud1))); >> - >> - /* Use oris, ori for low 32 bits. */ >> - if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) >> - emit_move_insn (dest, >> + /* Use oris, ori for low 32 bits. */ >> + if (ud2 != 0 && (ud1 == 0 || !can_use_paddi)) >> + count_or_emit_insn (dest, >> gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); >> - if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); >> - } >> + if (ud1 != 0 && (ud2 == 0 || !can_use_paddi)) >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); >> + return; >> } >> - else >> + >> + if (can_create_pseudo_p ()) >> { >> - if (can_create_pseudo_p ()) >> - { >> - /* lis HIGH,UD4 ; ori HIGH,UD3 ; >> - lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ >> - rtx high = gen_reg_rtx (DImode); >> - rtx low = gen_reg_rtx (DImode); >> - HOST_WIDE_INT num = (ud2 << 16) | ud1; >> - rs6000_emit_set_long_const (low, sext_hwi (num, 32)); >> - num = (ud4 << 16) | ud3; >> - rs6000_emit_set_long_const (high, sext_hwi (num, 32)); >> - emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low, >> - GEN_INT (0xffffffff))); >> - } >> - else >> - { >> - /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; >> - oris DEST,UD2 ; ori DEST,UD1. */ >> - emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); >> - if (ud3 != 0) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); >> + /* lis HIGH,UD4 ; ori HIGH,UD3 ; >> + lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */ >> + rtx high = num_insns ? nullptr : gen_reg_rtx (DImode); >> + rtx low = num_insns ? nullptr : gen_reg_rtx (DImode); >> + HOST_WIDE_INT num = (ud2 << 16) | ud1; >> + rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns); >> + num = (ud4 << 16) | ud3; >> + rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns); >> >> - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); >> - if (ud2 != 0) >> - emit_move_insn (dest, >> - gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16))); >> - if (ud1 != 0) >> - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); >> - } >> + count_or_emit_insn (dest, high, low); >> + return; >> } >> + >> + /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ; >> + oris DEST,UD2 ; ori DEST,UD1. */ >> + count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32))); >> + if (ud3 != 0) >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); >> + >> + count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); >> + if (ud2 != 0) >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << >> 16))); >> + if (ud1 != 0) >> + count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); >> + >> + return; >> } >> >> /* Helper for the following. Get rid of [r+r] memory refs