On Thu, Dec 01, 2022 at 09:39:53PM -0800, Richard Henderson wrote: > Pre-compute the function call layout for each helper at startup. > Drop TCG_CALL_DUMMY_ARG, as we no longer need to leave gaps > in the op->args[] array. This allows several places to stop > checking for NULL TCGTemp, to which TCG_CALL_DUMMY_ARG mapped. > > For tcg_gen_callN, loop over the arguments once. Allocate the TCGOp > for the call early but delay emitting it, collecting arguments first. > This allows the argument processing loop to emit code for extensions > and have them sequenced before the call. > > For tcg_reg_alloc_call, loop over the arguments in reverse order, > which allows stack slots to be filled first naturally. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > include/exec/helper-head.h | 2 + > include/tcg/tcg.h | 5 +- > tcg/tcg-internal.h | 22 +- > tcg/optimize.c | 6 +- > tcg/tcg.c | 609 ++++++++++++++++++++++--------------- > 5 files changed, 394 insertions(+), 250 deletions(-)
... > diff --git a/tcg/tcg.c b/tcg/tcg.c > index d08323db49..74f7491d73 100644 > --- a/tcg/tcg.c > +++ b/tcg/tcg.c > @@ -547,7 +547,7 @@ void tcg_pool_reset(TCGContext *s) > > #include "exec/helper-proto.h" > > -static const TCGHelperInfo all_helpers[] = { > +static TCGHelperInfo all_helpers[] = { > #include "exec/helper-tcg.h" > }; > static GHashTable *helper_table; > @@ -565,6 +565,154 @@ static ffi_type * const typecode_to_ffi[8] = { > }; > #endif > > +typedef struct TCGCumulativeArgs { > + int arg_idx; /* tcg_gen_callN args[] */ > + int info_in_idx; /* TCGHelperInfo in[] */ > + int arg_slot; /* regs+stack slot */ > + int ref_slot; /* stack slots for references */ > +} TCGCumulativeArgs; > + > +static void layout_arg_even(TCGCumulativeArgs *cum) > +{ > + cum->arg_slot += cum->arg_slot & 1; > +} > + > +static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info, > + TCGCallArgumentKind kind) > +{ > + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; > + > + *loc = (TCGCallArgumentLoc){ > + .kind = kind, > + .arg_idx = cum->arg_idx, > + .arg_slot = cum->arg_slot, > + }; > + cum->info_in_idx++; > + cum->arg_slot++; > +} > + > +static void layout_arg_normal_n(TCGCumulativeArgs *cum, > + TCGHelperInfo *info, int n) > +{ > + TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx]; > + > + for (int i = 0; i < n; ++i) { > + /* Layout all using the same arg_idx, adjusting the subindex. */ > + loc[i] = (TCGCallArgumentLoc){ > + .kind = TCG_CALL_ARG_NORMAL, > + .arg_idx = cum->arg_idx, > + .tmp_subindex = i, > + .arg_slot = cum->arg_slot + i, > + }; > + } > + cum->info_in_idx += n; > + cum->arg_slot += n; > +} > + > +static void init_call_layout(TCGHelperInfo *info) > +{ > + int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs); > + int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long); > + unsigned typemask = info->typemask; > + unsigned typecode; > + TCGCumulativeArgs cum = { }; > + > + /* > + * Parse and place any function return value. > + */ > + typecode = typemask & 7; > + switch (typecode) { > + case dh_typecode_void: > + info->nr_out = 0; > + break; > + case dh_typecode_i32: > + case dh_typecode_s32: > + case dh_typecode_ptr: > + info->nr_out = 1; > + info->out_kind = TCG_CALL_RET_NORMAL; > + break; > + case dh_typecode_i64: > + case dh_typecode_s64: > + info->nr_out = 64 / TCG_TARGET_REG_BITS; > + info->out_kind = TCG_CALL_RET_NORMAL; > + break; > + default: > + g_assert_not_reached(); > + } > + assert(info->nr_out <= ARRAY_SIZE(tcg_target_call_oarg_regs)); > + > + /* > + * Parse and place function arguments. > + */ > + for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) { > + TCGCallArgumentKind kind; > + TCGType type; > + > + typecode = typemask & 7; > + switch (typecode) { > + case dh_typecode_i32: > + case dh_typecode_s32: > + type = TCG_TYPE_I32; > + break; > + case dh_typecode_i64: > + case dh_typecode_s64: > + type = TCG_TYPE_I64; > + break; > + case dh_typecode_ptr: > + type = TCG_TYPE_PTR; > + break; > + default: > + g_assert_not_reached(); > + } > + > + switch (type) { > + case TCG_TYPE_I32: > + switch (TCG_TARGET_CALL_ARG_I32) { > + case TCG_CALL_ARG_EVEN: > + layout_arg_even(&cum); > + /* fall through */ > + case TCG_CALL_ARG_NORMAL: > + layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL); > + break; > + case TCG_CALL_ARG_EXTEND: > + kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1); > + layout_arg_1(&cum, info, kind); > + break; > + default: > + qemu_build_not_reached(); > + } > + break; > + > + case TCG_TYPE_I64: > + switch (TCG_TARGET_CALL_ARG_I64) { > + case TCG_CALL_ARG_EVEN: On a s390x host with gcc-11.0.1-0.3.1.ibm.fc34.s390x I get: FAILED: libqemu-aarch64-softmmu.fa.p/tcg_tcg.c.o ../tcg/tcg.c: In function ‘init_call_layout’: ../tcg/tcg.c:739:13: error: case value ‘1’ not in enumerated type [-Werror=switch] 739 | case TCG_CALL_ARG_EVEN: | ^~~~ The following helps: --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -735,7 +735,7 @@ static void init_call_layout(TCGHelperInfo *info) break; case TCG_TYPE_I64: - switch (TCG_TARGET_CALL_ARG_I64) { + switch ((TCGCallArgumentKind)TCG_TARGET_CALL_ARG_I64) { case TCG_CALL_ARG_EVEN: layout_arg_even(&cum); /* fall through */ This looks like a gcc bug to me. I still thought it would be worth mentioning here. ... Best regards, Ilya