On 5/3/21 1:57 AM, Richard Henderson wrote: > This requires adjusting where arguments are stored. > Place them on the stack at left-aligned positions. > Adjust the stack frame to be at entirely positive offsets. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > include/tcg/tcg.h | 1 + > tcg/tci/tcg-target.h | 2 +- > tcg/tcg.c | 64 +++++++++++++------ > tcg/tci.c | 135 ++++++++++++++++++++++----------------- > tcg/tci/tcg-target.c.inc | 50 +++++++-------- > 5 files changed, 148 insertions(+), 104 deletions(-) > > diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h > index 0f0695e90d..e5573a9877 100644 > --- a/include/tcg/tcg.h > +++ b/include/tcg/tcg.h > @@ -53,6 +53,7 @@ > #define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) > > #define CPU_TEMP_BUF_NLONGS 128 > +#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long)) > > /* Default target word size to pointer size. */ > #ifndef TCG_TARGET_REG_BITS > diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h > index 52af6d8bc5..4df10e2e83 100644 > --- a/tcg/tci/tcg-target.h > +++ b/tcg/tci/tcg-target.h > @@ -161,7 +161,7 @@ typedef enum { > > /* Used for function call generation. */ > #define TCG_TARGET_CALL_STACK_OFFSET 0 > -#define TCG_TARGET_STACK_ALIGN 16 > +#define TCG_TARGET_STACK_ALIGN 8
Is this FFI_SIZEOF_ARG? > > #define HAVE_TCG_QEMU_TB_EXEC ... > static void tci_args_rr(const uint8_t **tb_ptr, > TCGReg *r0, TCGReg *r1) > { > @@ -487,11 +479,13 @@ uintptr_t QEMU_DISABLE_CFI > tcg_qemu_tb_exec(CPUArchState *env, > { > const uint8_t *tb_ptr = v_tb_ptr; > tcg_target_ulong regs[TCG_TARGET_NB_REGS]; > - long tcg_temps[CPU_TEMP_BUF_NLONGS]; > - uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS); > + uint64_t stack[(TCG_STATIC_CALL_ARGS_SIZE + TCG_STATIC_FRAME_SIZE) > + / sizeof(uint64_t)]; Why not simply use a char* array? Ah I see later "call_slots[i] = &stack[i];", OK. > + void *call_slots[TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t)]; > > regs[TCG_AREG0] = (tcg_target_ulong)env; > - regs[TCG_REG_CALL_STACK] = sp_value; > + regs[TCG_REG_CALL_STACK] = (uintptr_t)stack; > + call_slots[0] = NULL; Maybe add a comment "Other slots initialization delayed (see below)"? > tci_assert(tb_ptr); > > for (;;) { > @@ -509,40 +503,58 @@ uintptr_t QEMU_DISABLE_CFI > tcg_qemu_tb_exec(CPUArchState *env, > #endif > TCGMemOpIdx oi; > int32_t ofs; > - void *ptr; > + void *ptr, *cif; > > /* Skip opcode and size entry. */ > tb_ptr += 2; > > switch (opc) { > case INDEX_op_call: > - tci_args_l(&tb_ptr, &ptr); > + /* > + * Set up the ffi_avalue array once, delayed until now > + * because many TB's do not make any calls. In tcg_gen_callN, > + * we arranged for every real argument to be "left-aligned" > + * in each 64-bit slot. > + */ > + if (unlikely(call_slots[0] == NULL)) { > + for (int i = 0; i < ARRAY_SIZE(call_slots); ++i) { > + call_slots[i] = &stack[i]; > + } > + } > + > + tci_args_nll(&tb_ptr, &len, &ptr, &cif); Reviewed-by: Philippe Mathieu-Daudé <f4...@amsat.org>