Richard Henderson <richard.hender...@linaro.org> writes:
> This case is similar to INDEX_op_mov_* in that we need to do > different things depending on the current location of the source. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- <snip> > > +static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op) > +{ > + const TCGLifeData arg_life = op->life; > + TCGRegSet dup_out_regs, dup_in_regs; > + TCGTemp *its, *ots; > + TCGType itype, vtype; > + unsigned vece; > + bool ok; > + > + ots = arg_temp(op->args[0]); > + its = arg_temp(op->args[1]); > + > + /* There should be no fixed vector registers. */ > + tcg_debug_assert(!ots->fixed_reg); This threw me slightly. I guess you only really duplicate vectors so I'm wondering if this should be called tcg_vec_reg_alloc_dup? Or maybe just a bit of verbiage in a block comment above the helper? > + > + itype = its->type; > + vece = TCGOP_VECE(op); > + vtype = TCGOP_VECL(op) + TCG_TYPE_V64; > + > + if (its->val_type == TEMP_VAL_CONST) { > + /* Propagate constant via movi -> dupi. */ > + tcg_target_ulong val = its->val; > + if (IS_DEAD_ARG(1)) { > + temp_dead(s, its); > + } > + tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]); > + return; > + } > + > + dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs; > + dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs; > + > + /* Allocate the output register now. */ > + if (ots->val_type != TEMP_VAL_REG) { > + TCGRegSet allocated_regs = s->reserved_regs; > + > + if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) { > + /* Make sure to not spill the input register. */ > + tcg_regset_set_reg(allocated_regs, its->reg); > + } > + ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs, > + op->output_pref[0], ots->indirect_base); > + ots->val_type = TEMP_VAL_REG; > + ots->mem_coherent = 0; > + s->reg_to_temp[ots->reg] = ots; > + } > + > + switch (its->val_type) { > + case TEMP_VAL_REG: > + /* > + * The dup constriaints must be broad, covering all possible VECE. > + * However, tcg_op_dup_vec() gets to see the VECE and we allow it > + * to fail, indicating that extra moves are required for that case. > + */ > + if (tcg_regset_test_reg(dup_in_regs, its->reg)) { > + if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) { > + goto done; > + } > + /* Try again from memory or a vector input register. */ > + } > + if (!its->mem_coherent) { > + /* > + * The input register is not synced, and so an extra store > + * would be required to use memory. Attempt an integer-vector > + * register move first. We do not have a TCGRegSet for this. > + */ > + if (tcg_out_mov(s, itype, ots->reg, its->reg)) { > + break; > + } > + /* Sync the temp back to its slot and load from there. */ > + temp_sync(s, its, s->reserved_regs, 0, 0); > + } > + /* fall through */ > + > + case TEMP_VAL_MEM: > + /* TODO: dup from memory */ > + tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, > its->mem_offset); Should we be aborting here? That said it looks like you are loading something directly from the register memory address here... > + break; > + > + default: > + g_assert_not_reached(); > + } > + > + /* We now have a vector input register, so dup must succeed. */ > + ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg); > + tcg_debug_assert(ok); > + > + done: > + if (IS_DEAD_ARG(1)) { > + temp_dead(s, its); > + } > + if (NEED_SYNC_ARG(0)) { > + temp_sync(s, ots, s->reserved_regs, 0, 0); > + } > + if (IS_DEAD_ARG(0)) { > + temp_dead(s, ots); > + } > +} > + > static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) > { > const TCGLifeData arg_life = op->life; > @@ -3981,6 +4080,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb) > case INDEX_op_dupi_vec: > tcg_reg_alloc_movi(s, op); > break; > + case INDEX_op_dup_vec: > + tcg_reg_alloc_dup(s, op); > + break; > case INDEX_op_insn_start: > if (num_insns >= 0) { > size_t off = tcg_current_code_size(s); -- Alex Bennée