The input which overlaps the sign bit of the output can have its input s_mask propagated to the output s_mask.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- tcg/optimize.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index de32cc0323..e4f68241ce 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -1646,8 +1646,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) TempOptInfo *t2 = arg_info(op->args[2]); int ofs = op->args[3]; int len = op->args[4]; + int width; TCGOpcode and_opc; - uint64_t z_mask; + uint64_t z_mask, s_mask; if (t1->is_const && t2->is_const) { return tcg_opt_gen_movi(ctx, op, op->args[0], @@ -1657,9 +1658,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) switch (ctx->type) { case TCG_TYPE_I32: and_opc = INDEX_op_and_i32; + width = 32; break; case TCG_TYPE_I64: and_opc = INDEX_op_and_i64; + width = 64; break; default: g_assert_not_reached(); @@ -1684,8 +1687,15 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op) return fold_and(ctx, op); } + /* The s_mask from the top portion of the deposit is still valid. */ + if (ofs + len == width) { + s_mask = t2->s_mask << ofs; + } else { + s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len); + } + z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); - return fold_masks_z(ctx, op, z_mask); + return fold_masks_zs(ctx, op, z_mask, s_mask); } static bool fold_divide(OptContext *ctx, TCGOp *op) -- 2.43.0