From: Ilya Leoshkevich <i...@linux.ibm.com> i386 and s390x implementations of op_add2 require an earlyclobber, which is currently missing. This breaks VCKSM in s390x guests. E.g., on x86_64 the following op:
add2_i32 tmp2,tmp3,tmp2,tmp3,tmp3,tmp2 dead: 0 2 3 4 5 pref=none,0xffff is translated to: addl %ebx, %r12d adcl %r12d, %ebx Introduce a new C_N1_O1_I4 constraint, and make sure that earlyclobber of aliased outputs is honored. Cc: qemu-sta...@nongnu.org Fixes: 82790a870992 ("tcg: Add markup for output requires new register") Signed-off-by: Ilya Leoshkevich <i...@linux.ibm.com> Reviewed-by: Richard Henderson <richard.hender...@linaro.org> Message-Id: <20230719221310.1968845-7-...@linux.ibm.com> Signed-off-by: Richard Henderson <richard.hender...@linaro.org> (cherry picked from commit 22d2e5351a18aff5a9c7e3984b50ecce61ff8975) Signed-off-by: Michael Tokarev <m...@tls.msk.ru> diff --git a/tcg/i386/tcg-target-con-set.h b/tcg/i386/tcg-target-con-set.h index 91ceb0e1da..5ea3a292f0 100644 --- a/tcg/i386/tcg-target-con-set.h +++ b/tcg/i386/tcg-target-con-set.h @@ -11,6 +11,9 @@ * * C_N1_Im(...) defines a constraint set with 1 output and <m> inputs, * except that the output must use a new register. + * + * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k> + * inputs, except that the first <n> outputs must use new registers. */ C_O0_I1(r) C_O0_I2(L, L) @@ -53,4 +56,4 @@ C_O2_I1(r, r, L) C_O2_I2(a, d, a, r) C_O2_I2(r, r, L, L) C_O2_I3(a, d, 0, 1, r) -C_O2_I4(r, r, 0, 1, re, re) +C_N1_O1_I4(r, r, 0, 1, re, re) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 5c7c180799..d00800d18a 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -3356,7 +3356,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_add2_i64: case INDEX_op_sub2_i32: case INDEX_op_sub2_i64: - return C_O2_I4(r, r, 0, 1, re, re); + return C_N1_O1_I4(r, r, 0, 1, re, re); case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h index 15f1c55103..31daa5daca 100644 --- a/tcg/s390x/tcg-target-con-set.h +++ b/tcg/s390x/tcg-target-con-set.h @@ -8,6 +8,9 @@ * C_On_Im(...) defines a constraint set with <n> outputs and <m> inputs. * Each operand should be a sequence of constraint letters as defined by * tcg-target-con-str.h; the constraint combination is inclusive or. + * + * C_Nn_Om_Ik(...) defines a constraint set with <n + m> outputs and <k> + * inputs, except that the first <n> outputs must use new registers. */ C_O0_I1(r) C_O0_I2(L, L) @@ -41,6 +44,5 @@ C_O1_I4(r, r, rA, rI, r) C_O2_I2(o, m, 0, r) C_O2_I2(o, m, r, r) C_O2_I3(o, m, 0, 1, r) -C_O2_I4(r, r, 0, 1, rA, r) -C_O2_I4(r, r, 0, 1, ri, r) -C_O2_I4(r, r, 0, 1, r, r) +C_N1_O1_I4(r, r, 0, 1, ri, r) +C_N1_O1_I4(r, r, 0, 1, rA, r) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 844532156b..2e5fd4968c 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -3229,11 +3229,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_add2_i32: case INDEX_op_sub2_i32: - return C_O2_I4(r, r, 0, 1, ri, r); + return C_N1_O1_I4(r, r, 0, 1, ri, r); case INDEX_op_add2_i64: case INDEX_op_sub2_i64: - return C_O2_I4(r, r, 0, 1, rA, r); + return C_N1_O1_I4(r, r, 0, 1, rA, r); case INDEX_op_st_vec: return C_O0_I2(v, r); diff --git a/tcg/tcg.c b/tcg/tcg.c index f3bf471274..09f345fa1b 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -368,6 +368,7 @@ void tcg_raise_tb_overflow(TCGContext *s) #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2), #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3), #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4), +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4), typedef enum { #include "tcg-target-con-set.h" @@ -388,6 +389,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); #undef C_O2_I2 #undef C_O2_I3 #undef C_O2_I4 +#undef C_N1_O1_I4 /* Put all of the constraint sets into an array, indexed by the enum. */ @@ -407,6 +409,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode); #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } }, #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } }, #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } }, +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } }, static const TCGTargetOpDef constraint_sets[] = { #include "tcg-target-con-set.h" @@ -426,6 +429,7 @@ static const TCGTargetOpDef constraint_sets[] = { #undef C_O2_I2 #undef C_O2_I3 #undef C_O2_I4 +#undef C_N1_O1_I4 /* Expand the enumerator to be returned from tcg_target_op_def(). */ @@ -445,6 +449,7 @@ static const TCGTargetOpDef constraint_sets[] = { #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2) #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3) #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4) +#define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4) #include "tcg-target.c.inc" @@ -4255,7 +4260,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op) * dead after the instruction, we must allocate a new * register and move it. */ - if (temp_readonly(ts) || !IS_DEAD_ARG(i)) { + if (temp_readonly(ts) || !IS_DEAD_ARG(i) + || def->args_ct[arg_ct->alias_index].newreg) { allocate_new_reg = true; } else if (ts->val_type == TEMP_VAL_REG) { /* -- 2.39.2