On 10/14/23 03:01, Paolo Bonzini wrote:
+static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn 
*decode)
+{
+    TCGv z_tl = tcg_constant_tl(0);
+    TCGLabel *label_top = gen_new_label();
+    TCGLabel *label_bottom = gen_new_label();
+    TCGv oldv = tcg_temp_new();
+    TCGv memv = tcg_temp_new();
+    TCGv newv = tcg_temp_new();
+    TCGv cmpv = tcg_temp_new();
+    TCGv tmp_cc = tcg_temp_new();
+
+    TCGv cmp_lhs, cmp_rhs;
+    MemOp ot, ot_full;
+
+    int jcc_op = (decode->b >> 1) & 7;
+    static const uint8_t cond[16] = {

TCGCond.

+        TCG_COND_NE,  /* o, just test OF=1 */
+        TCG_COND_EQ,  /* no, just test OF=0 */
+        TCG_COND_LTU, /* b */
+        TCG_COND_GEU, /* ae (nb) */
+        TCG_COND_EQ,  /* z */
+        TCG_COND_NE,  /* nz */
+        TCG_COND_LEU, /* be */
+        TCG_COND_GTU, /* a (nbe) */
+        TCG_COND_LT,  /* s, compares result against 0 */
+        TCG_COND_GE,  /* ns, compares result against 0 */
+        TCG_COND_NE,  /* p, just test PF=1 */
+        TCG_COND_EQ,  /* np, just test PF=0 */
+        TCG_COND_LT,  /* l */
+        TCG_COND_GE,  /* ge (nl) */
+        TCG_COND_LE,  /* le */
+        TCG_COND_GT,  /* g (nle) */
+    };

You don't need the full table here:

    cond = cond_table[jcc_op];
    if (decode->b & 1)
        cond = tcg_invert_cond(cond)


+    /* Compute comparison result but do not clobber cc_* yet.  */
+    switch (jcc_op) {
+    case JCC_O:
+    case JCC_P:
+        tcg_gen_sub_tl(s->T0, memv, cmpv);
+        gen_helper_cc_compute_all(tmp_cc, s->T0, cmpv, z_tl,
+                                  tcg_constant_i32(CC_OP_SUBB + ot));
+        decode->cc_src = tmp_cc;
+        set_cc_op(s, CC_OP_EFLAGS);
+
+        tcg_gen_andi_tl(s->T0, tmp_cc, (jcc_op == JCC_O ? CC_O : CC_P));
+        cmp_lhs = s->T0, cmp_rhs = z_tl;

I'm not keen on the weight of the helper function within a cmpxchg loop.
I think you should compute these two cases explicitly:

    JCC_O:
        // Need operands sign-extended.
        // cond_table[JCC_O] = TCG_COND_LT -- sign bit set.
        tcg_gen_xor_tl(tmp, cmpv, memv);
        tcg_gen_xor_tl(cmp_lhs, cmpv, s->T0);
        tcg_gen_and_tl(cmp_lhs, cmp_lhs, tmp);
        cmp_rhs = z_tl;
        break;

    JCC_P:
        // cond_table[JCC_P] = TCG_COND_EQ -- even parity.
        tcg_gen_ext8u_tl(cmp_lhs, s->T0);
        tcg_gen_ctpop_tl(cmp_lhs, cmp_lhs);
        tcg_gen_andi_tl(cmp_lhs, cmp_lhs, 1);
        cmp_rhs = z_tl;
        break;

+    cc_sub:
+        decode->cc_dst = s->T0;
+        decode->cc_src = cmpv;
+        decode->cc_srcT = memv;
+        set_cc_op(s, CC_OP_SUBB + ot);
+        break;

At which point this is common to all cases.

+    }
+
+    /* Compute new value: if condition does not hold, just store back memv */
+    tcg_gen_add_tl(newv, memv, s->T1);
+    tcg_gen_movcond_tl(cond[decode->b & 15], newv, cmp_lhs, cmp_rhs, newv, 
memv);
+    tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, memv, newv, s->mem_index, ot_full);
+
+    /* Exit unconditionally if cmpxchg succeeded.  */
+    tcg_gen_brcond_tl(TCG_COND_EQ, oldv, memv, label_bottom);
+
+    /* Try again if there was actually a store to make.  */
+    tcg_gen_brcond_tl(cond[decode->b & 15], cmp_lhs, cmp_rhs, label_top);

I'm tempted to have this unlikely case sync the pc and exit the tb.
This would restart the current instruction after testing for exit request.

But I suppose we have plenty of other places with unbounded cmpxchg loops...


r~

Reply via email to