emulate: mostly rewrite flags handling

Paolo Bonzini Fri, 02 May 2025 14:49:40 -0700

While Bochs's algorithms are pretty solid, there are small opportunities
to improve them or to make their logic more similar to TCG's handling
of condition codes.


- use a single bit for the difference between bits 0..7 of result and PF.
This is useful because "set only ZF" is not a common case.

- place SD in the same place as SF

- move CF and PO at bits 62 and 63 when target_ulong is 64-bits wide,
  so that 64-bit ALU operations need fewer shifts

- use rotates to move CF and AF from auxbits to their eflags position

Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
 target/i386/emulate/x86_flags.h |  12 +-
 target/i386/emulate/x86_emu.c   |   4 +-
 target/i386/emulate/x86_flags.c | 197 ++++++++++++++------------------
 3 files changed, 86 insertions(+), 127 deletions(-)

diff --git a/target/i386/emulate/x86_flags.h b/target/i386/emulate/x86_flags.h
index 6c175007b57..28b008e5771 100644
--- a/target/i386/emulate/x86_flags.h
+++ b/target/i386/emulate/x86_flags.h
@@ -28,20 +28,10 @@
 void lflags_to_rflags(CPUX86State *env);
 void rflags_to_lflags(CPUX86State *env);
 
-bool get_PF(CPUX86State *env);
-void set_PF(CPUX86State *env, bool val);
 bool get_CF(CPUX86State *env);
 void set_CF(CPUX86State *env, bool val);
-bool get_AF(CPUX86State *env);
-void set_AF(CPUX86State *env, bool val);
-bool get_ZF(CPUX86State *env);
-void set_ZF(CPUX86State *env, bool val);
-bool get_SF(CPUX86State *env);
-void set_SF(CPUX86State *env, bool val);
-bool get_OF(CPUX86State *env);
-void set_OF(CPUX86State *env, bool val);
 
-void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf);
+void SET_FLAGS_OxxxxC(CPUX86State *env, bool new_of, bool new_cf);
 
 void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2,
                             uint32_t diff);
diff --git a/target/i386/emulate/x86_emu.c b/target/i386/emulate/x86_emu.c
index 4c07f08942e..61bd5af5bb1 100644
--- a/target/i386/emulate/x86_emu.c
+++ b/target/i386/emulate/x86_emu.c
@@ -474,10 +474,10 @@ static inline void string_rep(CPUX86State *env, struct 
x86_decode *decode,
     while (rcx--) {
         func(env, decode);
         write_reg(env, R_ECX, rcx, decode->addressing_size);
-        if ((PREFIX_REP == rep) && !get_ZF(env)) {
+        if ((PREFIX_REP == rep) && !env->lflags.result) {
             break;
         }
-        if ((PREFIX_REPN == rep) && get_ZF(env)) {
+        if ((PREFIX_REPN == rep) && env->lflags.result) {
             break;
         }
     }
diff --git a/target/i386/emulate/x86_flags.c b/target/i386/emulate/x86_flags.c
index 84e27364a03..c347a951889 100644
--- a/target/i386/emulate/x86_flags.c
+++ b/target/i386/emulate/x86_flags.c
@@ -29,41 +29,50 @@
 #include "x86.h"
 
 
-/* this is basically bocsh code */
+/*
+ * The algorithms here are similar to those in Bochs.  After an ALU
+ * operation, RESULT can be used to compute ZF, SF and PF, whereas
+ * AUXBITS is used to compute AF, CF and OF.  In reality, SF and PF are the
+ * XOR of the value computed from RESULT and the value found in bits 7 and 2
+ * of AUXBITS; this way the same logic can be used to compute the flags
+ * both before and after an ALU operation.
+ *
+ * Compared to the TCG CC_OP codes, this avoids conditionals when converting
+ * to and from the RFLAGS representation.
+ */
 
-#define LF_SIGN_BIT     31
+#define LF_SIGN_BIT    (TARGET_LONG_BITS - 1)
 
-#define LF_BIT_SD      (0)          /* lazy Sign Flag Delta            */
-#define LF_BIT_AF      (3)          /* lazy Adjust flag                */
-#define LF_BIT_PDB     (8)          /* lazy Parity Delta Byte (8 bits) */
-#define LF_BIT_CF      (31)         /* lazy Carry Flag                 */
-#define LF_BIT_PO      (30)         /* lazy Partial Overflow = CF ^ OF */
+#define LF_BIT_PD      (2)          /* lazy Parity Delta, same bit as PF */
+#define LF_BIT_AF      (3)          /* lazy Adjust flag */
+#define LF_BIT_SD      (7)          /* lazy Sign Flag Delta, same bit as SF */
+#define LF_BIT_CF      (TARGET_LONG_BITS - 1) /* lazy Carry Flag */
+#define LF_BIT_PO      (TARGET_LONG_BITS - 2) /* lazy Partial Overflow = CF ^ 
OF */
 
-#define LF_MASK_SD     (0x01 << LF_BIT_SD)
-#define LF_MASK_AF     (0x01 << LF_BIT_AF)
-#define LF_MASK_PDB    (0xFF << LF_BIT_PDB)
-#define LF_MASK_CF     (0x01 << LF_BIT_CF)
-#define LF_MASK_PO     (0x01 << LF_BIT_PO)
+#define LF_MASK_PD     ((target_ulong)0x01 << LF_BIT_PD)
+#define LF_MASK_AF     ((target_ulong)0x01 << LF_BIT_AF)
+#define LF_MASK_SD     ((target_ulong)0x01 << LF_BIT_SD)
+#define LF_MASK_CF     ((target_ulong)0x01 << LF_BIT_CF)
+#define LF_MASK_PO     ((target_ulong)0x01 << LF_BIT_PO)
 
 /* ******************* */
 /* OSZAPC */
 /* ******************* */
 
-/* size, carries, result */
+/* use carries to fill in AF, PO and CF, while ensuring PD and SD are clear.
+ * for full-word operations just clear PD and SD; for smaller operand
+ * sizes only keep AF in the low byte and shift the carries left to
+ * place PO and CF in the top two bits.
+ */
 #define SET_FLAGS_OSZAPC_SIZE(size, lf_carries, lf_result) { \
-    target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \
-    (((lf_carries) >> (size - 2)) << LF_BIT_PO); \
     env->lflags.result = (target_ulong)(int##size##_t)(lf_result); \
-    if ((size) == 32) { \
-        temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \
-    } else if ((size) == 16) { \
-        temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 16); \
-    } else if ((size) == 8)  { \
-        temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 24); \
+    target_ulong temp = (lf_carries); \
+    if ((size) == TARGET_LONG_BITS) { \
+        temp = temp & ~(LF_MASK_PD | LF_MASK_SD); \
     } else { \
-        VM_PANIC("unimplemented");  \
+        temp = (temp & LF_MASK_AF) | (temp << (TARGET_LONG_BITS - (size))); \
     } \
-    env->lflags.auxbits = (target_ulong)(uint32_t)temp; \
+    env->lflags.auxbits = temp; \
 }
 
 /* carries, result */
@@ -77,23 +86,18 @@
 /* ******************* */
 /* OSZAP */
 /* ******************* */
-/* size, carries, result */
+/* same as setting OSZAPC, but preserve CF and flip PO if the old value of CF
+ * did not match the high bit of lf_carries. */
 #define SET_FLAGS_OSZAP_SIZE(size, lf_carries, lf_result) { \
-    target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \
-    (((lf_carries) >> (size - 2)) << LF_BIT_PO); \
-    if ((size) == 32) { \
-        temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \
-    } else if ((size) == 16) { \
-        temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 16); \
-    } else if ((size) == 8) { \
-        temp = ((lf_carries) & (LF_MASK_AF)) | ((lf_carries) << 24); \
-    } else { \
-        VM_PANIC("unimplemented");      \
-    } \
     env->lflags.result = (target_ulong)(int##size##_t)(lf_result); \
-    target_ulong delta_c = (env->lflags.auxbits ^ temp) & LF_MASK_CF; \
-    delta_c ^= (delta_c >> 1); \
-    env->lflags.auxbits = (target_ulong)(uint32_t)(temp ^ delta_c); \
+    target_ulong temp = (lf_carries); \
+    if ((size) == TARGET_LONG_BITS) { \
+        temp = (temp & ~(LF_MASK_PD | LF_MASK_SD)); \
+    } else { \
+        temp = (temp & LF_MASK_AF) | (temp << (TARGET_LONG_BITS - (size))); \
+    } \
+    target_ulong cf_changed = ((target_long)(env->lflags.auxbits ^ temp)) < 0; 
\
+    env->lflags.auxbits = temp ^ (cf_changed * (LF_MASK_PO | LF_MASK_CF)); \
 }
 
 /* carries, result */
@@ -104,11 +108,11 @@
 #define SET_FLAGS_OSZAP_32(carries, result) \
     SET_FLAGS_OSZAP_SIZE(32, carries, result)
 
-void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf)
+void SET_FLAGS_OxxxxC(CPUX86State *env, bool new_of, bool new_cf)
 {
-    uint32_t temp_po = new_of ^ new_cf;
     env->lflags.auxbits &= ~(LF_MASK_PO | LF_MASK_CF);
-    env->lflags.auxbits |= (temp_po << LF_BIT_PO) | (new_cf << LF_BIT_CF);
+    env->lflags.auxbits |= (-(target_ulong)new_cf << LF_BIT_PO);
+    env->lflags.auxbits ^= ((target_ulong)new_of << LF_BIT_PO);
 }
 
 void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2,
@@ -202,104 +206,69 @@ void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t 
v1, uint8_t v2,
     SET_FLAGS_OSZAPC_8(0, diff);
 }
 
-bool get_PF(CPUX86State *env)
+static inline uint32_t get_PF(CPUX86State *env)
 {
-    uint32_t temp = (255 & env->lflags.result);
-    temp = temp ^ (255 & (env->lflags.auxbits >> LF_BIT_PDB));
-    temp = (temp ^ (temp >> 4)) & 0x0F;
-    return (0x9669U >> temp) & 1;
+    uint8_t temp = env->lflags.result;
+    return ((parity8(temp) - 1) ^ env->lflags.auxbits) & CC_P;
 }
 
-void set_PF(CPUX86State *env, bool val)
+static inline uint32_t get_OF(CPUX86State *env)
 {
-    uint32_t temp = (255 & env->lflags.result) ^ (!val);
-    env->lflags.auxbits &= ~(LF_MASK_PDB);
-    env->lflags.auxbits |= (temp << LF_BIT_PDB);
-}
-
-bool get_OF(CPUX86State *env)
-{
-    return ((env->lflags.auxbits + (1U << LF_BIT_PO)) >> LF_BIT_CF) & 1;
+    return ((env->lflags.auxbits >> (LF_BIT_CF - 11)) + CC_O / 2) & CC_O;
 }
 
 bool get_CF(CPUX86State *env)
 {
-    return (env->lflags.auxbits >> LF_BIT_CF) & 1;
-}
-
-void set_OF(CPUX86State *env, bool val)
-{
-    bool old_cf = get_CF(env);
-    SET_FLAGS_OxxxxC(env, val, old_cf);
+    return ((target_long)env->lflags.auxbits) < 0;
 }
 
 void set_CF(CPUX86State *env, bool val)
 {
-    bool old_of = get_OF(env);
-    SET_FLAGS_OxxxxC(env, old_of, val);
+    /* If CF changes, flip PO and CF */
+    target_ulong temp = -(target_ulong)val;
+    target_ulong cf_changed = ((target_long)(env->lflags.auxbits ^ temp)) < 0;
+    env->lflags.auxbits ^= cf_changed * (LF_MASK_PO | LF_MASK_CF);
 }
 
-bool get_AF(CPUX86State *env)
+static inline uint32_t get_ZF(CPUX86State *env)
 {
-    return (env->lflags.auxbits >> LF_BIT_AF) & 1;
+    return env->lflags.result ? 0 : CC_Z;
 }
 
-void set_AF(CPUX86State *env, bool val)
+static inline uint32_t get_SF(CPUX86State *env)
 {
-    env->lflags.auxbits &= ~(LF_MASK_AF);
-    env->lflags.auxbits |= val << LF_BIT_AF;
-}
-
-bool get_ZF(CPUX86State *env)
-{
-    return !env->lflags.result;
-}
-
-void set_ZF(CPUX86State *env, bool val)
-{
-    if (val) {
-        env->lflags.auxbits ^=
-         (((env->lflags.result >> LF_SIGN_BIT) & 1) << LF_BIT_SD);
-        /* merge the parity bits into the Parity Delta Byte */
-        uint32_t temp_pdb = (255 & env->lflags.result);
-        env->lflags.auxbits ^= (temp_pdb << LF_BIT_PDB);
-        /* now zero the .result value */
-        env->lflags.result = 0;
-    } else {
-        env->lflags.result |= (1 << 8);
-    }
-}
-
-bool get_SF(CPUX86State *env)
-{
-    return ((env->lflags.result >> LF_SIGN_BIT) ^
-            (env->lflags.auxbits >> LF_BIT_SD)) & 1;
-}
-
-void set_SF(CPUX86State *env, bool val)
-{
-    bool temp_sf = get_SF(env);
-    env->lflags.auxbits ^= (temp_sf ^ val) << LF_BIT_SD;
+    return ((env->lflags.result >> (LF_SIGN_BIT - LF_BIT_SD)) ^
+            env->lflags.auxbits) & CC_S;
 }
 
 void lflags_to_rflags(CPUX86State *env)
 {
     env->eflags &= ~(CC_C|CC_P|CC_A|CC_Z|CC_S|CC_O);
-    env->eflags |= get_CF(env) ? CC_C : 0;
-    env->eflags |= get_PF(env) ? CC_P : 0;
-    env->eflags |= get_AF(env) ? CC_A : 0;
-    env->eflags |= get_ZF(env) ? CC_Z : 0;
-    env->eflags |= get_SF(env) ? CC_S : 0;
-    env->eflags |= get_OF(env) ? CC_O : 0;
+    /* rotate left by one to move carry-out bits into CF and AF */
+    env->eflags |= (
+        (env->lflags.auxbits << 1) |
+        (env->lflags.auxbits >> (TARGET_LONG_BITS - 1))) & (CC_C | CC_A);
+    env->eflags |= get_SF(env);
+    env->eflags |= get_PF(env);
+    env->eflags |= get_ZF(env);
+    env->eflags |= get_OF(env);
 }
 
 void rflags_to_lflags(CPUX86State *env)
 {
-    env->lflags.auxbits = env->lflags.result = 0;
-    set_OF(env, env->eflags & CC_O);
-    set_SF(env, env->eflags & CC_S);
-    set_ZF(env, env->eflags & CC_Z);
-    set_AF(env, env->eflags & CC_A);
-    set_PF(env, env->eflags & CC_P);
-    set_CF(env, env->eflags & CC_C);
+    target_ulong cf_xor_of;
+
+    env->lflags.auxbits = CC_P;
+    env->lflags.auxbits ^= env->eflags & (CC_S | CC_P);
+
+    /* rotate right by one to move CF and AF into the carry-out positions */
+    env->lflags.auxbits |= (
+        (env->eflags >> 1) |
+        (env->eflags << (TARGET_LONG_BITS - 1))) & (CC_C | CC_A);
+
+    cf_xor_of = (env->eflags & (CC_C | CC_O)) + (CC_O - CC_C);
+    env->lflags.auxbits |= -cf_xor_of & LF_MASK_PO;
+
+    /* Leave the low byte zero so that parity is not affected.  */
+    env->lflags.result = !(env->eflags & CC_Z) << 8;
 }
-- 
2.49.0

[PATCH 3/4] target/i386/emulate: mostly rewrite flags handling

Reply via email to