On 12/28/23 23:05, Paolo Bonzini wrote:
In the case where OR or XOR has an 8-bit immediate between 128 and 255, we can
operate on a low-byte register and shorten the output by two or three bytes
(two if a prefix byte is needed for REX.B).
Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
---
tcg/i386/tcg-target.c.inc | 7 +++++++
1 file changed, 7 insertions(+)
Queued. I adjusted the function a bit to use a switch.
r~
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 1791b959738..a24a23f43b1 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -244,6 +244,7 @@ static bool tcg_target_const_match(int64_t val, TCGType
type, int ct, int vece)
#define P_VEXL 0x80000 /* Set VEX.L = 1 */
#define P_EVEX 0x100000 /* Requires EVEX encoding */
+#define OPC_ARITH_EbIb (0x80)
#define OPC_ARITH_EvIz (0x81)
#define OPC_ARITH_EvIb (0x83)
#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
@@ -1366,6 +1367,12 @@ static void tgen_arithi(TCGContext *s, int c, int r0,
tcg_out8(s, val);
return;
}
+ if (val == (uint8_t)val && (c == ARITH_OR || c == ARITH_XOR) &&
+ (r0 < 4 || TCG_TARGET_REG_BITS == 64)) {
+ tcg_out_modrm(s, OPC_ARITH_EbIb + P_REXB_RM, c, r0);
+ tcg_out8(s, val);
+ return;
+ }
if (rexw == 0 || val == (int32_t)val) {
tcg_out_modrm(s, OPC_ARITH_EvIz + rexw, c, r0);
tcg_out32(s, val);