Emulate MMX maskmovq with SSE2 maskmovdqu by zeroing out the upper 64
bits of the mask operand. A warning is issued since invalid memory
access may happen when bits 64:127 at memory location are unmapped:
xmmintrin.h:1168:3: note: Emulate MMX maskmovq with SSE2 maskmovdqu may result
in invalid memory access
1168 | __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Only SSE register source operand is allowed.
PR target/89021
* config/i386/mmx.md (mmx_maskmovq): Emulate MMX maskmovq with
SSE2 maskmovdqu and a warning.
(sse2_maskmovq_<mode>): New.
(*mmx_maskmovq): Add "&& !TARGET_MMX_WITH_SSE".
* config/i386/sse.md (*sse2_maskmovdqu): Renamed to ...
(sse2_maskmovdqu_<mode>): This.
---
gcc/config/i386/mmx.md | 59 ++++++++++++++++++++++++++++++++++++++++--
gcc/config/i386/sse.md | 2 +-
2 files changed, 58 insertions(+), 3 deletions(-)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f90574a7255..92252984482 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1748,7 +1748,62 @@
(match_operand:V8QI 2 "register_operand")
(match_dup 0)]
UNSPEC_MASKMOV))]
- "TARGET_SSE || TARGET_3DNOW_A")
+ "TARGET_SSE || TARGET_3DNOW_A"
+{
+ if (TARGET_MMX_WITH_SSE)
+ {
+ /* Emulate MMX maskmovq with SSE2 maskmovdqu and issue a warning
+ since they aren't equivalent. */
+ inform (input_location, "Emulate MMX maskmovq with SSE2 maskmovdqu "
+ "may result in invalid memory access");
+ rtx insn;
+ rtx op = gen_reg_rtx (V2DImode);
+ if (Pmode == SImode)
+ insn = gen_sse2_maskmovq_si (XEXP (operands[0], 0),
+ operands[1], operands[2], op, op);
+ else
+ insn = gen_sse2_maskmovq_di (XEXP (operands[0], 0),
+ operands[1], operands[2], op, op);
+ emit_insn (insn);
+ DONE;
+ }
+})
+
+(define_insn_and_split "sse2_maskmovq_<mode>"
+ [(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
+ (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "Yy")
+ (match_operand:V8QI 2 "register_operand" "Yy")
+ (mem:V8QI (match_dup 0))]
+ UNSPEC_MASKMOV))
+ (set (match_operand:V2DI 3 "register_operand" "=Yy")
+ (unspec:V2DI [(match_operand:V2DI 4 "register_operand" "3")]
+ UNSPEC_MASKMOV))]
+ "TARGET_MMX_WITH_SSE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ /* Copy the lower 64 bits of operand 2 (the mask operan) to operand 3.
+ NB: Invalid memory access may happen when bits 64:127 at memory
+ location are unmapped. */
+ rtx op3 = operands[3];
+ rtx op2 = gen_rtx_REG (V2DImode, REGNO (operands[2]));
+ rtx insn = gen_sse2_movq128 (op3, op2);
+ emit_insn (insn);
+
+ /* Generate SSE2 maskmovdqu with operand 3. */
+ rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
+ op3 = gen_rtx_REG (V16QImode, REGNO (operands[3]));
+ if (Pmode == SImode)
+ insn = gen_sse2_maskmovdqu_si (operands[0], op1, op3);
+ else
+ insn = gen_sse2_maskmovdqu_di (operands[0], op1, op3);
+ emit_insn (insn);
+ DONE;
+}
+ [(set_attr "type" "ssemov")
+ (set_attr "znver1_decode" "vector")
+ (set_attr "mode" "TI")])
(define_insn "*mmx_maskmovq"
[(set (mem:V8QI (match_operand:P 0 "register_operand" "D"))
@@ -1756,7 +1811,7 @@
(match_operand:V8QI 2 "register_operand" "y")
(mem:V8QI (match_dup 0))]
UNSPEC_MASKMOV))]
- "TARGET_SSE || TARGET_3DNOW_A"
+ "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_MMX_WITH_SSE"
;; @@@ check ordering of operands in intel/nonintel syntax
"maskmovq\t{%2, %1|%1, %2}"
[(set_attr "type" "mmxcvt")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9ecd9789c1e..7218c9cd646 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15142,7 +15142,7 @@
UNSPEC_MASKMOV))]
"TARGET_SSE2")
-(define_insn "*sse2_maskmovdqu"
+(define_insn "sse2_maskmovdqu_<mode>"
[(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
(match_operand:V16QI 2 "register_operand" "x")
--
2.20.1