Add some missing APX NF and NDD support for imul and mul.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?
gcc/ChangeLog:
* config/i386/i386.md (*imulhi<mode>zu): Added APX
NF support.
(*imulhi<mode>zu<nf_name>): New define_insn.
(*mulsi3_1_zext<nf_name>): Ditto.
(*mul<mode><dwi>3_1<nf_name>): Ditto.
(*<u>mulqihi3_1<nf_name>): Ditto.
(*mul<mode>3_1<nf_name>): Added APX NDD support.
(*mulv<mode>4): Ditto.
(*mulvhi4): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add test for imul ndd.
---
gcc/config/i386/i386.md | 98 +++++++++++++------------
gcc/testsuite/gcc.target/i386/apx-ndd.c | 8 ++
2 files changed, 61 insertions(+), 45 deletions(-)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index fd48e764469..c1f29fee412 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6488,8 +6488,8 @@
(define_subst_attr "nf_nonf_x64_attr" "nf_subst" "noapx_nf" "x64")
(define_subst "nf_subst"
- [(set (match_operand:SWI 0)
- (match_operand:SWI 1))]
+ [(set (match_operand:SWIDWI 0)
+ (match_operand:SWIDWI 1))]
""
[(set (match_dup 0)
(match_dup 1))
@@ -10028,24 +10028,26 @@
;; On BDVER1, all HI MULs use DoublePath
(define_insn "*mul<mode>3_1<nf_name>"
- [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r")
+ [(set (match_operand:SWIM248 0 "register_operand" "=r,r,r,r")
(mult:SWIM248
- (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0")
- (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r")))]
+ (match_operand:SWIM248 1 "nonimmediate_operand" "%rm,rm,0,r")
+ (match_operand:SWIM248 2 "<general_operand>" "K,<i>,<m>r,<m>r")))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))
&& <nf_condition>"
"@
<nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
<nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+ <nf_prefix>imul{<imodesuffix>}\t{%2, %0|%0, %2}
+ <nf_prefix>imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "imul")
- (set_attr "prefix_0f" "0,0,1")
+ (set_attr "prefix_0f" "0,0,1,1")
+ (set_attr "isa" "*,*,*,apx_ndd")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
(const_string "vector")
(eq_attr "alternative" "1")
(const_string "vector")
- (and (eq_attr "alternative" "2")
+ (and (eq_attr "alternative" "2,3")
(ior (match_test "<MODE>mode == HImode")
(match_operand 1 "memory_operand")))
(const_string "vector")]
@@ -10063,33 +10065,34 @@
(const_string "direct")))
(set_attr "mode" "<MODE>")])
-(define_insn "*imulhi<mode>zu"
+(define_insn "*imulhi<mode>zu<nf_name>"
[(set (match_operand:SWI48x 0 "register_operand" "=r,r")
(zero_extend:SWI48x
(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm")
- (match_operand:HI 2 "immediate_operand" "K,n"))))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_APX_ZU"
+ (match_operand:HI 2 "immediate_operand" "K,n"))))]
+ "TARGET_APX_ZU && <nf_condition>"
"@
- imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
- imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
+ <nf_prefix>imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}
+ <nf_prefix>imulzu{w}\t{%2, %1, %w0|%w0, %1, %2}"
[(set_attr "type" "imul")
(set_attr "mode" "HI")])
-(define_insn "*mulsi3_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+(define_insn "*mulsi3_1_zext<nf_name>"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(zero_extend:DI
- (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
- (match_operand:SI 2 "x86_64_general_operand"
"K,e,BMr"))))
- (clobber (reg:CC FLAGS_REG))]
+ (mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0,r")
+ (match_operand:SI 2 "x86_64_general_operand"
"K,e,BMr,BMr"))))]
"TARGET_64BIT
- && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && <nf_condition>"
"@
- imul{l}\t{%2, %1, %k0|%k0, %1, %2}
- imul{l}\t{%2, %1, %k0|%k0, %1, %2}
- imul{l}\t{%2, %k0|%k0, %2}"
+ <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+ <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}
+ <nf_prefix>imul{l}\t{%2, %k0|%k0, %2}
+ <nf_prefix>imul{l}\t{%2, %1, %k0|%k0, %1, %2}"
[(set_attr "type" "imul")
- (set_attr "prefix_0f" "0,0,1")
+ (set_attr "prefix_0f" "0,0,1,1")
+ (set_attr "isa" "*,*,*,apx_ndd")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
(const_string "vector")
@@ -10158,30 +10161,32 @@
[(set (reg:CCO FLAGS_REG)
(eq:CCO (mult:<DWI>
(sign_extend:<DWI>
- (match_operand:SWI48 1 "nonimmediate_operand"
"%rm,0"))
+ (match_operand:SWI48 1 "nonimmediate_operand"
"%rm,0,r"))
(sign_extend:<DWI>
- (match_operand:SWI48 2 "x86_64_sext_operand"
"We,mr")))
+ (match_operand:SWI48 2 "x86_64_sext_operand"
"We,mr,mr")))
(sign_extend:<DWI>
(mult:SWI48 (match_dup 1) (match_dup 2)))))
- (set (match_operand:SWI48 0 "register_operand" "=r,r")
+ (set (match_operand:SWI48 0 "register_operand" "=r,r,r")
(mult:SWI48 (match_dup 1) (match_dup 2)))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}
- imul{<imodesuffix>}\t{%2, %0|%0, %2}"
+ imul{<imodesuffix>}\t{%2, %0|%0, %2}
+ imul{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "imul")
- (set_attr "prefix_0f" "0,1")
+ (set_attr "prefix_0f" "0,1,1")
+ (set_attr "isa" "*,*,apx_ndd")
(set (attr "athlon_decode")
(cond [(eq_attr "cpu" "athlon")
(const_string "vector")
(eq_attr "alternative" "0")
(const_string "vector")
- (and (eq_attr "alternative" "1")
+ (and (eq_attr "alternative" "1,2")
(match_operand 1 "memory_operand"))
(const_string "vector")]
(const_string "direct")))
(set (attr "amdfam10_decode")
- (cond [(and (eq_attr "alternative" "1")
+ (cond [(and (eq_attr "alternative" "1,2")
(match_operand 1 "memory_operand"))
(const_string "vector")]
(const_string "direct")))
@@ -10192,17 +10197,20 @@
[(set (reg:CCO FLAGS_REG)
(eq:CCO (mult:SI
(sign_extend:SI
- (match_operand:HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:HI 1 "nonimmediate_operand" "%0,r"))
(sign_extend:SI
- (match_operand:HI 2 "nonimmediate_operand" "mr")))
+ (match_operand:HI 2 "nonimmediate_operand" "mr,mr")))
(sign_extend:SI
(mult:HI (match_dup 1) (match_dup 2)))))
- (set (match_operand:HI 0 "register_operand" "=r")
+ (set (match_operand:HI 0 "register_operand" "=r,r")
(mult:HI (match_dup 1) (match_dup 2)))]
"!(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "imul{w}\t{%2, %0|%0, %2}"
+ "@
+ imul{w}\t{%2, %0|%0, %2}
+ imul{w}\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "imul")
(set_attr "prefix_0f" "1")
+ (set_attr "isa" "*,apx_ndd")
(set_attr "athlon_decode" "vector")
(set_attr "amdfam10_decode" "direct")
(set_attr "bdver1_decode" "double")
@@ -10451,16 +10459,16 @@
operands[5] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
})
-(define_insn "*mul<mode><dwi>3_1"
+(define_insn "*mul<mode><dwi>3_1<nf_name>"
[(set (match_operand:<DWI> 0 "register_operand" "=A")
(mult:<DWI>
(sign_extend:<DWI>
(match_operand:DWIH 1 "register_operand" "%a"))
(sign_extend:<DWI>
- (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
- (clobber (reg:CC FLAGS_REG))]
- "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "imul{<imodesuffix>}\t%2"
+ (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))]
+ "!(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && <nf_condition>"
+ "<nf_prefix>imul{<imodesuffix>}\t%2"
[(set_attr "type" "imul")
(set_attr "length_immediate" "0")
(set (attr "athlon_decode")
@@ -10471,17 +10479,17 @@
(set_attr "bdver1_decode" "direct")
(set_attr "mode" "<MODE>")])
-(define_insn "*<u>mulqihi3_1"
+(define_insn "*<u>mulqihi3_1<nf_name>"
[(set (match_operand:HI 0 "register_operand" "=a")
(mult:HI
(any_extend:HI
(match_operand:QI 1 "register_operand" "%0"))
(any_extend:HI
- (match_operand:QI 2 "nonimmediate_operand" "qm"))))
- (clobber (reg:CC FLAGS_REG))]
+ (match_operand:QI 2 "nonimmediate_operand" "qm"))))]
"TARGET_QIMODE_MATH
- && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
- "<sgnprefix>mul{b}\t%2"
+ && !(MEM_P (operands[1]) && MEM_P (operands[2]))
+ && <nf_condition>"
+ "<nf_prefix><sgnprefix>mul{b}\t%2"
[(set_attr "type" "imul")
(set_attr "length_immediate" "0")
(set (attr "athlon_decode")
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c
b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index 0ff4df0780c..6c88aff911a 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -170,6 +170,12 @@ FOO4 (uint16_t, rol, <<, >>, 1)
FOO4 (uint32_t, rol, <<, >>, 1)
FOO4 (uint64_t, rol, <<, >>, 1)
+FOO1 (short, imul, *)
+FOO1 (int, imul, *)
+FOO1 (int64_t, imul, *)
+FOO2 (short, imul, *)
+FOO2 (int, imul, *)
+FOO2 (int64_t, imul, *)
/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1,
\\(%(?:r|e)di\\), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times
"lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times
"add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%(?:r|e)di\\),
%(?:|r|e)a(?:x|l)" 4 } } */
@@ -200,3 +206,5 @@ FOO4 (uint64_t, rol, <<, >>, 1)
/* { dg-final { scan-assembler-times "shr(?:b|l|w|q)\[^\n\r]*7,
%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "ror(?:b|l|w|q)\[^\n\r]*1,
%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "rol(?:b|l|w|q)\[^\n\r]*1,
%(?:|r|e)di(?:|l), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times
"imul(?:l|q)\[^\n\r]%(?:|r|e)(?:|s|d)i, %(?:r|e)(?:|s|d)i, %(?:|r|e)ax" 3 }
} */
+/* { dg-final { scan-assembler-times "imul(?:l|w|q)\[^\n\r]\\(%rdi\\),
%(?:|r|e)si, %(?:|r|e)ax" 3 } } */
--
2.31.1