Hello!
With all recent mode handling cleanups to move patterns and SSE
bitops, it is now possible to enable TFmode moves via XMM registers
for 32bit SSE targets. The compiler emits packed single operations in
this case, so following testcase:
--cut here--
__float128 test_abs (__float128 a)
{
return (__builtin_fabsq (a));
}
__float128 test_copysign (__float128 a, __float128 b)
{
return (__builtin_copysignq (a, b));
}
--cut here--
compiles with "-O2 -msse" to:
test_abs:
movl 4(%esp), %eax
movaps 20(%esp), %xmm0
andps .LC0, %xmm0
movaps %xmm0, (%eax)
ret $4
test_copysign:
movaps 20(%esp), %xmm0
movaps 36(%esp), %xmm1
movl 4(%esp), %eax
andps .LC1, %xmm1
andps .LC0, %xmm0
orps %xmm1, %xmm0
movaps %xmm0, (%eax)
ret $4
For comparison, with -msse2 compiler generates:
test_abs:
movl 4(%esp), %eax
movdqa 20(%esp), %xmm0
pand .LC0, %xmm0
movdqa %xmm0, (%eax)
ret $4
test_copysign:
movl 4(%esp), %eax
movdqa 20(%esp), %xmm0
movdqa 36(%esp), %xmm1
pand .LC0, %xmm0
pand .LC1, %xmm1
por %xmm1, %xmm0
movdqa %xmm0, (%eax)
ret $4
With unpached 4.7 compiler, the same code compiles (-O2 -msse) to some
40 SImode moves, with calls to __fabstf2 and __copysigntf2.
2012-05-13 Uros Bizjak <[email protected]>
* config/i386/i386.md (*pushtf): Enable for TARGET_SSE.
(pushtf splitter): Ditto.
(movtf): Ditto.
(*movtf_internal): Ditto. Use V4SFmode for !TARGET_SSE2.
(<code>tf2): Enable for TARGET_SSE.
(*absnegtf2_sse): Ditto.
(copysign<mode>3): Enable TFmode for TARGET_SSE.
(copysign<mode>3_const): Ditto.
(copysign<mode>3_var): Ditto.
* config/i386/sse.md (<code>tf3): Enable for TARGET_SSE.
(*andnottf3): Ditto. Use V4SFmode for !TARGET_SSE2.
(*<code>tf3): Ditto.
* config/i386/i386.c (struct builtin_description bdesc_args)
<IX86_BUILTIN_FABSQ>: Enable for TARGET_SSE.
<IX86_BUILTIN_COPYSIGNQ>: Ditto.
(ix86_expand_builtin) <IX86_BUILTIN_FABSQ, IX86_BUILTIN_COPYSIGNQ>:
Emit a normal call if SSE isn't available.
Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32} and committed to mainline SVN.
Uros.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c (revision 187435)
+++ config/i386/i386.c (working copy)
@@ -26327,6 +26327,9 @@ static const struct builtin_description bdesc_args
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss",
IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss",
IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN,
(int) FLOAT128_FTYPE_FLOAT128 },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ,
UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
+
/* SSE MMX or 3Dnow!A */
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3,
"__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI
},
{ OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3,
"__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI
},
@@ -26510,9 +26513,6 @@ static const struct builtin_description bdesc_args
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd",
IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN,
(int) FLOAT128_FTYPE_FLOAT128 },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ,
UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
-
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128",
IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
/* SSE2 MMX */
@@ -28081,7 +28081,7 @@ ix86_init_builtins (void)
def_builtin_const (0, "__builtin_huge_valq",
FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
- /* We will expand them to normal call if SSE2 isn't available since
+ /* We will expand them to normal call if SSE isn't available since
they are used by libgcc. */
t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
@@ -30215,8 +30215,8 @@ rdrand_step:
{
case IX86_BUILTIN_FABSQ:
case IX86_BUILTIN_COPYSIGNQ:
- if (!TARGET_SSE2)
- /* Emit a normal call if SSE2 isn't available. */
+ if (!TARGET_SSE)
+ /* Emit a normal call if SSE isn't available. */
return expand_call (exp, target, ignore);
default:
return ix86_expand_args_builtin (d, exp, target);
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md (revision 187435)
+++ config/i386/i386.md (working copy)
@@ -2708,7 +2708,7 @@
(define_insn "*pushtf"
[(set (match_operand:TF 0 "push_operand" "=<,<,<")
(match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))]
- "TARGET_SSE2"
+ "TARGET_SSE"
{
/* This insn should be already split before reg-stack. */
gcc_unreachable ();
@@ -2721,7 +2721,7 @@
(define_split
[(set (match_operand:TF 0 "push_operand")
(match_operand:TF 1 "sse_reg_operand"))]
- "TARGET_SSE2 && reload_completed"
+ "TARGET_SSE && reload_completed"
[(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
(set (mem:TF (reg:P SP_REG)) (match_dup 1))])
@@ -2859,7 +2859,7 @@
(define_expand "movtf"
[(set (match_operand:TF 0 "nonimmediate_operand")
(match_operand:TF 1 "nonimmediate_operand"))]
- "TARGET_SSE2"
+ "TARGET_SSE"
{
ix86_expand_move (TFmode, operands);
DONE;
@@ -2874,7 +2874,7 @@
(define_insn "*movtf_internal"
[(set (match_operand:TF 0 "nonimmediate_operand" "=x,x ,m,?*r ,!o")
(match_operand:TF 1 "general_operand" "C ,xm,x,*roF,F*r"))]
- "TARGET_SSE2
+ "TARGET_SSE
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (!can_create_pseudo_p ()
|| (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -2929,7 +2929,8 @@
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
- (match_test "optimize_function_for_size_p (cfun)")
+ (ior (not (match_test "TARGET_SSE2"))
+ (match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "TI")))])
@@ -8710,7 +8711,7 @@
(define_expand "<code>tf2"
[(set (match_operand:TF 0 "register_operand")
(absneg:TF (match_operand:TF 1 "register_operand")))]
- "TARGET_SSE2"
+ "TARGET_SSE"
"ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
(define_insn "*absnegtf2_sse"
@@ -8719,7 +8720,7 @@
[(match_operand:TF 1 "register_operand" "0,x")]))
(use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_SSE2"
+ "TARGET_SSE"
"#")
;; Splitters for fp abs and neg.
@@ -8898,7 +8899,7 @@
(match_operand:CSGNMODE 1 "nonmemory_operand")
(match_operand:CSGNMODE 2 "register_operand")]
"(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+ || (TARGET_SSE && (<MODE>mode == TFmode))"
"ix86_expand_copysign (operands); DONE;")
(define_insn_and_split "copysign<mode>3_const"
@@ -8909,7 +8910,7 @@
(match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
UNSPEC_COPYSIGN))]
"(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+ || (TARGET_SSE && (<MODE>mode == TFmode))"
"#"
"&& reload_completed"
[(const_int 0)]
@@ -8925,7 +8926,7 @@
UNSPEC_COPYSIGN))
(clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
"(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_SSE2 && (<MODE>mode == TFmode))"
+ || (TARGET_SSE && (<MODE>mode == TFmode))"
"#")
(define_split
@@ -8938,7 +8939,7 @@
UNSPEC_COPYSIGN))
(clobber (match_scratch:<CSGNVMODE> 1))]
"((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
- || (TARGET_SSE2 && (<MODE>mode == TFmode)))
+ || (TARGET_SSE && (<MODE>mode == TFmode)))
&& reload_completed"
[(const_int 0)]
"ix86_split_copysign_var (operands); DONE;")