> 2014-03-20 Eric Botcazou <ebotca...@adacore.com>
>
> * config/sparc/sparc.c (sparc_do_work_around_errata): Implement work
> around for store forwarding issue in the FPU on the UT699.
> * config/sparc/sparc.md (in_branch_delay): Return false for single FP
> loads and operations if -mfix-ut699 is specified.
> (divtf3_hq): Tweak attribute.
> (sqrttf2_hq): Likewise.
Daniel privately pointed out that there are a few holes in the implementation
because of the irregularity of the instruction classification in the back-end.
The attached patch is aimed at addressing them: it changes the fptype of
floatsisf2 and fix_truncsfsi2 to "single" (this was an old bug), adds a new
fptype_ut699 attribute for truncdfsf2 and fix_truncdfsi2 and rewrites the
splitters of negtf2/negdf2 and abstf2/absdf2 to make them always split.
Tested on LEON & SPARC/Solaris, applied on the mainline, 4.9 and 4.8 branches.
2014-05-14 Eric Botcazou <ebotca...@adacore.com>
* config/sparc/sparc-protos.h (sparc_absnegfloat_split_legitimate):
Delete.
* config/sparc/sparc.c (sparc_absnegfloat_split_legitimate): Likewise.
* config/sparc/sparc.md (fptype_ut699): New attribute.
(in_branch_delay): Return false if -mfix-ut699 is specified and
fptype_ut699 is set to single.
(truncdfsf2): Add fptype_ut699 attribute.
(fix_truncdfsi2): Likewise.
(floatsisf2): Change fptype attribute.
(fix_truncsfsi2): Likewise.
(negtf2_notv9): Delete.
(negtf2_v9): Likewise.
(negtf2_hq): New instruction.
(negtf2): New instruction and splitter.
(negdf2_notv9): Rewrite.
(abstf2_notv9): Delete.
(abstf2_hq_v9): Likewise.
(abstf2_v9): Likewise.
(abstf2_hq): New instruction.
(abstf2): New instruction and splitter.
(absdf2_notv9): Rewrite.
--
Eric Botcazou
Index: config/sparc/sparc.md
===================================================================
--- config/sparc/sparc.md (revision 210410)
+++ config/sparc/sparc.md (working copy)
@@ -424,6 +424,10 @@ (define_attr "length" ""
(define_attr "fptype" "single,double"
(const_string "single"))
+;; FP precision specific to the UT699.
+(define_attr "fptype_ut699" "none,single"
+ (const_string "none"))
+
;; UltraSPARC-III integer load type.
(define_attr "us3load_type" "2cycle,3cycle"
(const_string "2cycle"))
@@ -464,7 +468,8 @@ (define_attr "in_branch_delay" "false,tr
(const_string "false")
(and (eq_attr "fix_ut699" "true")
(and (eq_attr "type" "fpload,fp,fpmove,fpmul,fpdivs,fpsqrts")
- (eq_attr "fptype" "single")))
+ (ior (eq_attr "fptype" "single")
+ (eq_attr "fptype_ut699" "single"))))
(const_string "false")
(eq_attr "length" "1")
(const_string "true")
@@ -3455,7 +3460,8 @@ (define_insn "truncdfsf2"
"TARGET_FPU"
"fdtos\t%1, %0"
[(set_attr "type" "fp")
- (set_attr "fptype" "double")])
+ (set_attr "fptype" "double")
+ (set_attr "fptype_ut699" "single")])
(define_expand "trunctfsf2"
[(set (match_operand:SF 0 "register_operand" "")
@@ -3496,7 +3502,7 @@ (define_insn "floatsisf2"
"TARGET_FPU"
"fitos\t%1, %0"
[(set_attr "type" "fp")
- (set_attr "fptype" "double")])
+ (set_attr "fptype" "single")])
(define_insn "floatsidf2"
[(set (match_operand:DF 0 "register_operand" "=e")
@@ -3583,7 +3589,7 @@ (define_insn "fix_truncsfsi2"
"TARGET_FPU"
"fstoi\t%1, %0"
[(set_attr "type" "fp")
- (set_attr "fptype" "double")])
+ (set_attr "fptype" "single")])
(define_insn "fix_truncdfsi2"
[(set (match_operand:SI 0 "register_operand" "=f")
@@ -3591,7 +3597,8 @@ (define_insn "fix_truncdfsi2"
"TARGET_FPU"
"fdtoi\t%1, %0"
[(set_attr "type" "fp")
- (set_attr "fptype" "double")])
+ (set_attr "fptype" "double")
+ (set_attr "fptype_ut699" "single")])
(define_expand "fix_trunctfsi2"
[(set (match_operand:SI 0 "register_operand" "")
@@ -5554,53 +5561,52 @@ (define_insn "divsf3"
[(set_attr "type" "fpdivs")])
(define_expand "negtf2"
- [(set (match_operand:TF 0 "register_operand" "=e,e")
- (neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
+ [(set (match_operand:TF 0 "register_operand" "")
+ (neg:TF (match_operand:TF 1 "register_operand" "")))]
"TARGET_FPU"
"")
-(define_insn_and_split "*negtf2_notv9"
- [(set (match_operand:TF 0 "register_operand" "=e,e")
- (neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
- ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
- "TARGET_FPU
- && ! TARGET_V9"
- "@
- fnegs\t%0, %0
- #"
- "&& reload_completed
- && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
- [(set (match_dup 2) (neg:SF (match_dup 3)))
- (set (match_dup 4) (match_dup 5))
- (set (match_dup 6) (match_dup 7))]
- "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
- operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
- operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
- operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
- operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
- operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
- [(set_attr "type" "fpmove,*")
- (set_attr "length" "*,2")])
-
-(define_insn_and_split "*negtf2_v9"
- [(set (match_operand:TF 0 "register_operand" "=e,e")
- (neg:TF (match_operand:TF 1 "register_operand" "0,e")))]
- ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
- "TARGET_FPU && TARGET_V9"
- "@
- fnegd\t%0, %0
- #"
- "&& reload_completed
- && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
- [(set (match_dup 2) (neg:DF (match_dup 3)))
- (set (match_dup 4) (match_dup 5))]
- "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
- operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
- operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
- operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
- [(set_attr "type" "fpmove,*")
- (set_attr "length" "*,2")
- (set_attr "fptype" "double")])
+(define_insn "*negtf2_hq"
+ [(set (match_operand:TF 0 "register_operand" "=e")
+ (neg:TF (match_operand:TF 1 "register_operand" "e")))]
+ "TARGET_FPU && TARGET_HARD_QUAD"
+ "fnegq\t%1, %0"
+ [(set_attr "type" "fpmove")])
+
+(define_insn_and_split "*negtf2"
+ [(set (match_operand:TF 0 "register_operand" "=e")
+ (neg:TF (match_operand:TF 1 "register_operand" "e")))]
+ "TARGET_FPU && !TARGET_HARD_QUAD"
+ "#"
+ "&& reload_completed"
+ [(clobber (const_int 0))]
+{
+ rtx set_dest = operands[0];
+ rtx set_src = operands[1];
+ rtx dest1, dest2;
+ rtx src1, src2;
+
+ dest1 = gen_df_reg (set_dest, 0);
+ dest2 = gen_df_reg (set_dest, 1);
+ src1 = gen_df_reg (set_src, 0);
+ src2 = gen_df_reg (set_src, 1);
+
+ /* Now emit using the real source and destination we found, swapping
+ the order if we detect overlap. */
+ if (reg_overlap_mentioned_p (dest1, src2))
+ {
+ emit_insn (gen_movdf (dest2, src2));
+ emit_insn (gen_negdf2 (dest1, src1));
+ }
+ else
+ {
+ emit_insn (gen_negdf2 (dest1, src1));
+ if (REGNO (dest2) != REGNO (src2))
+ emit_insn (gen_movdf (dest2, src2));
+ }
+ DONE;
+}
+ [(set_attr "length" "2")])
(define_expand "negdf2"
[(set (match_operand:DF 0 "register_operand" "")
@@ -5609,22 +5615,39 @@ (define_expand "negdf2"
"")
(define_insn_and_split "*negdf2_notv9"
- [(set (match_operand:DF 0 "register_operand" "=e,e")
- (neg:DF (match_operand:DF 1 "register_operand" "0,e")))]
- "TARGET_FPU && ! TARGET_V9"
- "@
- fnegs\t%0, %0
- #"
- "&& reload_completed
- && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
- [(set (match_dup 2) (neg:SF (match_dup 3)))
- (set (match_dup 4) (match_dup 5))]
- "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
- operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
- operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
- operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);"
- [(set_attr "type" "fpmove,*")
- (set_attr "length" "*,2")])
+ [(set (match_operand:DF 0 "register_operand" "=e")
+ (neg:DF (match_operand:DF 1 "register_operand" "e")))]
+ "TARGET_FPU && !TARGET_V9"
+ "#"
+ "&& reload_completed"
+ [(clobber (const_int 0))]
+{
+ rtx set_dest = operands[0];
+ rtx set_src = operands[1];
+ rtx dest1, dest2;
+ rtx src1, src2;
+
+ dest1 = gen_highpart (SFmode, set_dest);
+ dest2 = gen_lowpart (SFmode, set_dest);
+ src1 = gen_highpart (SFmode, set_src);
+ src2 = gen_lowpart (SFmode, set_src);
+
+ /* Now emit using the real source and destination we found, swapping
+ the order if we detect overlap. */
+ if (reg_overlap_mentioned_p (dest1, src2))
+ {
+ emit_insn (gen_movsf (dest2, src2));
+ emit_insn (gen_negsf2 (dest1, src1));
+ }
+ else
+ {
+ emit_insn (gen_negsf2 (dest1, src1));
+ if (REGNO (dest2) != REGNO (src2))
+ emit_insn (gen_movsf (dest2, src2));
+ }
+ DONE;
+}
+ [(set_attr "length" "2")])
(define_insn "*negdf2_v9"
[(set (match_operand:DF 0 "register_operand" "=e")
@@ -5647,56 +5670,47 @@ (define_expand "abstf2"
"TARGET_FPU"
"")
-(define_insn_and_split "*abstf2_notv9"
- [(set (match_operand:TF 0 "register_operand" "=e,e")
- (abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
- ; We don't use quad float insns here so we don't need TARGET_HARD_QUAD.
- "TARGET_FPU && ! TARGET_V9"
- "@
- fabss\t%0, %0
- #"
- "&& reload_completed
- && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
- [(set (match_dup 2) (abs:SF (match_dup 3)))
- (set (match_dup 4) (match_dup 5))
- (set (match_dup 6) (match_dup 7))]
- "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
- operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
- operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
- operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);
- operands[6] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
- operands[7] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
- [(set_attr "type" "fpmove,*")
- (set_attr "length" "*,2")])
-
-(define_insn "*abstf2_hq_v9"
- [(set (match_operand:TF 0 "register_operand" "=e,e")
- (abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
- "TARGET_FPU && TARGET_V9 && TARGET_HARD_QUAD"
- "@
- fabsd\t%0, %0
- fabsq\t%1, %0"
- [(set_attr "type" "fpmove")
- (set_attr "fptype" "double,*")])
+(define_insn "*abstf2_hq"
+ [(set (match_operand:TF 0 "register_operand" "=e")
+ (abs:TF (match_operand:TF 1 "register_operand" "e")))]
+ "TARGET_FPU && TARGET_HARD_QUAD"
+ "fabsq\t%1, %0"
+ [(set_attr "type" "fpmove")])
-(define_insn_and_split "*abstf2_v9"
- [(set (match_operand:TF 0 "register_operand" "=e,e")
- (abs:TF (match_operand:TF 1 "register_operand" "0,e")))]
- "TARGET_FPU && TARGET_V9 && !TARGET_HARD_QUAD"
- "@
- fabsd\t%0, %0
- #"
- "&& reload_completed
- && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
- [(set (match_dup 2) (abs:DF (match_dup 3)))
- (set (match_dup 4) (match_dup 5))]
- "operands[2] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]));
- operands[3] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]));
- operands[4] = gen_rtx_raw_REG (DFmode, REGNO (operands[0]) + 2);
- operands[5] = gen_rtx_raw_REG (DFmode, REGNO (operands[1]) + 2);"
- [(set_attr "type" "fpmove,*")
- (set_attr "length" "*,2")
- (set_attr "fptype" "double,*")])
+(define_insn_and_split "*abstf2"
+ [(set (match_operand:TF 0 "register_operand" "=e")
+ (abs:TF (match_operand:TF 1 "register_operand" "e")))]
+ "TARGET_FPU && !TARGET_HARD_QUAD"
+ "#"
+ "&& reload_completed"
+ [(clobber (const_int 0))]
+{
+ rtx set_dest = operands[0];
+ rtx set_src = operands[1];
+ rtx dest1, dest2;
+ rtx src1, src2;
+
+ dest1 = gen_df_reg (set_dest, 0);
+ dest2 = gen_df_reg (set_dest, 1);
+ src1 = gen_df_reg (set_src, 0);
+ src2 = gen_df_reg (set_src, 1);
+
+ /* Now emit using the real source and destination we found, swapping
+ the order if we detect overlap. */
+ if (reg_overlap_mentioned_p (dest1, src2))
+ {
+ emit_insn (gen_movdf (dest2, src2));
+ emit_insn (gen_absdf2 (dest1, src1));
+ }
+ else
+ {
+ emit_insn (gen_absdf2 (dest1, src1));
+ if (REGNO (dest2) != REGNO (src2))
+ emit_insn (gen_movdf (dest2, src2));
+ }
+ DONE;
+}
+ [(set_attr "length" "2")])
(define_expand "absdf2"
[(set (match_operand:DF 0 "register_operand" "")
@@ -5705,22 +5719,39 @@ (define_expand "absdf2"
"")
(define_insn_and_split "*absdf2_notv9"
- [(set (match_operand:DF 0 "register_operand" "=e,e")
- (abs:DF (match_operand:DF 1 "register_operand" "0,e")))]
- "TARGET_FPU && ! TARGET_V9"
- "@
- fabss\t%0, %0
- #"
- "&& reload_completed
- && sparc_absnegfloat_split_legitimate (operands[0], operands[1])"
- [(set (match_dup 2) (abs:SF (match_dup 3)))
- (set (match_dup 4) (match_dup 5))]
- "operands[2] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]));
- operands[3] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]));
- operands[4] = gen_rtx_raw_REG (SFmode, REGNO (operands[0]) + 1);
- operands[5] = gen_rtx_raw_REG (SFmode, REGNO (operands[1]) + 1);"
- [(set_attr "type" "fpmove,*")
- (set_attr "length" "*,2")])
+ [(set (match_operand:DF 0 "register_operand" "=e")
+ (abs:DF (match_operand:DF 1 "register_operand" "e")))]
+ "TARGET_FPU && !TARGET_V9"
+ "#"
+ "&& reload_completed"
+ [(clobber (const_int 0))]
+{
+ rtx set_dest = operands[0];
+ rtx set_src = operands[1];
+ rtx dest1, dest2;
+ rtx src1, src2;
+
+ dest1 = gen_highpart (SFmode, set_dest);
+ dest2 = gen_lowpart (SFmode, set_dest);
+ src1 = gen_highpart (SFmode, set_src);
+ src2 = gen_lowpart (SFmode, set_src);
+
+ /* Now emit using the real source and destination we found, swapping
+ the order if we detect overlap. */
+ if (reg_overlap_mentioned_p (dest1, src2))
+ {
+ emit_insn (gen_movsf (dest2, src2));
+ emit_insn (gen_abssf2 (dest1, src1));
+ }
+ else
+ {
+ emit_insn (gen_abssf2 (dest1, src1));
+ if (REGNO (dest2) != REGNO (src2))
+ emit_insn (gen_movsf (dest2, src2));
+ }
+ DONE;
+}
+ [(set_attr "length" "2")])
(define_insn "*absdf2_v9"
[(set (match_operand:DF 0 "register_operand" "=e")
Index: config/sparc/sparc-protos.h
===================================================================
--- config/sparc/sparc-protos.h (revision 210410)
+++ config/sparc/sparc-protos.h (working copy)
@@ -69,7 +69,6 @@ extern bool sparc_expand_move (enum mach
extern void sparc_emit_set_symbolic_const64 (rtx, rtx, rtx);
extern int sparc_splitdi_legitimate (rtx, rtx);
extern int sparc_split_regreg_legitimate (rtx, rtx);
-extern int sparc_absnegfloat_split_legitimate (rtx, rtx);
extern const char *output_ubranch (rtx, rtx);
extern const char *output_cbranch (rtx, rtx, int, int, int, rtx);
extern const char *output_return (rtx);
Index: config/sparc/sparc.c
===================================================================
--- config/sparc/sparc.c (revision 210410)
+++ config/sparc/sparc.c (working copy)
@@ -8543,22 +8543,6 @@ sparc_split_regreg_legitimate (rtx reg1,
return 0;
}
-/* Return 1 if x and y are some kind of REG and they refer to
- different hard registers. This test is guaranteed to be
- run after reload. */
-
-int
-sparc_absnegfloat_split_legitimate (rtx x, rtx y)
-{
- if (GET_CODE (x) != REG)
- return 0;
- if (GET_CODE (y) != REG)
- return 0;
- if (REGNO (x) == REGNO (y))
- return 0;
- return 1;
-}
-
/* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
This makes them candidates for using ldd and std insns.