Re: [PATCH], Add power9 support to GCC, patch #6 (IEEE 128-bit hardware support)

Michael Meissner Sun, 08 Nov 2015 16:45:53 -0800

This patch adds support for the IEEE 128-bit hardware instructions that are
being added to the PowerPC ISA 3.0 (power9).  With this patch, users on power7
and power8 will use the software emulation functions that are committed, but
still need some enhancment.  On ISA 3.0/power9, they would be able to use the
direct instructions.


I have built this patch with a bootstrap build on a power8 little endian
system.  There were no regressions in the test suite.  Is this patch ok to
install in the trunk?

[gcc]
2015-11-08  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/rs6000-protos.h (convert_float128_to_int): Add
        declaration.
        (convert_int_to_float128): Likewise.
        (rs6000_generate_compare): Add support for ISA 3.0 (power9)
        hardware support for IEEE 128-bit floating point.
        (rs6000_expand_float128_convert): Likewise.
        (convert_float128_to_int): Likewise.
        (convert_int_to_float128): Likewise.

        * config/rs6000/rs6000.md (UNSPEC_ROUND_TO_ODD): New unspecs for
        ISA 3.0 hardware IEEE 128-bit floating point.
        (UNSPEC_IEEE128_MOVE): Likewise.
        (UNSPEC_IEEE128_CONVERT): Likewise.
        (FMA_F): Add support for IEEE 128-bit floating point hardware
        support.
        (Ff): Add support for DImode.
        (Fv): Likewise.
        (any_fix code iterator): New and updated iterators for IEEE
        128-bit floating point hardware support.
        (any_float code iterator): Likewise.
        (s code attribute): Likewise.
        (su code attribute): Likewise.
        (az code attribute): Likewise.
        (neg<mode>2, FLOAT128 iterator): Add support for IEEE 128-bit
        floating point hardware support.
        (abs<mode>2, FLOAT128 iterator): Likewise.
        (add<mode>3, IEEE128 iterator): New insns for IEEE 128-bit
        floating point hardware.
        (sub<mode>3, IEEE128 iterator): Likewise.
        (mul<mode>3, IEEE128 iterator): Likewise.
        (div<mode>3, IEEE128 iterator): Likewise.
        (copysign<mode>3, IEEE128 iterator): Likewise.
        (sqrt<mode>2, IEEE128 iterator): Likewise.
        (neg<mode>2, IEEE128 iterator): Likewise.
        (abs<mode>2, IEEE128 iterator): Likewise.
        (nabs<mode>2, IEEE128 iterator): Likewise.
        (fma<mode>4_hw, IEEE128 iterator): Likewise.
        (fms<mode>4_hw, IEEE128 iterator): Likewise.
        (nfma<mode>4_hw, IEEE128 iterator): Likewise.
        (nfms<mode>4_hw, IEEE128 iterator): Likewise.
        (extend<SFDF:mode><IEEE128:mode>2_hw): Likewise.
        (trunc<mode>df2_hw, IEEE128 iterator): Likewise.
        (trunc<mode>sf2_hw, IEEE128 iterator): Likewise.
        (fix_fixuns code attribute): Likewise.
        (float_floatuns code attribute): Likewise.
        (<fix_fixuns>_<mode>si2_hw): Likewise.
        (<fix_fixuns>_<mode>di2_hw): Likewise.
        (<float_floatuns>_<mode>si2_hw): Likewise.
        (<float_floatuns>_<mode>di2_hw): Likewise.
        (xscvqp<su>wz_<mode>): Likewise.
        (xscvqp<su>dz_<mode>): Likewise.
        (xscv<su>dqp_<mode): Likewise.
        (ieee128_mfvsrd): Likewise.
        (ieee128_mfvsrwz): Likewise.
        (ieee128_mtvsrw): Likewise.
        (ieee128_mtvsrd): Likewise.
        (trunc<mode>df2_odd): Likewise.
        (cmp<mode>_h): Likewise.

[gcc/testsuite]
2015-11-08  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * gcc.target/powerpc/float128-hw.c: New test for IEEE 128-bit
        hardware floating point support.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h   (revision 229976)
+++ gcc/config/rs6000/rs6000-protos.h   (working copy)
@@ -55,6 +55,8 @@ extern const char *rs6000_output_move_12
 extern bool rs6000_move_128bit_ok_p (rtx []);
 extern bool rs6000_split_128bit_ok_p (rtx []);
 extern void rs6000_expand_float128_convert (rtx, rtx, bool);
+extern void convert_float128_to_int (rtx *, enum rtx_code);
+extern void convert_int_to_float128 (rtx *, enum rtx_code);
 extern void rs6000_expand_vector_init (rtx, rtx);
 extern void paired_expand_vector_init (rtx, rtx);
 extern void rs6000_expand_vector_set (rtx, rtx, int);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 229976)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -20504,11 +20504,12 @@ rs6000_generate_compare (rtx cmp, machin
       emit_insn (cmp);
     }
 
-  /* IEEE 128-bit support in VSX registers.  The comparison functions
-     (__cmpokf2 and __cmpukf2) returns 0..15 that is laid out the same way as
-     the PowerPC CR register would for a normal floating point comparison from
-     the fcmpo and fcmpu instructions.  */
-  else if (FLOAT128_IEEE_P (mode))
+  /* IEEE 128-bit support in VSX registers.  If we do not have IEEE 128-bit
+     hardware, the comparison functions (__cmpokf2 and __cmpukf2) returns 0..15
+     that is laid out the same way as the PowerPC CR register would for a
+     normal floating point comparison from the fcmpo and fcmpu
+     instructions.  */
+  else if (!TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))
     {
       rtx and_reg = gen_reg_rtx (SImode);
       rtx dest = gen_reg_rtx (SImode);
@@ -20647,7 +20648,7 @@ rs6000_generate_compare (rtx cmp, machin
   /* Some kinds of FP comparisons need an OR operation;
      under flag_finite_math_only we don't bother.  */
   if (FLOAT_MODE_P (mode)
-      && !FLOAT128_IEEE_P (mode)
+      && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
       && !flag_finite_math_only
       && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
       && (code == LE || code == GE
@@ -20740,6 +20741,56 @@ rs6000_expand_float128_convert (rtx dest
   bool do_move = false;
   rtx libfunc = NULL_RTX;
   rtx dest2;
+  typedef rtx (*rtx_2func_t) (rtx, rtx);
+  rtx_2func_t hw_convert = (rtx_2func_t)0;
+  size_t kf_or_tf;
+
+  struct hw_conv_t {
+    rtx_2func_t        from_df;
+    rtx_2func_t from_sf;
+    rtx_2func_t from_si_sign;
+    rtx_2func_t from_si_uns;
+    rtx_2func_t from_di_sign;
+    rtx_2func_t from_di_uns;
+    rtx_2func_t to_df;
+    rtx_2func_t to_sf;
+    rtx_2func_t to_si_sign;
+    rtx_2func_t to_si_uns;
+    rtx_2func_t to_di_sign;
+    rtx_2func_t to_di_uns;
+  } hw_conversions[2] = {
+    /* convertions to/from KFmode */
+    {
+      gen_extenddfkf2_hw,              /* KFmode <- DFmode.  */
+      gen_extendsfkf2_hw,              /* KFmode <- SFmode.  */
+      gen_float_kfsi2_hw,              /* KFmode <- SImode (signed).  */
+      gen_floatuns_kfsi2_hw,           /* KFmode <- SImode (unsigned).  */
+      gen_float_kfdi2_hw,              /* KFmode <- DImode (signed).  */
+      gen_floatuns_kfdi2_hw,           /* KFmode <- DImode (unsigned).  */
+      gen_trunckfdf2_hw,               /* DFmode <- KFmode.  */
+      gen_trunckfsf2_hw,               /* SFmode <- KFmode.  */
+      gen_fix_kfsi2_hw,                        /* SImode <- KFmode (signed).  
*/
+      gen_fixuns_kfsi2_hw,             /* SImode <- KFmode (unsigned).  */
+      gen_fix_kfdi2_hw,                        /* DImode <- KFmode (signed).  
*/
+      gen_fixuns_kfdi2_hw,             /* DImode <- KFmode (unsigned).  */
+    },
+
+    /* convertions to/from TFmode */
+    {
+      gen_extenddftf2_hw,              /* TFmode <- DFmode.  */
+      gen_extendsftf2_hw,              /* TFmode <- SFmode.  */
+      gen_float_tfsi2_hw,              /* TFmode <- SImode (signed).  */
+      gen_floatuns_tfsi2_hw,           /* TFmode <- SImode (unsigned).  */
+      gen_float_tfdi2_hw,              /* TFmode <- DImode (signed).  */
+      gen_floatuns_tfdi2_hw,           /* TFmode <- DImode (unsigned).  */
+      gen_trunctfdf2_hw,               /* DFmode <- TFmode.  */
+      gen_trunctfsf2_hw,               /* SFmode <- TFmode.  */
+      gen_fix_tfsi2_hw,                        /* SImode <- TFmode (signed).  
*/
+      gen_fixuns_tfsi2_hw,             /* SImode <- TFmode (unsigned).  */
+      gen_fix_tfdi2_hw,                        /* DImode <- TFmode (signed).  
*/
+      gen_fixuns_tfdi2_hw,             /* DImode <- TFmode (unsigned).  */
+    },
+  };
 
   if (dest_mode == src_mode)
     gcc_unreachable ();
@@ -20759,14 +20810,23 @@ rs6000_expand_float128_convert (rtx dest
   /* Convert to IEEE 128-bit floating point.  */
   if (FLOAT128_IEEE_P (dest_mode))
     {
+      if (dest_mode == KFmode)
+       kf_or_tf = 0;
+      else if (dest_mode == TFmode)
+       kf_or_tf = 1;
+      else
+       gcc_unreachable ();
+
       switch (src_mode)
        {
        case DFmode:
          cvt = sext_optab;
+         hw_convert = hw_conversions[kf_or_tf].from_df;
          break;
 
        case SFmode:
          cvt = sext_optab;
+         hw_convert = hw_conversions[kf_or_tf].from_sf;
          break;
 
        case KFmode:
@@ -20779,8 +20839,29 @@ rs6000_expand_float128_convert (rtx dest
          break;
 
        case SImode:
+         if (unsigned_p)
+           {
+             cvt = ufloat_optab;
+             hw_convert = hw_conversions[kf_or_tf].from_si_uns;
+           }
+         else
+           {
+             cvt = sfloat_optab;
+             hw_convert = hw_conversions[kf_or_tf].from_si_sign;
+           }
+         break;
+
        case DImode:
-         cvt = (unsigned_p) ? ufloat_optab : sfloat_optab;
+         if (unsigned_p)
+           {
+             cvt = ufloat_optab;
+             hw_convert = hw_conversions[kf_or_tf].from_di_uns;
+           }
+         else
+           {
+             cvt = sfloat_optab;
+             hw_convert = hw_conversions[kf_or_tf].from_di_sign;
+           }
          break;
 
        default:
@@ -20791,14 +20872,23 @@ rs6000_expand_float128_convert (rtx dest
   /* Convert from IEEE 128-bit floating point.  */
   else if (FLOAT128_IEEE_P (src_mode))
     {
+      if (src_mode == KFmode)
+       kf_or_tf = 0;
+      else if (src_mode == TFmode)
+       kf_or_tf = 1;
+      else
+       gcc_unreachable ();
+
       switch (dest_mode)
        {
        case DFmode:
          cvt = trunc_optab;
+         hw_convert = hw_conversions[kf_or_tf].to_df;
          break;
 
        case SFmode:
          cvt = trunc_optab;
+         hw_convert = hw_conversions[kf_or_tf].to_sf;
          break;
 
        case KFmode:
@@ -20811,8 +20901,29 @@ rs6000_expand_float128_convert (rtx dest
          break;
 
        case SImode:
+         if (unsigned_p)
+           {
+             cvt = ufix_optab;
+             hw_convert = hw_conversions[kf_or_tf].to_si_uns;
+           }
+         else
+           {
+             cvt = sfix_optab;
+             hw_convert = hw_conversions[kf_or_tf].to_si_sign;
+           }
+         break;
+
        case DImode:
-         cvt = (unsigned_p) ? ufix_optab : sfix_optab;
+         if (unsigned_p)
+           {
+             cvt = ufix_optab;
+             hw_convert = hw_conversions[kf_or_tf].to_di_uns;
+           }
+         else
+           {
+             cvt = sfix_optab;
+             hw_convert = hw_conversions[kf_or_tf].to_di_sign;
+           }
          break;
 
        default:
@@ -20831,6 +20942,10 @@ rs6000_expand_float128_convert (rtx dest
   if (do_move)
     emit_move_insn (dest, gen_lowpart (dest_mode, src));
 
+  /* Handle conversion if we have hardware support.  */
+  else if (TARGET_FLOAT128_HW && hw_convert)
+    emit_insn ((hw_convert) (dest, src));
+
   /* Call an external function to do the conversion.  */
   else if (cvt != unknown_optab)
     {
@@ -20851,6 +20966,92 @@ rs6000_expand_float128_convert (rtx dest
   return;
 }
 
+/* Split a conversion from __float128 to an integer type into separate insns.
+   OPERANDS points to the destination, source, and V2DI temporary
+   register. CODE is either FIX or UNSIGNED_FIX.  */
+
+void
+convert_float128_to_int (rtx *operands, enum rtx_code code)
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx cvt;
+  rtvec cvt_vec;
+  rtx cvt_unspec;
+  rtvec move_vec;
+  rtx move_unspec;
+
+  if (GET_CODE (tmp) == SCRATCH)
+    tmp = gen_reg_rtx (V2DImode);
+
+  if (MEM_P (dest))
+    dest = rs6000_address_for_fpconvert (dest);
+
+  /* Generate the actual convert insn of the form:
+     (set (tmp) (unspec:V2DI [(fix:SI (reg:KF))] UNSPEC_IEEE128_CONVERT)).  */
+  cvt = gen_rtx_fmt_e (code, GET_MODE (dest), src);
+  cvt_vec = gen_rtvec (1, cvt);
+  cvt_unspec = gen_rtx_UNSPEC (V2DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
+  emit_insn (gen_rtx_SET (tmp, cvt_unspec));
+
+  /* Generate the move insn of the form:
+     (set (dest:SI) (unspec:SI [(tmp:V2DI))] UNSPEC_IEEE128_MOVE)).  */
+  move_vec = gen_rtvec (1, tmp);
+  move_unspec = gen_rtx_UNSPEC (GET_MODE (dest), move_vec, 
UNSPEC_IEEE128_MOVE);
+  emit_insn (gen_rtx_SET (dest, move_unspec));
+}
+
+/* Split a conversion from an integer type to __float128 into separate insns.
+   OPERANDS points to the destination, source, and V2DI temporary
+   register. CODE is either FLOAT or UNSIGNED_FLOAT.  */
+
+void
+convert_int_to_float128 (rtx *operands, enum rtx_code code)
+{
+  rtx dest = operands[0];
+  rtx src = operands[1];
+  rtx tmp = operands[2];
+  rtx cvt;
+  rtvec cvt_vec;
+  rtx cvt_unspec;
+  rtvec move_vec;
+  rtx move_unspec;
+  rtx unsigned_flag;
+
+  if (GET_CODE (tmp) == SCRATCH)
+    tmp = gen_reg_rtx (V2DImode);
+
+  if (MEM_P (src))
+    src = rs6000_address_for_fpconvert (src);
+
+  /* Generate the move of the integer into the Altivec register of the form:
+     (set (tmp:V2DI) (unspec:V2DI [(src:SI)
+                                  (const_int 0)] UNSPEC_IEEE128_MOVE)).
+
+     or:
+     (set (tmp:V2DI) (unspec:V2DI [(src:DI)] UNSPEC_IEEE128_MOVE)).  */
+
+  if (GET_MODE (src) == SImode)
+    {
+      unsigned_flag = (code == UNSIGNED_FLOAT) ? const1_rtx : const0_rtx;
+      move_vec = gen_rtvec (2, src, unsigned_flag);
+    }
+  else
+    move_vec = gen_rtvec (1, src);
+
+  move_unspec = gen_rtx_UNSPEC (V2DImode, move_vec, UNSPEC_IEEE128_MOVE);
+  emit_insn (gen_rtx_SET (tmp, move_unspec));
+
+  /* Generate the actual convert insn of the form:
+     (set (dest:KF) (float:KF (unspec:DI [(tmp:V2DI)]
+                                        UNSPEC_IEEE128_CONVERT))).  */
+  cvt_vec = gen_rtvec (1, tmp);
+  cvt_unspec = gen_rtx_UNSPEC (DImode, cvt_vec, UNSPEC_IEEE128_CONVERT);
+  cvt = gen_rtx_fmt_e (code, GET_MODE (dest), cvt_unspec);
+  emit_insn (gen_rtx_SET (dest, cvt));
+}
+
 
 /* Emit the RTL for an sISEL pattern.  */
 
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 229976)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -143,6 +143,9 @@ (define_c_enum "unspec"
    UNSPEC_STACK_CHECK
    UNSPEC_FUSION_P9
    UNSPEC_FUSION_ADDIS
+   UNSPEC_ROUND_TO_ODD
+   UNSPEC_IEEE128_MOVE
+   UNSPEC_IEEE128_CONVERT
   ])
 
 ;;
@@ -381,6 +384,8 @@ (define_mode_iterator FMA_F [
   (V2SF "TARGET_PAIRED_FLOAT")
   (V4SF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)")
   (V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
+  (KF "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (KFmode)")
+  (TF "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (TFmode)")
   ])
 
 ; Floating point move iterators to combine binary and decimal moves
@@ -485,10 +490,10 @@ (define_mode_attr Ftrad           [(SF "s") (DF "
 (define_mode_attr Fvsx         [(SF "sp") (DF  "dp")])
 
 ; SF/DF constraint for arithmetic on traditional floating point registers
-(define_mode_attr Ff           [(SF "f") (DF "d")])
+(define_mode_attr Ff           [(SF "f") (DF "d") (DI "d")])
 
 ; SF/DF constraint for arithmetic on VSX registers
-(define_mode_attr Fv           [(SF "wy") (DF "ws")])
+(define_mode_attr Fv           [(SF "wy") (DF "ws") (DI "wi")])
 
 ; SF/DF constraint for arithmetic on altivec registers
 (define_mode_attr Fa           [(SF "wu") (DF "wv")])
@@ -510,9 +515,26 @@ (define_code_attr return_str [(return ""
 (define_code_iterator iorxor [ior xor])
 
 ; Signed/unsigned variants of ops.
-(define_code_iterator any_extend [sign_extend zero_extend])
-(define_code_attr u [(sign_extend "") (zero_extend "u")])
-(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+(define_code_iterator any_extend       [sign_extend zero_extend])
+(define_code_iterator any_fix          [fix unsigned_fix])
+(define_code_iterator any_float                [float unsigned_float])
+
+(define_code_attr u  [(sign_extend     "")
+                     (zero_extend      "u")])
+
+(define_code_attr su [(sign_extend     "s")
+                     (zero_extend      "u")
+                     (fix              "s")
+                     (unsigned_fix     "s")
+                     (float            "s")
+                     (unsigned_float   "u")])
+
+(define_code_attr az [(sign_extend     "a")
+                     (zero_extend      "z")
+                     (fix              "a")
+                     (unsigned_fix     "z")
+                     (float            "a")
+                     (unsigned_float   "z")])
 
 ; Various instructions that come in SI and DI forms.
 ; A generic w/d attribute, for things like cmpw/cmpd.
@@ -7003,7 +7025,16 @@ (define_expand "neg<mode>2"
 {
   if (FLOAT128_IEEE_P (<MODE>mode))
     {
-      if (TARGET_FLOAT128)
+      if (TARGET_FLOAT128_HW)
+       {
+         if (<MODE>mode == TFmode)
+           emit_insn (gen_negtf2_hw (operands[0], operands[1]));
+         else if (<MODE>mode == KFmode)
+           emit_insn (gen_negkf2_hw (operands[0], operands[1]));
+         else
+           gcc_unreachable ();
+       }
+      else if (TARGET_FLOAT128)
        {
          if (<MODE>mode == TFmode)
            emit_insn (gen_ieee_128bit_vsx_negtf2 (operands[0], operands[1]));
@@ -7053,7 +7084,17 @@ (define_expand "abs<mode>2"
 
   if (FLOAT128_IEEE_P (<MODE>mode))
     {
-      if (TARGET_FLOAT128)
+      if (TARGET_FLOAT128_HW)
+       {
+         if (<MODE>mode == TFmode)
+           emit_insn (gen_abstf2_hw (operands[0], operands[1]));
+         else if (<MODE>mode == KFmode)
+           emit_insn (gen_abskf2_hw (operands[0], operands[1]));
+         else
+           FAIL;
+         DONE;
+       }
+      else if (TARGET_FLOAT128)
        {
          if (<MODE>mode == TFmode)
            emit_insn (gen_ieee_128bit_vsx_abstf2 (operands[0], operands[1]));
@@ -7140,7 +7181,7 @@ (define_insn_and_split "ieee_128bit_vsx_
   [(set (match_operand:IEEE128 0 "register_operand" "=wa")
        (neg:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa")))
    (clobber (match_scratch:V16QI 2 "=v"))]
-  "TARGET_FLOAT128"
+  "TARGET_FLOAT128 && !TARGET_FLOAT128_HW"
   "#"
   "&& 1"
   [(parallel [(set (match_dup 0)
@@ -7160,7 +7201,7 @@ (define_insn "*ieee_128bit_vsx_neg<mode>
   [(set (match_operand:IEEE128 0 "register_operand" "=wa")
        (neg:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa")))
    (use (match_operand:V16QI 2 "register_operand" "=v"))]
-  "TARGET_FLOAT128"
+  "TARGET_FLOAT128 && !TARGET_FLOAT128_HW"
   "xxlxor %x0,%x1,%x2"
   [(set_attr "type" "vecsimple")])
 
@@ -7169,7 +7210,7 @@ (define_insn_and_split "ieee_128bit_vsx_
   [(set (match_operand:IEEE128 0 "register_operand" "=wa")
        (abs:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa")))
    (clobber (match_scratch:V16QI 2 "=v"))]
-  "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+  "TARGET_FLOAT128 && !TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
   "#"
   "&& 1"
   [(parallel [(set (match_dup 0)
@@ -7189,7 +7230,7 @@ (define_insn "*ieee_128bit_vsx_abs<mode>
   [(set (match_operand:IEEE128 0 "register_operand" "=wa")
        (abs:IEEE128 (match_operand:IEEE128 1 "register_operand" "wa")))
    (use (match_operand:V16QI 2 "register_operand" "=v"))]
-  "TARGET_FLOAT128"
+  "TARGET_FLOAT128 && !TARGET_FLOAT128_HW"
   "xxlandc %x0,%x1,%x2"
   [(set_attr "type" "vecsimple")])
 
@@ -7200,7 +7241,7 @@ (define_insn_and_split "*ieee_128bit_vsx
         (abs:IEEE128
          (match_operand:IEEE128 1 "register_operand" "wa"))))
    (clobber (match_scratch:V16QI 2 "=v"))]
-  "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+  "TARGET_FLOAT128 && !TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
   "#"
   "&& 1"
   [(parallel [(set (match_dup 0)
@@ -7222,7 +7263,7 @@ (define_insn "*ieee_128bit_vsx_nabs<mode
         (abs:IEEE128
          (match_operand:IEEE128 1 "register_operand" "wa"))))
    (use (match_operand:V16QI 2 "register_operand" "=v"))]
-  "TARGET_FLOAT128"
+  "TARGET_FLOAT128 && !TARGET_FLOAT128_HW"
   "xxlor %x0,%x1,%x2"
   [(set_attr "type" "vecsimple")])
 
@@ -12998,6 +13039,335 @@ (define_insn "pack<mode>"
 
 
 
+;; ISA 2.08 IEEE 128-bit floating point support.
+
+(define_insn "add<mode>3"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (plus:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")
+        (match_operand:IEEE128 2 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsaddqp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (minus:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")
+        (match_operand:IEEE128 2 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xssubqp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (mult:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")
+        (match_operand:IEEE128 2 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsmulqp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "div<mode>3"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (div:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")
+        (match_operand:IEEE128 2 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsdivqp %0,%1,%2"
+  [(set_attr "type" "vecdiv")])
+
+(define_insn "sqrt<mode>2"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (sqrt:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+   "xssqrtqp %0,%1"
+  [(set_attr "type" "vecdiv")])
+
+(define_insn "copysign<mode>3"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (unspec:IEEE128
+        [(match_operand:IEEE128 1 "altivec_register_operand" "v")
+         (match_operand:IEEE128 2 "altivec_register_operand" "v")]
+        UNSPEC_COPYSIGN))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+   "xscpsgnqp %0,%2,%1"
+  [(set_attr "type" "vecsimple")])
+
+(define_insn "neg<mode>2_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (neg:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsnegqp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+
+(define_insn "abs<mode>2_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (abs:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsabsqp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+
+(define_insn "*nabs<mode>2_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (neg:IEEE128
+        (abs:IEEE128
+         (match_operand:IEEE128 1 "altivec_register_operand" "v"))))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsnabsqp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+;; Initially don't worry about doing fusion
+(define_insn "*fma<mode>4_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (fma:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "%v")
+        (match_operand:IEEE128 2 "altivec_register_operand" "v")
+        (match_operand:IEEE128 3 "altivec_register_operand" "0")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsmaddqp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*fms<mode>4_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (fma:IEEE128
+        (match_operand:IEEE128 1 "altivec_register_operand" "%v")
+        (match_operand:IEEE128 2 "altivec_register_operand" "v")
+        (neg:IEEE128
+         (match_operand:IEEE128 3 "altivec_register_operand" "0"))))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsmsubqp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*nfma<mode>4_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (neg:IEEE128
+        (fma:IEEE128
+         (match_operand:IEEE128 1 "altivec_register_operand" "%v")
+         (match_operand:IEEE128 2 "altivec_register_operand" "v")
+         (match_operand:IEEE128 3 "altivec_register_operand" "0"))))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsnmaddqp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*nfms<mode>4_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (neg:IEEE128
+        (fma:IEEE128
+         (match_operand:IEEE128 1 "altivec_register_operand" "%v")
+         (match_operand:IEEE128 2 "altivec_register_operand" "v")
+         (neg:IEEE128
+          (match_operand:IEEE128 3 "altivec_register_operand" "0")))))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xsnmsubqp %0,%1,%2"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "extend<SFDF:mode><IEEE128:mode>2_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (float_extend:IEEE128
+        (match_operand:SFDF 1 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<IEEE128:MODE>mode)"
+  "xscvdpqp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "trunc<mode>df2_hw"
+  [(set (match_operand:DF 0 "altivec_register_operand" "=v")
+       (float_truncate:DF
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xscvqpdp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+;; There is no KFmode -> SFmode instruction. Preserve the accuracy by doing
+;; the KFmode -> DFmode conversion using round to odd rather than the normal
+;; conversion
+(define_insn_and_split "trunc<mode>sf2_hw"
+  [(set (match_operand:SF 0 "vsx_register_operand" "=wy")
+       (float_truncate:SF
+        (match_operand:IEEE128 1 "altivec_register_operand" "v")))
+   (clobber (match_scratch:DF 2 "=v"))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(set (match_dup 2)
+       (unspec:DF [(match_dup 1)] UNSPEC_ROUND_TO_ODD))
+   (set (match_dup 0)
+       (float_truncate:SF (match_dup 2)))]
+{
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (DFmode);
+}
+  [(set_attr "type" "vecfloat")
+   (set_attr "length" "8")])
+
+;; At present SImode is not allowed in VSX registers at all, and DImode is only
+;; allowed in the traditional floating point registers. Use V2DImode so that
+;; we can get a value in an Altivec register.
+
+(define_code_attr fix_fixuns    [(fix   "fix")   (unsigned_fix   "fixuns")])
+(define_code_attr float_floatuns [(float "float") (unsigned_float "floatuns")])
+
+(define_insn_and_split "<fix_fixuns>_<mode>si2_hw"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,Z")
+       (any_fix:SI (match_operand:IEEE128 1 "altivec_register_operand" "v,v")))
+   (clobber (match_scratch:V2DI 2 "=v,v"))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  convert_float128_to_int (operands, <CODE>);
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "mftgpr,fpstore")])
+
+(define_insn_and_split "<fix_fixuns>_<mode>di2_hw"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=wr,wi,Z")
+       (any_fix:DI (match_operand:IEEE128 1 "altivec_register_operand" 
"v,v,v")))
+   (clobber (match_scratch:V2DI 2 "=v,v,v"))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  convert_float128_to_int (operands, <CODE>);
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "mftgpr,vecsimple,fpstore")])
+
+(define_insn_and_split "<float_floatuns>_<mode>si2_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v,v")
+       (any_float:IEEE128 (match_operand:SI 1 "nonimmediate_operand" "r,Z")))
+   (clobber (match_scratch:V2DI 2 "=v,v"))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  convert_int_to_float128 (operands, <CODE>);
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecfloat")])
+
+(define_insn_and_split "<float_floatuns>_<mode>di2_hw"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v,v,v")
+       (any_float:IEEE128 (match_operand:DI 1 "nonimmediate_operand" 
"wi,wr,Z")))
+   (clobber (match_scratch:V2DI 2 "=v,v,v"))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "#"
+  "&& 1"
+  [(pc)]
+{
+  convert_int_to_float128 (operands, <CODE>);
+  DONE;
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "vecfloat")])
+
+;; Integer conversion instructions, using V2DImode to get an Altivec register
+(define_insn "*xscvqp<su>wz_<mode>"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+       (unspec:V2DI
+        [(any_fix:SI
+          (match_operand:IEEE128 1 "altivec_register_operand" "v"))]
+        UNSPEC_IEEE128_CONVERT))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xscvqp<su>wz %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*xscvqp<su>dz_<mode>"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v")
+       (unspec:V2DI
+        [(any_fix:DI
+          (match_operand:IEEE128 1 "altivec_register_operand" "v"))]
+        UNSPEC_IEEE128_CONVERT))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xscvqp<su>dz %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*xscv<su>dqp_<mode>"
+  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
+       (any_float:IEEE128
+        (unspec:DI [(match_operand:V2DI 1 "altivec_register_operand" "v")]
+                   UNSPEC_IEEE128_CONVERT)))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xscv<su>dqp %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+(define_insn "*ieee128_mfvsrd"
+  [(set (match_operand:DI 0 "reg_or_indexed_operand" "=wr,Z,wi")
+       (unspec:DI [(match_operand:V2DI 1 "altivec_register_operand" "v,v,v")]
+                  UNSPEC_IEEE128_MOVE))]
+  "TARGET_FLOAT128_HW && TARGET_POWERPC64"
+  "@
+   mfvsrd %0,%x1
+   stxsdx %x1,%y0
+   xxlor %x0,%x1,%x1"
+  [(set_attr "type" "mftgpr,vecsimple,fpstore")])
+
+(define_insn "*ieee128_mfvsrwz"
+  [(set (match_operand:SI 0 "reg_or_indexed_operand" "=r,Z")
+       (unspec:SI [(match_operand:V2DI 1 "altivec_register_operand" "v,v")]
+                  UNSPEC_IEEE128_MOVE))]
+  "TARGET_FLOAT128_HW"
+  "@
+   mfvsrwz %0,%x1
+   stxsiwx %x1,%y0"
+  [(set_attr "type" "mftgpr,fpstore")])
+
+;; 0 says do sign-extension, 1 says zero-extension
+(define_insn "*ieee128_mtvsrw"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v,v,v,v")
+       (unspec:V2DI [(match_operand:SI 1 "nonimmediate_operand" "r,Z,r,Z")
+                     (match_operand:SI 2 "const_0_to_1_operand" "O,O,n,n")]
+                    UNSPEC_IEEE128_MOVE))]
+  "TARGET_FLOAT128_HW"
+  "@
+   mtvsrwa %x0,%1
+   lxsiwax %x0,%y1
+   mtvsrwz %x0,%1
+   lxsiwzx %x0,%y1"
+  [(set_attr "type" "mffgpr,fpload,mffgpr,fpload")])
+
+
+(define_insn "*ieee128_mtvsrd"
+  [(set (match_operand:V2DI 0 "altivec_register_operand" "=v,v,v")
+       (unspec:V2DI [(match_operand:DI 1 "nonimmediate_operand" "wr,Z,wi")]
+                    UNSPEC_IEEE128_MOVE))]
+  "TARGET_FLOAT128_HW"
+  "@
+   mtvsrd %x0,%1
+   lxsdx %x0,%y1
+   xxlor %x0,%x1,%x1"
+  [(set_attr "type" "mffgpr,fpload,vecsimple")])
+
+;; IEEE 128-bit instructions with round to odd semantics
+(define_insn "*trunc<mode>df2_odd"
+  [(set (match_operand:DF 0 "vsx_register_operand" "=v")
+       (unspec:DF [(match_operand:IEEE128 1 "altivec_register_operand" "v")]
+                  UNSPEC_ROUND_TO_ODD))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+  "xscvqpdpo %0,%1"
+  [(set_attr "type" "vecfloat")])
+
+;; IEEE 128-bit comparisons
+(define_insn "*cmp<mode>_hw"
+  [(set (match_operand:CCFP 0 "cc_reg_operand" "=y")
+       (compare:CCFP (match_operand:IEEE128 1 "altivec_register_operand" "v")
+                     (match_operand:IEEE128 2 "altivec_register_operand" 
"v")))]
+  "TARGET_FLOAT128_HW && FLOAT128_IEEE_P (<MODE>mode)"
+   "xscmpuqp %0,%1,%2"
+  [(set_attr "type" "fpcompare")])
+
+
 
 (include "sync.md")
 (include "vector.md")
Index: gcc/testsuite/gcc.target/powerpc/float128-hw.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/float128-hw.c      (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/float128-hw.c      (revision 0)
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O3" } */
+
+__float128 f128_add (__float128 a, __float128 b) { return a+b; }
+__float128 f128_sub (__float128 a, __float128 b) { return a-b; }
+__float128 f128_mul (__float128 a, __float128 b) { return a*b; }
+__float128 f128_div (__float128 a, __float128 b) { return a/b; }
+__float128 f128_fma (__float128 a, __float128 b, __float128 c) { return 
(a*b)+c; }
+long f128_cmove (__float128 a, __float128 b, long c, long d) { return (a == b) 
? c : d; }
+
+/* { dg-final { scan-assembler "xsaddqp"  } } */
+/* { dg-final { scan-assembler "xssubqp"  } } */
+/* { dg-final { scan-assembler "xsmulqp"  } } */
+/* { dg-final { scan-assembler "xsdivqp"  } } */
+/* { dg-final { scan-assembler "xsmaddqp" } } */
+/* { dg-final { scan-assembler "xscmpuqp" } } */

Re: [PATCH], Add power9 support to GCC, patch #6 (IEEE 128-bit hardware support)

Reply via email to