Segher Boessenkool <seg...@kernel.crashing.org> writes:
> Hi Richard,
>
> On Wed, Jul 12, 2017 at 05:33:42PM +0100, Richard Sandiford wrote:
>> The little-endian VSX code uses rotates to swap the two 64-bit halves of
>> 128-bit scalar modes.  This is fine for TImode and V1TImode, but it
>> isn't really valid to use RTL rotates on floating-point modes like
>> KFmode and TFmode, and doing that triggered an assert added by the
>> SVE series.  This patch uses bit-casts to V1TImode instead.
>> 
>> Tested on powerpc64le-linux-gnu.  OK to install?
>
>
>> +void
>> +rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
>>  {
>>    /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
>>       128-bit integers if they are allowed in VSX registers.  */
>> -  if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
>> -    return gen_rtx_ROTATE (mode, source, GEN_INT (64));
>> +  if (FLOAT128_VECTOR_P (mode))
>> +    {
>> +      dest = gen_lowpart (V1TImode, dest);
>> +      source = gen_lowpart (V1TImode, source);
>> +      mode = V1TImode;
>> +    }
>
> Add an empty line here?  And maybe a comment.
>
>> +  if (mode == TImode || mode == V1TImode)
>> +    emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
>> +                                              GEN_INT (64))));
>>    else
>>      {
>>        rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
>> -      return gen_rtx_VEC_SELECT (mode, source, par);
>> +      emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, 
>> par)));
>>      }
>>  }
>
>> --- gcc/config/rs6000/vsx.md 2017-06-30 12:50:38.889632907 +0100
>> +++ gcc/config/rs6000/vsx.md 2017-07-12 16:30:38.734631598 +0100
>> @@ -37,6 +37,10 @@ (define_mode_iterator VSX_LE_128 [(KF
>>                                (TI   "TARGET_VSX_TIMODE")
>>                                V1TI])
>>  
>> +;; Same, but with just the integer modes.
>> +(define_mode_iterator VSX_LE_128I [(TI      "TARGET_VSX_TIMODE")
>> +                               V1TI])
>
> I don't like that name much.  The difference between VSX_LE_128 and
> VSX_LE_128I is easy to overlook (and what _is_ the difference?  "I"
> means "integer" I guess?).  The "LE" in the name has no real meaning
> (it is used for LE, sure, but that doesn't matter for the iterator).
> Maybe just VSX_TI?  Or is that too short.
>
> Other than that, looks fine.  Thank you for the patch!

OK, how does this look?  Tested in the same way as before.

> Does this need backports?

Not sure, but probably not.  I don't know of any specific code that
would complain at the moment (but there again I haven't looked that hard).

Thanks,
Richard


2017-07-25  Richard Sandiford  <richard.sandif...@linaro.org>

gcc/
        * config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare.
        * config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with...
        (rs6000_emit_le_vsx_permute): ...this.  Take the destination as input.
        Emit instructions rather than returning an expression.  Handle TFmode
        and KFmode by casting to TImode.
        (rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute.
        (rs6000_emit_le_vsx_store): Likewise.
        * config/rs6000/vsx.md (VSX_TI): New iterator.
        (*vsx_le_permute_<mode>): Use it instead of VSX_LE_128.
        (*vsx_le_undo_permute_<mode>): Likewise.
        (*vsx_le_perm_load_<mode>): Use rs6000_emit_le_vsx_permute to
        emit the split sequence.
        (*vsx_le_perm_store_<mode>): Likewise.

Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h   2017-07-13 09:25:13.909213921 +0100
+++ gcc/config/rs6000/rs6000-protos.h   2017-07-25 11:04:20.314991769 +0100
@@ -151,6 +151,7 @@ extern rtx rs6000_longcall_ref (rtx);
 extern void rs6000_fatal_bad_address (rtx);
 extern rtx create_TOC_reference (rtx, rtx);
 extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode);
 extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
 extern bool valid_sf_si_move (rtx, rtx, machine_mode);
 extern void rs6000_emit_move (rtx, rtx, machine_mode);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  2017-07-13 09:25:13.909213921 +0100
+++ gcc/config/rs6000/rs6000.c  2017-07-25 11:14:27.692739547 +0100
@@ -10503,17 +10503,28 @@ rs6000_const_vec (machine_mode mode)
 
 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
    for a VSX load or store operation.  */
-rtx
-rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
+void
+rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
 {
-  /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
-     128-bit integers if they are allowed in VSX registers.  */
-  if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
-    return gen_rtx_ROTATE (mode, source, GEN_INT (64));
+  /* Scalar permutations are easier to express in integer modes rather than
+     floating-point modes, so cast them here.  We use V1TImode instead
+     of TImode to ensure that the values don't go through GPRs.  */
+  if (FLOAT128_VECTOR_P (mode))
+    {
+      dest = gen_lowpart (V1TImode, dest);
+      source = gen_lowpart (V1TImode, source);
+      mode = V1TImode;
+    }
+
+  /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
+     scalar.  */
+  if (mode == TImode || mode == V1TImode)
+    emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
+                                                 GEN_INT (64))));
   else
     {
       rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
-      return gen_rtx_VEC_SELECT (mode, source, par);
+      emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
     }
 }
 
@@ -10523,8 +10534,6 @@ rs6000_gen_le_vsx_permute (rtx source, m
 void
 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
 {
-  rtx tmp, permute_mem, permute_reg;
-
   /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
      V1TImode).  */
   if (mode == TImode || mode == V1TImode)
@@ -10534,11 +10543,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
       source = adjust_address (source, V2DImode, 0);
     }
 
-  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
-  permute_mem = rs6000_gen_le_vsx_permute (source, mode);
-  permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
-  emit_insn (gen_rtx_SET (tmp, permute_mem));
-  emit_insn (gen_rtx_SET (dest, permute_reg));
+  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+  rs6000_emit_le_vsx_permute (tmp, source, mode);
+  rs6000_emit_le_vsx_permute (dest, tmp, mode);
 }
 
 /* Emit a little-endian store to vector memory location DEST from VSX
@@ -10547,8 +10554,6 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
 void
 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
 {
-  rtx tmp, permute_src, permute_tmp;
-
   /* This should never be called during or after reload, because it does
      not re-permute the source register.  It is intended only for use
      during expand.  */
@@ -10563,11 +10568,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx
       source = gen_lowpart (V2DImode, source);
     }
 
-  tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
-  permute_src = rs6000_gen_le_vsx_permute (source, mode);
-  permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
-  emit_insn (gen_rtx_SET (tmp, permute_src));
-  emit_insn (gen_rtx_SET (dest, permute_tmp));
+  rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+  rs6000_emit_le_vsx_permute (tmp, source, mode);
+  rs6000_emit_le_vsx_permute (dest, tmp, mode);
 }
 
 /* Emit a sequence representing a little-endian VSX load or store,
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md    2017-07-13 09:25:13.909213921 +0100
+++ gcc/config/rs6000/vsx.md    2017-07-25 11:08:54.160528532 +0100
@@ -37,6 +37,9 @@ (define_mode_iterator VSX_LE_128 [(KF
                                  (TI   "TARGET_VSX_TIMODE")
                                  V1TI])
 
+;; Iterator for 128-bit integer types that go in a single vector register.
+(define_mode_iterator VSX_TI [(TI "TARGET_VSX_TIMODE") V1TI])
+
 ;; Iterator for the 2 32-bit vector types
 (define_mode_iterator VSX_W [V4SF V4SI])
 
@@ -750,9 +753,9 @@ (define_split
 ;; special V1TI container class, which it is not appropriate to use vec_select
 ;; for the type.
 (define_insn "*vsx_le_permute_<mode>"
-  [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
-       (rotate:VSX_LE_128
-        (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
+  [(set (match_operand:VSX_TI 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
+       (rotate:VSX_TI
+        (match_operand:VSX_TI 1 "input_operand" "<VSa>,Z,<VSa>")
         (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "@
@@ -763,10 +766,10 @@ (define_insn "*vsx_le_permute_<mode>"
    (set_attr "type" "vecperm,vecload,vecstore")])
 
 (define_insn_and_split "*vsx_le_undo_permute_<mode>"
-  [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
-       (rotate:VSX_LE_128
-        (rotate:VSX_LE_128
-         (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
+  [(set (match_operand:VSX_TI 0 "vsx_register_operand" "=<VSa>,<VSa>")
+       (rotate:VSX_TI
+        (rotate:VSX_TI
+         (match_operand:VSX_TI 1 "vsx_register_operand" "0,<VSa>")
          (const_int 64))
         (const_int 64)))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -791,16 +794,15 @@ (define_insn_and_split "*vsx_le_perm_loa
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
-  [(set (match_dup 2)
-       (rotate:VSX_LE_128 (match_dup 1)
-                          (const_int 64)))
-   (set (match_dup 0)
-       (rotate:VSX_LE_128 (match_dup 2)
-                          (const_int 64)))]
+  [(const_int 0)]
   "
 {
-  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
-                                       : operands[0];
+  rtx tmp = (can_create_pseudo_p ()
+            ? gen_reg_rtx_and_attrs (operands[0])
+            : operands[0]);
+  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+  DONE;
 }
   "
   [(set_attr "type" "vecload")
@@ -818,15 +820,14 @@ (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
-  [(set (match_dup 2)
-       (rotate:VSX_LE_128 (match_dup 1)
-                          (const_int 64)))
-   (set (match_dup 0)
-       (rotate:VSX_LE_128 (match_dup 2)
-                          (const_int 64)))]
+  [(const_int 0)]
 {
-  operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
-                                       : operands[0];
+  rtx tmp = (can_create_pseudo_p ()
+            ? gen_reg_rtx_and_attrs (operands[0])
+            : operands[0]);
+  rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+  DONE;
 })
 
 ;; Peephole to catch memory to memory transfers for TImode if TImode landed in
@@ -850,16 +851,13 @@ (define_split
   [(set (match_operand:VSX_LE_128 0 "memory_operand" "")
         (match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
-  [(set (match_dup 1)
-       (rotate:VSX_LE_128 (match_dup 1)
-                          (const_int 64)))
-   (set (match_dup 0)
-       (rotate:VSX_LE_128 (match_dup 1)
-                          (const_int 64)))
-   (set (match_dup 1)
-       (rotate:VSX_LE_128 (match_dup 1)
-                          (const_int 64)))]
-  "")
+  [(const_int 0)]
+{
+  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
+  rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+  DONE;
+})
 
 ;; Vector constants that can be generated with XXSPLTIB that was added in ISA
 ;; 3.0.  Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.

Reply via email to