Hi,

 This change removes an extraneous NOP instruction placed at the end of 
code produced by each of byte-swap patterns due to the expansion of the 
(const_int 0) RTL unnecessarily produced by `define_split' definitions.  
Updated patterns follow what other targets do in corresponding situations.

 The change in output produced can be illustrated with the following 
simple example:

$ cat bswap.c
long long
bswap (long long i)
{
  return __builtin_bswap64 (i);
}
$ powerpc-linux-gnu-gcc -S -dp -o bswap.s bswap.c

This currently produces the following code:

        .file   "bswap.c"
        .section        ".text"
        .align 2
        .globl bswap
        .type   bswap, @function
bswap:
        stwu 1,-32(1)    # 20   movsi_update/2  [length = 4]
        stw 31,28(1)     # 21   *movsi_internal1/4      [length = 4]
        mr 31,1  # 22   *movsi_internal1/1      [length = 4]
        stw 3,8(31)      # 31   *movsi_internal1/4      [length = 4]
        stw 4,12(31)     # 32   *movsi_internal1/4      [length = 4]
        lwz 9,8(31)      # 33   *movsi_internal1/3      [length = 4]
        lwz 10,12(31)    # 34   *movsi_internal1/3      [length = 4]
        rlwinm 7,10,8,0xffffffff         # 38   rotlsi3/2       [length = 4]
        rlwimi 7,10,24,0,7       # 39   insvsi_internal [length = 4]
        rlwimi 7,10,24,16,23     # 40   *insvsi_internal1       [length = 4]
        rlwinm 8,9,8,0xffffffff  # 41   rotlsi3/2       [length = 4]
        rlwimi 8,9,24,0,7        # 42   insvsi_internal [length = 4]
        rlwimi 8,9,24,16,23      # 43   *insvsi_internal1       [length = 4]
        nop      # 37   nop     [length = 4]
        mr 10,8  # 44   *movsi_internal1/1      [length = 4]
        mr 9,7   # 45   *movsi_internal1/1      [length = 4]
        mr 3,9   # 46   *movsi_internal1/1      [length = 4]
        mr 4,10  # 47   *movsi_internal1/1      [length = 4]
        addi 11,31,32    # 25   *addsi3_internal1/2     [length = 4]
        lwz 31,-4(11)    # 26   *movsi_internal1/3      [length = 4]
        mr 1,11  # 28   *movsi_internal1/1      [length = 4]
        blr      # 29   *return_internal_si     [length = 4]
        .size   bswap,.-bswap

Notice the NOP in the middle.  With this change applied this code is 
produced instead:

        .file   "bswap.c"
        .section        ".text"
        .align 2
        .globl bswap
        .type   bswap, @function
bswap:
        stwu 1,-32(1)    # 20   movsi_update/2  [length = 4]
        stw 31,28(1)     # 21   *movsi_internal1/4      [length = 4]
        mr 31,1  # 22   *movsi_internal1/1      [length = 4]
        stw 3,8(31)      # 31   *movsi_internal1/4      [length = 4]
        stw 4,12(31)     # 32   *movsi_internal1/4      [length = 4]
        lwz 9,8(31)      # 33   *movsi_internal1/3      [length = 4]
        lwz 10,12(31)    # 34   *movsi_internal1/3      [length = 4]
        rlwinm 7,10,8,0xffffffff         # 37   rotlsi3/2       [length = 4]
        rlwimi 7,10,24,0,7       # 38   insvsi_internal [length = 4]
        rlwimi 7,10,24,16,23     # 39   *insvsi_internal1       [length = 4]
        rlwinm 8,9,8,0xffffffff  # 40   rotlsi3/2       [length = 4]
        rlwimi 8,9,24,0,7        # 41   insvsi_internal [length = 4]
        rlwimi 8,9,24,16,23      # 42   *insvsi_internal1       [length = 4]
        mr 10,8  # 43   *movsi_internal1/1      [length = 4]
        mr 9,7   # 44   *movsi_internal1/1      [length = 4]
        mr 3,9   # 45   *movsi_internal1/1      [length = 4]
        mr 4,10  # 46   *movsi_internal1/1      [length = 4]
        addi 11,31,32    # 25   *addsi3_internal1/2     [length = 4]
        lwz 31,-4(11)    # 26   *movsi_internal1/3      [length = 4]
        mr 1,11  # 28   *movsi_internal1/1      [length = 4]
        blr      # 29   *return_internal_si     [length = 4]
        .size   bswap,.-bswap

 This has been regression tested with the powerpc-eabi target and the 
following multilibs:

-mcpu=603e
-mcpu=603e -msoft-float
-mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe
-mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe -msoft-float
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -mlittle
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -msoft-float
-mcpu=7400 -maltivec -mabi=altivec

as well as the powerpc-linux-gnu target and the following multilibs:

-mcpu=603e
-mcpu=603e -msoft-float
-mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe
-mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe
-mcpu=7400 -maltivec -mabi=altivec
-mcpu=e5500 -m64

 OK to apply?

2014-06-20  Maciej W. Rozycki  <ma...@codesourcery.com>

        gcc/
        * config/rs6000/rs6000.md: Append `DONE' to preparation
        statements of `bswap' pattern splitters.

  Maciej

gcc-ppc-bswap-done.diff
Index: gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.md
===================================================================
--- gcc-fsf-trunk-quilt.orig/gcc/config/rs6000/rs6000.md        2014-06-10 
21:46:36.000000000 +0100
+++ gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.md     2014-06-11 
02:40:42.028572744 +0100
@@ -2475,6 +2475,7 @@
   emit_insn (gen_bswapsi2 (op4_32, word_high));
   emit_insn (gen_ashldi3 (dest, op3, GEN_INT (32)));
   emit_insn (gen_iordi3 (dest, dest, op4));
+  DONE;
 }")
 
 (define_split
@@ -2536,6 +2537,7 @@
     }
   emit_insn (gen_bswapsi2 (word_high, src_si));
   emit_insn (gen_bswapsi2 (word_low, op3_si));
+  DONE;
 }")
 
 (define_split
@@ -2563,6 +2565,7 @@
   emit_insn (gen_bswapsi2 (op3_si, op2_si));
   emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32)));
   emit_insn (gen_iordi3 (dest, dest, op3));
+  DONE;
 }")
 
 (define_insn "bswapdi2_32bit"
@@ -2619,6 +2622,7 @@
 
   emit_insn (gen_bswapsi2 (dest2, word1));
   emit_insn (gen_bswapsi2 (dest1, word2));
+  DONE;
 }")
 
 (define_split
@@ -2667,6 +2671,7 @@
 
   emit_insn (gen_bswapsi2 (word2, src1));
   emit_insn (gen_bswapsi2 (word1, src2));
+  DONE;
 }")
 
 (define_split
@@ -2686,6 +2691,7 @@
 
   emit_insn (gen_bswapsi2 (dest1, src2));
   emit_insn (gen_bswapsi2 (dest2, src1));
+  DONE;
 }")
 
 (define_insn "mulsi3"

Reply via email to