Hi, This change removes an extraneous NOP instruction placed at the end of code produced by each of byte-swap patterns due to the expansion of the (const_int 0) RTL unnecessarily produced by `define_split' definitions. Updated patterns follow what other targets do in corresponding situations.
The change in output produced can be illustrated with the following simple example: $ cat bswap.c long long bswap (long long i) { return __builtin_bswap64 (i); } $ powerpc-linux-gnu-gcc -S -dp -o bswap.s bswap.c This currently produces the following code: .file "bswap.c" .section ".text" .align 2 .globl bswap .type bswap, @function bswap: stwu 1,-32(1) # 20 movsi_update/2 [length = 4] stw 31,28(1) # 21 *movsi_internal1/4 [length = 4] mr 31,1 # 22 *movsi_internal1/1 [length = 4] stw 3,8(31) # 31 *movsi_internal1/4 [length = 4] stw 4,12(31) # 32 *movsi_internal1/4 [length = 4] lwz 9,8(31) # 33 *movsi_internal1/3 [length = 4] lwz 10,12(31) # 34 *movsi_internal1/3 [length = 4] rlwinm 7,10,8,0xffffffff # 38 rotlsi3/2 [length = 4] rlwimi 7,10,24,0,7 # 39 insvsi_internal [length = 4] rlwimi 7,10,24,16,23 # 40 *insvsi_internal1 [length = 4] rlwinm 8,9,8,0xffffffff # 41 rotlsi3/2 [length = 4] rlwimi 8,9,24,0,7 # 42 insvsi_internal [length = 4] rlwimi 8,9,24,16,23 # 43 *insvsi_internal1 [length = 4] nop # 37 nop [length = 4] mr 10,8 # 44 *movsi_internal1/1 [length = 4] mr 9,7 # 45 *movsi_internal1/1 [length = 4] mr 3,9 # 46 *movsi_internal1/1 [length = 4] mr 4,10 # 47 *movsi_internal1/1 [length = 4] addi 11,31,32 # 25 *addsi3_internal1/2 [length = 4] lwz 31,-4(11) # 26 *movsi_internal1/3 [length = 4] mr 1,11 # 28 *movsi_internal1/1 [length = 4] blr # 29 *return_internal_si [length = 4] .size bswap,.-bswap Notice the NOP in the middle. With this change applied this code is produced instead: .file "bswap.c" .section ".text" .align 2 .globl bswap .type bswap, @function bswap: stwu 1,-32(1) # 20 movsi_update/2 [length = 4] stw 31,28(1) # 21 *movsi_internal1/4 [length = 4] mr 31,1 # 22 *movsi_internal1/1 [length = 4] stw 3,8(31) # 31 *movsi_internal1/4 [length = 4] stw 4,12(31) # 32 *movsi_internal1/4 [length = 4] lwz 9,8(31) # 33 *movsi_internal1/3 [length = 4] lwz 10,12(31) # 34 *movsi_internal1/3 [length = 4] rlwinm 7,10,8,0xffffffff # 37 rotlsi3/2 [length = 4] rlwimi 7,10,24,0,7 # 38 insvsi_internal [length = 4] rlwimi 7,10,24,16,23 # 39 *insvsi_internal1 [length = 4] rlwinm 8,9,8,0xffffffff # 40 rotlsi3/2 [length = 4] rlwimi 8,9,24,0,7 # 41 insvsi_internal [length = 4] rlwimi 8,9,24,16,23 # 42 *insvsi_internal1 [length = 4] mr 10,8 # 43 *movsi_internal1/1 [length = 4] mr 9,7 # 44 *movsi_internal1/1 [length = 4] mr 3,9 # 45 *movsi_internal1/1 [length = 4] mr 4,10 # 46 *movsi_internal1/1 [length = 4] addi 11,31,32 # 25 *addsi3_internal1/2 [length = 4] lwz 31,-4(11) # 26 *movsi_internal1/3 [length = 4] mr 1,11 # 28 *movsi_internal1/1 [length = 4] blr # 29 *return_internal_si [length = 4] .size bswap,.-bswap This has been regression tested with the powerpc-eabi target and the following multilibs: -mcpu=603e -mcpu=603e -msoft-float -mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe -mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe -msoft-float -mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -mlittle -mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -msoft-float -mcpu=7400 -maltivec -mabi=altivec as well as the powerpc-linux-gnu target and the following multilibs: -mcpu=603e -mcpu=603e -msoft-float -mcpu=8540 -mfloat-gprs=single -mspe=yes -mabi=spe -mcpu=8548 -mfloat-gprs=double -mspe=yes -mabi=spe -mcpu=7400 -maltivec -mabi=altivec -mcpu=e5500 -m64 OK to apply? 2014-06-20 Maciej W. Rozycki <ma...@codesourcery.com> gcc/ * config/rs6000/rs6000.md: Append `DONE' to preparation statements of `bswap' pattern splitters. Maciej gcc-ppc-bswap-done.diff Index: gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.md =================================================================== --- gcc-fsf-trunk-quilt.orig/gcc/config/rs6000/rs6000.md 2014-06-10 21:46:36.000000000 +0100 +++ gcc-fsf-trunk-quilt/gcc/config/rs6000/rs6000.md 2014-06-11 02:40:42.028572744 +0100 @@ -2475,6 +2475,7 @@ emit_insn (gen_bswapsi2 (op4_32, word_high)); emit_insn (gen_ashldi3 (dest, op3, GEN_INT (32))); emit_insn (gen_iordi3 (dest, dest, op4)); + DONE; }") (define_split @@ -2536,6 +2537,7 @@ } emit_insn (gen_bswapsi2 (word_high, src_si)); emit_insn (gen_bswapsi2 (word_low, op3_si)); + DONE; }") (define_split @@ -2563,6 +2565,7 @@ emit_insn (gen_bswapsi2 (op3_si, op2_si)); emit_insn (gen_ashldi3 (dest, dest, GEN_INT (32))); emit_insn (gen_iordi3 (dest, dest, op3)); + DONE; }") (define_insn "bswapdi2_32bit" @@ -2619,6 +2622,7 @@ emit_insn (gen_bswapsi2 (dest2, word1)); emit_insn (gen_bswapsi2 (dest1, word2)); + DONE; }") (define_split @@ -2667,6 +2671,7 @@ emit_insn (gen_bswapsi2 (word2, src1)); emit_insn (gen_bswapsi2 (word1, src2)); + DONE; }") (define_split @@ -2686,6 +2691,7 @@ emit_insn (gen_bswapsi2 (dest1, src2)); emit_insn (gen_bswapsi2 (dest2, src1)); + DONE; }") (define_insn "mulsi3"