Hi, A previous patch of mine was misguided. It modified the altivec_vperm_* patterns to use the little endian conversion trick of reversing the input operands and complementing the permute control vector.
Looking at the Altivec manual, we really can't do this. These patterns need to be direct pass-throughs to the vperm instruction, as shown in Figure 4-95 on page 130 of http://www.freescale.com/files/32bit/doc/ref_manual/ALTIVECPIM.pdf. Section 4.2 on page 49 confirms that big-endian byte ordering is to be used with the Altivec instruction descriptions. This patch reverts that specific change, cleans up some associated commentary in another part, and modifies the one test case affected by the change. gcc.dg/vmx/3b-15.c performs the argument reversal and control vector complementing in the source code, as all users will need to do when porting code containing vec_perm calls to little endian. Bootstrapped and tested on powerpc64{,le}-unknown-linux-gnu with no regressions. Is this ok for trunk? Thanks, Bill gcc: 2013-11-15 Bill Schmidt <wschm...@linux.vnet.ibm.com> * config/rs6000/altivec.md (UNSPEC_VPERM_X, UNSPEC_VPERM_UNS_X): Remove. (altivec_vperm_<mode>): Revert earlier little endian change. (*altivec_vperm_<mode>_internal): Remove. (altivec_vperm_<mode>_uns): Revert earlier little endian change. (*altivec_vperm_<mode>_uns_internal): Remove. * config/rs6000/vector.md (vec_realign_load_<mode>): Revise commentary. gcc/testsuite: 2013-11-15 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.dg/vmx/3b-15.c: Revise for little endian. Index: gcc/testsuite/gcc.dg/vmx/3b-15.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/3b-15.c (revision 204792) +++ gcc/testsuite/gcc.dg/vmx/3b-15.c (working copy) @@ -3,7 +3,11 @@ vector unsigned char f (vector unsigned char a, vector unsigned char b, vector unsigned char c) { +#ifdef __BIG_ENDIAN__ return vec_perm(a,b,c); +#else + return vec_perm(b,a,c); +#endif } static void test() @@ -12,8 +16,13 @@ static void test() 8,9,10,11,12,13,14,15}), ((vector unsigned char){70,71,72,73,74,75,76,77, 78,79,80,81,82,83,84,85}), +#ifdef __BIG_ENDIAN__ ((vector unsigned char){0x1,0x14,0x18,0x10,0x16,0x15,0x19,0x1a, 0x1c,0x1c,0x1c,0x12,0x8,0x1d,0x1b,0xe})), +#else + ((vector unsigned char){0x1e,0xb,0x7,0xf,0x9,0xa,0x6,0x5, + 0x3,0x3,0x3,0xd,0x17,0x2,0x4,0x11})), +#endif ((vector unsigned char){1,74,78,70,76,75,79,80,82,82,82,72,8,83,81,14})), "f"); } Index: gcc/config/rs6000/vector.md =================================================================== --- gcc/config/rs6000/vector.md (revision 204792) +++ gcc/config/rs6000/vector.md (working copy) @@ -966,8 +966,8 @@ operands[2], operands[3])); else { - /* Avoid the "subtract from splat31" workaround for vperm since - we have changed lvsr to lvsl instead. */ + /* We have changed lvsr to lvsl, so to complete the transformation + of vperm for LE, we must swap the inputs. */ rtx unspec = gen_rtx_UNSPEC (<MODE>mode, gen_rtvec (3, operands[2], operands[1], operands[3]), Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 204792) +++ gcc/config/rs6000/altivec.md (working copy) @@ -59,8 +59,6 @@ UNSPEC_VSUMSWS UNSPEC_VPERM UNSPEC_VPERM_UNS - UNSPEC_VPERM_X - UNSPEC_VPERM_UNS_X UNSPEC_VRFIN UNSPEC_VCFUX UNSPEC_VCFSX @@ -1393,91 +1391,21 @@ "vrfiz %0,%1" [(set_attr "type" "vecfloat")]) -(define_insn_and_split "altivec_vperm_<mode>" +(define_insn "altivec_vperm_<mode>" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:V16QI 3 "register_operand" "v")] - UNSPEC_VPERM_X))] - "TARGET_ALTIVEC" - "#" - "!reload_in_progress && !reload_completed" - [(set (match_dup 0) (match_dup 4))] -{ - if (BYTES_BIG_ENDIAN) - operands[4] = gen_rtx_UNSPEC (<MODE>mode, - gen_rtvec (3, operands[1], - operands[2], operands[3]), - UNSPEC_VPERM); - else - { - /* We want to subtract from 31, but we can't vspltisb 31 since - it's out of range. -1 works as well because only the low-order - five bits of the permute control vector elements are used. */ - rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode, - gen_rtx_CONST_INT (QImode, -1)); - rtx tmp = gen_reg_rtx (V16QImode); - emit_move_insn (tmp, splat); - rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]); - emit_move_insn (tmp, sel); - operands[4] = gen_rtx_UNSPEC (<MODE>mode, - gen_rtvec (3, operands[2], - operands[1], tmp), - UNSPEC_VPERM); - } -} - [(set_attr "type" "vecperm")]) - -(define_insn "*altivec_vperm_<mode>_internal" - [(set (match_operand:VM 0 "register_operand" "=v") - (unspec:VM [(match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "+v")] UNSPEC_VPERM))] "TARGET_ALTIVEC" "vperm %0,%1,%2,%3" [(set_attr "type" "vecperm")]) -(define_insn_and_split "altivec_vperm_<mode>_uns" +(define_insn "altivec_vperm_<mode>_uns" [(set (match_operand:VM 0 "register_operand" "=v") (unspec:VM [(match_operand:VM 1 "register_operand" "v") (match_operand:VM 2 "register_operand" "v") (match_operand:V16QI 3 "register_operand" "v")] - UNSPEC_VPERM_UNS_X))] - "TARGET_ALTIVEC" - "#" - "!reload_in_progress && !reload_completed" - [(set (match_dup 0) (match_dup 4))] -{ - if (BYTES_BIG_ENDIAN) - operands[4] = gen_rtx_UNSPEC (<MODE>mode, - gen_rtvec (3, operands[1], - operands[2], operands[3]), - UNSPEC_VPERM_UNS); - else - { - /* We want to subtract from 31, but we can't vspltisb 31 since - it's out of range. -1 works as well because only the low-order - five bits of the permute control vector elements are used. */ - rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode, - gen_rtx_CONST_INT (QImode, -1)); - rtx tmp = gen_reg_rtx (V16QImode); - emit_move_insn (tmp, splat); - rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]); - emit_move_insn (tmp, sel); - operands[4] = gen_rtx_UNSPEC (<MODE>mode, - gen_rtvec (3, operands[2], - operands[1], tmp), - UNSPEC_VPERM_UNS); - } -} - [(set_attr "type" "vecperm")]) - -(define_insn "*altivec_vperm_<mode>_uns_internal" - [(set (match_operand:VM 0 "register_operand" "=v") - (unspec:VM [(match_operand:VM 1 "register_operand" "v") - (match_operand:VM 2 "register_operand" "v") - (match_operand:V16QI 3 "register_operand" "+v")] UNSPEC_VPERM_UNS))] "TARGET_ALTIVEC" "vperm %0,%1,%2,%3"