This patch provides for interpreting parity of element numbers for the Altivec vec_mule and vec_mulo intrinsics as big-endian (left to right in a vector register) when targeting a little endian machine and specifying -maltivec=be. New test cases are added to test this functionality on all supported vector types.
The main change is in the altivec.md define_insns for vec_widen_{su}mult_{even,odd}_{v8hi,v16qi}, where we now test for VECTOR_ELT_ORDER_BIG rather than BYTES_BIG_ENDIAN in order to treat the element order as big-endian. However, this necessitates changes to other places in altivec.md where we previously called gen_vec_widen_{su}mult_*. The semantics of these internal uses are not affected by -maltivec=be, so these are now replaced with direct generation of the underlying instructions that were previously generated. Bootstrapped and tested with no new regressions on powerpc64{,le}-unknown-linux-gnu. Ok for trunk? Thanks, Bill gcc: 2014-01-13 Bill Schmidt <wschm...@vnet.linux.ibm.com> * config/rs6000/altivec.md (mulv8hi3): Explicitly generate vmulesh and vmulosh rather than call gen_vec_widen_smult_*. (vec_widen_umult_even_v16qi): Test VECTOR_ELT_ORDER_BIG rather than BYTES_BIG_ENDIAN to determine use of even or odd instruction. (vec_widen_smult_even_v16qi): Likewise. (vec_widen_umult_even_v8hi): Likewise. (vec_widen_smult_even_v8hi): Likewise. (vec_widen_umult_odd_v16qi): Likewise. (vec_widen_smult_odd_v16qi): Likewise. (vec_widen_umult_odd_v8hi): Likewise. (vec_widen_smult_odd_v8hi): Likewise. (vec_widen_umult_hi_v16qi): Explicitly generate vmuleub and vmuloub rather than call gen_vec_widen_umult_*. (vec_widen_umult_lo_v16qi): Likewise. (vec_widen_smult_hi_v16qi): Explicitly generate vmulesb and vmulosb rather than call gen_vec_widen_smult_*. (vec_widen_smult_lo_v16qi): Likewise. (vec_widen_umult_hi_v8hi): Explicitly generate vmuleuh and vmulouh rather than call gen_vec_widen_umult_*. (vec_widen_umult_lo_v8hi): Likewise. (vec_widen_smult_hi_v8hi): Explicitly gnerate vmulesh and vmulosh rather than call gen_vec_widen_smult_*. (vec_widen_smult_lo_v8hi): Likewise. gcc/testsuite: 2014-01-13 Bill Schmidt <wschm...@vnet.linux.ibm.com> * gcc.dg/vmx/mult-even-odd.c: New. * gcc.dg/vmx/mult-even-odd-be-order.c: New. Index: gcc/testsuite/gcc.dg/vmx/mult-even-odd.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/mult-even-odd.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/mult-even-odd.c (revision 0) @@ -0,0 +1,43 @@ +#include "harness.h" + +static void test() +{ + vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector unsigned char vucb = {2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3}; + vector signed char vsca = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7}; + vector signed char vscb = {2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3}; + vector unsigned short vusa = {0,1,2,3,4,5,6,7}; + vector unsigned short vusb = {2,3,2,3,2,3,2,3}; + vector signed short vssa = {-4,-3,-2,-1,0,1,2,3}; + vector signed short vssb = {2,-3,2,-3,2,-3,2,-3}; + vector unsigned short vuse, vuso; + vector signed short vsse, vsso; + vector unsigned int vuie, vuio; + vector signed int vsie, vsio; + + vuse = vec_mule (vuca, vucb); + vuso = vec_mulo (vuca, vucb); + vsse = vec_mule (vsca, vscb); + vsso = vec_mulo (vsca, vscb); + vuie = vec_mule (vusa, vusb); + vuio = vec_mulo (vusa, vusb); + vsie = vec_mule (vssa, vssb); + vsio = vec_mulo (vssa, vssb); + + check (vec_all_eq (vuse, + ((vector unsigned short){0,4,8,12,16,20,24,28})), + "vuse"); + check (vec_all_eq (vuso, + ((vector unsigned short){3,9,15,21,27,33,39,45})), + "vuso"); + check (vec_all_eq (vsse, + ((vector signed short){-16,-12,-8,-4,0,4,8,12})), + "vsse"); + check (vec_all_eq (vsso, + ((vector signed short){21,15,9,3,-3,-9,-15,-21})), + "vsso"); + check (vec_all_eq (vuie, ((vector unsigned int){0,4,8,12})), "vuie"); + check (vec_all_eq (vuio, ((vector unsigned int){3,9,15,21})), "vuio"); + check (vec_all_eq (vsie, ((vector signed int){-8,-4,0,4})), "vsie"); + check (vec_all_eq (vsio, ((vector signed int){9,3,-3,-9})), "vsio"); +} Index: gcc/testsuite/gcc.dg/vmx/mult-even-odd-be-order.c =================================================================== --- gcc/testsuite/gcc.dg/vmx/mult-even-odd-be-order.c (revision 0) +++ gcc/testsuite/gcc.dg/vmx/mult-even-odd-be-order.c (revision 0) @@ -0,0 +1,64 @@ +/* { dg-options "-maltivec=be -mabi=altivec -std=gnu99 -mno-vsx" } */ + +#include "harness.h" + +static void test() +{ + vector unsigned char vuca = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; + vector unsigned char vucb = {2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3}; + vector signed char vsca = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7}; + vector signed char vscb = {2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3,2,-3}; + vector unsigned short vusa = {0,1,2,3,4,5,6,7}; + vector unsigned short vusb = {2,3,2,3,2,3,2,3}; + vector signed short vssa = {-4,-3,-2,-1,0,1,2,3}; + vector signed short vssb = {2,-3,2,-3,2,-3,2,-3}; + vector unsigned short vuse, vuso; + vector signed short vsse, vsso; + vector unsigned int vuie, vuio; + vector signed int vsie, vsio; + + vuse = vec_mule (vuca, vucb); + vuso = vec_mulo (vuca, vucb); + vsse = vec_mule (vsca, vscb); + vsso = vec_mulo (vsca, vscb); + vuie = vec_mule (vusa, vusb); + vuio = vec_mulo (vusa, vusb); + vsie = vec_mule (vssa, vssb); + vsio = vec_mulo (vssa, vssb); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + check (vec_all_eq (vuse, + ((vector unsigned short){3,9,15,21,27,33,39,45})), + "vuse"); + check (vec_all_eq (vuso, + ((vector unsigned short){0,4,8,12,16,20,24,28})), + "vuso"); + check (vec_all_eq (vsse, + ((vector signed short){21,15,9,3,-3,-9,-15,-21})), + "vsse"); + check (vec_all_eq (vsso, + ((vector signed short){-16,-12,-8,-4,0,4,8,12})), + "vsso"); + check (vec_all_eq (vuie, ((vector unsigned int){3,9,15,21})), "vuie"); + check (vec_all_eq (vuio, ((vector unsigned int){0,4,8,12})), "vuio"); + check (vec_all_eq (vsie, ((vector signed int){9,3,-3,-9})), "vsie"); + check (vec_all_eq (vsio, ((vector signed int){-8,-4,0,4})), "vsio"); +#else + check (vec_all_eq (vuse, + ((vector unsigned short){0,4,8,12,16,20,24,28})), + "vuse"); + check (vec_all_eq (vuso, + ((vector unsigned short){3,9,15,21,27,33,39,45})), + "vuso"); + check (vec_all_eq (vsse, + ((vector signed short){-16,-12,-8,-4,0,4,8,12})), + "vsse"); + check (vec_all_eq (vsso, + ((vector signed short){21,15,9,3,-3,-9,-15,-21})), + "vsso"); + check (vec_all_eq (vuie, ((vector unsigned int){0,4,8,12})), "vuie"); + check (vec_all_eq (vuio, ((vector unsigned int){3,9,15,21})), "vuio"); + check (vec_all_eq (vsie, ((vector signed int){-8,-4,0,4})), "vsie"); + check (vec_all_eq (vsio, ((vector signed int){9,3,-3,-9})), "vsio"); +#endif +} Index: gcc/config/rs6000/altivec.md =================================================================== --- gcc/config/rs6000/altivec.md (revision 206375) +++ gcc/config/rs6000/altivec.md (working copy) @@ -673,17 +673,18 @@ rtx high = gen_reg_rtx (V4SImode); rtx low = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_smult_even_v8hi (even, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v8hi (odd, operands[1], operands[2])); - if (BYTES_BIG_ENDIAN) { + emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2])); emit_insn (gen_altivec_vmrghw (high, even, odd)); emit_insn (gen_altivec_vmrglw (low, even, odd)); emit_insn (gen_altivec_vpkuwum (operands[0], high, low)); } else { + emit_insn (gen_altivec_vmulosh (even, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (odd, operands[1], operands[2])); emit_insn (gen_altivec_vmrghw (high, odd, even)); emit_insn (gen_altivec_vmrglw (low, odd, even)); emit_insn (gen_altivec_vpkuwum (operands[0], low, high)); @@ -981,7 +982,7 @@ (use (match_operand:V16QI 2 "register_operand" ""))] "TARGET_ALTIVEC" { - if (BYTES_BIG_ENDIAN) + if (VECTOR_ELT_ORDER_BIG) emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2])); else emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2])); @@ -994,7 +995,7 @@ (use (match_operand:V16QI 2 "register_operand" ""))] "TARGET_ALTIVEC" { - if (BYTES_BIG_ENDIAN) + if (VECTOR_ELT_ORDER_BIG) emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2])); else emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2])); @@ -1007,7 +1008,7 @@ (use (match_operand:V8HI 2 "register_operand" ""))] "TARGET_ALTIVEC" { - if (BYTES_BIG_ENDIAN) + if (VECTOR_ELT_ORDER_BIG) emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2])); else emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2])); @@ -1020,7 +1021,7 @@ (use (match_operand:V8HI 2 "register_operand" ""))] "TARGET_ALTIVEC" { - if (BYTES_BIG_ENDIAN) + if (VECTOR_ELT_ORDER_BIG) emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2])); else emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2])); @@ -1033,7 +1034,7 @@ (use (match_operand:V16QI 2 "register_operand" ""))] "TARGET_ALTIVEC" { - if (BYTES_BIG_ENDIAN) + if (VECTOR_ELT_ORDER_BIG) emit_insn (gen_altivec_vmuloub (operands[0], operands[1], operands[2])); else emit_insn (gen_altivec_vmuleub (operands[0], operands[1], operands[2])); @@ -1046,7 +1047,7 @@ (use (match_operand:V16QI 2 "register_operand" ""))] "TARGET_ALTIVEC" { - if (BYTES_BIG_ENDIAN) + if (VECTOR_ELT_ORDER_BIG) emit_insn (gen_altivec_vmulosb (operands[0], operands[1], operands[2])); else emit_insn (gen_altivec_vmulesb (operands[0], operands[1], operands[2])); @@ -1059,7 +1060,7 @@ (use (match_operand:V8HI 2 "register_operand" ""))] "TARGET_ALTIVEC" { - if (BYTES_BIG_ENDIAN) + if (VECTOR_ELT_ORDER_BIG) emit_insn (gen_altivec_vmulouh (operands[0], operands[1], operands[2])); else emit_insn (gen_altivec_vmuleuh (operands[0], operands[1], operands[2])); @@ -1072,7 +1073,7 @@ (use (match_operand:V8HI 2 "register_operand" ""))] "TARGET_ALTIVEC" { - if (BYTES_BIG_ENDIAN) + if (VECTOR_ELT_ORDER_BIG) emit_insn (gen_altivec_vmulosh (operands[0], operands[1], operands[2])); else emit_insn (gen_altivec_vmulesh (operands[0], operands[1], operands[2])); @@ -2220,12 +2221,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } else - emit_insn (gen_altivec_vmrghh (operands[0], vo, ve)); + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh (operands[0], vo, ve)); + } DONE; }") @@ -2240,12 +2247,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + { + emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } else - emit_insn (gen_altivec_vmrglh (operands[0], vo, ve)); + { + emit_insn (gen_altivec_vmuloub (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleub (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh (operands[0], vo, ve)); + } DONE; }") @@ -2260,12 +2273,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh (operands[0], ve, vo)); + } else - emit_insn (gen_altivec_vmrghh (operands[0], vo, ve)); + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghh (operands[0], vo, ve)); + } DONE; }") @@ -2280,12 +2299,18 @@ rtx ve = gen_reg_rtx (V8HImode); rtx vo = gen_reg_rtx (V8HImode); - emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + { + emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh (operands[0], ve, vo)); + } else - emit_insn (gen_altivec_vmrglh (operands[0], vo, ve)); + { + emit_insn (gen_altivec_vmulosb (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesb (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglh (operands[0], vo, ve)); + } DONE; }") @@ -2300,12 +2325,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } else - emit_insn (gen_altivec_vmrghw (operands[0], vo, ve)); + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw (operands[0], vo, ve)); + } DONE; }") @@ -2320,12 +2351,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + { + emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } else - emit_insn (gen_altivec_vmrglw (operands[0], vo, ve)); + { + emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw (operands[0], vo, ve)); + } DONE; }") @@ -2340,12 +2377,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); + } else - emit_insn (gen_altivec_vmrghw (operands[0], vo, ve)); + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrghw (operands[0], vo, ve)); + } DONE; }") @@ -2360,12 +2403,18 @@ rtx ve = gen_reg_rtx (V4SImode); rtx vo = gen_reg_rtx (V4SImode); - emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2])); - emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2])); if (BYTES_BIG_ENDIAN) - emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + { + emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); + } else - emit_insn (gen_altivec_vmrglw (operands[0], vo, ve)); + { + emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); + emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); + emit_insn (gen_altivec_vmrglw (operands[0], vo, ve)); + } DONE; }")