Hi, This patch adds a few more vector interfaces listed in the ELFv2 ABI v1.1: missing flavors of vec_madd, vec_pmsum_be, and vec_shasigma_be. Existing tests have been updated to check for correct code gen. Tested on powerpc64le-unknown-linux-gnu with no regressions. Ok for trunk?
Thanks, Bill [gcc] 2015-08-20 Bill Schmidt <wschm...@linux.vnet.ibm.com> * config/rs6000/altivec.h (vec_pmsum_be): New #define. (vec_shasigma_be): New #define. * config/rs6000/rs6000-builtin.def (VPMSUMB): New BU_P8V_AV2_2. (VPMSUMH): Likewise. (VPMSUMW): Likewise. (VPMSUMD): Likewise. (VPMSUM): New BU_P8V_OVERLOAD_2. * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): New entries for VEC_MADD and VEC_VPMSUM. [gcc/testsuite] 2015-08-20 Bill Schmidt <wschm...@linux.vnet.ibm.com> * gcc.target/powerpc/altivec-35.c (foo): Add tests for vec_madd. * gcc.target/powerpc/p8vector-builtin-8.c (foo): Add tests for vec_vpmsum_be and vec_shasigma_be. Index: gcc/config/rs6000/altivec.h =================================================================== --- gcc/config/rs6000/altivec.h (revision 227035) +++ gcc/config/rs6000/altivec.h (working copy) @@ -208,6 +208,8 @@ #define vec_lvebx __builtin_vec_lvebx #define vec_lvehx __builtin_vec_lvehx #define vec_lvewx __builtin_vec_lvewx +#define vec_pmsum_be __builtin_vec_vpmsum +#define vec_shasigma_be __builtin_crypto_vshasigma /* Cell only intrinsics. */ #ifdef __PPU__ #define vec_lvlx __builtin_vec_lvlx Index: gcc/config/rs6000/rs6000-builtin.def =================================================================== --- gcc/config/rs6000/rs6000-builtin.def (revision 227035) +++ gcc/config/rs6000/rs6000-builtin.def (working copy) @@ -1489,6 +1489,10 @@ BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_v BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss) BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus) BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpksdus) +BU_P8V_AV_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_P8V_AV_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_P8V_AV_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_P8V_AV_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3) BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3) BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3) @@ -1570,6 +1574,7 @@ BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss") BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus") BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum") BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus") +BU_P8V_OVERLOAD_2 (VPMSUM, "vpmsum") BU_P8V_OVERLOAD_2 (VRLD, "vrld") BU_P8V_OVERLOAD_2 (VSLD, "vsld") BU_P8V_OVERLOAD_2 (VSRAD, "vsrad") Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (revision 227035) +++ gcc/config/rs6000/rs6000-c.c (working copy) @@ -2937,6 +2937,14 @@ const struct altivec_builtin_types altivec_overloa RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, + { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI }, { ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI }, { ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM, @@ -4171,6 +4179,19 @@ const struct altivec_builtin_types altivec_overloa { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMB, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMH, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, + RS6000_BTI_unsigned_V8HI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMW, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V4SI, 0 }, + { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMD, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI, + RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB, Index: gcc/testsuite/gcc.target/powerpc/altivec-35.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/altivec-35.c (revision 227035) +++ gcc/testsuite/gcc.target/powerpc/altivec-35.c (working copy) @@ -7,10 +7,19 @@ /* Test Altivec built-ins added for version 1.1 of ELFv2 ABI. */ vector signed int vsia, vsib; +vector signed short vssa, vssb, vssc; +vector unsigned short vusa, vusb, vusc; -void foo (vector signed int *vsir) +void foo (vector signed int *vsir, + vector signed short *vssr, + vector unsigned short *vusr) { *vsir++ = vec_addc (vsia, vsib); + *vssr++ = vec_madd (vssa, vssb, vssc); + *vssr++ = vec_madd (vssa, vusb, vusc); + *vssr++ = vec_madd (vusa, vssb, vssc); + *vusr++ = vec_madd (vusa, vusb, vusc); } /* { dg-final { scan-assembler-times "vaddcuw" 1 } } */ +/* { dg-final { scan-assembler-times "vmladduhm" 4 } } */ Index: gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c (revision 227035) +++ gcc/testsuite/gcc.target/powerpc/p8vector-builtin-8.c (working copy) @@ -8,7 +8,9 @@ vector unsigned char vuca, vucb, vucc; vector bool char vbca, vbcb; +vector unsigned short vusa, vusb; vector bool short vbsa, vbsb; +vector unsigned int vuia, vuib; vector bool int vbia, vbib; vector signed long long vsla, vslb; vector unsigned long long vula, vulb, vulc; @@ -19,7 +21,9 @@ vector double vda, vdb; void foo (vector unsigned char *vucr, vector bool char *vbcr, + vector unsigned short *vusr, vector bool short *vbsr, + vector unsigned int *vuir, vector bool int *vbir, vector unsigned long long *vulr, vector bool long long *vblr, @@ -48,6 +52,12 @@ void foo (vector unsigned char *vucr, *vblr++ = vec_orc (vbla, vblb); *vbsr++ = vec_orc (vbsa, vbsb); *vblr++ = vec_perm (vbla, vblb, vucc); + *vusr++ = vec_pmsum_be (vuca, vucb); + *vuir++ = vec_pmsum_be (vusa, vusb); + *vulr++ = vec_pmsum_be (vuia, vuib); + *vuxr++ = vec_pmsum_be (vula, vulb); + *vuir++ = vec_shasigma_be (vuia, 0, 1); + *vulr++ = vec_shasigma_be (vula, 0, 1); } /* { dg-final { scan-assembler-times "vaddcuq" 2 } } */ @@ -59,4 +69,10 @@ void foo (vector unsigned char *vucr, /* { dg-final { scan-assembler-times "xxlnand" 4 } } */ /* { dg-final { scan-assembler-times "xxlorc" 4 } } */ /* { dg-final { scan-assembler-times "vperm" 1 } } */ +/* { dg-final { scan-assembler-times "vpmsumb" 1 } } */ +/* { dg-final { scan-assembler-times "vpmsumh" 1 } } */ +/* { dg-final { scan-assembler-times "vpmsumw" 1 } } */ +/* { dg-final { scan-assembler-times "vpmsumd" 1 } } */ +/* { dg-final { scan-assembler-times "vshasigmaw" 1 } } */ +/* { dg-final { scan-assembler-times "vshasigmad" 1 } } */