Segher: I have addressed the following comments in the patch > >
<snip> > > 2018-03-12 Carl Love <c...@us.ibm.com> > > > > * config/rs6000/rs6000-c.c: Add macro definitions for > > ALTIVEC_BUILTIN_VEC_PERM. > > ALTIVEC_BUILTIN_VEC_PERMXOR. Fixed typo. > > > * config/rs6000/altivec.md: Add define_insn for altivec- > > vpermxor. > > altivec_vpermxor. > > Please write this like: > (altivec_vpermxor): New define_expand. Fixed. > > > Add UNSPEC_VNOR, define_insn altivec_vnor_v16qi3. > > There already is nor<mode>3 for BOOL_128 (so norv16qi3 for this). > > But you probably want to use one_cmpl<mode>2 instead (which ends up > as > *one_cmpl<mode>3_internal for BOOL_128, *one_cmplv16qi3_internal here > (the "3" is a misnomer), the expander is one_cmplv16qi2). Changed, removed the vnor definition I added. > > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c > > @@ -0,0 +1,112 @@ > > +/* { dg-do run { target { powerpc*-*-* && { lp64 && p8vector_hw } > > } } } */ > > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "- > > mcpu=*" } { "-mcpu=power8" } } */ > > +/* { dg-options "-mcpu=power8 -O2" } */ > > Does this need lp64? Nope, removed. I have retested the patch on powerpc64-unknown-linux-gnu (Power 8 BE) powerpc64le-unknown-linux-gnu (Power 8 LE) powerpc64le-unknown-linux-gnu (Power 9 LE) The patch is below. Please let me know if it looks OK now. Thanks. Carl Love ------------------------------------------------------------ gcc/ChangeLog: 2018-03-14 Carl Love <c...@us.ibm.com> * config/rs6000/rs6000-c.c: Add macro definitions for ALTIVEC_BUILTIN_VEC_PERMXOR. * config/rs6000/rs6000.h: Add #define for vec_permxor builtin. * config/rs6000/rs6000-builtin.def: Add macro expansions for VPERMXOR. * config/rs6000/altivec.md (altivec-vpermxor): New define expand. Add UNSPEC_VNOR, define_insn altivec_vnor_v16qi3. * config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Add case UNSPEC_VPERMXOR. * config/doc/extend.texi: Add prototypes for vec_permxor. gcc/testsuite/ChangeLog: 2018-03-13 Carl Love <c...@us.ibm.com> * gcc.target/powerpc/builtins-7-runnable.c: Add tests for vec_permxor. --- gcc/config/rs6000/altivec.h | 1 + gcc/config/rs6000/altivec.md | 23 +++++ gcc/config/rs6000/rs6000-builtin.def | 3 + gcc/config/rs6000/rs6000-c.c | 10 ++ gcc/config/rs6000/rs6000-p8swap.c | 1 + gcc/doc/extend.texi | 6 ++ .../gcc.target/powerpc/builtins-7-runnable.c | 112 +++++++++++++++++++++ 7 files changed, 156 insertions(+) create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 1e495e6..5a34162 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -76,6 +76,7 @@ #define vec_vor vec_or #define vec_vpkpx vec_packpx #define vec_vperm vec_perm +#define vec_permxor __builtin_vec_vpermxor #define vec_vrefp vec_re #define vec_vrfin vec_round #define vec_vrsqrtefp vec_rsqrte diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 2759f2d..646275c 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -3827,6 +3827,29 @@ DONE; }) +(define_expand "altivec_vpermxor" + [(use (match_operand:V16QI 0 "register_operand")) + (use (match_operand:V16QI 1 "register_operand")) + (use (match_operand:V16QI 2 "register_operand")) + (use (match_operand:V16QI 3 "register_operand"))] + "TARGET_P8_VECTOR" +{ + if (!BYTES_BIG_ENDIAN) + { + /* vpermxor indexes the bytes using Big Endian numbering. If LE, + change indexing in operand[3] to BE index. */ + rtx be_index = gen_reg_rtx (V16QImode); + + emit_insn (gen_one_cmplv16qi2 (be_index, operands[3])); + emit_insn (gen_crypto_vpermxor_v16qi (operands[0], operands[1], + operands[2], be_index)); + } + else + emit_insn (gen_crypto_vpermxor_v16qi (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + (define_expand "altivec_negv4sf2" [(use (match_operand:V4SF 0 "register_operand")) (use (match_operand:V4SF 1 "register_operand"))] diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 9942d65..1efa8b6 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2008,6 +2008,8 @@ BU_P8V_AV_P (VCMPEQUD_P, "vcmpequd_p", CONST, vector_eq_v2di_p) BU_P8V_AV_P (VCMPGTSD_P, "vcmpgtsd_p", CONST, vector_gt_v2di_p) BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p) +BU_P8V_AV_3 (VPERMXOR, "vpermxor", CONST, altivec_vpermxor) + /* ISA 2.05 overloaded 2 argument functions. */ BU_P6_OVERLOAD_2 (CMPB, "cmpb") @@ -2069,6 +2071,7 @@ BU_P8V_OVERLOAD_3 (VADDECUQ, "vaddecuq") BU_P8V_OVERLOAD_3 (VADDEUQM, "vaddeuqm") BU_P8V_OVERLOAD_3 (VSUBECUQ, "vsubecuq") BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm") +BU_P8V_OVERLOAD_3 (VPERMXOR, "vpermxor") /* ISA 3.0 vector overloaded 2-argument functions. */ BU_P9V_AV_2 (VSLV, "vslv", CONST, vslv) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index cc8e4e1..9ffb253 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -3585,6 +3585,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI }, { ALTIVEC_BUILTIN_VEC_PERM, ALTIVEC_BUILTIN_VPERM_16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI }, + + { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR, + RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, + RS6000_BTI_bool_V16QI }, + { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR, + RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI }, + { P8V_BUILTIN_VEC_VPERMXOR, P8V_BUILTIN_VPERMXOR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI }, + { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DF, diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c index ffcbba9..d2b39f3 100644 --- a/gcc/config/rs6000/rs6000-p8swap.c +++ b/gcc/config/rs6000/rs6000-p8swap.c @@ -753,6 +753,7 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VPERM_UNS: case UNSPEC_VPERMHI: case UNSPEC_VPERMSI: + case UNSPEC_VPERMXOR: case UNSPEC_VPKPX: case UNSPEC_VSLDOI: case UNSPEC_VSLO: diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 1379502..d10fb46 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -18448,6 +18448,12 @@ vector double vec_perm (vector double, vector double, vector unsigned char); vector long vec_perm (vector long, vector long, vector unsigned char); vector unsigned long vec_perm (vector unsigned long, vector unsigned long, vector unsigned char); +vector bool char vec_permxor (vector bool char, vector bool char, + vector bool char); +vector unsigned char vec_permxor (vector signed char, vector signed char, + vector signed char); +vector unsigned char vec_permxor (vector unsigned char, vector unsigned char, + vector unsigned char); vector double vec_rint (vector double); vector double vec_recip (vector double, vector double); vector double vec_rsqrt (vector double); diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c new file mode 100644 index 0000000..965b1cd --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/builtins-7-runnable.c @@ -0,0 +1,112 @@ +/* { dg-do run { target { powerpc*-*-* && p8vector_hw } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O2" } */ + +#include <stdint.h> +#include <stdio.h> +#include <inttypes.h> +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#endif + +void abort (void); + +int main() { + int i; + vector bool char ubc_arg1, ubc_arg2, ubc_arg3; + vector unsigned char uc_arg1, uc_arg2, uc_arg3; + vector signed char sc_arg1, sc_arg2, sc_arg3; + + vector bool char vec_ubc_expected1, vec_ubc_result1; + vector unsigned char vec_uc_expected1, vec_uc_result1; + vector signed char vec_sc_expected1, vec_sc_result1; + + /* vec_permxor: bool char args, result */ + ubc_arg1 = (vector bool char){0xA, 0x2, 0xB0, 0x4, + 0x5, 0x6, 0x7, 0x8, + 0x9, 0x10, 0x11, 0x12, + 0x13, 0x15, 0x15, 0x16}; + ubc_arg2 = (vector bool char){0x5, 0x20, 0xC, 0x40, + 0x55, 0x66, 0x77, 0x88, + 0x9, 0xFF, 0x0, 0xED, + 0x4, 0x5, 0x6, 0x7}; + ubc_arg3 = (vector bool char){0x08, 0x19, 0x2A, 0x3B, + 0x4D, 0x5C, 0x6D, 0x7E, + 0x8F, 0x90, 0xA1, 0xB2, + 0xC3, 0xD4, 0xE5, 0xF6}; + vec_ubc_expected1 = (vector bool char){0x3, 0xFD, 0xB0, 0xE9, + 0x0, 0x2, 0x2, 0xE, + 0xE, 0x15, 0x31, 0x1E, + 0x53, 0x40, 0x73, 0x61}; + vec_ubc_result1 = vec_permxor (ubc_arg1, ubc_arg2, ubc_arg3); + + for (i = 0; i < 16; i++) { + if (vec_ubc_expected1[i] != vec_ubc_result1[i]) +#ifdef DEBUG + printf("ERROR vec_permxor (ubc, ubc, ubc) result[%d]=0x%x != expected[%d]=0x%x\n", + i, vec_ubc_result1[i], i, vec_ubc_expected1[i]); +#else + abort(); +#endif + } + + /* vec_permxor: signed char args, result */ + sc_arg1 = (vector signed char){0x1, 0x2, 0x3, 0x4, + 0x5, 0x6, 0x7, 0x8, + 0x9, 0x10, 0xA, 0xB, + 0xC, 0xD, 0xE, 0xF}; + sc_arg2 = (vector signed char){0x5, 0x5, 0x7, 0x8, + 0x9, 0xA, 0xB, 0xC, + 0xD, 0xE, 0xF, 0x0, + 0x1, 0x2, 0x3, 0x4}; + sc_arg3 = (vector signed char){0x08, 0x19, 0x2A, 0x3B, + 0x4D, 0x5C, 0x6D, 0x7E, + 0x8F, 0x90, 0xA1, 0xB2, + 0xC3, 0xD4, 0xE5, 0xF6}; + vec_sc_expected1 = (vector signed char){0xC, 0xC, 0xC, 0x4, + 0x7, 0x7, 0x5, 0xB, + 0xD, 0x15, 0xF, 0xC, + 0x4, 0x4, 0x4, 0x4}; + vec_sc_result1 = vec_permxor (sc_arg1, sc_arg2, sc_arg3); + + for (i = 0; i < 16; i++) { + if (vec_sc_expected1[i] != vec_sc_result1[i]) +#ifdef DEBUG + printf("ERROR vec_permxor (sc, sc, sc) result[%d]=0x%x != expected[%d]=0x%x\n", + i, vec_sc_result1[i], i, vec_sc_expected1[i]); +#else + abort(); +#endif + } + + /* vec_permxor: unsigned char args, result */ + uc_arg1 = (vector unsigned char){0xA, 0xB, 0xC, 0xD, + 0xE, 0xF, 0x0, 0x1, + 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9}; + uc_arg2 = (vector unsigned char){0x5, 0x6, 0x7, 0x8, + 0x9, 0xA, 0xB, 0xC, + 0xD, 0xE, 0xF, 0x0, + 0x1, 0x2, 0x3, 0x4}; + uc_arg3 = (vector unsigned char){0x08, 0x19, 0x2A, 0x3B, + 0x4D, 0x5C, 0x6D, 0x7E, + 0x8F, 0x90, 0xA1, 0xB2, + 0xC3, 0xD4, 0xE5, 0xF6}; + vec_uc_expected1 = (vector unsigned char){0x7, 0x5, 0x3, 0xD, + 0xC, 0xE, 0x2, 0x2, + 0x6, 0x6, 0x2, 0x2, + 0xE, 0xE, 0x2, 0x2}; + vec_uc_result1 = vec_permxor (uc_arg1, uc_arg2, uc_arg3); + + for (i = 0; i < 16; i++) { + if (vec_uc_expected1[i] != vec_uc_result1[i]) +#ifdef DEBUG + printf("ERROR vec_permxor (uc, uc, uc) result[%d]=0x%x != expected[%d]=0x%x\n", + i, vec_uc_result1[i], i, vec_uc_expected1[i]); +#else + abort(); +#endif + } +} -- 2.7.4