This patch adds support for the vec_vinsert4b and vec_vextract4b built-in functions that generate the ISA 3.0 XXINSERTW and XXEXTRACTUW/VEXTUW{L,R}X instructions. These functions are part of the PowerOpen 64-bit ELF V2 abi.
In doing the work, I noticed the P9V built-in ternary functions incorrectly were declared to be binary. I have fixed these functions. The built-ins added are: long long vec_vextract4b (const vector signed char, const int); long long vec_vextract4b (const vector unsigned char, const int); vector signed char vec_insert4b (vector int, vector signed char, const int); vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char, const int); vector signed char vec_insert4b (long long, vector signed char, const int); vector unsigned char vec_insert4b (long long, vector unsigned char, const int); Note, the ABI only adds the form of vec_insert4b that takes a vector int as the first argument. On little endian systems, you have to swap double words to get the desired element into the scalar position for the XXINSERTW instruction. I have added a GCC extension to alternatively take a long long (or long in 64-bit) for the value to be inserted, since IMHO, it makes the built-in much easier to use. I have done bootstrap builds on a 64-bit power8 little endian system and a 32/64-bit power7 big endian system. There were no regressions. Can I check this into the GCC trunk? [gcc] 2016-12-13 Michael Meissner <meiss...@linux.vnet.ibm.com> * config/rs6000/predicates.md (const_0_to_11_operand): New predicate, match 0..11. * config/rs6000/rs6000-builtin.def (BU_P9V_VSX_3): Set built-in type to ternary, not binary. (BU_P9V_64BIT_VSX_3): Likewise. (P9V_BUILTIN_VEXTRACT4B): Add support for vec_vinsert4b and vec_extract4b non-overloaded built-in functions. (P9V_BUILTIN_VINSERT4B): Likewise. (P9V_BUILTIN_VINSERT4B_DI): Likewise. (P9V_BUILTIN_VEC_VEXTULX): Move to section that adds 2 operand ISA 3.0 built-in functions. (P9V_BUILTIN_VEC_VEXTURX): Likewise. (P9V_BUILTIN_VEC_VEXTRACT4B): Add support for overloaded vec_insert4b and vec_extract4 built-in functions. (P9V_BUILTIN_VEC_VINSERT4B): Likewise. * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add overloaded support for vec_vinsert4b and vec_extract4b. * config/rs6000/rs6000.c (altivec_expand_builtin): Add checks for the vec_insert4b and vec_extract4b byte number being a constant in the range 0..11. * config/rs6000/vsx.md (UNSPEC_XXEXTRACTUW): New unspec. (UNSPEC_XXINSERTW): Likewise. (vextract4b): Add support for the vec_vextract4b built-in function. (vextract4b_internal): Likewise. (vinsert4b): Add support for the vec_insert4b built-in function. Include both a version that inserts element 1 from a V4SI object and one that inserts a DI object. (vinsert4b_internal): Likewise. (vinsert4b_di): Likewise. (vinsert4b_di_internal): Likewise. * config/rs6000/altivec.h (vec_vinsert4b): Support vec_vinsert4b and vec_extract4b built-in functions. * doc/extend.doc (PowerPC VSX built-in functions): Document vec_insert4b and vec_extract4b. [gcc/testsuite] 2016-12-13 Michael Meissner <meiss...@linux.vnet.ibm.com> * gcc.target/powerpc/p9-vinsert4b-1.c: New test. * gcc.target/powerpc/p9-vinsert4b-2.c: Likewise. -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/predicates.md =================================================================== --- gcc/config/rs6000/predicates.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 243590) +++ gcc/config/rs6000/predicates.md (.../gcc/config/rs6000) (working copy) @@ -210,6 +210,11 @@ (define_predicate "const_0_to_7_operand" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 7)"))) +;; Match op = 0..11 +(define_predicate "const_0_to_11_operand" + (and (match_code "const_int") + (match_test "IN_RANGE (INTVAL (op), 0, 11)"))) + ;; Match op = 0..15 (define_predicate "const_0_to_15_operand" (and (match_code "const_int") Index: gcc/config/rs6000/rs6000-builtin.def =================================================================== --- gcc/config/rs6000/rs6000-builtin.def (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 243590) +++ gcc/config/rs6000/rs6000-builtin.def (.../gcc/config/rs6000) (working copy) @@ -877,7 +877,16 @@ "__builtin_vsx_" NAME, /* NAME */ \ RS6000_BTM_P9_VECTOR, /* MASK */ \ (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_BINARY), \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + +#define BU_P9V_64BIT_VSX_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (P9V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_vsx_" NAME, /* NAME */ \ + (RS6000_BTM_64BIT \ + | RS6000_BTM_P9_VECTOR), /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ /* See the comment on BU_ALTIVEC_P. */ @@ -1967,6 +1976,11 @@ BU_P9V_AV_2 (VEXTUHRX, "vextuhrx", CONS BU_P9V_AV_2 (VEXTUWLX, "vextuwlx", CONST, vextuwlx) BU_P9V_AV_2 (VEXTUWRX, "vextuwrx", CONST, vextuwrx) +/* Insert/extract 4 byte word into a vector. */ +BU_P9V_VSX_2 (VEXTRACT4B, "vextract4b", CONST, vextract4b) +BU_P9V_VSX_3 (VINSERT4B, "vinsert4b", CONST, vinsert4b) +BU_P9V_VSX_3 (VINSERT4B_DI, "vinsert4b_di", CONST, vinsert4b_di) + /* 3 argument vector functions returning void, treated as SPECIAL, added in ISA 3.0 (power9). */ BU_P9V_64BIT_AV_X (STXVL, "stxvl", MISC) @@ -2008,12 +2022,13 @@ BU_P9V_AV_P (VCMPNEZW_P, "vcmpnezw_p", C /* ISA 3.0 Vector scalar overloaded 2 argument functions */ BU_P9V_OVERLOAD_2 (LXVL, "lxvl") +BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx") +BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx") +BU_P9V_OVERLOAD_2 (VEXTRACT4B, "vextract4b") /* ISA 3.0 Vector scalar overloaded 3 argument functions */ BU_P9V_OVERLOAD_3 (STXVL, "stxvl") - -BU_P9V_OVERLOAD_2 (VEXTULX, "vextulx") -BU_P9V_OVERLOAD_2 (VEXTURX, "vexturx") +BU_P9V_OVERLOAD_3 (VINSERT4B, "vinsert4b") /* Overloaded CMPNE support was implemented prior to Power 9, so is not mentioned here. */ Index: gcc/config/rs6000/rs6000-c.c =================================================================== --- gcc/config/rs6000/rs6000-c.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 243590) +++ gcc/config/rs6000/rs6000-c.c (.../gcc/config/rs6000) (working copy) @@ -4682,6 +4682,11 @@ const struct altivec_builtin_types altiv { P9V_BUILTIN_VEC_VCTZLSBB, P9V_BUILTIN_VCTZLSBB, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, + RS6000_BTI_INTDI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_VEC_VEXTRACT4B, P9V_BUILTIN_VEXTRACT4B, + RS6000_BTI_INTDI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI, 0 }, + { P9V_BUILTIN_VEC_VEXTULX, P9V_BUILTIN_VEXTUBLX, RS6000_BTI_INTQI, RS6000_BTI_UINTSI, RS6000_BTI_V16QI, 0 }, @@ -4735,6 +4740,28 @@ const struct altivec_builtin_types altiv { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_V16QI, RS6000_BTI_V4SI, + RS6000_BTI_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_V16QI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V4SI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTSI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_V16QI, RS6000_BTI_INTDI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI, + RS6000_BTI_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTDI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI }, + { P9V_BUILTIN_VEC_VINSERT4B, P9V_BUILTIN_VINSERT4B_DI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_UINTDI }, + { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ, Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 243590) +++ gcc/config/rs6000/rs6000.c (.../gcc/config/rs6000) (working copy) @@ -15546,7 +15546,7 @@ altivec_expand_builtin (tree exp, rtx ta size_t i; enum insn_code icode; tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - tree arg0; + tree arg0, arg1, arg2; rtx op0, pat; machine_mode tmode, mode0; enum rs6000_builtins fcode @@ -15766,6 +15766,40 @@ altivec_expand_builtin (tree exp, rtx ta case VSX_BUILTIN_VEC_EXT_V1TI: return altivec_expand_vec_ext_builtin (exp, target); + case P9V_BUILTIN_VEXTRACT4B: + case P9V_BUILTIN_VEC_VEXTRACT4B: + arg1 = CALL_EXPR_ARG (exp, 1); + STRIP_NOPS (arg1); + + /* Generate a normal call if it is invalid. */ + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg1 == error_mark_node) + return expand_call (exp, target, false); + + if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 11) + { + error ("second argument to vec_vextract4b must 0..11"); + return expand_call (exp, target, false); + } + break; + + case P9V_BUILTIN_VINSERT4B: + case P9V_BUILTIN_VINSERT4B_DI: + case P9V_BUILTIN_VEC_VINSERT4B: + arg2 = CALL_EXPR_ARG (exp, 2); + STRIP_NOPS (arg2); + + /* If we got invalid arguments bail out before generating bad rtl. */ + if (arg2 == error_mark_node) + return expand_call (exp, target, false); + + if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 11) + { + error ("third argument to vec_vinsert4b must 0..11"); + return expand_call (exp, target, false); + } + break; + default: break; /* Fall through. */ Index: gcc/config/rs6000/vsx.md =================================================================== --- gcc/config/rs6000/vsx.md (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 243590) +++ gcc/config/rs6000/vsx.md (.../gcc/config/rs6000) (working copy) @@ -366,6 +366,8 @@ (define_c_enum "unspec" UNSPEC_VCMPNEZH UNSPEC_VCMPNEW UNSPEC_VCMPNEZW + UNSPEC_XXEXTRACTUW + UNSPEC_XXINSERTW ]) ;; VSX moves @@ -3686,3 +3688,94 @@ (define_insn "vextuwrx" "TARGET_P9_VECTOR" "vextuwrx %0,%1,%2" [(set_attr "type" "vecsimple")]) + +;; Vector insert/extract word at arbitrary byte values. Note, the little +;; endian version needs to adjust the byte number, and the V4SI element in +;; vinsert4b. +(define_expand "vextract4b" + [(set (match_operand:DI 0 "gpc_reg_operand") + (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand") + (match_operand:QI 2 "const_0_to_11_operand")] + UNSPEC_XXEXTRACTUW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + operands[2] = GEN_INT (12 - INTVAL (operands[2])); +}) + +(define_insn_and_split "*vextract4b_internal" + [(set (match_operand:DI 0 "gpc_reg_operand" "=wj,r") + (unspec:DI [(match_operand:V16QI 1 "vsx_register_operand" "wa,v") + (match_operand:QI 2 "const_0_to_11_operand" "n,n")] + UNSPEC_XXEXTRACTUW))] + "TARGET_P9_VECTOR" + "@ + xxextractuw %x0,%x1,%2 + #" + "&& reload_completed && int_reg_operand (operands[0], DImode)" + [(const_int 0)] +{ + rtx op0 = operands[0]; + rtx op1 = operands[1]; + rtx op2 = operands[2]; + rtx op0_si = gen_rtx_REG (SImode, REGNO (op0)); + rtx op1_v4si = gen_rtx_REG (V4SImode, REGNO (op1)); + + emit_move_insn (op0, op2); + if (VECTOR_ELT_ORDER_BIG) + emit_insn (gen_vextuwlx (op0_si, op0_si, op1_v4si)); + else + emit_insn (gen_vextuwrx (op0_si, op0_si, op1_v4si)); + DONE; +} + [(set_attr "type" "vecperm")]) + +(define_expand "vinsert4b" + [(set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand") + (match_operand:V16QI 2 "vsx_register_operand") + (match_operand:QI 3 "const_0_to_11_operand")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + { + rtx op1 = operands[1]; + rtx v4si_tmp = gen_reg_rtx (V4SImode); + emit_insn (gen_vsx_xxpermdi_v4si (v4si_tmp, op1, op1, const1_rtx)); + operands[1] = v4si_tmp; + operands[3] = GEN_INT (12 - INTVAL (operands[3])); + } +}) + +(define_insn "*vinsert4b_internal" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:V4SI 1 "vsx_register_operand" "wa") + (match_operand:V16QI 2 "vsx_register_operand" "0") + (match_operand:QI 3 "const_0_to_11_operand" "n")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" + "xxinsertw %x0,%x1,%3" + [(set_attr "type" "vecperm")]) + +(define_expand "vinsert4b_di" + [(set (match_operand:V16QI 0 "vsx_register_operand") + (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand") + (match_operand:V16QI 2 "vsx_register_operand") + (match_operand:QI 3 "const_0_to_11_operand")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" +{ + if (!VECTOR_ELT_ORDER_BIG) + operands[3] = GEN_INT (12 - INTVAL (operands[3])); +}) + +(define_insn "*vinsert4b_di_internal" + [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa") + (unspec:V16QI [(match_operand:DI 1 "vsx_register_operand" "wj") + (match_operand:V16QI 2 "vsx_register_operand" "0") + (match_operand:QI 3 "const_0_to_11_operand" "n")] + UNSPEC_XXINSERTW))] + "TARGET_P9_VECTOR" + "xxinsertw %x0,%x1,%3" + [(set_attr "type" "vecperm")]) Index: gcc/config/rs6000/altivec.h =================================================================== --- gcc/config/rs6000/altivec.h (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000) (revision 243590) +++ gcc/config/rs6000/altivec.h (.../gcc/config/rs6000) (working copy) @@ -394,6 +394,8 @@ #define vec_vctzd __builtin_vec_vctzd #define vec_vctzh __builtin_vec_vctzh #define vec_vctzw __builtin_vec_vctzw +#define vec_vextract4b __builtin_vec_vextract4b +#define vec_vinsert4b __builtin_vec_vinsert4b #define vec_vprtyb __builtin_vec_vprtyb #define vec_vprtybd __builtin_vec_vprtybd #define vec_vprtybw __builtin_vec_vprtybw Index: gcc/doc/extend.texi =================================================================== --- gcc/doc/extend.texi (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/doc) (revision 243590) +++ gcc/doc/extend.texi (.../gcc/doc) (working copy) @@ -17988,6 +17988,15 @@ vector unsigned short vec_vctzh (vector vector int vec_vctzw (vector int); vector unsigned int vec_vctzw (vector int); +long long vec_vextract4b (const vector signed char, const int); +long long vec_vextract4b (const vector unsigned char, const int); + +vector signed char vec_insert4b (vector int, vector signed char, const int); +vector unsigned char vec_insert4b (vector unsigned int, vector unsigned char, + const int); +vector signed char vec_insert4b (long long, vector signed char, const int); +vector unsigned char vec_insert4b (long long, vector unsigned char, const int); + vector int vec_vprtyb (vector int); vector unsigned int vec_vprtyb (vector unsigned int); vector long long vec_vprtyb (vector long long); Index: gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-1.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243618) @@ -0,0 +1,39 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include <altivec.h> + +vector signed char +vins_v4si (vector int *vi, vector signed char *vc) +{ + return vec_vinsert4b (*vi, *vc, 1); +} + +vector unsigned char +vins_di (long di, vector unsigned char *vc) +{ + return vec_vinsert4b (di, *vc, 2); +} + +vector char +vins_di2 (long *p_di, vector char *vc) +{ + return vec_vinsert4b (*p_di, *vc, 3); +} + +vector unsigned char +vins_di0 (vector unsigned char *vc) +{ + return vec_vinsert4b (0, *vc, 4); +} + +long +vext (vector signed char *vc) +{ + return vec_vextract4b (*vc, 5); +} + +/* { dg-final { scan-assembler "xxextractuw\|vextuw\[lr\]x" } } */ +/* { dg-final { scan-assembler "xxinsertw" } } */ Index: gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c =================================================================== --- gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c (.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc) (revision 0) +++ gcc/testsuite/gcc.target/powerpc/p9-vinsert4b-2.c (.../gcc/testsuite/gcc.target/powerpc) (revision 243618) @@ -0,0 +1,30 @@ +/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-options "-mcpu=power9 -O2" } */ + +#include <altivec.h> + +vector signed char +ins_v4si (vector int vi, vector signed char vc) +{ + return vec_vinsert4b (vi, vc, 12); /* { dg-error "vec_vinsert4b" } */ +} + +vector unsigned char +ins_di (long di, vector unsigned char vc, long n) +{ + return vec_vinsert4b (di, vc, n); /* { dg-error "vec_vinsert4b" } */ +} + +long +vext1 (vector signed char vc) +{ + return vec_vextract4b (vc, 12); /* { dg-error "vec_vextract4b" } */ +} + +long +vextn (vector unsigned char vc, long n) +{ + return vec_vextract4b (vc, n); /* { dg-error "vec_vextract4b" } */ +}