On Fri, Jan 12, 2018 at 10:22 PM, Will Schmidt <will_schm...@vnet.ibm.com> wrote: > Hi, > Add support for gimple folding of the mergeh, mergel intrinsics. > Since the merge low and merge high variants are almost identical, a > new helper function has been added so that code can be shared. > > This also adds define_insn for xxmrghw, xxmrglw instructions, allowing us > to generate xxmrglw instead of vmrglw after folding. A few whitespace > fixes have been made to the existing vmrg?w defines. > > The changes introduced here affect the existing target testcases > gcc.target/powerpc/builtins-1-be.c and builtins-1-le.c, such that > a number of the scan-assembler tests would fail due to instruction counts > changing. Since the purpose of that test is to primarily ensure those > intrinsics are accepted by the compiler, I have disabled gimple-folding for > the existing tests that count instructions, and created new variants of those > tests with folding enabled and a higher optimization level, that do not count > instructions. > > Regtests are currently running across assorted power systems. > OK for trunk, pending successful results? > > Thanks, > -Will > > [gcc] > > 2018-01-12 Will Schmidt <will_schm...@vnet.ibm.com> > > * config/rs6000/rs6000.c: (rs6000_gimple_builtin) Add gimple folding > support for merge[hl]. (fold_mergehl_helper): New helper function. > * config/rs6000/altivec.md (altivec_xxmrghw_direct): New. > (altivec_xxmrglw_direct): New. > > [testsuite] > > 2018-01-12 Will Schmidt <will_schm...@vnet.ibm.com> > > * gcc.target/powerpc/fold-vec-mergehl-char.c: New. > * gcc.target/powerpc/fold-vec-mergehl-double.c: New. > * gcc.target/powerpc/fold-vec-mergehl-float.c: New. > * gcc.target/powerpc/fold-vec-mergehl-int.c: New. > * gcc.target/powerpc/fold-vec-mergehl-longlong.c: New. > * gcc.target/powerpc/fold-vec-mergehl-pixel.c: New. > * gcc.target/powerpc/fold-vec-mergehl-short.c: New. > * gcc.target/powerpc/builtins-1-be.c: Disable gimple-folding. > * gcc.target/powerpc/builtins-1-le.c: Disable gimple-folding. > * gcc.target/powerpc/builtins-1-be-folded.c: New. > * gcc.target/powerpc/builtins-1-le-folded.c: New. > * gcc.target/powerpc/builtins-1.fold.h: New. > > diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md > index 733d920..65d4548 100644 > --- a/gcc/config/rs6000/altivec.md > +++ b/gcc/config/rs6000/altivec.md > @@ -1101,10 +1101,20 @@ > else > return "vmrglw %0,%2,%1"; > } > [(set_attr "type" "vecperm")]) > > + > +(define_insn "altivec_xxmrghw_direct" > + [(set (match_operand:V4SI 0 "register_operand" "=v") > + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") > + (match_operand:V4SI 2 "register_operand" "v")] > + UNSPEC_VMRGH_DIRECT))] > + "TARGET_P8_VECTOR" > + "xxmrghw %x0,%x1,%x2" > + [(set_attr "type" "vecperm")]) > + > (define_insn "altivec_vmrghw_direct" > [(set (match_operand:V4SI 0 "register_operand" "=v") > (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") > (match_operand:V4SI 2 "register_operand" "v")] > UNSPEC_VMRGH_DIRECT))] > @@ -1185,12 +1195,12 @@ > [(set_attr "type" "vecperm")]) > > (define_insn "altivec_vmrglb_direct" > [(set (match_operand:V16QI 0 "register_operand" "=v") > (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v") > - (match_operand:V16QI 2 "register_operand" "v")] > - UNSPEC_VMRGL_DIRECT))] > + (match_operand:V16QI 2 "register_operand" "v")] > + UNSPEC_VMRGL_DIRECT))] > "TARGET_ALTIVEC" > "vmrglb %0,%1,%2" > [(set_attr "type" "vecperm")]) > > (define_expand "altivec_vmrglh" > @@ -1242,11 +1252,11 @@ > > (define_insn "altivec_vmrglh_direct" > [(set (match_operand:V8HI 0 "register_operand" "=v") > (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v") > (match_operand:V8HI 2 "register_operand" "v")] > - UNSPEC_VMRGL_DIRECT))] > + UNSPEC_VMRGL_DIRECT))] > "TARGET_ALTIVEC" > "vmrglh %0,%1,%2" > [(set_attr "type" "vecperm")]) > > (define_expand "altivec_vmrglw" > @@ -1290,10 +1300,19 @@ > else > return "vmrghw %0,%2,%1"; > } > [(set_attr "type" "vecperm")]) > > +(define_insn "altivec_xxmrglw_direct" > + [(set (match_operand:V4SI 0 "register_operand" "=v") > + (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") > + (match_operand:V4SI 2 "register_operand" "v")] > + UNSPEC_VMRGL_DIRECT))] > + "TARGET_P8_VECTOR" > + "xxmrglw %x0,%x1,%x2" > + [(set_attr "type" "vecperm")]) > + > (define_insn "altivec_vmrglw_direct" > [(set (match_operand:V4SI 0 "register_operand" "=v") > (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v") > (match_operand:V4SI 2 "register_operand" "v")] > UNSPEC_VMRGL_DIRECT))] > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index 840b83c..18a7424 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -16121,10 +16121,45 @@ fold_compare_helper (gimple_stmt_iterator *gsi, > tree_code code, gimple *stmt) > gimple *g = gimple_build_assign (lhs, cmp); > gimple_set_location (g, gimple_location (stmt)); > gsi_replace (gsi, g, true); > } > > +/* Helper function to handle the vector merge[hl] built-ins. The > + implementation difference between h and l versions for this code are in > + the values used when building of the permute vector for high word versus > + low word merge. The variance is keyed off the use_high parameter. */ > +static void > +fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high) > +{ > + tree arg0 = gimple_call_arg (stmt, 0); > + tree arg1 = gimple_call_arg (stmt, 1); > + tree lhs = gimple_call_lhs (stmt); > + tree lhs_type = TREE_TYPE (lhs); > + tree lhs_type_type = TREE_TYPE (lhs_type); > + gimple *g; > + int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type); > + vec<constructor_elt, va_gc> *ctor_elts = NULL; > + int midpoint = n_elts / 2; > + int offset = 0; > + if (use_high == 1) > + offset = midpoint; > + for (int i = 0; i < midpoint; i++) > + { > + tree tmp1 = build_int_cst (lhs_type_type, offset + i); > + tree tmp2 = build_int_cst (lhs_type_type, offset + n_elts + i); > + CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, tmp1); > + CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, tmp2); > + } > + tree permute = create_tmp_reg_or_ssa_name (lhs_type); > + g = gimple_build_assign (permute, build_constructor (lhs_type, ctor_elts));
I think this is no longer canonical GIMPLE (Richard?) and given it is also a constant you shouldn't emit a CONSTRUCTOR here but directly construct the appropriate VECTOR_CST. So it looks like the mergel/h intrinsics interleave the low or high part of two vectors? > + gimple_set_location (g, gimple_location (stmt)); > + gsi_insert_before (gsi, g, GSI_SAME_STMT); > + g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); > + gimple_set_location (g, gimple_location (stmt)); > + gsi_replace (gsi, g, true); > +} > + > /* Fold a machine-dependent built-in in GIMPLE. (For folding into > a constant, use rs6000_fold_builtin.) */ > > bool > rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) > @@ -16649,10 +16684,32 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator > *gsi) > gimple_set_location (g, gimple_location (stmt)); > gsi_replace (gsi, g, true); > return true; > } > > + /* vec_mergel (integrals). */ > + case ALTIVEC_BUILTIN_VMRGLH: > + case ALTIVEC_BUILTIN_VMRGLW: > + case VSX_BUILTIN_XXMRGLW_4SI: > + case ALTIVEC_BUILTIN_VMRGLB: > + case VSX_BUILTIN_VEC_MERGEL_V2DI: > + /* Do not fold for -maltivec=be on LE targets. */ > + if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN) > + return false; > + fold_mergehl_helper (gsi, stmt, 1); > + return true; > + /* vec_mergeh (integrals). */ > + case ALTIVEC_BUILTIN_VMRGHH: > + case ALTIVEC_BUILTIN_VMRGHW: > + case VSX_BUILTIN_XXMRGHW_4SI: > + case ALTIVEC_BUILTIN_VMRGHB: > + case VSX_BUILTIN_VEC_MERGEH_V2DI: > + /* Do not fold for -maltivec=be on LE targets. */ > + if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN) > + return false; > + fold_mergehl_helper (gsi, stmt, 0); > + return true; > default: > if (TARGET_DEBUG_BUILTIN) > fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", > fn_code, fn_name1, fn_name2); > break; > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-be-folded.c > b/gcc/testsuite/gcc.target/powerpc/builtins-1-be-folded.c > new file mode 100644 > index 0000000..06ba1cb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-be-folded.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile { target { powerpc64-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power8" } } */ > +/* { dg-options "-mcpu=power8 -O2 -mfold-gimple" } */ > + > +/* Based on builtins-1-le.c ; ensure that the power8 builtins are accepted by > + the compiler, at O2 with gimple folding enabled. */ > +/* Test that a number of newly added builtin overloads are accepted > + by the compiler. */ > + > +/* The test code is in builtins-1.fold.h. */ > +#include "builtins-1.fold.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-be.c > b/gcc/testsuite/gcc.target/powerpc/builtins-1-be.c > index f6db3c2..aaaf79d 100644 > --- a/gcc/testsuite/gcc.target/powerpc/builtins-1-be.c > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-be.c > @@ -1,8 +1,8 @@ > /* { dg-do compile { target { powerpc64-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power8" } } */ > -/* { dg-options "-mcpu=power8 -O0" } */ > +/* { dg-options "-mcpu=power8 -O0 -mno-fold-gimple" } */ > > /* Test that a number of newly added builtin overloads are accepted > by the compiler. */ > > /* Expected results for Big Endian: > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-le-folded.c > b/gcc/testsuite/gcc.target/powerpc/builtins-1-le-folded.c > new file mode 100644 > index 0000000..f8490b5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-le-folded.c > @@ -0,0 +1,11 @@ > +/* { dg-do compile { target { powerpc64le-*-* } } } */ > +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power8" } } */ > +/* { dg-options "-mcpu=power8 -O2 -mfold-gimple" } */ > + > +/* Based on builtins-1-le.c ; ensure that the power8 builtins are accepted by > + the compiler, at O2 with gimple folding enabled. */ > +/* Test that a number of newly added builtin overloads are accepted > + by the compiler. */ > + > +/* The test code is in builtins-1.fold.h. */ > +#include "builtins-1.fold.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-le.c > b/gcc/testsuite/gcc.target/powerpc/builtins-1-le.c > index 8763c68..e029d5f 100644 > --- a/gcc/testsuite/gcc.target/powerpc/builtins-1-le.c > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-le.c > @@ -1,8 +1,8 @@ > /* { dg-do compile { target { powerpc64le-*-* } } } */ > /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { > "-mcpu=power8" } } */ > -/* { dg-options "-mcpu=power8 -O0" } */ > +/* { dg-options "-mcpu=power8 -O0 -mno-fold-gimple" } */ > > /* Test that a number of newly added builtin overloads are accepted > by the compiler. */ > > /* Expected results for Little Endian: > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h > b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h > new file mode 100644 > index 0000000..8bc5f5e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h > @@ -0,0 +1,232 @@ > +/* This test is included into builtins-1-be.c and builtins-1-le.c to test on > + Big Endian and Little Endian machines. */ > +/* This file is based on builtins-1.h. In this variant, most variables have > + been marked as extern to prevent optimization-away. */ > + > +#include <altivec.h> > + > +vector double y = { 2.0, 4.0 }; > +extern vector double z; > + > +int main () > +{ > + vector float fa = {1.0, 2.0, 3.0, -4.0}; > + vector float fb = {-2.0, -3.0, -4.0, -5.0}; > + extern vector float fd; fd = vec_and (fa, fb); > + extern vector float fc; fc = vec_cpsgn (fa, fb); > + extern vector float fe; fe = vec_mergeh (fa, fb); > + extern vector float ff; ff = vec_mergel (fa, fb); > + > + vector double da = {1.0, 2.0}; > + vector double db = {-2.0, -3.0}; > + extern vector double dz; dz = vec_and (da, db); > + > + vector long long la = {5L, 14L}; > + vector long long lb = {3L, 86L}; > + extern vector long long lc; lc = vec_and (la, lb); > + vector bool long long ld = {0, -1}; > + extern vector long long le; le = vec_and (la, ld); > + extern vector long long lf; lf = vec_and (ld, lb); > + > + vector unsigned long long ua = {5L, 14L}; > + vector unsigned long long ub = {3L, 86L}; > + extern vector unsigned long long uc; uc = vec_and (ua, ub); > + vector bool long long ud = {0, -1}; > + extern vector unsigned long long ue; ue = vec_and (ua, ud); > + extern vector unsigned long long uf; uf = vec_and (ud, ub); > + > + extern vector long long lg; lg = vec_andc (la, lb); > + extern vector long long lh; lh = vec_andc (la, ld); > + extern vector long long li; li = vec_andc (ld, lb); > + > + extern vector unsigned long long ug; ug = vec_andc (ua, ub); > + extern vector unsigned long long uh; uh = vec_andc (ua, ud); > + extern vector unsigned long long ui; ui = vec_andc (ud, ub); > + > + vector double de = {1.0, -4.0}; > + vector double df = {-2.0, 5.0}; > + extern vector double dg; dg = vec_cpsgn (de, df); > + extern vector double dzw; dzw = vec_mergeh (de, df); > + extern vector double dze; dze = vec_mergel (de, df); > + > + extern vector long long lj; lj = vec_mergeh (la, lb); > + extern vector long long lk; lk = vec_mergeh (la, ld); > + extern vector long long ll; ll = vec_mergeh (ld, la); > + > + extern vector unsigned long long uj; uj = vec_mergeh (ua, ub); > + extern vector unsigned long long uk; uk = vec_mergeh (ua, ud); > + extern vector unsigned long long ul; ul = vec_mergeh (ud, ua); > + > + vector pixel pa = {9, 16, 25, 36, 1, 2, 3, 4}; > + vector pixel pb = {25, 36, 1, 2, 45, 3, 4, 99}; > + extern vector pixel pc; pc = vec_mergeh (pa, pb); > + extern vector pixel pd; pd = vec_mergel (pa, pb); > + > + extern vector long long lm; lm = vec_mergel (la, lb); > + extern vector long long ln; ln = vec_mergel (la, ld); > + extern vector long long lo; lo = vec_mergel (ld, la); > + > + extern vector unsigned long long um; um = vec_mergel (ua, ub); > + extern vector unsigned long long un; un = vec_mergel (ua, ud); > + extern vector unsigned long long uo; uo = vec_mergel (ud, ua); > + > + extern vector long long lp; lp = vec_nor (la, lb); > + extern vector long long lq; lq = vec_nor (la, ld); > + extern vector long long lr; lr = vec_nor (ld, la); > + > + extern vector unsigned long long up; up = vec_nor (ua, ub); > + extern vector unsigned long long uq; uq = vec_nor (ua, ud); > + extern vector unsigned long long ur; ur = vec_nor (ud, ua); > + > + extern vector long long ls; ls = vec_or (la, lb); > + extern vector long long lt; lt = vec_or (la, ld); > + extern vector long long lu; lu = vec_or (ld, la); > + > + extern vector unsigned long long us; us = vec_or (ua, ub); > + extern vector unsigned long long ut; ut = vec_or (ua, ud); > + extern vector unsigned long long uu; uu = vec_or (ud, ua); > + > + vector unsigned char ca = {0,4,8,1,5,9,2,6,10,3,7,11,15,12,14,13}; > + vector unsigned char cbb = {5,4,8,3,1,9,2,6,10,3,7,11,15,12,14,13}; > + > + extern vector long long lv; lv = vec_perm (la, lb, ca); > + > + extern vector unsigned char ucm; ucm = vec_and (ca, cbb); > + extern vector unsigned char ucn; ucn = vec_andc (ca, cbb); > + extern vector unsigned char uco; uco = vec_mergel (ca, cbb); > + > + extern vector unsigned long long uv; uv = vec_perm (ua, ub, ca); > + > + extern vector long long lw; lw = vec_sel (la, lb, lc); > + extern vector long long lx; lx = vec_sel (la, lb, uc); > + extern vector long long ly; ly = vec_sel (la, lb, ld); > + > + extern vector unsigned long long uw; uw = vec_sel (ua, ub, lc); > + extern vector unsigned long long ux; ux = vec_sel (ua, ub, uc); > + extern vector unsigned long long uy; uy = vec_sel (ua, ub, ld); > + > + extern vector long long lz; lz = vec_xor (la, lb); > + extern vector long long l0; l0 = vec_xor (la, ld); > + extern vector long long l1; l1 = vec_xor (ld, la); > + > + extern vector unsigned long long uz; uz = vec_xor (ua, ub); > + extern vector unsigned long long u0; u0 = vec_xor (ua, ud); > + extern vector unsigned long long u1; u1 = vec_xor (ud, ua); > + > + extern int ia; ia = vec_all_eq (ua, ub); > + extern int ib; ib = vec_all_ge (ua, ub); > + extern int ic; ic = vec_all_gt (ua, ub); > + extern int id; id = vec_all_le (ua, ub); > + extern int ie; ie = vec_all_lt (ua, ub); > + extern int ig; ig = vec_all_ne (ua, ub); > + > + extern int ih; ih = vec_any_eq (ua, ub); > + extern int ii; ii = vec_any_ge (ua, ub); > + extern int ij; ij = vec_any_gt (ua, ub); > + extern int ik; ik = vec_any_le (ua, ub); > + extern int il; il = vec_any_lt (ua, ub); > + extern int im; im = vec_any_ne (ua, ub); > + > + vector short ssa = {9, 16, 25, 36, 1, 2, 3, 4}; > + vector short ssb = {-8, -27, -64, -125, 2, 3, 5, 3}; > + extern vector short sscc; sscc = vec_and (ssa, ssb); > + extern vector short sscd; sscd = vec_mergeh (ssa, ssb); > + extern vector short ssce; ssce = vec_mergel (ssa, ssb); > + > + vector int sia = {9, 16, 25, 36}; > + vector int sib = {-8, -27, -64, -125}; > + extern vector int sicc; sicc = vec_and (sia, sib); > + extern vector int sicd; sicd = vec_andc (sia, sib); > + extern vector int sig; sig = vec_mergel (sia, sib); > + > + vector unsigned int uia = {9, 16, 25, 36}; > + vector unsigned int uib = {8, 27, 64, 125}; > + extern vector unsigned int uicc; uicc = vec_and (uia, uib); > + extern vector unsigned int uidd; uidd = vec_andc (uia, uib); > + extern vector unsigned int uig; uig = vec_mergel (uia, uib); > + > + vector bool char bca = {0, 1, 4, 7}; > + vector bool char bcb = {-8, 9, 2, 9}; > + extern vector bool char bcc; bcc= vec_and (bca, bcb); > + extern vector bool char bcd; bcd = vec_andc (bca, bcb); > + extern vector bool char bce; bce = vec_mergel (bca, bcb); > + > + vector bool short bsa = {0, -1, -1, 0, 3, 4, 6, 7}; > + vector bool short bsb = {-1, -1, 0, -1, 0, 0, 0, 0}; > + extern vector bool short bscc; bscc = vec_and (bsa, bsb); > + extern vector bool short bscd; bscd = vec_andc (bsa, bsb); > + extern vector bool short bsce; bsce = vec_mergel (bsa, bsb); > + > + vector bool int bia = {0, -1, -1, 0}; > + vector bool int bib = {-1, -1, 0, -1}; > + extern vector bool int bicc; bicc = vec_and (bia, bib); > + extern vector bool int bicd; bicd = vec_andc (bia, bib); > + extern vector bool int bide; bide = vec_mergel (bia, bib); > + > + extern vector unsigned int uie; uie = vec_packsu (ua, ub); > + > + extern vector long long l2; l2 = vec_cntlz (la); > + extern vector unsigned long long u2; u2 = vec_cntlz (ua); > + extern vector int sie; sie = vec_cntlz (sia); > + extern vector unsigned int uif; uif = vec_cntlz (uia); > + extern vector short sszz; sszz = vec_cntlz (ssa); > + > + vector unsigned short usa = {81, 72, 63, 54, 45, 36, 27, 18}; > + vector unsigned short usb = {81, 72, 63, 54, 45, 36, 27, 18}; > + extern vector unsigned short usd; usd = vec_and (usa, usb); > + extern vector unsigned short use; use = vec_andc (usa, usb); > + extern vector unsigned short usc; usc = vec_cntlz (usa); > + extern vector unsigned short uscd; uscd = vec_mergeh (usa, usb); > + extern vector unsigned short usce; usce = vec_mergel (usa, usb); > + > + vector signed char sca = {-4, 3, -9, 15, -31, 31, 0, 0, > + 1, 117, -36, 99, 98, 97, 96, 95}; > + extern vector signed char scb; scb = vec_cntlz (sca); > + extern vector signed char scc; scc = vec_mergel (sca, scb); > + > + extern vector unsigned char cb; cb = vec_cntlz (ca); > + > + extern vector double dd; dd = vec_xl (0, &y); > + vec_xst (dd, 0, &z); > + > + extern vector double dzz; dzz = vec_round (dd); > + extern vector double dzz1; dzz1 = vec_rsqrt (dd); > + extern vector double dzz2; dzz2 = vec_rsqrte (dd); > + > + extern vector double dff; dff = vec_splat (de, 0); > + extern vector double dgg; dgg = vec_splat (de, 1); > + extern vector long long l3; l3 = vec_splat (l2, 0); > + extern vector long long l4; l4 = vec_splat (l2, 1); > + extern vector unsigned long long u3; u3 = vec_splat (u2, 0); > + extern vector unsigned long long u4; u4 = vec_splat (u2, 1); > + extern vector bool long long l5; l5 = vec_splat (ld, 0); > + extern vector bool long long l6; l6 = vec_splat (ld, 1); > + extern vector bool long long l10; l10 = vec_mergee (ld, ld); > + extern vector bool long long l11; l11 = vec_mergeo (ld, ld); > + > + extern vector long long l7; l7 = vec_div (l3, l4); > + extern vector unsigned long long u5; u5 = vec_div (u3, u4); > + extern vector long long l12; l12 = vec_mergee (la, lb); > + extern vector long long l13; l13 = vec_mergeo (la, lb); > + extern vector unsigned long long u8; u8 = vec_mergee (u3, u4); > + extern vector unsigned long long u9; u9 = vec_mergeo (u3, u4); > + > + extern vector long long l8; l8 = vec_mul (l3, l4); > + extern vector unsigned long long u6; u6 = vec_mul (u3, u4); > + > + extern vector double dh; dh = vec_ctf (la, -2); > + extern vector double di; di = vec_ctf (ua, 2); > + extern vector int sz; sz = vec_cts (fa, 0x1F); > + extern vector long long l9; l9 = vec_cts (dh, -2); > + extern vector unsigned long long u7; u7 = vec_ctu (di, 2); > + extern vector unsigned int usz; usz = vec_ctu (fa, 0x1F); > + > + extern vector float f1; f1 = vec_mergee (fa, fb); > + extern vector float f2; f2 = vec_mergeo (fa, fb); > + > + extern vector double d1; d1 = vec_mergee (da, db); > + extern vector double d2; d2 = vec_mergeo (da, db); > + > + return 0; > +} > + > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-char.c > b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-char.c > new file mode 100644 > index 0000000..1762603 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-char.c > @@ -0,0 +1,55 @@ > +/* Verify that overloaded built-ins for vec_merge* with char > + inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +vector signed char > +test_misc () { > + vector signed char vsc1c = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; > + vector signed char vsc2c = {9,10,11,12,13,14,15,16,0,1,2,3,4,5,6,7}; > + return vec_mergel (vsc1c, vsc2c); > +} > + > +vector bool char > +testbc_l (vector bool char vbc2, vector bool char vbc3) > +{ > + return vec_mergel (vbc2, vbc3); > +} > + > +vector signed char > +testsc_l (vector signed char vsc2, vector signed char vsc3) > +{ > + return vec_mergel (vsc2, vsc3); > +} > + > +vector unsigned char > +testuc_l (vector unsigned char vuc2, vector unsigned char vuc3) > +{ > + return vec_mergel (vuc2, vuc3); > +} > + > +vector bool char > +testbc_h (vector bool char vbc2, vector bool char vbc3) > +{ > + return vec_mergeh (vbc2, vbc3); > +} > + > +vector signed char > +testsc_h (vector signed char vsc2, vector signed char vsc3) > +{ > + return vec_mergeh (vsc2, vsc3); > +} > + > +vector unsigned char > +testuc_h (vector unsigned char vuc2, vector unsigned char vuc3) > +{ > + return vec_mergeh (vuc2, vuc3); > +} > + > +/* { dg-final { scan-assembler-times "vmrghb" 3 } } */ > +/* { dg-final { scan-assembler-times "vmrglb" 3 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-double.c > b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-double.c > new file mode 100644 > index 0000000..25f4bc6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-double.c > @@ -0,0 +1,25 @@ > +/* Verify that overloaded built-ins for vec_splat with float and > + double inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-mvsx -O2" } */ > + > +#include <altivec.h> > + > +vector double > +testd_l (vector double vd2, vector double vd3) > +{ > + return vec_mergel (vd2, vd3); > +} > + > +vector double > +testd_h (vector double vd2, vector double vd3) > +{ > + return vec_mergeh (vd2, vd3); > +} > + > +/* vec_merge with doubles tend to just use xxpermdi (3 ea for BE, 1 ea for > LE). */ > +/* { dg-final { scan-assembler-times "xxpermdi" 2 { target { powerpc*le-*-* > } } } } */ > +/* { dg-final { scan-assembler-times "xxpermdi" 6 { target { powerpc-*-* } > } } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-float.c > b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-float.c > new file mode 100644 > index 0000000..b362587 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-float.c > @@ -0,0 +1,26 @@ > +/* Verify that overloaded built-ins for vec_splat with float > + inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +vector float > +testf_l (vector float vf2, vector float vf3) > +{ > + return vec_mergel (vf2, vf3); > +} > + > +vector float > +testf_h (vector float vf2, vector float vf3) > +{ > + return vec_mergeh (vf2, vf3); > +} > + > +/* mergeh with floats use xxmrgh{l,w} (1 ea) insns. */ > + > +/* { dg-final { scan-assembler-times "xxmrghw|vmrghw" 1 } } */ > +/* { dg-final { scan-assembler-times "xxmrglw|vmrglw" 1 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-int.c > b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-int.c > new file mode 100644 > index 0000000..4da09fd > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-int.c > @@ -0,0 +1,48 @@ > +/* Verify that overloaded built-ins for vec_merge* with int > + inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2 " } */ > + > +#include <altivec.h> > + > +vector bool int > +testbi_l (vector bool int vbi2, vector bool int vbi3) > +{ > + return vec_mergel (vbi2, vbi3); > +} > + > +vector signed int > +testsi_l (vector signed int vsi2, vector signed int vsi3) > +{ > + return vec_mergel (vsi2, vsi3); > +} > + > +vector unsigned int > +testui_l (vector unsigned int vui2, vector unsigned int vui3) > +{ > + return vec_mergel (vui2, vui3); > +} > + > +vector bool int > +testbi_h (vector bool int vbi2, vector bool int vbi3) > +{ > + return vec_mergeh (vbi2, vbi3); > +} > + > +vector signed int > +testsi_h (vector signed int vsi2, vector signed int vsi3) > +{ > + return vec_mergeh (vsi2, vsi3); > +} > + > +vector unsigned int > +testui_h (vector unsigned int vui2, vector unsigned int vui3) > +{ > + return vec_mergeh (vui2, vui3); > +} > + > +/* { dg-final { scan-assembler-times "vmrghw|xxmrghw" 3 } } */ > +/* { dg-final { scan-assembler-times "vmrglw|xxmrglw" 3 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-longlong.c > b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-longlong.c > new file mode 100644 > index 0000000..ab5f54e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-longlong.c > @@ -0,0 +1,48 @@ > +/* Verify that overloaded built-ins for vec_merge* with long long > + inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_p8vector_ok } */ > +/* { dg-options "-mvsx -O2" } */ > + > +#include <altivec.h> > + > +vector bool long long > +testbl_l (vector bool long long vbl2, vector bool long long vbl3) > +{ > + return vec_mergel (vbl2, vbl3); > +} > + > +vector signed long long > +testsl_l (vector signed long long vsl2, vector signed long long vsl3) > +{ > + return vec_mergel (vsl2, vsl3); > +} > + > +vector unsigned long long > +testul_l (vector unsigned long long vul2, vector unsigned long long vul3) > +{ > + return vec_mergel (vul2, vul3); > +} > + > +vector bool long long > +testbl_h (vector bool long long vbl2, vector bool long long vbl3) > +{ > + return vec_mergeh (vbl2, vbl3); > +} > + > +vector signed long long > +testsl_h (vector signed long long vsl2, vector signed long long vsl3) > +{ > + return vec_mergeh (vsl2, vsl3); > +} > + > +vector unsigned long long > +testul_h (vector unsigned long long vul2, vector unsigned long long vul3) > +{ > + return vec_mergeh (vul2, vul3); > +} > + > +/* mergeh with longlong types use xxpermdi (1 ea). */ > +/* { dg-final { scan-assembler-times "xxpermdi" 6 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-pixel.c > b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-pixel.c > new file mode 100644 > index 0000000..59463ae > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-pixel.c > @@ -0,0 +1,24 @@ > +/* Verify that overloaded built-ins for vec_splat with pixel > + inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_vsx_ok } */ > +/* { dg-options "-maltivec -mvsx -O2" } */ > + > +#include <altivec.h> > + > +vector pixel > +testf_el (vector pixel px2, vector pixel px3) > +{ > + return vec_mergel (px2, px3); > +} > + > +vector pixel > +testf_eh (vector pixel px2, vector pixel px3) > +{ > + return vec_mergeh (px2, px3); > +} > + > +/* { dg-final { scan-assembler-times "vmrghh" 1 } } */ > +/* { dg-final { scan-assembler-times "vmrglh" 1 } } */ > + > diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-short.c > b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-short.c > new file mode 100644 > index 0000000..b3a0362 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mergehl-short.c > @@ -0,0 +1,48 @@ > +/* Verify that overloaded built-ins for vec_merge* with int > + inputs produce the right code. */ > + > +/* { dg-do compile } */ > +/* { dg-require-effective-target powerpc_altivec_ok } */ > +/* { dg-options "-maltivec -O2" } */ > + > +#include <altivec.h> > + > +vector bool short > +testbi_el (vector bool short vbs2, vector bool short vbs3) > +{ > + return vec_mergel (vbs2, vbs3); > +} > + > +vector signed short > +testsi_el (vector signed short vss2, vector signed short vss3) > +{ > + return vec_mergel (vss2, vss3); > +} > + > +vector unsigned short > +testui_el (vector unsigned short vus2, vector unsigned short vus3) > +{ > + return vec_mergel (vus2, vus3); > +} > + > +vector bool short > +testbi_eh (vector bool short vbs2, vector bool short vbs3) > +{ > + return vec_mergeh (vbs2, vbs3); > +} > + > +vector signed short > +testsi_eh (vector signed short vss2, vector signed short vss3) > +{ > + return vec_mergeh (vss2, vss3); > +} > + > +vector unsigned short > +testui_eh (vector unsigned short vus2, vector unsigned short vus3) > +{ > + return vec_mergeh (vus2, vus3); > +} > + > +/* { dg-final { scan-assembler-times "vmrghh" 3 } } */ > +/* { dg-final { scan-assembler-times "vmrglh" 3 } } */ > + > >