Here's a new version of the patch.
On 01/12/2022 14:16, Jakub Jelinek wrote:
+void __attribute__((noinline))
You should use noipa attribute instead of noinline on callers
which aren't declare simd (on declare simd it would prevent cloning
which is essential for the declare simd behavior), so that you don't
get surprises e.g. from extra ipa cp etc.
Fixed.
+/* Ensure the the in-branch simd clones are used on targets that support
+ them. These counts include all call and definitions. */
+
+/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */
Drop lines line above.
I don't want to drop the comment because I get so frustrated by
testcases that fail when something changes and it's not obvious what the
original author was actually trying to test.
I've tried to fix the -flto thing and I can't figure out how. The
problem seems to be that there are two dump files from the two compiler
invocations and it scans the wrong one. Aarch64 has the same problem.
+/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target
x86_64-*-* } } } */
+/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target
amdgcn-*-* } } } */
And scan-tree-dump-times " = foo.simdclone" 2 "optimized"; I'd think that
should be the right number for all of x86_64, amdgcn and aarch64. And
please don't forget about i?86-*-* too.
I've switched the pattern and changed to using the "vect" dump (instead
of "optimized") so that the later transformations don't mess up the
counts. However there are still other reasons why the count varies. It
might be that those can be turned off by options somehow, but probably
testing those cases is valuable too. The values are 2, 3, or 4, now,
instead of 18, so that's an improvement.
+/* TODO: aarch64 */
For aarch64, one would need to include it in
check_effective_target_vect_simd_clones
first...
I've done so and tested it, but that's not included in the patch because
there were other testcases that started reporting fails. None of the new
testcases fail for Aarch64.
OK now?
Andrew
vect: inbranch SIMD clones
There has been support for generating "inbranch" SIMD clones for a long time,
but nothing actually uses them (as far as I can see).
This patch add supports for a sub-set of possible cases (those using
mask_mode == VOIDmode). The other cases fail to vectorize, just as before,
so there should be no regressions.
The sub-set of support should cover all cases needed by amdgcn, at present.
gcc/ChangeLog:
* internal-fn.cc (expand_MASK_CALL): New.
* internal-fn.def (MASK_CALL): New.
* internal-fn.h (expand_MASK_CALL): New prototype.
* omp-simd-clone.cc (simd_clone_adjust_argument_types): Set vector_type
for mask arguments also.
* tree-if-conv.cc: Include cgraph.h.
(if_convertible_stmt_p): Do if conversions for calls to SIMD calls.
(predicate_statements): Convert functions to IFN_MASK_CALL.
* tree-vect-loop.cc (vect_get_datarefs_in_loop): Recognise
IFN_MASK_CALL as a SIMD function call.
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Handle
IFN_MASK_CALL as an inbranch SIMD function call.
Generate the mask vector arguments.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/vect-simd-clone-16.c: New test.
* gcc.dg/vect/vect-simd-clone-16b.c: New test.
* gcc.dg/vect/vect-simd-clone-16c.c: New test.
* gcc.dg/vect/vect-simd-clone-16d.c: New test.
* gcc.dg/vect/vect-simd-clone-16e.c: New test.
* gcc.dg/vect/vect-simd-clone-16f.c: New test.
* gcc.dg/vect/vect-simd-clone-17.c: New test.
* gcc.dg/vect/vect-simd-clone-17b.c: New test.
* gcc.dg/vect/vect-simd-clone-17c.c: New test.
* gcc.dg/vect/vect-simd-clone-17d.c: New test.
* gcc.dg/vect/vect-simd-clone-17e.c: New test.
* gcc.dg/vect/vect-simd-clone-17f.c: New test.
* gcc.dg/vect/vect-simd-clone-18.c: New test.
* gcc.dg/vect/vect-simd-clone-18b.c: New test.
* gcc.dg/vect/vect-simd-clone-18c.c: New test.
* gcc.dg/vect/vect-simd-clone-18d.c: New test.
* gcc.dg/vect/vect-simd-clone-18e.c: New test.
* gcc.dg/vect/vect-simd-clone-18f.c: New test.
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 9471f543191..d9e11bfc62a 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4527,3 +4527,10 @@ void
expand_ASSUME (internal_fn, gcall *)
{
}
+
+void
+expand_MASK_CALL (internal_fn, gcall *)
+{
+ /* This IFN should only exist between ifcvt and vect passes. */
+ gcc_unreachable ();
+}
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 61516dab66d..301c3780659 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -466,6 +466,9 @@ DEF_INTERNAL_FN (TRAP, ECF_CONST | ECF_LEAF | ECF_NORETURN
DEF_INTERNAL_FN (ASSUME, ECF_CONST | ECF_LEAF | ECF_NOTHROW
| ECF_LOOPING_CONST_OR_PURE, NULL)
+/* For if-conversion of inbranch SIMD clones. */
+DEF_INTERNAL_FN (MASK_CALL, ECF_NOVOPS, NULL)
+
#undef DEF_INTERNAL_INT_FN
#undef DEF_INTERNAL_FLT_FN
#undef DEF_INTERNAL_FLT_FLOATN_FN
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index 21b1ce43df6..ced92c041bb 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -244,6 +244,7 @@ extern void expand_SHUFFLEVECTOR (internal_fn, gcall *);
extern void expand_SPACESHIP (internal_fn, gcall *);
extern void expand_TRAP (internal_fn, gcall *);
extern void expand_ASSUME (internal_fn, gcall *);
+extern void expand_MASK_CALL (internal_fn, gcall *);
extern bool vectorized_internal_fn_supported_p (internal_fn, tree);
diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc
index 21d69aa8747..afb7d99747b 100644
--- a/gcc/omp-simd-clone.cc
+++ b/gcc/omp-simd-clone.cc
@@ -937,6 +937,7 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
}
sc->args[i].orig_type = base_type;
sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
+ sc->args[i].vector_type = adj.type;
}
if (node->definition)
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c
new file mode 100644
index 00000000000..ce9a6dad1b7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly. */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned. */
+#pragma omp declare simd
+TYPE __attribute__((noinline))
+foo (TYPE a)
+{
+ return a + 1;
+}
+
+/* Check that "inbranch" clones are called correctly. */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+ #pragma omp simd
+ for (int i = 0; i < size; i++)
+ b[i] = a[i]<1 ? foo(a[i]) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling. */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+ #pragma omp simd
+ for (int i = 0; i < 128; i++)
+ b[i] = a[i]<1 ? foo(a[i]) : a[i];
+}
+
+/* Validate the outputs. */
+
+void
+check_masked (TYPE *b, int size)
+{
+ for (int i = 0; i < size; i++)
+ if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+ || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+ {
+ __builtin_printf ("error at %d\n", i);
+ __builtin_exit (1);
+ }
+}
+
+int
+main ()
+{
+ TYPE a[1024];
+ TYPE b[1024];
+
+ for (int i = 0; i < 1024; i++)
+ a[i] = i;
+
+ masked_fixed (a, b);
+ check_masked (b, 128);
+
+ /* Test various sizes to cover machines with different vectorization
+ factors. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size);
+ check_masked (b, size);
+ }
+
+ /* Test sizes that might exercise the partial vector code-path. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size-4);
+ check_masked (b, size-4);
+ }
+
+ return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c
new file mode 100644
index 00000000000..af543b6573d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
new file mode 100644
index 00000000000..677548a9439
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect"
{ target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
new file mode 100644
index 00000000000..a9ae9932b30
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect"
{ target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
new file mode 100644
index 00000000000..c8b482bf2e7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect"
{ target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
new file mode 100644
index 00000000000..f42ac082678
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-16.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect"
{ target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c
new file mode 100644
index 00000000000..756225e4306
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly. */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned. */
+#pragma omp declare simd uniform(b)
+TYPE __attribute__((noinline))
+foo (TYPE a, TYPE b)
+{
+ return a + b;
+}
+
+/* Check that "inbranch" clones are called correctly. */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+ #pragma omp simd
+ for (int i = 0; i < size; i++)
+ b[i] = a[i]<1 ? foo(a[i], 1) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling. */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+ #pragma omp simd
+ for (int i = 0; i < 128; i++)
+ b[i] = a[i]<1 ? foo(a[i], 1) : a[i];
+}
+
+/* Validate the outputs. */
+
+void
+check_masked (TYPE *b, int size)
+{
+ for (int i = 0; i < size; i++)
+ if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+ || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+ {
+ __builtin_printf ("error at %d\n", i);
+ __builtin_exit (1);
+ }
+}
+
+int
+main ()
+{
+ TYPE a[1024];
+ TYPE b[1024];
+
+ for (int i = 0; i < 1024; i++)
+ a[i] = i;
+
+ masked_fixed (a, b);
+ check_masked (b, 128);
+
+ /* Test various sizes to cover machines with different vectorization
+ factors. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size);
+ check_masked (b, size);
+ }
+
+ /* Test sizes that might exercise the partial vector code-path. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size-4);
+ check_masked (b, size-4);
+ }
+
+ return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c
new file mode 100644
index 00000000000..8731c268644
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
new file mode 100644
index 00000000000..6683d1a9cbf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect"
{ target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
new file mode 100644
index 00000000000..d38bde6d85e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect"
{ target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
new file mode 100644
index 00000000000..f2a428c62c1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect"
{ target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
new file mode 100644
index 00000000000..cd05dec9632
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-17.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect"
{ target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c
new file mode 100644
index 00000000000..febf9fdf85e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c
@@ -0,0 +1,89 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+/* Test that simd inbranch clones work correctly. */
+
+#ifndef TYPE
+#define TYPE int
+#endif
+
+/* A simple function that will be cloned. */
+#pragma omp declare simd uniform(b)
+TYPE __attribute__((noinline))
+foo (TYPE b, TYPE a)
+{
+ return a + b;
+}
+
+/* Check that "inbranch" clones are called correctly. */
+
+void __attribute__((noipa))
+masked (TYPE * __restrict a, TYPE * __restrict b, int size)
+{
+ #pragma omp simd
+ for (int i = 0; i < size; i++)
+ b[i] = a[i]<1 ? foo(1, a[i]) : a[i];
+}
+
+/* Check that "inbranch" works when there might be unrolling. */
+
+void __attribute__((noipa))
+masked_fixed (TYPE * __restrict a, TYPE * __restrict b)
+{
+ #pragma omp simd
+ for (int i = 0; i < 128; i++)
+ b[i] = a[i]<1 ? foo(1, a[i]) : a[i];
+}
+
+/* Validate the outputs. */
+
+void
+check_masked (TYPE *b, int size)
+{
+ for (int i = 0; i < size; i++)
+ if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1))
+ || ((TYPE)i >= 1 && b[i] != (TYPE)i))
+ {
+ __builtin_printf ("error at %d\n", i);
+ __builtin_exit (1);
+ }
+}
+
+int
+main ()
+{
+ TYPE a[1024];
+ TYPE b[1024];
+
+ for (int i = 0; i < 1024; i++)
+ a[i] = i;
+
+ masked_fixed (a, b);
+ check_masked (b, 128);
+
+ /* Test various sizes to cover machines with different vectorization
+ factors. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size);
+ check_masked (b, size);
+ }
+
+ /* Test sizes that might exercise the partial vector code-path. */
+ for (int size = 8; size <= 1024; size *= 2)
+ {
+ masked (a, b, size-4);
+ check_masked (b, size-4);
+ }
+
+ return 0;
+}
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target aarch64*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c
new file mode 100644
index 00000000000..120993e517a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE float
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { avx_runtime || aarch64*-*-* } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { avx_runtime || aarch64*-*-* } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
new file mode 100644
index 00000000000..0d1fc6de4e4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE short
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=short. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect"
{ target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
new file mode 100644
index 00000000000..1e6c028fc47
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c
@@ -0,0 +1,17 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE char
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { x86_64*-*-* || { i686*-*-* || aarch64*-*-* } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { aarch64*-*-* } } } } */
+
+/* x86_64 fails to use in-branch clones for TYPE=char. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 0 "vect"
{ target x86_64*-*-* i686*-*-* } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
new file mode 100644
index 00000000000..9d20e52cb9a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE double
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use another call for the epilogue loops.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { { ! avx_runtime } && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 3 "vect"
{ target { avx_runtime && { ! { i686*-*-* && { ! lp64 } } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect"
{ target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
new file mode 100644
index 00000000000..09ee7ff60fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c
@@ -0,0 +1,14 @@
+/* { dg-require-effective-target vect_simd_clones } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+
+#define TYPE __INT64_TYPE__
+#include "vect-simd-clone-18.c"
+
+/* Ensure the the in-branch simd clones are used on targets that support them.
+ Some targets use pairs of vectors and do twice the calls. */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 2 "vect"
{ target { ! { i686*-*-* && { ! lp64 } } } } } } */
+/* { dg-final { scan-tree-dump-times {[\n\r] [^\n]* = foo\.simdclone} 4 "vect"
{ target { i686*-*-* && { ! lp64 } } } } } */
+
+/* The LTO test produces two dump files and we scan the wrong one. */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 64b20b4a9e1..54f687b1172 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -123,6 +123,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-ssa-dse.h"
#include "tree-vectorizer.h"
#include "tree-eh.h"
+#include "cgraph.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
@@ -1063,7 +1064,8 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt,
A statement is if-convertible if:
- it is an if-convertible GIMPLE_ASSIGN,
- it is a GIMPLE_LABEL or a GIMPLE_COND,
- - it is builtins call. */
+ - it is builtins call,
+ - it is a call to a function with a SIMD clone. */
static bool
if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
@@ -1083,13 +1085,23 @@ if_convertible_stmt_p (gimple *stmt,
vec<data_reference_p> refs)
tree fndecl = gimple_call_fndecl (stmt);
if (fndecl)
{
+ /* We can vectorize some builtins and functions with SIMD
+ "inbranch" clones. */
int flags = gimple_call_flags (stmt);
+ struct cgraph_node *node = cgraph_node::get (fndecl);
if ((flags & ECF_CONST)
&& !(flags & ECF_LOOPING_CONST_OR_PURE)
- /* We can only vectorize some builtins at the moment,
- so restrict if-conversion to those. */
&& fndecl_built_in_p (fndecl))
return true;
+ if (node && node->simd_clones != NULL)
+ /* Ensure that at least one clone can be "inbranch". */
+ for (struct cgraph_node *n = node->simd_clones; n != NULL;
+ n = n->simdclone->next_clone)
+ if (n->simdclone->inbranch)
+ {
+ need_to_predicate = true;
+ return true;
+ }
}
return false;
}
@@ -2603,6 +2615,29 @@ predicate_statements (loop_p loop)
gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
update_stmt (stmt);
}
+
+ /* Convert functions that have a SIMD clone to IFN_MASK_CALL. This
+ will cause the vectorizer to match the "in branch" clone variants,
+ and serves to build the mask vector in a natural way. */
+ gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
+ if (call && !gimple_call_internal_p (call))
+ {
+ tree orig_fn = gimple_call_fn (call);
+ int orig_nargs = gimple_call_num_args (call);
+ auto_vec<tree> args;
+ args.safe_push (orig_fn);
+ for (int i = 0; i < orig_nargs; i++)
+ args.safe_push (gimple_call_arg (call, i));
+ args.safe_push (cond);
+
+ /* Replace the call with a IFN_MASK_CALL that has the extra
+ condition parameter. */
+ gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL,
+ args);
+ gimple_call_set_lhs (new_call, gimple_call_lhs (call));
+ gsi_replace (&gsi, new_call, true);
+ }
+
lhs = gimple_get_lhs (gsi_stmt (gsi));
if (lhs && TREE_CODE (lhs) == SSA_NAME)
ssa_names.add (lhs);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index aacbb12580c..fce5738aa58 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2121,6 +2121,14 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
if (is_gimple_call (stmt) && loop->safelen)
{
tree fndecl = gimple_call_fndecl (stmt), op;
+ if (fndecl == NULL_TREE
+ && gimple_call_internal_p (stmt, IFN_MASK_CALL))
+ {
+ fndecl = gimple_call_arg (stmt, 0);
+ gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
+ fndecl = TREE_OPERAND (fndecl, 0);
+ gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
+ }
if (fndecl != NULL_TREE)
{
cgraph_node *node = cgraph_node::get (fndecl);
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 5485da58b38..48048938291 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3987,6 +3987,7 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
size_t i, nargs;
tree lhs, rtype, ratype;
vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
+ int arg_offset = 0;
/* Is STMT a vectorizable call? */
gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
@@ -3994,6 +3995,15 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
return false;
fndecl = gimple_call_fndecl (stmt);
+ if (fndecl == NULL_TREE
+ && gimple_call_internal_p (stmt, IFN_MASK_CALL))
+ {
+ fndecl = gimple_call_arg (stmt, 0);
+ gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
+ fndecl = TREE_OPERAND (fndecl, 0);
+ gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
+ arg_offset = 1;
+ }
if (fndecl == NULL_TREE)
return false;
@@ -4024,7 +4034,7 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
return false;
/* Process function arguments. */
- nargs = gimple_call_num_args (stmt);
+ nargs = gimple_call_num_args (stmt) - arg_offset;
/* Bail out if the function has zero arguments. */
if (nargs == 0)
@@ -4042,7 +4052,7 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
thisarginfo.op = NULL_TREE;
thisarginfo.simd_lane_linear = false;
- op = gimple_call_arg (stmt, i);
+ op = gimple_call_arg (stmt, i + arg_offset);
if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
&thisarginfo.vectype)
|| thisarginfo.dt == vect_uninitialized_def)
@@ -4057,16 +4067,7 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
|| thisarginfo.dt == vect_external_def)
gcc_assert (thisarginfo.vectype == NULL_TREE);
else
- {
- gcc_assert (thisarginfo.vectype != NULL_TREE);
- if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "vector mask arguments are not supported\n");
- return false;
- }
- }
+ gcc_assert (thisarginfo.vectype != NULL_TREE);
/* For linear arguments, the analyze phase should have saved
the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
@@ -4159,9 +4160,6 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
if (target_badness < 0)
continue;
this_badness += target_badness * 512;
- /* FORNOW: Have to add code to add the mask argument. */
- if (n->simdclone->inbranch)
- continue;
for (i = 0; i < nargs; i++)
{
switch (n->simdclone->args[i].arg_type)
@@ -4169,7 +4167,7 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
case SIMD_CLONE_ARG_TYPE_VECTOR:
if (!useless_type_conversion_p
(n->simdclone->args[i].orig_type,
- TREE_TYPE (gimple_call_arg (stmt, i))))
+ TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
i = -1;
else if (arginfo[i].dt == vect_constant_def
|| arginfo[i].dt == vect_external_def
@@ -4199,7 +4197,7 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
i = -1;
break;
case SIMD_CLONE_ARG_TYPE_MASK:
- gcc_unreachable ();
+ break;
}
if (i == (size_t) -1)
break;
@@ -4225,18 +4223,55 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
return false;
for (i = 0; i < nargs; i++)
- if ((arginfo[i].dt == vect_constant_def
- || arginfo[i].dt == vect_external_def)
- && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
- {
- tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
- arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
- slp_node);
- if (arginfo[i].vectype == NULL
- || !constant_multiple_p (bestn->simdclone->simdlen,
- simd_clone_subparts (arginfo[i].vectype)))
+ {
+ if ((arginfo[i].dt == vect_constant_def
+ || arginfo[i].dt == vect_external_def)
+ && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
+ {
+ tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
+ arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
+ slp_node);
+ if (arginfo[i].vectype == NULL
+ || !constant_multiple_p (bestn->simdclone->simdlen,
+ simd_clone_subparts
(arginfo[i].vectype)))
+ return false;
+ }
+
+ if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
+ && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "vector mask arguments are not supported.\n");
return false;
- }
+ }
+
+ if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
+ && bestn->simdclone->mask_mode == VOIDmode
+ && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
+ != simd_clone_subparts (arginfo[i].vectype)))
+ {
+ /* FORNOW we only have partial support for vector-type masks that
+ can't hold all of simdlen. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "in-branch vector clones are not yet"
+ " supported for mismatched vector sizes.\n");
+ return false;
+ }
+ if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
+ && bestn->simdclone->mask_mode != VOIDmode)
+ {
+ /* FORNOW don't support integer-type masks. */
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+ vect_location,
+ "in-branch vector clones are not yet"
+ " supported for integer mask modes.\n");
+ return false;
+ }
+ }
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
@@ -4326,7 +4361,7 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
{
unsigned int k, l, m, o;
tree atype;
- op = gimple_call_arg (stmt, i);
+ op = gimple_call_arg (stmt, i + arg_offset);
switch (bestn->simdclone->args[i].arg_type)
{
case SIMD_CLONE_ARG_TYPE_VECTOR:
@@ -4425,6 +4460,65 @@ vectorizable_simd_clone_call (vec_info *vinfo,
stmt_vec_info stmt_info,
}
}
break;
+ case SIMD_CLONE_ARG_TYPE_MASK:
+ atype = bestn->simdclone->args[i].vector_type;
+ if (bestn->simdclone->mask_mode != VOIDmode)
+ {
+ /* FORNOW: this is disabled above. */
+ gcc_unreachable ();
+ }
+ else
+ {
+ tree elt_type = TREE_TYPE (atype);
+ tree one = fold_convert (elt_type, integer_one_node);
+ tree zero = fold_convert (elt_type, integer_zero_node);
+ o = vector_unroll_factor (nunits,
+ simd_clone_subparts (atype));
+ for (m = j * o; m < (j + 1) * o; m++)
+ {
+ if (simd_clone_subparts (atype)
+ < simd_clone_subparts (arginfo[i].vectype))
+ {
+ /* The mask type has fewer elements than simdlen. */
+
+ /* FORNOW */
+ gcc_unreachable ();
+ }
+ else if (simd_clone_subparts (atype)
+ == simd_clone_subparts (arginfo[i].vectype))
+ {
+ /* The SIMD clone function has the same number of
+ elements as the current function. */
+ if (m == 0)
+ {
+ vect_get_vec_defs_for_operand (vinfo, stmt_info,
+ o * ncopies,
+ op,
+ &vec_oprnds[i]);
+ vec_oprnds_i[i] = 0;
+ }
+ vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
+ vec_oprnd0
+ = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
+ build_vector_from_val (atype, one),
+ build_vector_from_val (atype, zero));
+ gassign *new_stmt
+ = gimple_build_assign (make_ssa_name (atype),
+ vec_oprnd0);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ vargs.safe_push (gimple_assign_lhs (new_stmt));
+ }
+ else
+ {
+ /* The mask type has more elements than simdlen. */
+
+ /* FORNOW */
+ gcc_unreachable ();
+ }
+ }
+ }
+ break;
case SIMD_CLONE_ARG_TYPE_UNIFORM:
vargs.safe_push (op);
break;