From 898b4908b32452091da79373129933d5d816cdfc Mon Sep 17 00:00:00 2001
From: Alan Hayward <alan.hayward@arm.com>
Date: Fri, 28 Aug 2015 10:01:15 +0100
Subject: [PATCH] Support for vectorizing conditional expressions

2015-08-28  Alan Hayward <alan.hayward@arm.com>

	PR tree-optimization/65947
	* tree-vect-loop.c
	(vect_is_simple_reduction_1): Find condition reductions.
	(vect_model_reduction_cost): Add condition reduction costs.
	(get_initial_def_for_reduction): Add condition reduction initial var.
	(vect_create_epilog_for_reduction): Add condition reduction epilog.
	(vectorizable_reduction): Condition reduction support.
	* tree-vect-stmts.c
	(vectorizable_condition): Add vect reduction arg
        * doc/sourcebuild.texi (Vector-specific attributes): Document
	vect_max_reduc

    testsuite/Changelog:

	PR tree-optimization/65947
	* lib/target-supports.exp
	(check_effective_target_vect_max_reduc): Add.
	* gcc.dg/vect/pr65947-1.c: New test.
	* gcc.dg/vect/pr65947-2.c: New test.
	* gcc.dg/vect/pr65947-3.c: New test.
	* gcc.dg/vect/pr65947-4.c: New test.
	* gcc.dg/vect/pr65947-5.c: New test.
	* gcc.dg/vect/pr65947-6.c: New test.
	* gcc.dg/vect/pr65947-7.c: New test.
	* gcc.dg/vect/pr65947-8.c: New test.
	* gcc.dg/vect/pr65947-9.c: New test.
	* gcc.dg/vect/pr65947-10.c: New test.
	* gcc.dg/vect/pr65947-11.c: New test.
---
 gcc/doc/sourcebuild.texi               |   3 +
 gcc/testsuite/gcc.dg/vect/pr65947-1.c  |  39 +++
 gcc/testsuite/gcc.dg/vect/pr65947-10.c |  40 +++
 gcc/testsuite/gcc.dg/vect/pr65947-11.c |  48 +++
 gcc/testsuite/gcc.dg/vect/pr65947-2.c  |  40 +++
 gcc/testsuite/gcc.dg/vect/pr65947-3.c  |  50 ++++
 gcc/testsuite/gcc.dg/vect/pr65947-4.c  |  40 +++
 gcc/testsuite/gcc.dg/vect/pr65947-5.c  |  41 +++
 gcc/testsuite/gcc.dg/vect/pr65947-6.c  |  39 +++
 gcc/testsuite/gcc.dg/vect/pr65947-7.c  |  51 ++++
 gcc/testsuite/gcc.dg/vect/pr65947-8.c  |  41 +++
 gcc/testsuite/gcc.dg/vect/pr65947-9.c  |  42 +++
 gcc/testsuite/lib/target-supports.exp  |  10 +
 gcc/tree-vect-loop.c                   | 515 ++++++++++++++++++++++++++-------
 gcc/tree-vect-stmts.c                  |  44 +--
 gcc/tree-vectorizer.h                  |  11 +-
 16 files changed, 931 insertions(+), 123 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-10.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-11.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-4.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-5.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-6.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-7.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-8.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr65947-9.c

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 5dc7c81..61de4a5 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1466,6 +1466,9 @@ Target supports conversion from @code{float} to @code{signed int}.
 
 @item vect_floatuint_cvt
 Target supports conversion from @code{float} to @code{unsigned int}.
+
+@item vect_max_reduc
+Target supports max reduction for vectors.
 @end table
 
 @subsubsection Thread Local Storage attributes
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
new file mode 100644
index 0000000..7933f5c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
@@ -0,0 +1,39 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Simple condition reduction.  */
+
+int
+condition_reduction (int *a, int min_v)
+{
+  int last = -1;
+
+  for (int i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = i;
+
+  return last;
+}
+
+int
+main (void)
+{
+  int a[N] = {
+  11, -12, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, -3, 4, 5, 6, 7, -8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+
+  int ret = condition_reduction (a, 16);
+
+  if (ret != 19)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
new file mode 100644
index 0000000..9a43a60
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
@@ -0,0 +1,40 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Non-integer data types.  */
+
+float
+condition_reduction (float *a, float min_v)
+{
+  float last = 0;
+
+  for (int i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = a[i];
+
+  return last;
+}
+
+int
+main (void)
+{
+  float a[N] = {
+  11.5, 12.2, 13.22, 14.1, 15.2, 16.3, 17, 18.7, 19, 20,
+  1, 2, 3.3, 4.3333, 5.5, 6.23, 7, 8.63, 9, 10.6,
+  21, 22.12, 23.55, 24.76, 25, 26, 27.34, 28.765, 29, 30,
+  31.111, 32.322
+  };
+
+  float ret = condition_reduction (a, 16.7);
+
+  if (ret != (float)10.6)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-11.c b/gcc/testsuite/gcc.dg/vect/pr65947-11.c
new file mode 100644
index 0000000..25064bb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-11.c
@@ -0,0 +1,48 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 37
+
+/* Re-use the result of the condition inside the loop.  Will fail to
+   vectorize.  */
+
+unsigned int
+condition_reduction (unsigned int *a, unsigned int min_v, unsigned int *b)
+{
+  unsigned int last = N + 65;
+
+  for (unsigned int i = 0; i < N; i++)
+    {
+      if (b[i] < min_v)
+	last = i;
+      a[i] = last;
+    }
+  return last;
+}
+
+int
+main (void)
+{
+  unsigned int a[N] = {
+  31, 32, 33, 34, 35, 36, 37,
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20
+  };
+  unsigned int b[N] = {
+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  31, 32, 33, 34, 35, 36, 37
+  };
+
+  unsigned int ret = condition_reduction (a, 16, b);
+
+  if (ret != 29)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 0 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
new file mode 100644
index 0000000..9c627d9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
@@ -0,0 +1,40 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 254
+
+/* Non-simple condition reduction.  */
+
+unsigned char
+condition_reduction (unsigned char *a, unsigned char min_v)
+{
+  unsigned char last = 65;
+
+  for (unsigned char i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = a[i];
+
+  return last;
+}
+
+int
+main (void)
+{
+  unsigned char a[N] = {
+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+  __builtin_memset (a+32, 43, N-32);
+
+  unsigned char ret = condition_reduction (a, 16);
+
+  if (ret != 10)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
new file mode 100644
index 0000000..e115de2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
@@ -0,0 +1,50 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 37
+
+/* Non-simple condition reduction with additional variable and unsigned
+   types.  */
+
+unsigned int
+condition_reduction (unsigned int *a, unsigned int min_v, unsigned int *b)
+{
+  unsigned int last = N + 65;
+  unsigned int aval;
+
+  for (unsigned int i = 0; i < N; i++)
+    {
+      aval = a[i];
+      if (b[i] < min_v)
+	last = aval;
+    }
+  return last;
+}
+
+
+int
+main (void)
+{
+  unsigned int a[N] = {
+  31, 32, 33, 34, 35, 36, 37,
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20
+  };
+  unsigned int b[N] = {
+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  31, 32, 33, 34, 35, 36, 37
+  };
+
+  unsigned int ret = condition_reduction (a, 16, b);
+
+  if (ret != 13)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
new file mode 100644
index 0000000..76a0567
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
@@ -0,0 +1,40 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 27
+
+/* Condition reduction with no valid matches at runtime.  */
+
+int
+condition_reduction (int *a, int min_v)
+{
+  int last = N + 96;
+
+  for (int i = 0; i < N; i++)
+    if (a[i] > min_v)
+      last = i;
+
+  return last;
+}
+
+int
+main (void)
+{
+  int a[N] = {
+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27
+  };
+
+  int ret = condition_reduction (a, 46);
+
+  /* loop should never have found a value.  */
+  if (ret != N + 96)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
new file mode 100644
index 0000000..360e3b5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 32
+
+/* Condition reduction where loop size is not known at compile time.  Will fail
+   to vectorize.  Version inlined into main loop will vectorize.  */
+
+unsigned char
+condition_reduction (unsigned char *a, unsigned char min_v, int count)
+{
+  unsigned char last = 65;
+
+  for (int i = 0; i < count; i++)
+    if (a[i] < min_v)
+      last = a[i];
+
+  return last;
+}
+
+int
+main (void)
+{
+  unsigned char a[N] = {
+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+
+  unsigned char ret = condition_reduction (a, 16, N);
+
+  if (ret != 10)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { xfail { ! vect_max_reduc } } } } */
+/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
new file mode 100644
index 0000000..4997ef7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
@@ -0,0 +1,39 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 30
+
+/* Condition reduction where loop type is different than the data type.  */
+
+int
+condition_reduction (int *a, int min_v)
+{
+  int last = N + 65;
+
+  for (char i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = a[i];
+
+  return last;
+}
+
+
+int
+main (void)
+{
+  int a[N] = {
+  67, 32, 45, 43, 21, -11, 12, 3, 4, 5,
+  6, 76, -32, 56, -32, -1, 4, 5, 6, 99,
+  43, 22, -3, 22, 16, 34, 55, 31, 87, 324
+  };
+
+  int ret = condition_reduction (a, 16);
+
+  if (ret != -3)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-7.c b/gcc/testsuite/gcc.dg/vect/pr65947-7.c
new file mode 100644
index 0000000..c86f1fd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-7.c
@@ -0,0 +1,51 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 43
+
+/* Condition reduction with comparison is a different type to the data.  Will
+   fail to vectorize.  */
+
+int
+condition_reduction (short *a, int min_v, int *b)
+{
+  int last = N + 65;
+  short aval;
+
+  for (int i = 0; i < N; i++)
+    {
+      aval = a[i];
+      if (b[i] < min_v)
+	last = aval;
+    }
+  return last;
+}
+
+int
+main (void)
+{
+  short a[N] = {
+  31, -32, 133, 324, 335, 36, 37, 45, 11, 65,
+  1, -28, 3, 48, 5, -68, 7, 88, 89, 180,
+  121, -122, 123, 124, -125, 126, 127, 128, 129, 130,
+  11, 12, 13, 14, -15, -16, 17, 18, 19, 20,
+  33, 27, 99
+  };
+  int b[N] = {
+  11, -12, -13, 14, 15, 16, 17, 18, 19, 20,
+  21, -22, 23, 24, -25, 26, 27, 28, 29, 30,
+  1, 62, 3, 14, -15, 6, 37, 48, 99, 10,
+  31, -32, 33, 34, -35, 36, 37, 56, 54, 22,
+  73, 2, 87
+  };
+
+  int ret = condition_reduction (a, 16, b);
+
+  if (ret != 27)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 0 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-8.c b/gcc/testsuite/gcc.dg/vect/pr65947-8.c
new file mode 100644
index 0000000..d2d3e44
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-8.c
@@ -0,0 +1,41 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 27
+
+/* Condition reduction with multiple types in the comparison.  Will fail to
+   vectorize.  */
+
+int
+condition_reduction (char *a, int min_v)
+{
+  int last = N + 65;
+
+  for (int i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = a[i];
+
+  return last;
+}
+
+
+int
+main (void)
+{
+  char a[N] = {
+  1, 28, 3, 48, 5, 68, 7, -88, 89, 180,
+  121, 122, -123, 124, 12, -12, 12, 67, 84, 122,
+  67, 55, 112, 22, 45, 23, 111
+  };
+
+  int ret = condition_reduction (a, 16);
+
+  if (ret != 12)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 0 "vect" } } */
+/* { dg-final { scan-tree-dump "multiple types in double reduction or condition reduction" "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
new file mode 100644
index 0000000..d2ffea9
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
@@ -0,0 +1,42 @@
+/* { dg-require-effective-target vect_condition } */
+
+extern void abort (void) __attribute__ ((noreturn));
+
+#define N 255
+
+/* Condition reduction with maximum possible loop size.  Will fail to
+   vectorize because the vectorisation requires a slot for default values.  */
+
+char
+condition_reduction (char *a, char min_v)
+{
+  char last = -72;
+
+  for (int i = 0; i < N; i++)
+    if (a[i] < min_v)
+      last = a[i];
+
+  return last;
+}
+
+char
+main (void)
+{
+  char a[N] = {
+  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
+  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+  21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+  31, 32
+  };
+  __builtin_memset (a+32, 43, N-32);
+
+  char ret = condition_reduction (a, 16);
+
+  if (ret != 10)
+    abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 0 "vect" } } */
+/* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail { ! vect_max_reduc } } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index a465eb1..cf07a56 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6449,3 +6449,13 @@ proc check_effective_target_comdat_group {} {
 	int (*fn) () = foo;
     }]
 }
+
+
+# Return 1 if the target supports max reduction for vectors.
+
+proc check_effective_target_vect_max_reduc { } {
+    if { [istarget aarch64*-*-*] || [istarget arm*-*-*] } {
+	return 1
+    }
+    return 0
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 59c75af..528c80e 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2331,6 +2331,11 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
      inner loop (def of a3)
      a2 = phi < a3 >
 
+   (4) Detect condition expressions, ie:
+     for (int i = 0; i < N; i++)
+       if (a[i] < val)
+	ret_val = a[i];
+
    If MODIFY is true it tries also to rework the code in-place to enable
    detection of more reduction patterns.  For the time being we rewrite
    "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
@@ -2339,7 +2344,8 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
 static gimple
 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 			    bool check_reduction, bool *double_reduc,
-			    bool modify, bool need_wrapping_integral_overflow)
+			    bool modify, bool need_wrapping_integral_overflow,
+			    enum vect_reduction_type *v_reduc_type)
 {
   struct loop *loop = (gimple_bb (phi))->loop_father;
   struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
@@ -2356,6 +2362,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
   bool phi_def;
 
   *double_reduc = false;
+  *v_reduc_type = TREE_CODE_REDUCTION;
 
   /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
      otherwise, we assume outer loop vectorization.  */
@@ -2501,13 +2508,19 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
       && SSA_NAME_DEF_STMT (op1) == phi)
     code = PLUS_EXPR;
 
-  if (check_reduction
-      && (!commutative_tree_code (code) || !associative_tree_code (code)))
+  if (check_reduction)
     {
-      if (dump_enabled_p ())
-        report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			"reduction: not commutative/associative: ");
-      return NULL;
+      if (code != COND_EXPR
+	  && (!commutative_tree_code (code) || !associative_tree_code (code)))
+	{
+	  if (dump_enabled_p ())
+	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			    "reduction: not commutative/associative: ");
+	  return NULL;
+	}
+
+      if (code == COND_EXPR)
+	*v_reduc_type = COND_REDUCTION;
     }
 
   if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
@@ -2603,47 +2616,50 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
      and therefore vectorizing reductions in the inner-loop during
      outer-loop vectorization is safe.  */
 
-  /* CHECKME: check for !flag_finite_math_only too?  */
-  if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
-      && check_reduction)
+  if (*v_reduc_type != COND_REDUCTION)
     {
-      /* Changing the order of operations changes the semantics.  */
-      if (dump_enabled_p ())
-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			"reduction: unsafe fp math optimization: ");
-      return NULL;
-    }
-  else if (INTEGRAL_TYPE_P (type) && check_reduction)
-    {
-      if (!operation_no_trapping_overflow (type, code))
+      /* CHECKME: check for !flag_finite_math_only too?  */
+      if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
+	  && check_reduction)
 	{
 	  /* Changing the order of operations changes the semantics.  */
 	  if (dump_enabled_p ())
 	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			    "reduction: unsafe int math optimization"
-			    " (overflow traps): ");
+			"reduction: unsafe fp math optimization: ");
 	  return NULL;
 	}
-      if (need_wrapping_integral_overflow
-	  && !TYPE_OVERFLOW_WRAPS (type)
-	  && operation_can_overflow (code))
+      else if (INTEGRAL_TYPE_P (type) && check_reduction)
+	{
+	  if (!operation_no_trapping_overflow (type, code))
+	    {
+	      /* Changing the order of operations changes the semantics.  */
+	      if (dump_enabled_p ())
+		report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+				"reduction: unsafe int math optimization"
+				" (overflow traps): ");
+	      return NULL;
+	    }
+	  if (need_wrapping_integral_overflow
+	      && !TYPE_OVERFLOW_WRAPS (type)
+	      && operation_can_overflow (code))
+	    {
+	      /* Changing the order of operations changes the semantics.  */
+	      if (dump_enabled_p ())
+		report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+				"reduction: unsafe int math optimization"
+				" (overflow doesn't wrap): ");
+	      return NULL;
+	    }
+	}
+      else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
 	{
 	  /* Changing the order of operations changes the semantics.  */
 	  if (dump_enabled_p ())
-	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			    "reduction: unsafe int math optimization"
-			    " (overflow doesn't wrap): ");
+	  report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+			  "reduction: unsafe fixed-point math optimization: ");
 	  return NULL;
 	}
     }
-  else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
-    {
-      /* Changing the order of operations changes the semantics.  */
-      if (dump_enabled_p ())
-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
-			"reduction: unsafe fixed-point math optimization: ");
-      return NULL;
-    }
 
   /* If we detected "res -= x[i]" earlier, rewrite it into
      "res += -x[i]" now.  If this turns out to be useless reassoc
@@ -2719,6 +2735,16 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
     {
       if (check_reduction)
         {
+	  if (code == COND_EXPR)
+	    {
+	      /* No current known use where this case would be useful.  */
+	      if (dump_enabled_p ())
+		report_vect_op (MSG_NOTE, def_stmt,
+				"detected reduction: cannot currently swap "
+				"operands for cond_expr");
+	      return NULL;
+	    }
+
           /* Swap operands (just for simplicity - so that the rest of the code
 	     can assume that the reduction variable is always the last (second)
 	     argument).  */
@@ -2742,7 +2768,8 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
     }
 
   /* Try to find SLP reduction chain.  */
-  if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
+  if (check_reduction && code != COND_EXPR &&
+      vect_is_slp_reduction (loop_info, phi, def_stmt))
     {
       if (dump_enabled_p ())
         report_vect_op (MSG_NOTE, def_stmt,
@@ -2764,11 +2791,13 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
 static gimple
 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
 			  bool check_reduction, bool *double_reduc,
-			  bool need_wrapping_integral_overflow)
+			  bool need_wrapping_integral_overflow,
+			  enum vect_reduction_type *v_reduc_type)
 {
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
 				     double_reduc, false,
-				     need_wrapping_integral_overflow);
+				     need_wrapping_integral_overflow,
+				     v_reduc_type);
 }
 
 /* Wrapper around vect_is_simple_reduction_1, which will modify code
@@ -2780,9 +2809,11 @@ vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
 			     bool check_reduction, bool *double_reduc,
 			     bool need_wrapping_integral_overflow)
 {
+  enum vect_reduction_type v_reduc_type;
   return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
 				     double_reduc, true,
-				     need_wrapping_integral_overflow);
+				     need_wrapping_integral_overflow,
+				     &v_reduc_type);
 }
 
 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
@@ -3266,7 +3297,8 @@ get_reduction_op (gimple stmt, int reduc_index)
 
 static bool
 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
-			   int ncopies, int reduc_index)
+			   int ncopies, int reduc_index,
+			   enum vect_reduction_type v_reduc_type)
 {
   int prologue_cost = 0, epilogue_cost = 0;
   enum tree_code code;
@@ -3287,6 +3319,10 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
   else
     target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
 
+  /* Condition reductions generate two reductions in the loop.  */
+  if (v_reduc_type == COND_REDUCTION)
+    ncopies *= 2;
+
   /* Cost of reduction op inside loop.  */
   unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
 					stmt_info, 0, vect_body);
@@ -3316,9 +3352,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
 
   code = gimple_assign_rhs_code (orig_stmt);
 
-  /* Add in cost for initial definition.  */
-  prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
-				  stmt_info, 0, vect_prologue);
+  /* Add in cost for initial definition.
+     For cond reduction we have four vectors: initial index, step, initial
+     result of the data reduction, initial value of the index reduction.  */
+  int prologue_stmts = v_reduc_type == COND_REDUCTION ? 4 : 1;
+  prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts,
+				  scalar_to_vec, stmt_info, 0,
+				  vect_prologue);
 
   /* Determine cost of epilogue code.
 
@@ -3329,10 +3369,29 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
     {
       if (reduc_code != ERROR_MARK)
 	{
-	  epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
-					  stmt_info, 0, vect_epilogue);
-	  epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
-					  stmt_info, 0, vect_epilogue);
+	  if (v_reduc_type == COND_REDUCTION)
+	    {
+	      /* An EQ stmt and an AND stmt.  */
+	      epilogue_cost += add_stmt_cost (target_cost_data, 2,
+					      vector_stmt, stmt_info, 0,
+					      vect_epilogue);
+	      /* Reduction of the max index and a reduction of the found
+		 values.  */
+	      epilogue_cost += add_stmt_cost (target_cost_data, 1,
+					      vec_to_scalar, stmt_info, 0,
+					      vect_epilogue);
+	      /* A broadcast of the max value.  */
+	      epilogue_cost += add_stmt_cost (target_cost_data, 2,
+					      scalar_to_vec, stmt_info, 0,
+					      vect_epilogue);
+	    }
+	  else
+	    {
+	      epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
+					      stmt_info, 0, vect_epilogue);
+	      epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
+					      stmt_info, 0, vect_epilogue);
+	    }
 	}
       else
 	{
@@ -3705,7 +3764,7 @@ get_initial_def_for_induction (gimple iv_phi)
          the final vector of induction results:  */
       exit_phi = NULL;
       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
-        {
+	{
 	  gimple use_stmt = USE_STMT (use_p);
 	  if (is_gimple_debug (use_stmt))
 	    continue;
@@ -3774,7 +3833,8 @@ get_initial_def_for_induction (gimple iv_phi)
 
    Input:
    STMT - a stmt that performs a reduction operation in the loop.
-   INIT_VAL - the initial value of the reduction variable
+   INIT_VAL - the initial value of the reduction variable.
+   V_REDUC_TYPE - the type of reduction.
 
    Output:
    ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
@@ -3784,16 +3844,20 @@ get_initial_def_for_induction (gimple iv_phi)
         vector of partial results.
 
    Option1 (adjust in epilog): Initialize the vector as follows:
-     add/bit or/xor:    [0,0,...,0,0]
-     mult/bit and:      [1,1,...,1,1]
-     min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
+     add/bit or/xor:     [0,0,...,0,0]
+     mult/bit and:       [1,1,...,1,1]
+     min/max:		 [init_val,init_val,..,init_val,init_val]
+     nested cond_expr:   [init_val,init_val,..,init_val,init_val]
+     unnested cond_expr: [init_val,0,0,...,0]
    and when necessary (e.g. add/mult case) let the caller know
    that it needs to adjust the result by init_val.
 
    Option2: Initialize the vector as follows:
-     add/bit or/xor:    [init_val,0,0,...,0]
-     mult/bit and:      [init_val,1,1,...,1]
-     min/max/cond_expr: [init_val,init_val,...,init_val]
+     add/bit or/xor:     [init_val,0,0,...,0]
+     mult/bit and:       [init_val,1,1,...,1]
+     min/max:		 [init_val,init_val,...,init_val]
+     nested cond_expr:   [init_val,init_val,...,init_val]
+     unnested cond_expr: [init_val,0,0,...,0]
    and no adjustments are needed.
 
    For example, for the following code:
@@ -3815,7 +3879,8 @@ get_initial_def_for_induction (gimple iv_phi)
 
 tree
 get_initial_def_for_reduction (gimple stmt, tree init_val,
-                               tree *adjustment_def)
+			       tree *adjustment_def,
+			       enum vect_reduction_type v_reduc_type)
 {
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
@@ -3936,18 +4001,39 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
 
         break;
 
+      case COND_EXPR:
+	if (v_reduc_type == COND_REDUCTION)
+	  {
+	    if (adjustment_def)
+	      *adjustment_def = NULL_TREE;
+
+	    /* Create a vector of {init_value, 0, 0, 0...}.  */
+	    vec<constructor_elt, va_gc> *v;
+	    vec_alloc (v, nunits);
+	    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
+	    if (SCALAR_FLOAT_TYPE_P (scalar_type))
+	      for (i = 1; i < nunits; ++i)
+		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+					build_real (scalar_type, dconst0));
+	    else
+	      for (i = 1; i < nunits; ++i)
+		CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
+					build_int_cst (scalar_type, 0));
+	    init_def = build_constructor (vectype, v);
+	    break;
+	  }
+	/* Fall through.  */
+
       case MIN_EXPR:
       case MAX_EXPR:
-      case COND_EXPR:
-        if (adjustment_def)
+	if (adjustment_def)
           {
-            *adjustment_def = NULL_TREE;
+	    *adjustment_def = NULL_TREE;
             init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
             break;
           }
-
 	init_def = build_vector_from_val (vectype, init_value);
-        break;
+	break;
 
       default:
         gcc_unreachable ();
@@ -3977,6 +4063,9 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
    DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
    SLP_NODE is an SLP node containing a group of reduction statements. The 
      first one in this group is STMT.
+   V_REDUC_TYPE is the type of reduction.
+   INDUCTION_INDEX is the index of the loop for condition reductions. Otherwise
+     it is undefined.
 
    This function:
    1. Creates the reduction def-use cycles: sets the arguments for 
@@ -4022,7 +4111,9 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
 				  int ncopies, enum tree_code reduc_code,
 				  vec<gimple> reduction_phis,
                                   int reduc_index, bool double_reduc, 
-                                  slp_tree slp_node)
+				  slp_tree slp_node,
+				  enum vect_reduction_type v_reduc_type,
+				  tree induction_index)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   stmt_vec_info prev_phi_info;
@@ -4321,11 +4412,97 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
     }
   else
     new_phi_result = PHI_RESULT (new_phis[0]);
- 
+
+  if (v_reduc_type == COND_REDUCTION)
+    {
+      tree index_vec_type = TREE_TYPE (induction_index);
+      tree index_vec_type_signed = signed_type_for (index_vec_type);
+      tree index_scalar_type = TREE_TYPE (index_vec_type);
+      machine_mode index_vector_mode = TYPE_MODE (index_vec_type);
+
+      /* Find maximum value from the vector of found indexes.  */
+      tree max_index = make_temp_ssa_name (index_scalar_type, NULL, "");
+      gimple max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
+						   induction_index);
+      gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
+
+      /* Vector of {max_index, max_index, max_index,...}.  */
+      tree max_index_vec = make_temp_ssa_name (index_vec_type, NULL, "");
+      tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
+						      max_index);
+      gimple max_index_vec_stmt = gimple_build_assign (max_index_vec,
+						       max_index_vec_rhs);
+      gsi_insert_before (&exit_gsi, max_index_vec_stmt, GSI_SAME_STMT);
+
+      /* Compare the max index vector to the vector of found indexes to find
+	 the postion of the max value.  This will result in either a single
+	 match or all of the values.  */
+      tree vec_compare = make_temp_ssa_name (index_vec_type_signed, NULL, "");
+      gimple vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
+						     induction_index,
+						     max_index_vec);
+      gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
+
+      /* Convert the vector of data to the same type as the EQ.  */
+      tree vec_data_cast;
+      if ( TYPE_UNSIGNED (index_vec_type))
+	{
+	  vec_data_cast = make_temp_ssa_name (index_vec_type_signed, NULL,
+					       "");
+	  tree vec_data_cast_rhs = build1 (VIEW_CONVERT_EXPR,
+					   index_vec_type_signed,
+					   new_phi_result);
+	  gimple vec_data_cast_stmt = gimple_build_assign (vec_data_cast,
+							   VIEW_CONVERT_EXPR,
+							   vec_data_cast_rhs);
+	  gsi_insert_before (&exit_gsi, vec_data_cast_stmt, GSI_SAME_STMT);
+	}
+      else
+	vec_data_cast = new_phi_result;
+
+      /* Where the max index occured, use the value from the data vector.  */
+      tree vec_and = make_temp_ssa_name (index_vec_type_signed, NULL, "");
+      gimple vec_and_stmt = gimple_build_assign (vec_and, BIT_AND_EXPR,
+						 vec_compare, vec_data_cast);
+      gsi_insert_before (&exit_gsi, vec_and_stmt, GSI_SAME_STMT);
+
+      /* Make the matched data values unsigned.  */
+      tree vec_and_cast = make_temp_ssa_name (index_vec_type, NULL, "");
+      tree vec_and_cast_rhs = build1 (VIEW_CONVERT_EXPR, index_vec_type,
+				      vec_and);
+      gimple vec_and_cast_stmt = gimple_build_assign (vec_and_cast,
+						      VIEW_CONVERT_EXPR,
+						      vec_and_cast_rhs);
+      gsi_insert_before (&exit_gsi, vec_and_cast_stmt, GSI_SAME_STMT);
+
+      /* Reduce down to a scalar value.  */
+      tree matched_data_reduc = make_temp_ssa_name (index_scalar_type, NULL,
+						    "");
+      gimple matched_data_reduc_stmt;
+      optab ot = optab_for_tree_code (REDUC_MAX_EXPR, index_vec_type,
+				      optab_default);
+      gcc_assert (optab_handler (ot, index_vector_mode) != CODE_FOR_nothing);
+      matched_data_reduc_stmt = gimple_build_assign (matched_data_reduc,
+						     REDUC_MAX_EXPR,
+						     vec_and_cast);
+      gsi_insert_before (&exit_gsi, matched_data_reduc_stmt, GSI_SAME_STMT);
+
+      /* Convert the reduced value to the result type and set as the
+	 result.  */
+      tree matched_data_reduc_cast = build1 (VIEW_CONVERT_EXPR, scalar_type,
+					     matched_data_reduc);
+      epilog_stmt = gimple_build_assign (new_scalar_dest,
+					 matched_data_reduc_cast);
+      new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
+      gimple_assign_set_lhs (epilog_stmt, new_temp);
+      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+      scalar_results.safe_push (new_temp);
+    }
+
   /* 2.3 Create the reduction code, using one of the three schemes described
          above. In SLP we simply need to extract all the elements from the 
          vector (without reducing them), so we use scalar shifts.  */
-  if (reduc_code != ERROR_MARK && !slp_reduc)
+  else if (reduc_code != ERROR_MARK && !slp_reduc)
     {
       tree tmp;
       tree vec_elem_type;
@@ -4847,6 +5024,15 @@ vect_finalize_reduction:
    and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
    sequence that had been detected and replaced by the pattern-stmt (STMT).
 
+   This function also handles reduction of condition expressions, for example:
+     for (int i = 0; i < N; i++)
+       if (a[i] < value)
+	 last = a[i];
+   This is handled by vectorising the loop and creating an additional vector
+   containing the loop indexes for which "a[i] < value" was true.  In the
+   function epilogue this is reduced to a single max value and then used to
+   index into the vector of results.
+
    In some cases of reduction patterns, the type of the reduction variable X is
    different than the type of the other arguments of STMT.
    In such cases, the vectype that is used when transforming STMT into a vector
@@ -4922,6 +5108,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   int vec_num;
   tree def0, def1, tem, op0, op1 = NULL_TREE;
   bool first_p = true;
+  enum vect_reduction_type v_reduc_type = TREE_CODE_REDUCTION;
+  tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
 
   /* In case of reduction chain we switch to the first stmt in the chain, but
      we don't update STMT_INFO, since only the last stmt is marked as reduction
@@ -5092,7 +5280,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
-					 !nested_cycle, &dummy, false);
+					 !nested_cycle, &dummy, false,
+					 &v_reduc_type);
   if (orig_stmt)
     gcc_assert (tmp == orig_stmt
 		|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);
@@ -5117,12 +5306,12 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
 
   if (code == COND_EXPR)
     {
-      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
+      if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL,
+				   v_reduc_type))
         {
           if (dump_enabled_p ())
 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			     "unsupported condition in reduction\n");
-
 	  return false;
         }
     }
@@ -5153,7 +5342,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
         }
 
       if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
-        {
+	{
           if (dump_enabled_p ())
             dump_printf (MSG_NOTE, "op not supported by target.\n");
 
@@ -5246,49 +5435,71 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
     }
 
   epilog_reduc_code = ERROR_MARK;
-  if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
+
+  if (v_reduc_type == TREE_CODE_REDUCTION)
     {
-      reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
+      if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
+	{
+	  reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
                                          optab_default);
-      if (!reduc_optab)
-        {
-          if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "no optab for reduction.\n");
-
-          epilog_reduc_code = ERROR_MARK;
-        }
-      else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
-        {
-          optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
-          if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
-            {
-              if (dump_enabled_p ())
-	        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "reduc op not supported by target.\n");
+	  if (!reduc_optab)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "no optab for reduction.\n");
 
 	      epilog_reduc_code = ERROR_MARK;
 	    }
-        }
+	  else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
+	    {
+	      optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
+	      if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+		{
+		  if (dump_enabled_p ())
+		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				     "reduc op not supported by target.\n");
+
+		  epilog_reduc_code = ERROR_MARK;
+		}
+	    }
+	}
+      else
+	{
+	  if (!nested_cycle || double_reduc)
+	    {
+	      if (dump_enabled_p ())
+		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+				 "no reduc code for scalar code.\n");
+
+	      return false;
+	    }
+	}
     }
   else
     {
-      if (!nested_cycle || double_reduc)
-        {
-          if (dump_enabled_p ())
-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			     "no reduc code for scalar code.\n");
+      int scalar_precision = GET_MODE_PRECISION (TYPE_MODE (scalar_type));
+      cr_index_scalar_type = make_unsigned_type (scalar_precision);
+      cr_index_vector_type = build_vector_type
+	(cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
 
-          return false;
-        }
+      epilog_reduc_code = REDUC_MAX_EXPR;
+      optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
+				   optab_default);
+      if (optab_handler (optab, TYPE_MODE (cr_index_vector_type)) == CODE_FOR_nothing)
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "reduc max op not supported by target.\n");
+	  return false;
+	}
     }
 
-  if (double_reduc && ncopies > 1)
+  if ((double_reduc || v_reduc_type == COND_REDUCTION) && ncopies > 1)
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			 "multiple types in double reduction\n");
-
+			 "multiple types in double reduction or condition "
+			 "reduction.\n");
       return false;
     }
 
@@ -5312,11 +5523,39 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
         }
     }
 
+  if (v_reduc_type == COND_REDUCTION)
+    {
+      widest_int ni;
+
+      if (! max_loop_iterations (loop, &ni))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "loop count not known, cannot create cond "
+			     "reduction.\n");
+	  return false;
+	}
+      /* Convert backedges to iterations.  */
+      ni += 1;
+
+      /* The additional index will be the same type as the condition.  Check
+	 that the loop can fit into this less one (because we'll use up the
+	 zero slot for when there are no matches).  */
+      tree max_index = TYPE_MAX_VALUE (cr_index_scalar_type);
+      if (wi::geu_p (ni, wi::to_widest (max_index)))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "loop size is greater than data size.\n");
+	  return false;
+	}
+    }
+
   if (!vec_stmt) /* transformation not required.  */
     {
       if (first_p
 	  && !vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies,
-					 reduc_index))
+					 reduc_index, v_reduc_type))
         return false;
       STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
       return true;
@@ -5327,6 +5566,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
 
+  STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+
   /* FORNOW: Multiple types are not supported for condition.  */
   if (code == COND_EXPR)
     gcc_assert (ncopies == 1);
@@ -5406,9 +5647,8 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       if (code == COND_EXPR)
         {
           gcc_assert (!slp_node);
-          vectorizable_condition (stmt, gsi, vec_stmt, 
-                                  PHI_RESULT (phis[0]), 
-                                  reduc_index, NULL);
+	  vectorizable_condition (stmt, gsi, vec_stmt, PHI_RESULT (phis[0]),
+				  reduc_index, NULL, v_reduc_type);
           /* Multiple types are not supported for condition.  */
           break;
         }
@@ -5528,17 +5768,88 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       prev_phi_info = vinfo_for_stmt (new_phi);
     }
 
+  tree indx_before_incr, indx_after_incr, cond_name = NULL;
+
   /* Finalize the reduction-phi (set its arguments) and create the
      epilog reduction code.  */
   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
     {
       new_temp = gimple_assign_lhs (*vec_stmt);
       vect_defs[0] = new_temp;
+
+      /* For cond reductions we need to add an additional conditional based on
+	 the loop index.  */
+      if (v_reduc_type == COND_REDUCTION)
+	{
+	  int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+	  int k;
+
+	  gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
+
+	  /* Create a {1,2,3,...} vector.  */
+	  tree *vtemp = XALLOCAVEC (tree, nunits_out);
+	  for (k = 0; k < nunits_out; ++k)
+	    vtemp[k] = build_int_cst (cr_index_scalar_type, k + 1);
+	  tree series_vect = build_vector (cr_index_vector_type, vtemp);
+
+	  /* Create a vector of the step value.  */
+	  tree step = build_int_cst (cr_index_scalar_type, nunits_out);
+	  tree vec_step = build_vector_from_val (cr_index_vector_type, step);
+
+	  /* Create a vector of 0s.  */
+	  tree zero = build_zero_cst (cr_index_scalar_type);
+	  tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
+
+	  /* Create an induction variable, starting at series_vect, and
+	     incrementing by vec_step.  */
+	  gimple_stmt_iterator incr_gsi;
+	  bool insert_after;
+	  standard_iv_increment_position (loop, &incr_gsi, &insert_after);
+	  create_iv (series_vect, vec_step, NULL_TREE, loop, &incr_gsi,
+		     insert_after, &indx_before_incr, &indx_after_incr);
+
+	  /* Create a vector phi node from the VEC_COND_EXPR (see below) and
+	     0s.  */
+	  tree new_phi_tree = make_temp_ssa_name (cr_index_vector_type, NULL, "");
+	  new_phi = create_phi_node (new_phi_tree, loop->header);
+	  set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo,
+							  NULL));
+	  add_phi_arg (new_phi, vec_zero, loop_preheader_edge (loop),
+		       UNKNOWN_LOCATION);
+
+	  /* Turn the condition from vec_stmt into an ssa name.  */
+	  gimple index_condition;
+	  gimple_stmt_iterator vec_stmt_gsi = gsi_for_stmt (*vec_stmt);
+	  tree ccompare = gimple_assign_rhs1 (*vec_stmt);
+	  tree ccompare_name = make_temp_ssa_name (TREE_TYPE (ccompare), NULL,
+						   "");
+	  gimple ccompare_stmt = gimple_build_assign (ccompare_name, ccompare);
+	  gsi_insert_before (&vec_stmt_gsi, ccompare_stmt, GSI_SAME_STMT);
+	  gimple_assign_set_rhs1 (*vec_stmt, ccompare_name);
+	  update_stmt (*vec_stmt);
+
+	  /* Create a conditional, where the condition is the same as from
+	     vec_stmt, then is the induction index, else is the phi.  */
+	  tree cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type, ccompare_name,
+				   indx_before_incr, new_phi_tree);
+	  cond_name = make_temp_ssa_name (cr_index_vector_type, NULL, "");
+	  index_condition = gimple_build_assign (cond_name, cond_expr);
+	  gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
+	  stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
+							    loop_vinfo, NULL);
+	  STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
+	  set_vinfo_for_stmt (index_condition, index_vec_info);
+
+	  /* Update the phi with the vec cond.  */
+	  add_phi_arg (new_phi, cond_name, loop_latch_edge (loop),
+		       UNKNOWN_LOCATION);
+	}
     }
 
   vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
                                     epilog_reduc_code, phis, reduc_index,
-                                    double_reduc, slp_node);
+				    double_reduc, slp_node, v_reduc_type,
+				    cond_name);
 
   return true;
 }
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 359e010..7bcf575 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -7293,7 +7293,7 @@ vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
 bool
 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
 			gimple *vec_stmt, tree reduc_def, int reduc_index,
-			slp_tree slp_node)
+			slp_tree slp_node, enum vect_reduction_type v_reduc_type)
 {
   tree scalar_dest = NULL_TREE;
   tree vec_dest = NULL_TREE;
@@ -7321,21 +7321,24 @@ vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
   if (reduc_index && STMT_SLP_TYPE (stmt_info))
     return false;
 
-  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
-    return false;
+  if (v_reduc_type == TREE_CODE_REDUCTION)
+    {
+      if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+	return false;
 
-  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
-      && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
-           && reduc_def))
-    return false;
+      if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+	       && reduc_def))
+	return false;
 
-  /* FORNOW: not yet supported.  */
-  if (STMT_VINFO_LIVE_P (stmt_info))
-    {
-      if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                         "value used after loop.\n");
-      return false;
+      /* FORNOW: not yet supported.  */
+      if (STMT_VINFO_LIVE_P (stmt_info))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "value used after loop.\n");
+	  return false;
+	}
     }
 
   /* Is vectorizable conditional operation?  */
@@ -7739,7 +7742,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
 	  || vectorizable_call (stmt, NULL, NULL, node)
 	  || vectorizable_store (stmt, NULL, NULL, node)
 	  || vectorizable_reduction (stmt, NULL, NULL, node)
-	  || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
+	  || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
+				     TREE_CODE_REDUCTION));
   else
     {
       if (bb_vinfo)
@@ -7751,7 +7755,8 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
 	      || vectorizable_load (stmt, NULL, NULL, node, NULL)
 	      || vectorizable_call (stmt, NULL, NULL, node)
 	      || vectorizable_store (stmt, NULL, NULL, node)
-	      || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
+	      || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node,
+					 TREE_CODE_REDUCTION));
     }
 
   if (!ok)
@@ -7863,7 +7868,8 @@ vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
       break;
 
     case condition_vec_info_type:
-      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
+      done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node,
+				     TREE_CODE_REDUCTION);
       gcc_assert (done);
       break;
 
@@ -8262,8 +8268,8 @@ vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
   if (TREE_CODE (operand) != SSA_NAME)
     {
       if (dump_enabled_p ())
-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                         "not ssa-name.\n");
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			 "not ssa-name.\n");
       return false;
     }
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 95276fa..34f76d4 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -60,6 +60,12 @@ enum vect_def_type {
   vect_unknown_def_type
 };
 
+/* Define type of reduction.  */
+enum vect_reduction_type {
+  TREE_CODE_REDUCTION,
+  COND_REDUCTION
+};
+
 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def)           \
                                    || ((D) == vect_double_reduction_def) \
                                    || ((D) == vect_nested_cycle))
@@ -1037,7 +1043,7 @@ extern bool vect_transform_stmt (gimple, gimple_stmt_iterator *,
 extern void vect_remove_stores (gimple);
 extern bool vect_analyze_stmt (gimple, bool *, slp_tree);
 extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
-                                    tree, int, slp_tree);
+				    tree, int, slp_tree, enum vect_reduction_type);
 extern void vect_get_load_cost (struct data_reference *, int, bool,
 				unsigned int *, unsigned int *,
 				stmt_vector_for_cost *,
@@ -1105,7 +1111,8 @@ extern bool vectorizable_live_operation (gimple, gimple_stmt_iterator *,
 extern bool vectorizable_reduction (gimple, gimple_stmt_iterator *, gimple *,
                                     slp_tree);
 extern bool vectorizable_induction (gimple, gimple_stmt_iterator *, gimple *);
-extern tree get_initial_def_for_reduction (gimple, tree, tree *);
+extern tree get_initial_def_for_reduction
+	(gimple, tree, tree *, enum vect_reduction_type = TREE_CODE_REDUCTION);
 extern int vect_min_worthwhile_factor (enum tree_code);
 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
 					stmt_vector_for_cost *,
-- 
1.9.3 (Apple Git-50)

