Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 179744)
+++ gcc/doc/invoke.texi	(working copy)
@@ -271,7 +271,8 @@ Objective-C and Objective-C++ Dialects}.
 -Wunused-label  -Wunused-local-typedefs -Wunused-parameter @gol
 -Wno-unused-result -Wunused-value @gol -Wunused-variable @gol
 -Wunused-but-set-parameter -Wunused-but-set-variable @gol
--Wvariadic-macros -Wvla -Wvolatile-register-var  -Wwrite-strings}
+-Wvariadic-macros -Wvector-operation-performance -Wvla 
+-Wvolatile-register-var  -Wwrite-strings}
 
 @item C and Objective-C-only Warning Options
 @gccoptlist{-Wbad-function-cast  -Wmissing-declarations @gol
@@ -4535,6 +4536,18 @@ Warn if variadic macros are used in peda
 alternate syntax when in pedantic ISO C99 mode.  This is default.
 To inhibit the warning messages, use @option{-Wno-variadic-macros}.
 
+@item -Wvector-operation-performance
+@opindex Wvector-operation-performance
+@opindex Wno-vector-operation-performance
+Warn if vector operation is not implemented via SIMD capabilities of the
+architecture.  Mainly useful for the performance tuning.
+Vector operation can be implemented @code{piecewise} which means that the
+scalar operation is performed on every vector element; 
+@code{in parallel} which means that the vector operation is implemented
+using scalars of wider type, which normally is more performance efficient;
+and @code{as a single scalar} which means that vector fits into a
+scalar type.
+
 @item -Wvla
 @opindex Wvla
 @opindex Wno-vla
Index: gcc/testsuite/gcc.target/i386/warn-vect-op-3.c
===================================================================
--- gcc/testsuite/gcc.target/i386/warn-vect-op-3.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/warn-vect-op-3.c	(revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile }  */
+/* { dg-options "-mno-sse -Wvector-operation-performance" }  */
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+int main (int argc, char *argv[])
+{
+  vector (8, short) v0 = {argc, 1, 15, 38, 12, -1, argc, 2};
+  vector (8, short) v1 = {-4, argc, 2, 11, 1, 17, -8, argc};
+  vector (8, short) res[] = 
+  {
+    v0 + v1,	      /* { dg-warning "expanded in parallel" }  */
+    v0 - v1,          /* { dg-warning "expanded in parallel" }  */
+    v0 > v1,          /* { dg-warning "expanded piecewise" }  */
+    v0 & v1,          /* { dg-warning "expanded in parallel" }  */
+    __builtin_shuffle (v0, v1),	      /* { dg-warning "expanded piecewise" }  */
+    __builtin_shuffle (v0, v1, v1)    /* { dg-warning "expanded piecewise" }  */
+  };
+  
+  return res[argc][argc];
+}
Index: gcc/testsuite/gcc.target/i386/warn-vect-op-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/warn-vect-op-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/warn-vect-op-1.c	(revision 0)
@@ -0,0 +1,21 @@
+/* { dg-do compile }  */
+/* { dg-options "-mno-sse -Wvector-operation-performance" }  */
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+int main (int argc, char *argv[])
+{
+  vector (4, int) v0 = {argc, 1, 15, 38};
+  vector (4, int) v1 = {-4, argc, 2, 11};
+  vector (4, int) res[] = 
+  {
+    v0 + v1,	  /* { dg-warning "expanded piecewise" }  */
+    v0 - v1,	  /* { dg-warning "expanded piecewise" }  */
+    v0 > v1,	  /* { dg-warning "expanded piecewise" }  */
+    v0 & v1,	  /* { dg-warning "expanded in parallel" }  */
+    __builtin_shuffle (v0, v1),	    /* { dg-warning "expanded piecewise" }  */
+    __builtin_shuffle (v0, v1, v1)  /* { dg-warning "expanded piecewise" }  */  
+  };
+
+  return res[argc][argc];
+}
Index: gcc/testsuite/gcc.target/i386/warn-vect-op-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/warn-vect-op-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/warn-vect-op-2.c	(revision 0)
@@ -0,0 +1,23 @@
+/* { dg-do compile }  */
+/* { dg-options "-mno-sse -Wvector-operation-performance" }  */
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+int main (int argc, char *argv[])
+{
+  vector (16, signed char) v0 = {argc, 1, 15, 38, 12, -1, argc, 2, 
+				 argc, 1, 15, 38, 12, -1, argc, 2};
+  vector (16, signed char) v1 = {-4, argc, 2, 11, 1, 17, -8, argc,
+				 argc, 1, 15, 38, 12, -1, argc, 2};
+  vector (16, signed char) res[] = 
+  {
+    v0 + v1,		  /* { dg-warning "expanded in parallel" }  */
+    v0 - v1,              /* { dg-warning "expanded in parallel" }  */
+    v0 > v1,              /* { dg-warning "expanded piecewise" }  */
+    v0 & v1,              /* { dg-warning "expanded in parallel" }  */
+    __builtin_shuffle (v0, v1),        /* { dg-warning "expanded piecewise" }  */
+    __builtin_shuffle (v0, v1, v1)     /* { dg-warning "expanded piecewise" }  */
+  };
+ 
+  return res[argc][argc];
+}
Index: gcc/c-typeck.c
===================================================================
--- gcc/c-typeck.c	(revision 179744)
+++ gcc/c-typeck.c	(working copy)
@@ -2934,7 +2934,8 @@ c_build_vec_perm_expr (location_t loc, t
 
   if (!wrap)
     ret = c_wrap_maybe_const (ret, true);
-
+  
+  SET_EXPR_LOCATION (ret, loc);
   return ret;
 }
 
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(revision 179744)
+++ gcc/common.opt	(working copy)
@@ -694,6 +694,10 @@ Wcoverage-mismatch
 Common Var(warn_coverage_mismatch) Init(1) Warning
 Warn in case profiles in -fprofile-use do not match
 
+Wvector-operation-performance
+Common Var(warn_vector_operation_performance) Warning
+Warn when a vector operation is compiled outside the SIMD
+
 Xassembler
 Driver Separate
 
Index: gcc/tree-vect-generic.c
===================================================================
--- gcc/tree-vect-generic.c	(revision 179744)
+++ gcc/tree-vect-generic.c	(working copy)
@@ -235,6 +235,14 @@ expand_vector_piecewise (gimple_stmt_ite
   int delta = tree_low_cst (part_width, 1)
 	      / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
   int i;
+  location_t loc = gimple_location (gsi_stmt (*gsi));
+
+  if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
+    warning_at (loc, OPT_Wvector_operation_performance,
+		"vector operation will be expanded piecewise");
+  else
+    warning_at (loc, OPT_Wvector_operation_performance,
+		"vector operation will be expanded in parallel");
 
   v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
   for (i = 0; i < nunits;
@@ -260,6 +268,7 @@ expand_vector_parallel (gimple_stmt_iter
   tree result, compute_type;
   enum machine_mode mode;
   int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
+  location_t loc = gimple_location (gsi_stmt (*gsi));
 
   /* We have three strategies.  If the type is already correct, just do
      the operation an element at a time.  Else, if the vector is wider than
@@ -284,6 +293,9 @@ expand_vector_parallel (gimple_stmt_iter
       mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
       compute_type = lang_hooks.types.type_for_mode (mode, 1);
       result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
+      warning_at (loc, OPT_Wvector_operation_performance,
+	          "vector operation will be expanded with a "
+		  "single scalar operation");
     }
 
   return result;
@@ -400,8 +412,8 @@ expand_vector_operation (gimple_stmt_ite
       case PLUS_EXPR:
       case MINUS_EXPR:
         if (!TYPE_OVERFLOW_TRAPS (type))
-          return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
-		      		         gimple_assign_rhs1 (assign),
+	  return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
+					 gimple_assign_rhs1 (assign),
 					 gimple_assign_rhs2 (assign), code);
 	break;
 
@@ -626,10 +638,15 @@ lower_vec_perm (gimple_stmt_iterator *gs
   tree constr, t, si, i_val;
   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
+  location_t loc = gimple_location (gsi_stmt (*gsi));
   unsigned i;
 
   if (expand_vec_perm_expr_p (TYPE_MODE (vect_type), vec0, vec1, mask))
     return;
+  
+  warning_at (loc, OPT_Wvector_operation_performance,
+              "vector shuffling operation will be expanded piecewise");
+
 
   v = VEC_alloc (constructor_elt, gc, elements);
   for (i = 0; i < elements; i++)
