Hi,
gether/scatter loads tends to be expensive (at least for x86) while we now 
account them
as vector loads/stores which are cheap.  This patch adds vectorizer cost entry 
for these
so this can be modelled more realistically.

Bootstrapped/regtested x86_64-linux, OK?

Honza

2017-10-17  Jan Hubicka  <hubi...@ucw.cz>

        * target.h (enum vect_cost_for_stmt): Add vec_gather_load and
        vec_scatter_store
        * tree-vect-stmts.c (record_stmt_cost): Make difference between normal
        and scatter/gather ops.

        * aarch64/aarch64.c (aarch64_builtin_vectorization_cost): Add
        vec_gather_load and vec_scatter_store.
        * arm/arm.c (arm_builtin_vectorization_cost): Likewise.
        * powerpcspe/powerpcspe.c (rs6000_builtin_vectorization_cost): Likewise.
        * rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Likewise.
        * s390/s390.c (s390_builtin_vectorization_cost): Likewise.
        * spu/spu.c (spu_builtin_vectorization_cost): Likewise.

Index: config/aarch64/aarch64.c
===================================================================
--- config/aarch64/aarch64.c    (revision 253789)
+++ config/aarch64/aarch64.c    (working copy)
@@ -8547,9 +8547,10 @@ aarch64_builtin_vectorization_cost (enum
        return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost;
 
       case vector_load:
+      case vector_gather_load:
        return costs->vec_align_load_cost;
 
-      case vector_store:
+      case vector_scatter_store:
        return costs->vec_store_cost;
 
       case vec_to_scalar:
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c    (revision 253789)
+++ config/arm/arm.c    (working copy)
@@ -11241,9 +11241,11 @@ arm_builtin_vectorization_cost (enum vec
         return current_tune->vec_costs->vec_stmt_cost;
 
       case vector_load:
+      case vector_gather_load:
         return current_tune->vec_costs->vec_align_load_cost;
 
       case vector_store:
+      case vector_scatter_store:
         return current_tune->vec_costs->vec_store_cost;
 
       case vec_to_scalar:
Index: config/powerpcspe/powerpcspe.c
===================================================================
--- config/powerpcspe/powerpcspe.c      (revision 253789)
+++ config/powerpcspe/powerpcspe.c      (working copy)
@@ -5834,6 +5834,8 @@ rs6000_builtin_vectorization_cost (enum
       case vector_stmt:
       case vector_load:
       case vector_store:
+      case vector_gather_load:
+      case vector_scatter_store:
       case vec_to_scalar:
       case scalar_to_vec:
       case cond_branch_not_taken:
Index: config/rs6000/rs6000.c
===================================================================
--- config/rs6000/rs6000.c      (revision 253789)
+++ config/rs6000/rs6000.c      (working copy)
@@ -5398,6 +5398,8 @@ rs6000_builtin_vectorization_cost (enum
       case vector_stmt:
       case vector_load:
       case vector_store:
+      case vector_gather_load:
+      case vector_scatter_store:
       case vec_to_scalar:
       case scalar_to_vec:
       case cond_branch_not_taken:
Index: config/s390/s390.c
===================================================================
--- config/s390/s390.c  (revision 253789)
+++ config/s390/s390.c  (working copy)
@@ -3717,6 +3717,8 @@ s390_builtin_vectorization_cost (enum ve
       case vector_stmt:
       case vector_load:
       case vector_store:
+      case vector_gather_load:
+      case vector_scatter_store:
       case vec_to_scalar:
       case scalar_to_vec:
       case cond_branch_not_taken:
Index: config/spu/spu.c
===================================================================
--- config/spu/spu.c    (revision 253789)
+++ config/spu/spu.c    (working copy)
@@ -6625,6 +6625,8 @@ spu_builtin_vectorization_cost (enum vec
       case vector_stmt:
       case vector_load:
       case vector_store:
+      case vector_gather_load:
+      case vector_scatter_store:
       case vec_to_scalar:
       case scalar_to_vec:
       case cond_branch_not_taken:
Index: target.h
===================================================================
--- target.h    (revision 253789)
+++ target.h    (working copy)
@@ -171,9 +171,11 @@ enum vect_cost_for_stmt
   scalar_store,
   vector_stmt,
   vector_load,
+  vector_gather_load,
   unaligned_load,
   unaligned_store,
   vector_store,
+  vector_scatter_store,
   vec_to_scalar,
   scalar_to_vec,
   cond_branch_not_taken,
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c   (revision 253789)
+++ tree-vect-stmts.c   (working copy)
@@ -95,6 +95,12 @@ record_stmt_cost (stmt_vector_for_cost *
                  enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
                  int misalign, enum vect_cost_model_location where)
 {
+  if ((kind == vector_load || kind == unaligned_load)
+      && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    kind = vector_gather_load;
+  if ((kind == vector_store || kind == unaligned_store)
+      && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    kind = vector_scatter_store;
   if (body_cost_vec)
     {
       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;

Reply via email to