Hi, gether/scatter loads tends to be expensive (at least for x86) while we now account them as vector loads/stores which are cheap. This patch adds vectorizer cost entry for these so this can be modelled more realistically.
Bootstrapped/regtested x86_64-linux, OK? Honza 2017-10-17 Jan Hubicka <hubi...@ucw.cz> * target.h (enum vect_cost_for_stmt): Add vec_gather_load and vec_scatter_store * tree-vect-stmts.c (record_stmt_cost): Make difference between normal and scatter/gather ops. * aarch64/aarch64.c (aarch64_builtin_vectorization_cost): Add vec_gather_load and vec_scatter_store. * arm/arm.c (arm_builtin_vectorization_cost): Likewise. * powerpcspe/powerpcspe.c (rs6000_builtin_vectorization_cost): Likewise. * rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Likewise. * s390/s390.c (s390_builtin_vectorization_cost): Likewise. * spu/spu.c (spu_builtin_vectorization_cost): Likewise. Index: config/aarch64/aarch64.c =================================================================== --- config/aarch64/aarch64.c (revision 253789) +++ config/aarch64/aarch64.c (working copy) @@ -8547,9 +8547,10 @@ aarch64_builtin_vectorization_cost (enum return fp ? costs->vec_fp_stmt_cost : costs->vec_int_stmt_cost; case vector_load: + case vector_gather_load: return costs->vec_align_load_cost; - case vector_store: + case vector_scatter_store: return costs->vec_store_cost; case vec_to_scalar: Index: config/arm/arm.c =================================================================== --- config/arm/arm.c (revision 253789) +++ config/arm/arm.c (working copy) @@ -11241,9 +11241,11 @@ arm_builtin_vectorization_cost (enum vec return current_tune->vec_costs->vec_stmt_cost; case vector_load: + case vector_gather_load: return current_tune->vec_costs->vec_align_load_cost; case vector_store: + case vector_scatter_store: return current_tune->vec_costs->vec_store_cost; case vec_to_scalar: Index: config/powerpcspe/powerpcspe.c =================================================================== --- config/powerpcspe/powerpcspe.c (revision 253789) +++ config/powerpcspe/powerpcspe.c (working copy) @@ -5834,6 +5834,8 @@ rs6000_builtin_vectorization_cost (enum case vector_stmt: case vector_load: case vector_store: + case vector_gather_load: + case vector_scatter_store: case vec_to_scalar: case scalar_to_vec: case cond_branch_not_taken: Index: config/rs6000/rs6000.c =================================================================== --- config/rs6000/rs6000.c (revision 253789) +++ config/rs6000/rs6000.c (working copy) @@ -5398,6 +5398,8 @@ rs6000_builtin_vectorization_cost (enum case vector_stmt: case vector_load: case vector_store: + case vector_gather_load: + case vector_scatter_store: case vec_to_scalar: case scalar_to_vec: case cond_branch_not_taken: Index: config/s390/s390.c =================================================================== --- config/s390/s390.c (revision 253789) +++ config/s390/s390.c (working copy) @@ -3717,6 +3717,8 @@ s390_builtin_vectorization_cost (enum ve case vector_stmt: case vector_load: case vector_store: + case vector_gather_load: + case vector_scatter_store: case vec_to_scalar: case scalar_to_vec: case cond_branch_not_taken: Index: config/spu/spu.c =================================================================== --- config/spu/spu.c (revision 253789) +++ config/spu/spu.c (working copy) @@ -6625,6 +6625,8 @@ spu_builtin_vectorization_cost (enum vec case vector_stmt: case vector_load: case vector_store: + case vector_gather_load: + case vector_scatter_store: case vec_to_scalar: case scalar_to_vec: case cond_branch_not_taken: Index: target.h =================================================================== --- target.h (revision 253789) +++ target.h (working copy) @@ -171,9 +171,11 @@ enum vect_cost_for_stmt scalar_store, vector_stmt, vector_load, + vector_gather_load, unaligned_load, unaligned_store, vector_store, + vector_scatter_store, vec_to_scalar, scalar_to_vec, cond_branch_not_taken, Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 253789) +++ tree-vect-stmts.c (working copy) @@ -95,6 +95,12 @@ record_stmt_cost (stmt_vector_for_cost * enum vect_cost_for_stmt kind, stmt_vec_info stmt_info, int misalign, enum vect_cost_model_location where) { + if ((kind == vector_load || kind == unaligned_load) + && STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + kind = vector_gather_load; + if ((kind == vector_store || kind == unaligned_store) + && STMT_VINFO_GATHER_SCATTER_P (stmt_info)) + kind = vector_scatter_store; if (body_cost_vec) { tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;