Hi,

Presently the decision as to whether to completely scalarize an aggregate
or not is made based on MOVE_RATIO. This is an undocumented, and unexpected,
overloading of the target macro.

In this patch we fix this.

First, we we add a new target hook
TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE, which returns MOVE_RATIO
by default.

Then we add two new parameters:

  sra-max-total-scalarization-size-Ospeed - The maximum size of aggregate
  to consider when compiling for speed
  sra-max-total-scalarization-size-Osize - The maximum size of aggregate
  to consider when compiling for size.

Set to default to 0.

Finally we wire up SRA to prefer using the parameters, and if it doesn't
find values for them, fallback to the target hook.

Bootstrapped and regression tested for x86, arm and aarch64 with no
issues, I've also thrown a smoke-test of popular small benchmarks at
each platform without seeing meaningful differences (as you would expect).

OK?

Thanks,
James

---
gcc/

2014-08-20  James Greenhalgh  <james.greenha...@arm.com>

        * doc/invoke.texi (sra-max-total-scalarization-size-Ospeed): Document.
        (sra-max-total-scalarization-size-Osize): Likewise.
        * doc/tm.texi.in
        (TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE): Add hook.
        * doc/tm.texi: Regenerate.
        * params.def (sra-max-total-scalarization-size-Ospeed): New.
        (sra-max-total-scalarization-size-Osize): Likewise.
        * target.def (default_max_total_scalarization_size): New.
        * targhooks.c (default_max_total_scalarization_size): New.
        * targhooks.h (default_max_total_scalarization_size): New.
        * tree-sra.c (get_max_total_scalarization_size): New.
        (analyze_all_variable_accesses): Use it.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 6374261..2b6593d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10232,6 +10232,15 @@ parameters only when their cumulative size is less or equal to
 @option{ipa-sra-ptr-growth-factor} times the size of the original
 pointer parameter.
 
+@item sra-max-total-scalarization-size-Ospeed
+@item sra-max-total-scalarization-size-Osize
+The two Scalar Reduction of Aggregates passes (SRA and IPA-SRA) aim to
+replace scalar parts of aggregates with uses of independent scalar
+variables. These parameters control the maximum size of aggregate
+which will be considered for replacement when compiling for speed
+(@option{sra-max-total-scalarization-size-Ospeed}) or size
+(@option{sra-max-total-scalarization-size-Osize}) respectively.
+
 @item tm-max-aggregate-size
 When making copies of thread-local variables in a transaction, this
 parameter specifies the size in bytes after which variables are
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 9dd8d68..42ef37f 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6118,6 +6118,16 @@ value to the result of that function.  The arguments to that function
 are the same as to this target hook.
 @end deftypefn
 
+@deftypefn {Target Hook} {unsigned int} TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE (bool @var{size_p})
+This target hook is used by the Scalar Replacement of Aggregates pass
+  to determine the maximum size, in words, of aggregate to consider for
+  replacement.  @code{size_p} is used to indicate whether we are compiling
+  for size or speed.  By default, the maximum total scalarization size
+  is determined by MOVE_RATIO and can be further controlled using the
+  parameters @code{sra-max-total-scalarization-size-Ospeed} and
+  @code{sra-max-total-scalarization-size-Osize}.
+@end deftypefn
+
 @defmac BRANCH_COST (@var{speed_p}, @var{predictable_p})
 A C expression for the cost of a branch instruction.  A value of 1 is
 the default; other values are interpreted relative to that. Parameter
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index dd72b98..d560521 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4607,6 +4607,8 @@ These macros are obsolete, new ports should use the target hook
 
 @hook TARGET_MEMORY_MOVE_COST
 
+@hook TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE
+
 @defmac BRANCH_COST (@var{speed_p}, @var{predictable_p})
 A C expression for the cost of a branch instruction.  A value of 1 is
 the default; other values are interpreted relative to that. Parameter
diff --git a/gcc/params.def b/gcc/params.def
index aefdd07..dea6fb3 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -942,6 +942,18 @@ DEFPARAM (PARAM_TM_MAX_AGGREGATE_SIZE,
 	  "pairs",
 	  9, 0, 0)
 
+DEFPARAM (PARAM_SRA_TOTAL_SCALARIZATION_SIZE_SPEED,
+	  "sra-max-total-scalarization-size-Ospeed",
+	  "Maximum size, in words, of an aggregate which should be "
+	  "considered for scalarization when compiling for speed",
+	  0, 0, 0)
+
+DEFPARAM (PARAM_SRA_TOTAL_SCALARIZATION_SIZE_SIZE,
+	  "sra-max-total-scalarization-size-Osize",
+	  "Maximum size, in words, of an aggregate which should be "
+	  "considered for scalarization when compiling for size",
+	  0, 0, 0)
+
 DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
 	  "ipa-cp-value-list-size",
 	  "Maximum size of a list of values associated with each parameter for "
diff --git a/gcc/target.def b/gcc/target.def
index 3a41db1..f879a3f 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3037,6 +3037,20 @@ are the same as to this target hook.",
  int, (enum machine_mode mode, reg_class_t rclass, bool in),
  default_memory_move_cost)
 
+/* Return the maximum size in words of aggregate which will be considered
+   for replacement by SRA/IP-SRA.  */
+DEFHOOK
+(default_max_total_scalarization_size,
+ "This target hook is used by the Scalar Replacement of Aggregates pass\n\
+  to determine the maximum size, in words, of aggregate to consider for\n\
+  replacement.  @code{size_p} is used to indicate whether we are compiling\n\
+  for size or speed.  By default, the maximum total scalarization size\n\
+  is determined by MOVE_RATIO and can be further controlled using the\n\
+  parameters @code{sra-max-total-scalarization-size-Ospeed} and\n\
+  @code{sra-max-total-scalarization-size-Osize}.",
+ unsigned int, (bool size_p),
+ default_max_total_scalarization_size)
+
 /* True for MODE if the target expects that registers in this mode will
    be allocated to registers in a small register class.  The compiler is
    allowed to use registers explicitly used in the rtl as spill registers
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 0f27a5a..3b2d1b8 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1375,6 +1375,15 @@ default_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
 #endif
 }
 
+/* Return the maximum size in words of aggregate which will be considered
+   for replacement by SRA/IP-SRA.  */
+
+unsigned int
+default_max_total_scalarization_size (bool size_p ATTRIBUTE_UNUSED)
+{
+  return MOVE_RATIO (!size_p);
+}
+
 bool
 default_profile_before_prologue (void)
 {
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 4be33f8..20168f4 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -177,6 +177,8 @@ extern int default_memory_move_cost (enum machine_mode, reg_class_t, bool);
 extern int default_register_move_cost (enum machine_mode, reg_class_t,
 				       reg_class_t);
 
+extern unsigned int default_max_total_scalarization_size (bool size_p);
+
 extern bool default_profile_before_prologue (void);
 extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
 extern reg_class_t default_preferred_output_reload_class (rtx, reg_class_t);
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 2f80497..90ad068 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -2482,6 +2482,24 @@ propagate_all_subaccesses (void)
     }
 }
 
+/* Return the max_total_scalarization_size as requested by the user in
+   parameters, or the target through
+   TARGET_DEFAULT_MAX_TOTAL_SCALARIZATION_SIZE.  */
+
+unsigned int
+get_max_total_scalarization_size (bool size_p)
+{
+  unsigned param_max_scalarization_size
+    = size_p
+      ? PARAM_VALUE (PARAM_SRA_TOTAL_SCALARIZATION_SIZE_SIZE)
+      : PARAM_VALUE (PARAM_SRA_TOTAL_SCALARIZATION_SIZE_SPEED);
+
+  if (param_max_scalarization_size > 0)
+    return param_max_scalarization_size;
+  else
+    return targetm.default_max_total_scalarization_size (size_p);
+}
+
 /* Go through all accesses collected throughout the (intraprocedural) analysis
    stage, exclude overlapping ones, identify representatives and build trees
    out of them, making decisions about scalarization on the way.  Return true
@@ -2493,10 +2511,10 @@ analyze_all_variable_accesses (void)
   int res = 0;
   bitmap tmp = BITMAP_ALLOC (NULL);
   bitmap_iterator bi;
-  unsigned i, max_total_scalarization_size;
-
-  max_total_scalarization_size = UNITS_PER_WORD * BITS_PER_UNIT
-    * MOVE_RATIO (optimize_function_for_speed_p (cfun));
+  unsigned i;
+  unsigned max_total_scalarization_size
+    = get_max_total_scalarization_size (optimize_function_for_size_p (cfun))
+      * UNITS_PER_WORD * BITS_PER_UNIT;
 
   EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi)
     if (bitmap_bit_p (should_scalarize_away_bitmap, i)

Reply via email to