From: Ramana Radhakrishnan <ramana.radhakrish...@linaro.org>

Hi,

While investigating cases where we generate excessive moves
between VFP and integer registers I came across a situation
where ivopts wasn't considering the mode in which auto-increment
was allowed in this situation. Changing legitimate_address_p
wasn't enough to catch all these situations as HAVE_PRE_INCREMENT
or HAVE_POST_INCREMENT are not predicated on the modes. I did
think about making this mode specific but these changes were
enough for me to go ahead.

I've tried a few options like disabling the auto-increment
modes for the necessary floating point modes as well as the
vector modes ( for neon for instance only  etc. but that
wasn't enough of a hammer to stop auto-inc from generating
loops of this form. The motivating example for this is
from the bug report here :

https://bugs.launchpad.net/gcc-linaro/+bug/640518

With this patch now we are able to generate:

oo:
        @ args = 0, pretend = 0, frame = 0
        @ frame_needed = 0, uses_anonymous_args = 0
        @ link register save eliminated.
        mov     r3, #0
        fconsts s14, #112
.L2:
        fldmias r1!, {s15}
        add     r3, r3, #1
        cmp     r3, #100
        fadds   s15, s15, s14
        fstmias r0!, {s15}
        bne     .L2
        bx      lr

rather than the kind of code as shown in the bug report.

This hasn't yet run through a full test cycle but I'd like to
push this out for some review comments and also because I promised
Richard sometime last week that I'd have something out for him.

Ok after regression tests and a round of benchmark runs ?

One possible enhancement for architectures that care about whether
something is a valid load vs store auto-increment ( I think SH) is to try
and arrange things such that auto-inc would decide whether
to use the load or the store depending on the kind of memory access.

cheers
Ramana

2011-07-21  Ramana Radhakrishnan  <ramana.radhakrish...@linaro.org>

        * config/arm/arm-protos.h (arm_autoinc_modes_ok_p): Declare.
        * config/arm/arm.c (arm_autoinc_modes_ok_p): Define.
        * config/arm/arm.h (ARM_AUTOINC_VALID_FOR_MODE_P): New.
        (USE_LOAD_POST_INCREMENT, USE_LOAD_PRE_INCREMENT)
        (USE_LOAD_POST_DECREMENT, USE_LOAD_PRE_DECREMENT): Define.
        (USE_STORE_PRE_DECREMENT, USE_STORE_PRE_INCREMENT): Likewise.
        (USE_STORE_POST_DECREMENT, USE_STORE_POST_INCREMENT): Likewise.
        * tree-ssa-loop-ivopts.c (rtl.h): Include.
        (add_autoinc_candidates): Use modes to decide
        whether we have auto-increment.
        (get_address_cost): Likewise.
---
 gcc/ChangeLog               |   14 ++++++++++++++
 gcc/Makefile.in             |    2 +-
 gcc/config/arm/arm-protos.h |    1 +
 gcc/config/arm/arm.c        |   35 +++++++++++++++++++++++++++++++++++
 gcc/config/arm/arm.h        |   16 ++++++++++++++++
 gcc/tree-ssa-loop-ivopts.c  |   29 +++++++++++++++++++++--------
 6 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cf57537..262acf8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2011-07-21  Ramana Radhakrishnan  <ramana.radhakrish...@linaro.org>
+
+       * config/arm/arm-protos.h (arm_autoinc_modes_ok_p): Declare.
+       * config/arm/arm.c (arm_autoinc_modes_ok_p): Define.
+       * config/arm/arm.h (ARM_AUTOINC_VALID_FOR_MODE_P): New.
+       (USE_LOAD_POST_INCREMENT, USE_LOAD_PRE_INCREMENT)
+       (USE_LOAD_POST_DECREMENT, USE_LOAD_PRE_DECREMENT): Define.
+       (USE_STORE_PRE_DECREMENT, USE_STORE_PRE_INCREMENT): Likewise.
+       (USE_STORE_POST_DECREMENT, USE_STORE_POST_INCREMENT): Likewise.
+       * tree-ssa-loop-ivopts.c (rtl.h): Include.
+       (add_autoinc_candidates): Use modes to decide
+       whether we have auto-increment.
+       (get_address_cost): Likewise.
+
 2011-07-21  Richard Sandiford  <richard.sandif...@linaro.org>
 
        * regcprop.c (maybe_mode_change): Check HARD_REGNO_MODE_OK.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d924fb6..b0c361d 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2595,7 +2595,7 @@ tree-predcom.o: tree-predcom.c $(CONFIG_H) $(SYSTEM_H) 
$(TREE_H) $(TM_P_H) \
    $(PARAMS_H) $(DIAGNOSTIC_H) $(TREE_PASS_H) $(TM_H) coretypes.h \
    tree-affine.h $(TREE_INLINE_H) tree-pretty-print.h
 tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
-   $(SYSTEM_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
+   $(SYSTEM_H) $(TREE_H) $(RTL_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
    output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
    $(TREE_PASS_H) $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
    $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) \
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 2f7c508..6eecce8 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -238,6 +238,7 @@ struct tune_params
 };
 
 extern const struct tune_params *current_tune;
+extern bool arm_autoinc_modes_ok_p (enum machine_mode, RTX_CODE);
 #endif /* RTX_CODE */
 
 #endif /* ! GCC_ARM_PROTOS_H */
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6e2b799..1e0431f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -23988,4 +23988,39 @@ arm_attr_length_push_multi(rtx parallel_op, rtx 
first_op)
   return 4;
 }
 
+bool
+arm_autoinc_modes_ok_p (enum machine_mode mode, enum rtx_code code)
+{
+  if (TARGET_SOFT_FLOAT)
+    return true;
+
+  switch (code)
+    {
+    case POST_INC:
+    case PRE_DEC:
+      if (VECTOR_MODE_P (mode))
+       {
+         if (code != PRE_DEC)
+           return true;
+         else 
+           return false;
+       }
+      
+      return true;
+
+    case POST_DEC:
+    case PRE_INC:
+      if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
+       return false;
+      else
+       return true;
+     
+    default:
+      return false;
+      
+    }
+
+  return false;
+}
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3810f9e..9d36377 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1660,6 +1660,22 @@ typedef struct
 #define HAVE_PRE_MODIFY_REG   TARGET_32BIT
 #define HAVE_POST_MODIFY_REG  TARGET_32BIT
 
+#define ARM_AUTOINC_VALID_FOR_MODE_P(mode, code) \
+  (TARGET_32BIT && arm_autoinc_modes_ok_p (mode, code))
+#define USE_LOAD_POST_INCREMENT(mode) \
+  ARM_AUTOINC_VALID_FOR_MODE_P(mode, POST_INC)
+#define USE_LOAD_PRE_INCREMENT(mode)  \
+  ARM_AUTOINC_VALID_FOR_MODE_P(mode, PRE_INC)
+#define USE_LOAD_POST_DECREMENT(mode) \
+  ARM_AUTOINC_VALID_FOR_MODE_P(mode, POST_DEC)
+#define USE_LOAD_PRE_DECREMENT(mode)  \
+  ARM_AUTOINC_VALID_FOR_MODE_P(mode, PRE_DEC)
+
+#define USE_STORE_PRE_DECREMENT(mode) USE_LOAD_PRE_DECREMENT(mode)
+#define USE_STORE_PRE_INCREMENT(mode) USE_LOAD_PRE_INCREMENT(mode)
+#define USE_STORE_POST_DECREMENT(mode) USE_LOAD_POST_DECREMENT(mode)
+#define USE_STORE_POST_INCREMENT(mode) USE_LOAD_POST_INCREMENT(mode)
+
 /* Macros to check register numbers against specific register classes.  */
 
 /* These assume that REGNO is a hard or pseudo reg number.
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 4d4b67a..7026a41 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -67,6 +67,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
+#include "rtl.h"
 #include "tm_p.h"
 #include "basic-block.h"
 #include "output.h"
@@ -2356,8 +2357,12 @@ add_autoinc_candidates (struct ivopts_data *data, tree 
base, tree step,
   cstepi = int_cst_value (step);
 
   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
-  if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
-      || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
+  if (((USE_LOAD_PRE_INCREMENT (mem_mode)
+       || USE_STORE_PRE_INCREMENT (mem_mode))
+       && GET_MODE_SIZE (mem_mode) == cstepi)
+      || ((USE_LOAD_PRE_DECREMENT (mem_mode)
+          || USE_STORE_PRE_DECREMENT (mem_mode))
+         && GET_MODE_SIZE (mem_mode) == -cstepi))
     {
       enum tree_code code = MINUS_EXPR;
       tree new_base;
@@ -2374,8 +2379,12 @@ add_autoinc_candidates (struct ivopts_data *data, tree 
base, tree step,
       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
                       use->stmt);
     }
-  if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
-      || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
+  if (((USE_LOAD_POST_INCREMENT (mem_mode)
+       || USE_STORE_POST_INCREMENT (mem_mode))
+       && GET_MODE_SIZE (mem_mode) == cstepi)
+      || ((USE_LOAD_POST_DECREMENT (mem_mode)
+          || USE_STORE_POST_DECREMENT (mem_mode))
+         && GET_MODE_SIZE (mem_mode) == -cstepi))
     {
       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
                       use->stmt);
@@ -3329,25 +3338,29 @@ get_address_cost (bool symbol_present, bool var_present,
       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
 
-      if (HAVE_PRE_DECREMENT)
+      if (USE_LOAD_PRE_DECREMENT (mem_mode) 
+         || USE_STORE_PRE_DECREMENT (mem_mode))
        {
          addr = gen_rtx_PRE_DEC (address_mode, reg0);
          has_predec[mem_mode]
            = memory_address_addr_space_p (mem_mode, addr, as);
        }
-      if (HAVE_POST_DECREMENT)
+      if (USE_LOAD_POST_DECREMENT (mem_mode) 
+         || USE_STORE_POST_DECREMENT (mem_mode))
        {
          addr = gen_rtx_POST_DEC (address_mode, reg0);
          has_postdec[mem_mode]
            = memory_address_addr_space_p (mem_mode, addr, as);
        }
-      if (HAVE_PRE_INCREMENT)
+      if (USE_LOAD_PRE_INCREMENT (mem_mode) 
+         || USE_STORE_PRE_DECREMENT (mem_mode))
        {
          addr = gen_rtx_PRE_INC (address_mode, reg0);
          has_preinc[mem_mode]
            = memory_address_addr_space_p (mem_mode, addr, as);
        }
-      if (HAVE_POST_INCREMENT)
+      if (USE_LOAD_POST_INCREMENT (mem_mode) 
+         || USE_STORE_POST_INCREMENT (mem_mode))
        {
          addr = gen_rtx_POST_INC (address_mode, reg0);
          has_postinc[mem_mode]
-- 
1.7.4.1

Reply via email to