Hi All,
The patch is refactored a little according to the last comment. Do you have
more comments? If no, I will commit it later.
Tested on X86_64 and AArch64.
gcc/:
PR tree-optimization/89430
* tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking
to support ARRAY_REFs and COMPONENT_REFs. Support a special case: if
there is a dominating load of local variable without address escape,
a store is not trapped (as local stack is always writable).
The logic is also simplified to ignore other loads, as they don't
help to check if a store is trapped (may be read-only).
gcc/testsuite/:
PR tree-optimization/89430
* gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.
* gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.
* gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.
* gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.
* gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.
* gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c: New test.
* gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.
---
gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c | 2 +-
.../gcc.dg/tree-ssa/pr89430-7-comp-ref.c | 17 +++
.../gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c | 15 ++
gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c | 2 +-
gcc/tree-ssa-phiopt.c | 129 ++++++++++--------
8 files changed, 107 insertions(+), 64 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
index ce242ba569b..8ee1850ac63 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
return a[0]+a[1];
}
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" {
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
index 90ae36bfce2..9b96875ac7a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
return a[0]+a[1];
}
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" {
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
index c633cbe947d..b2d04119381 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
@@ -13,4 +13,4 @@ int test(int b, int k) {
return a.data[0] + a.data[1];
}
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" {
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
index 7cad563128d..8d3c4f7cc6a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
@@ -16,4 +16,4 @@ int test(int b, int k) {
return a.data[0].x + a.data[1].x;
}
-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" {
xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
new file mode 100644
index 00000000000..c35a2afc70b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+typedef union {
+ int i;
+ float f;
+} U;
+
+int foo(U *u, int b, int i)
+{
+ u->i = 0;
+ if (b)
+ u->i = i;
+ return u->i;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
new file mode 100644
index 00000000000..f9e66aefb13
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
+
+int *t;
+
+int f1 (int tt)
+{
+ int *t1 = t;
+ *t1 = -5;
+ if (*t1 < tt)
+ *((unsigned *) t1) = 5;
+ return *t1;
+}
+
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
index 09313716598..a06f339f0bb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-pre-stats" } */
+/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
typedef union {
int i;
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index bb97dcf63b4..9d69a9565b4 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1987,26 +1987,33 @@ abs_replacement (basic_block cond_bb, basic_block
middle_bb,
??? We currently are very conservative and assume that a load might
trap even if a store doesn't (write-only memory). This probably is
- overly conservative. */
+ overly conservative.
-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
- through it was seen, which would constitute a no-trap region for
- same accesses. */
-struct name_to_bb
+ We currently support a special case that for !TREE_ADDRESSABLE automatic
+ variables, it could ignore whether something is a load or store because the
+ local stack should be always writable. */
+
+/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
+ basic block an *_REF through it was seen, which would constitute a
+ no-trap region for same accesses.
+
+ Size is needed to support 2 MEM_REFs of different types, like
+ MEM<double>(s_1) and MEM<long>(s_1), which would compare equal with
+ OEP_ADDRESS_OF. */
+struct ref_to_bb
{
- unsigned int ssa_name_ver;
+ tree exp;
+ HOST_WIDE_INT size;
unsigned int phase;
- bool store;
- HOST_WIDE_INT offset, size;
basic_block bb;
};
/* Hashtable helpers. */
-struct ssa_names_hasher : free_ptr_hash <name_to_bb>
+struct refs_hasher : free_ptr_hash<ref_to_bb>
{
- static inline hashval_t hash (const name_to_bb *);
- static inline bool equal (const name_to_bb *, const name_to_bb *);
+ static inline hashval_t hash (const ref_to_bb *);
+ static inline bool equal (const ref_to_bb *, const ref_to_bb *);
};
/* Used for quick clearing of the hash-table when we see calls.
@@ -2016,28 +2023,29 @@ static unsigned int nt_call_phase;
/* The hash function. */
inline hashval_t
-ssa_names_hasher::hash (const name_to_bb *n)
+refs_hasher::hash (const ref_to_bb *n)
{
- return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
- ^ (n->offset << 6) ^ (n->size << 3);
+ inchash::hash hstate;
+ inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF);
+ hstate.add_hwi (n->size);
+ return hstate.end ();
}
/* The equality function of *P1 and *P2. */
inline bool
-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
+refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
{
- return n1->ssa_name_ver == n2->ssa_name_ver
- && n1->store == n2->store
- && n1->offset == n2->offset
- && n1->size == n2->size;
+ return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF)
+ && n1->size == n2->size;
}
class nontrapping_dom_walker : public dom_walker
{
public:
nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
- : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
+ : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
+ {}
virtual edge before_dom_children (basic_block);
virtual void after_dom_children (basic_block);
@@ -2054,7 +2062,7 @@ private:
hash_set<tree> *m_nontrapping;
/* The hash table for remembering what we've seen. */
- hash_table<ssa_names_hasher> m_seen_ssa_names;
+ hash_table<refs_hasher> m_seen_refs;
};
/* Called by walk_dominator_tree, when entering the block BB. */
@@ -2103,65 +2111,68 @@ nontrapping_dom_walker::after_dom_children (basic_block
bb)
}
/* We see the expression EXP in basic block BB. If it's an interesting
- expression (an MEM_REF through an SSA_NAME) possibly insert the
- expression into the set NONTRAP or the hash table of seen expressions.
- STORE is true if this expression is on the LHS, otherwise it's on
- the RHS. */
+ expression of:
+ 1) MEM_REF
+ 2) ARRAY_REF
+ 3) COMPONENT_REF
+ possibly insert the expression into the set NONTRAP or the hash table
+ of seen expressions. STORE is true if this expression is on the LHS,
+ otherwise it's on the RHS. */
void
nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
{
HOST_WIDE_INT size;
- if (TREE_CODE (exp) == MEM_REF
- && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
- && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
+ if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
+ || TREE_CODE (exp) == COMPONENT_REF)
&& (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
{
- tree name = TREE_OPERAND (exp, 0);
- struct name_to_bb map;
- name_to_bb **slot;
- struct name_to_bb *n2bb;
+ struct ref_to_bb map;
+ ref_to_bb **slot;
+ struct ref_to_bb *r2bb;
basic_block found_bb = 0;
- /* Try to find the last seen MEM_REF through the same
- SSA_NAME, which can trap. */
- map.ssa_name_ver = SSA_NAME_VERSION (name);
- map.phase = 0;
- map.bb = 0;
- map.store = store;
- map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
- map.size = size;
+ if (!store)
+ {
+ tree base = get_base_address (exp);
+ /* Only record a LOAD of a local variable without address-taken, as
+ the local stack is always writable. This allows cselim on a STORE
+ with a dominating LOAD. */
+ if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
+ return;
+ }
- slot = m_seen_ssa_names.find_slot (&map, INSERT);
- n2bb = *slot;
- if (n2bb && n2bb->phase >= nt_call_phase)
- found_bb = n2bb->bb;
+ /* Try to find the last seen *_REF, which can trap. */
+ map.exp = exp;
+ map.size = size;
+ slot = m_seen_refs.find_slot (&map, INSERT);
+ r2bb = *slot;
+ if (r2bb && r2bb->phase >= nt_call_phase)
+ found_bb = r2bb->bb;
- /* If we've found a trapping MEM_REF, _and_ it dominates EXP
- (it's in a basic block on the path from us to the dominator root)
+ /* If we've found a trapping *_REF, _and_ it dominates EXP
+ (it's in a basic block on the path from us to the dominator root)
then we can't trap. */
- if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
+ if (found_bb && (((size_t) found_bb->aux) & 1) == 1)
{
m_nontrapping->add (exp);
}
else
- {
+ {
/* EXP might trap, so insert it into the hash table. */
- if (n2bb)
+ if (r2bb)
{
- n2bb->phase = nt_call_phase;
- n2bb->bb = bb;
+ r2bb->phase = nt_call_phase;
+ r2bb->bb = bb;
}
else
{
- n2bb = XNEW (struct name_to_bb);
- n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
- n2bb->phase = nt_call_phase;
- n2bb->bb = bb;
- n2bb->store = store;
- n2bb->offset = map.offset;
- n2bb->size = size;
- *slot = n2bb;
+ r2bb = XNEW (struct ref_to_bb);
+ r2bb->phase = nt_call_phase;
+ r2bb->bb = bb;
+ r2bb->exp = exp;
+ r2bb->size = size;
+ *slot = r2bb;
}
}
}
--
2.17.1
________________________________________
From: Richard Biener <[email protected]>
Sent: Tuesday, June 2, 2020 19:29
To: Hao Liu OS
Cc: [email protected]; Jakub Jelinek
Subject: RE: [PATCH] extend cselim to check non-trapping for more references
(PR tree-optimizaton/89430)
On Fri, 29 May 2020, Hao Liu OS wrote:
> Hi Richard,
>
> Thanks for your comments. It's a good idea to simplify the code and remove
> get_inner_reference. I've updated the patch accordingly. I also simplified
> the code to ignore other loads, which can not help to check if a store can be
> trapped.
>
> About tests:
> 1. All previously XFAIL tests (gcc.dg/tree-ssa/pr89430-*) for pr89430
> are passed with this patch.
> 2. ssa-pre-17.c is failed as cselim optimizes the conditional store, so
> "-fno-tree-cselim" is added. That case is added as a new test case for
> pr89430.
> 3. Other test cases (including the regression test for pr94734) in gcc
> testsuit are not affected by this patch, according to gcc "make check".
> 4. Some other benchmarks are also tested for correctness and
> performance. The performance regression mentioned in pr89430 can be fixed.
>
> Review, please.
Looks mostly good now, some more comments inline below
> gcc/ChangeLog:
>
> PR tree-optimization/89430
> * tree-ssa-phiopt.c (cond_store_replacement): Extend non-trap checking to
> support ARRAY_REFs and COMPONENT_REFs. Support a special case: if there
> is
> a dominating load of local variable without address escape, a store is not
> trapped (as local stack is always writable). Other loads are ignored for
> simplicity, as they don't help to check if a store can be trapped.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/89430
> * gcc.dg/tree-ssa/pr89430-1.c: Remove xfail.
> * gcc.dg/tree-ssa/pr89430-2.c: Remove xfail.
> * gcc.dg/tree-ssa/pr89430-5.c: Remove xfail.
> * gcc.dg/tree-ssa/pr89430-6.c: Remove xfail.
> * gcc.dg/tree-ssa/pr89430-7-comp-ref.c: New test.
> * gcc.dg/tree-ssa/ssa-pre-17.c: Add -fno-tree-cselim.
>
> ---
> gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c | 2 +-
> gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c | 2 +-
> gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c | 2 +-
> gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c | 2 +-
> .../gcc.dg/tree-ssa/pr89430-7-comp-ref.c | 17 +++
> gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c | 2 +-
> gcc/tree-ssa-phiopt.c | 140 +++++++++---------
> 7 files changed, 91 insertions(+), 76 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
>
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> index ce242ba569b..8ee1850ac63 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
> @@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
> return a[0]+a[1];
> }
>
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" {
> xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } }
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> index 90ae36bfce2..9b96875ac7a 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
> @@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
> return a[0]+a[1];
> }
>
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" {
> xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } }
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> index c633cbe947d..b2d04119381 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
> @@ -13,4 +13,4 @@ int test(int b, int k) {
> return a.data[0] + a.data[1];
> }
>
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" {
> xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } }
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> index 7cad563128d..8d3c4f7cc6a 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
> @@ -16,4 +16,4 @@ int test(int b, int k) {
> return a.data[0].x + a.data[1].x;
> }
>
> -/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" {
> xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } }
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
> b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
> new file mode 100644
> index 00000000000..c35a2afc70b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-cselim-details" } */
> +
> +typedef union {
> + int i;
> + float f;
> +} U;
> +
> +int foo(U *u, int b, int i)
> +{
> + u->i = 0;
> + if (b)
> + u->i = i;
> + return u->i;
> +}
> +
> +/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } }
> */
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> index 09313716598..a06f339f0bb 100644
> --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-options "-O2 -fdump-tree-pre-stats" } */
> +/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
>
> typedef union {
> int i;
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index b1e0dce93d8..7c67b75486c 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -1969,43 +1969,44 @@ abs_replacement (basic_block cond_bb, basic_block
> middle_bb,
> return true;
> }
>
> -/* Auxiliary functions to determine the set of memory accesses which
> +/* Auxiliary functions to determine the set of memory write accesses which
> can't trap because they are preceded by accesses to the same memory
> - portion. We do that for MEM_REFs, so we only need to track
> - the SSA_NAME of the pointer indirectly referenced. The algorithm
> + portion. We do that for MEM_REFs/ARRAY_REFs/COMPONENT_REFs. The
> algorithm
> simply is a walk over all instructions in dominator order. When
> - we see an MEM_REF we determine if we've already seen a same
> + we see an *_REF we determine if we've already seen a same
> ref anywhere up to the root of the dominator tree. If we do the
> current access can't trap. If we don't see any dominating access
> the current access might trap, but might also make later accesses
> non-trapping, so we remember it. We need to be careful with loads
> or stores, for instance a load might not trap, while a store would,
> so if we see a dominating read access this doesn't mean that a later
> - write access would not trap. Hence we also need to differentiate the
> - type of access(es) seen.
> + write access would not trap.
>
> - ??? We currently are very conservative and assume that a load might
> - trap even if a store doesn't (write-only memory). This probably is
> - overly conservative. */
> + We care about following memory accesses:
> + 1) a store.
> + 2) a load of local variable without address-taken (as local stack is
> + always writable). It helps to optimize a conditoinal store with
> + a dominating load.
>
> -/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
> - through it was seen, which would constitute a no-trap region for
> - same accesses. */
> -struct name_to_bb
> + other loads are ignored as we don't know if the accessed memory is
> writable,
> + so they don't help conditional store replacement. */
> +
> +/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
> + basic block an *_REF through it was seen, which would constitute a
> + no-trap region for same accesses. */
> +struct ref_to_bb
> {
> - unsigned int ssa_name_ver;
> + tree exp;
> unsigned int phase;
> - bool store;
> - HOST_WIDE_INT offset, size;
> basic_block bb;
> };
>
> /* Hashtable helpers. */
>
> -struct ssa_names_hasher : free_ptr_hash <name_to_bb>
> +struct refs_hasher : free_ptr_hash<ref_to_bb>
> {
> - static inline hashval_t hash (const name_to_bb *);
> - static inline bool equal (const name_to_bb *, const name_to_bb *);
> + static inline hashval_t hash (const ref_to_bb *);
> + static inline bool equal (const ref_to_bb *, const ref_to_bb *);
> };
>
> /* Used for quick clearing of the hash-table when we see calls.
> @@ -2015,28 +2016,27 @@ static unsigned int nt_call_phase;
> /* The hash function. */
>
> inline hashval_t
> -ssa_names_hasher::hash (const name_to_bb *n)
> +refs_hasher::hash (const ref_to_bb *n)
> {
> - return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
> - ^ (n->offset << 6) ^ (n->size << 3);
> + inchash::hash hstate;
> + inchash::add_expr (n->exp, hstate);
> + return hstate.end ();
> }
>
> /* The equality function of *P1 and *P2. */
>
> inline bool
> -ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
> +refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
> {
> - return n1->ssa_name_ver == n2->ssa_name_ver
> - && n1->store == n2->store
> - && n1->offset == n2->offset
> - && n1->size == n2->size;
> + return operand_equal_p (n1->exp, n2->exp, 0);
So I figured we're going to regress some cases here that do not
have semantically agreeing MEM_REFs, like MEM<double>(s_1)
and MEM<long>(s_1) they would not compare equal but did before,
just considering offset and size. So we'd like to pass
OEP_ADDRESS_OF as flag argument to operand_equal_p here
and to add_expr above. This part would then just check whether
the accesses are to the same address.
This means you have to preserve the ->size member (and check it).
The rest of the patch looks good to me.
Thanks,
Richard.
> }
>
> class nontrapping_dom_walker : public dom_walker
> {
> public:
> nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
> - : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
> + : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
> + {}
>
> virtual edge before_dom_children (basic_block);
> virtual void after_dom_children (basic_block);
> @@ -2053,7 +2053,7 @@ private:
> hash_set<tree> *m_nontrapping;
>
> /* The hash table for remembering what we've seen. */
> - hash_table<ssa_names_hasher> m_seen_ssa_names;
> + hash_table<refs_hasher> m_seen_refs;
> };
>
> /* Called by walk_dominator_tree, when entering the block BB. */
> @@ -2102,65 +2102,63 @@ nontrapping_dom_walker::after_dom_children
> (basic_block bb)
> }
>
> /* We see the expression EXP in basic block BB. If it's an interesting
> - expression (an MEM_REF through an SSA_NAME) possibly insert the
> - expression into the set NONTRAP or the hash table of seen expressions.
> - STORE is true if this expression is on the LHS, otherwise it's on
> - the RHS. */
> + expression of:
> + 1) MEM_REF
> + 2) ARRAY_REF
> + 3) COMPONENT_REF
> + possibly insert the expression into the set NONTRAP or the hash table
> + of seen expressions. STORE is true if this expression is on the LHS,
> + otherwise it's on the RHS. */
> void
> nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool
> store)
> {
> - HOST_WIDE_INT size;
> -
> - if (TREE_CODE (exp) == MEM_REF
> - && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
> - && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
> - && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
> + if (TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
> + || TREE_CODE (exp) == COMPONENT_REF)
> {
> - tree name = TREE_OPERAND (exp, 0);
> - struct name_to_bb map;
> - name_to_bb **slot;
> - struct name_to_bb *n2bb;
> + struct ref_to_bb map;
> + ref_to_bb **slot;
> + struct ref_to_bb *r2bb;
> basic_block found_bb = 0;
>
> - /* Try to find the last seen MEM_REF through the same
> - SSA_NAME, which can trap. */
> - map.ssa_name_ver = SSA_NAME_VERSION (name);
> - map.phase = 0;
> - map.bb = 0;
> - map.store = store;
> - map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
> - map.size = size;
> -
> - slot = m_seen_ssa_names.find_slot (&map, INSERT);
> - n2bb = *slot;
> - if (n2bb && n2bb->phase >= nt_call_phase)
> - found_bb = n2bb->bb;
> -
> - /* If we've found a trapping MEM_REF, _and_ it dominates EXP
> - (it's in a basic block on the path from us to the dominator root)
> + if (!store)
> + {
> + tree base = get_base_address (exp);
> + /* Only record a LOAD of local variable without address-taken, as
> + the local stack is always writable. This allows cselim on a STORE
> + with a dominating LOAD. */
> + if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
> + return;
> + }
> +
> + /* Try to find the last seen *_REF, which can trap. */
> + map.exp = exp;
> + slot = m_seen_refs.find_slot (&map, INSERT);
> + r2bb = *slot;
> + if (r2bb && r2bb->phase >= nt_call_phase)
> + found_bb = r2bb->bb;
> +
> + /* If we've found a trapping *_REF, _and_ it dominates EXP
> + (it's in a basic block on the path from us to the dominator root)
> then we can't trap. */
> if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
> {
> m_nontrapping->add (exp);
> }
> else
> - {
> + {
> /* EXP might trap, so insert it into the hash table. */
> - if (n2bb)
> + if (r2bb)
> {
> - n2bb->phase = nt_call_phase;
> - n2bb->bb = bb;
> + r2bb->phase = nt_call_phase;
> + r2bb->bb = bb;
> }
> else
> {
> - n2bb = XNEW (struct name_to_bb);
> - n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
> - n2bb->phase = nt_call_phase;
> - n2bb->bb = bb;
> - n2bb->store = store;
> - n2bb->offset = map.offset;
> - n2bb->size = size;
> - *slot = n2bb;
> + r2bb = XNEW (struct ref_to_bb);
> + r2bb->phase = nt_call_phase;
> + r2bb->bb = bb;
> + r2bb->exp = exp;
> + *slot = r2bb;
> }
> }
> }
>
--
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imend?rffer; HRB 36809 (AG Nuernberg)