Hi Richard, > -----Original Message----- > From: Richard Biener [mailto:richard.guent...@gmail.com] > Sent: Friday, October 30, 2015 5:00 PM > To: Kumar, Venkataramanan > Cc: Andrew Pinski; gcc-patches@gcc.gnu.org > Subject: Re: [RFC] [Patch] Relax tree-if-conv.c trap assumptions. > > On Fri, Oct 30, 2015 at 11:21 AM, Kumar, Venkataramanan > <venkataramanan.ku...@amd.com> wrote: > > Hi Andrew, > > > >> -----Original Message----- > >> From: Andrew Pinski [mailto:pins...@gmail.com] > >> Sent: Friday, October 30, 2015 3:38 PM > >> To: Kumar, Venkataramanan > >> Cc: Richard Beiner (richard.guent...@gmail.com); > >> gcc-patches@gcc.gnu.org > >> Subject: Re: [RFC] [Patch] Relax tree-if-conv.c trap assumptions. > >> > >> On Fri, Oct 30, 2015 at 6:06 PM, Kumar, Venkataramanan > >> <venkataramanan.ku...@amd.com> wrote: > >> > Hi Richard, > >> > > >> > I am trying to "if covert the store" in the below test case and > >> > later help it to get vectorized under -Ofast > >> > -ftree-loop-if-convert-stores -fno-common > >> > > >> > #define LEN 4096 > >> > __attribute__((aligned(32))) float array[LEN]; void test() { for > >> > (int i = 0; i < > >> LEN; i++) { > >> > if (array[i] > (float)0.) > >> > array[i] =3 ; > >> > > >> > } > >> > } > >> > > >> > Currently in GCC 5.2 does not vectorize it. > >> > https://goo.gl/9nS6Pd > >> > > >> > However ICC seems to vectorize it > >> > https://goo.gl/y1yGHx > >> > > >> > As discussed with you earlier, to allow "if convert store" I am > >> > checking the > >> following: > >> > > >> > (1) We already read the reference "array[i]" unconditionally once . > >> > (2) I am now checking if we are conditionally writing to memory > >> > which is > >> defined as read and write and is bound to the definition we are seeing. > >> > >> > >> I don't think this is thread safe .... > >> > > > > I thought under -ftree-loop-if-convert-stores it is ok to do this > transformation. > > Yes, that's what we have done in the past here. Note that I think we should > remove the flag in favor of the --param allow-store-data-races and if-convert > safe stores always (stores to thread-local memory). Esp. using masked > stores should be always safe. > > > Regards, > > Venkat. > > > >> Thanks, > >> Andrew > >> > >> > > >> > With this change, I get able to if convert and the vectorize the case > >> > also. > >> > > >> > /build/gcc/xgcc -B ./build/gcc/ ifconv.c -Ofast -fopt-info-vec -S > >> > -ftree-loop-if-convert-stores -fno-common > >> > ifconv.c:2:63: note: loop vectorized > >> > > >> > Patch > >> > ------ > >> > diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index > >> > f201ab5..6475cc0 100644 > >> > --- a/gcc/tree-if-conv.c > >> > +++ b/gcc/tree-if-conv.c > >> > @@ -727,6 +727,34 @@ write_memrefs_written_at_least_once > (gimple > >> *stmt, > >> > return true; > >> > } > >> > > >> > +static bool > >> > +write_memrefs_safe_to_access_unconditionally (gimple *stmt, > >> > + > >> > +vec<data_reference_p> drs) { > > { to the next line > > The function has a bad name it should be write_memrefs_writable () > > >> > + int i; > >> > + data_reference_p a; > >> > + bool found = false; > >> > + > >> > + for (i = 0; drs.iterate (i, &a); i++) > >> > + { > >> > + if (DR_STMT (a) == stmt > >> > + && DR_IS_WRITE (a) > >> > + && (DR_WRITTEN_AT_LEAST_ONCE (a) == 0) > >> > + && (DR_RW_UNCONDITIONALLY (a) == 1)) > >> > + { > >> > + tree base = get_base_address (DR_REF (a)); > >> > + found = false; > >> > + if (DECL_P (base) > >> > + && decl_binds_to_current_def_p (base) > >> > + && !TREE_READONLY (base)) > >> > + { > >> > + found = true; > > So if the vector ever would contain more than one write you'd return true if > only one of them is not readonly. > > IMHO all the routines need refactoring to operate on single DRs which AFAIK > is the only case if-conversion handles anyway (can't if-convert calls or > aggregate assignments or asms). Ugh, it seems the whole thing is quadratic, > doing linear walks to find the DRs for a stmt ... > > A simple > > Index: gcc/tree-if-conv.c > ========================================================== > ========= > --- gcc/tree-if-conv.c (revision 229572) > +++ gcc/tree-if-conv.c (working copy) > @@ -612,9 +612,10 @@ memrefs_read_or_written_unconditionally > data_reference_p a, b; > tree ca = bb_predicate (gimple_bb (stmt)); > > - for (i = 0; drs.iterate (i, &a); i++) > - if (DR_STMT (a) == stmt) > - { > + for (i = gimple_uid (stmt) - 1; drs.iterate (i, &a); i++) > + { > + if (DR_STMT (a) != stmt) > + break; > bool found = false; > int x = DR_RW_UNCONDITIONALLY (a); > > @@ -684,10 +685,13 @@ write_memrefs_written_at_least_once (gim > data_reference_p a, b; > tree ca = bb_predicate (gimple_bb (stmt)); > > - for (i = 0; drs.iterate (i, &a); i++) > - if (DR_STMT (a) == stmt > - && DR_IS_WRITE (a)) > - { > + for (i = gimple_uid (stmt) - 1; drs.iterate (i, &a); i++) > + { > + if (DR_STMT (a) != stmt) > + break; > + if (! DR_IS_WRITE (a)) > + continue; > + > bool found = false; > int x = DR_WRITTEN_AT_LEAST_ONCE (a); > > @@ -721,7 +725,7 @@ write_memrefs_written_at_least_once (gim > DR_WRITTEN_AT_LEAST_ONCE (a) = 0; > return false; > } > - } > + } > > return true; > } > @@ -1291,6 +1297,7 @@ if_convertible_loop_p_1 (struct loop *lo > case GIMPLE_CALL: > case GIMPLE_DEBUG: > case GIMPLE_COND: > + gimple_set_uid (gsi_stmt (gsi), 0); > break; > default: > return false; > @@ -1304,6 +1311,8 @@ if_convertible_loop_p_1 (struct loop *lo > dr->aux = XNEW (struct ifc_dr); > DR_WRITTEN_AT_LEAST_ONCE (dr) = -1; > DR_RW_UNCONDITIONALLY (dr) = -1; > + if (gimple_uid (DR_STMT (dr)) == 0) > + gimple_set_uid (DR_STMT (dr), i + 1); > } > predicate_bbs (loop); > > > would improve that tremendously ... (well, given the array of DRs is sorted > by stmt which is an implementation detail not documented). > > Computing all the DR flags in separate loops also doesn't make much sense to > me and just complicates code. > > Richard.
I have now implemented storing of DR and references using hash maps. Please find attached patch. As discussed, I am now storing the ref, DR and baseref, DR pairs along with unconditional read/write information in hash tables while iterating over DR during its initialization. Then during checking for possible traps for if-converting, just check if the memory reference for a gimple statement is read/written unconditionally by querying the hash table instead of quadratic walk. Boot strapped and regression tested on x86_64. gcc/ChangeLog 2015-11-07 Venkataramanan <venkataramanan.ku...@amd.com> *tree-hash-traits.h (struct tree_operand_hash). Use compare_type and value_type. (equal_keys): Rename to equal and use compare_type and value_type. * tree-if-conv.c (ref_DR_map): Define. (baseref_DR_map): Like wise (struct ifc_dr): New tree predicate field. (hash_memrefs_baserefs_and_store_DRs_read_written_info): New function. (memrefs_read_or_written_unconditionally): Use hashmap to query unconditional read/written information. (write_memrefs_written_at_least_once) : Remove. (ifcvt_memrefs_wont_trap): Remove call to write_memrefs_written_at_least_once. (if_convertible_loop_p_1): Initialize/delete hash maps an initialize predicates before the call to hash_memrefs_baserefs_and_store_DRs_read_written_info. gcc/testsuite/ChangeLog 2015-11-07 Venkataramanan <venkataramanan.ku...@amd.com> *gcc.dg/tree-ssa/ifc-8.c: Add new. Ok for trunk? Regards, Venkat. > > >> > + } > >> > + } > >> > + } > >> > + return found; > >> > +} > >> > + > >> > /* Return true when the memory references of STMT won't trap in the > >> > if-converted code. There are two things that we have to check for: > >> > > >> > @@ -748,8 +776,20 @@ write_memrefs_written_at_least_once > (gimple > >> > *stmt, static bool ifcvt_memrefs_wont_trap (gimple *stmt, > >> > vec<data_reference_p> refs) { > >> > - return write_memrefs_written_at_least_once (stmt, refs) > >> > - && memrefs_read_or_written_unconditionally (stmt, refs); > >> > + bool memrefs_written_once, > memrefs_read_written_unconditionally; > >> > + bool memrefs_safe_to_access; > >> > + > >> > + memrefs_written_once > >> > + = write_memrefs_written_at_least_once (stmt, refs); > >> > + > >> > + memrefs_read_written_unconditionally > >> > + = memrefs_read_or_written_unconditionally (stmt, > >> > + refs); > >> > + > >> > + memrefs_safe_to_access > >> > + = write_memrefs_safe_to_access_unconditionally (stmt, > >> > + refs); > >> > + > >> > + return ((memrefs_written_once || memrefs_safe_to_access) > >> > + && memrefs_read_written_unconditionally); > >> > } > >> > > >> > /* Wrapper around gimple_could_trap_p refined for the needs of the > >> > > >> > > >> > do I need this function write_memrefs_written_at_least_once > anymore? > >> > Please suggest if there a better way to do this. > >> > > >> > Bootstrapped and regression tested on x86_64. > >> > I am not adding change log and comments now, as I wanted to check > >> approach first. > >> > > >> > Regards, > >> > Venkat. > >> > > >> >
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c new file mode 100644 index 0000000..c155e5b --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-8.c @@ -0,0 +1,17 @@ + +/* { dg-do compile } */ +/* { dg-options "-Ofast -fdump-tree-ifcvt-details -fno-common -ftree-loop-if-convert-stores" } */ + +#define LEN 4096 + __attribute__((aligned(32))) float array[LEN]; + +void test () +{ + for (int i = 0; i < LEN; i++) + { + if (array[i] > (float)0.) + array[i] =3 ; + } +} + +/* { dg-final { scan-tree-dump-times "Applying if-conversion" 1 "ifcvt" } } */ diff --git a/gcc/tree-hash-traits.h b/gcc/tree-hash-traits.h index 1edc49e..be93106 100644 --- a/gcc/tree-hash-traits.h +++ b/gcc/tree-hash-traits.h @@ -19,22 +19,23 @@ along with GCC; see the file COPYING3. If not see #ifndef tree_hash_traits_h #define tree_hash_traits_h - /* Hash for trees based on operand_equal_p. */ struct tree_operand_hash : ggc_ptr_hash <tree_node> { - static inline hashval_t hash (const_tree); - static inline bool equal_keys (const_tree, const_tree); + static inline hashval_t hash (const value_type &); + static inline bool equal (const value_type &, + const compare_type &); }; inline hashval_t -tree_operand_hash::hash (const_tree t) +tree_operand_hash::hash (const value_type &t) { return iterative_hash_expr (t, 0); } inline bool -tree_operand_hash::equal_keys (const_tree t1, const_tree t2) +tree_operand_hash::equal (const value_type &t1, + const compare_type &t2) { return operand_equal_p (t1, t2, 0); } diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c index f201ab5..8795faa 100644 --- a/gcc/tree-if-conv.c +++ b/gcc/tree-if-conv.c @@ -129,6 +129,12 @@ static basic_block *ifc_bbs; /* Apply more aggressive (extended) if-conversion if true. */ static bool aggressive_if_conv; +/* Hash table to store references, DR pairs. */ +static hash_map<tree_operand_hash, data_reference_p> *ref_DR_map; + +/* Hash table to store base reference, DR pairs. */ +static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map; + /* Structure used to predicate basic blocks. This is attached to the ->aux field of the BBs in the loop to be if-converted. */ struct bb_predicate { @@ -592,137 +598,153 @@ struct ifc_dr { /* -1 when not initialized, 0 when false, 1 when true. */ int rw_unconditionally; + + tree ored_result; + }; #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux) #define DR_WRITTEN_AT_LEAST_ONCE(DR) (IFC_DR (DR)->written_at_least_once) #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally) -/* Returns true when the memory references of STMT are read or written - unconditionally. In other words, this function returns true when - for every data reference A in STMT there exist other accesses to - a data reference with the same base with predicates that add up (OR-up) to - the true predicate: this ensures that the data reference A is touched - (read or written) on every iteration of the if-converted loop. */ - -static bool -memrefs_read_or_written_unconditionally (gimple *stmt, - vec<data_reference_p> drs) +/* Iterates over DR's and stores refs, DR and base refs, DR pairs in + HASH tables. While storing them in HASH table, it checks if the + reference is unconditionally read or written and stores that as a flag + information. For base reference it checks if it is written atlest once + unconditionally and stores it as flag information along with DR. + In other words for every data reference A in STMT there exist other + accesses to a data reference with the same base with predicates that + add up (OR-up) to the true predicate: this ensures that the data + reference A is touched (read or written) on every iteration of the + if-converted loop. */ +static void +hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a) { - int i, j; - data_reference_p a, b; - tree ca = bb_predicate (gimple_bb (stmt)); - for (i = 0; drs.iterate (i, &a); i++) - if (DR_STMT (a) == stmt) - { - bool found = false; - int x = DR_RW_UNCONDITIONALLY (a); - - if (x == 0) - return false; + data_reference_p *master_dr, *base_master_dr; + tree ref = DR_REF (a); + tree base_ref = DR_BASE_OBJECT (a); + tree ca = bb_predicate (gimple_bb (DR_STMT (a))); + bool exsist1, exsist2; - if (x == 1) - continue; + while (TREE_CODE (ref) == COMPONENT_REF + || TREE_CODE (ref) == IMAGPART_EXPR + || TREE_CODE (ref) == REALPART_EXPR) + ref = TREE_OPERAND (ref, 0); - for (j = 0; drs.iterate (j, &b); j++) - { - tree ref_base_a = DR_REF (a); - tree ref_base_b = DR_REF (b); + master_dr = &ref_DR_map->get_or_insert (ref, &exsist1); - if (DR_STMT (b) == stmt) - continue; + if (!exsist1) + { + if (is_true_predicate (ca)) + { + DR_RW_UNCONDITIONALLY (a) = 1; + } - while (TREE_CODE (ref_base_a) == COMPONENT_REF - || TREE_CODE (ref_base_a) == IMAGPART_EXPR - || TREE_CODE (ref_base_a) == REALPART_EXPR) - ref_base_a = TREE_OPERAND (ref_base_a, 0); + IFC_DR (a)->ored_result = ca; + *master_dr = a; + } + else + { + IFC_DR (*master_dr)->ored_result + = fold_or_predicates + (EXPR_LOCATION (IFC_DR (*master_dr)->ored_result), + ca, IFC_DR (*master_dr)->ored_result); - while (TREE_CODE (ref_base_b) == COMPONENT_REF - || TREE_CODE (ref_base_b) == IMAGPART_EXPR - || TREE_CODE (ref_base_b) == REALPART_EXPR) - ref_base_b = TREE_OPERAND (ref_base_b, 0); + if (is_true_predicate (ca) + || is_true_predicate (IFC_DR (*master_dr)->ored_result)) + { + DR_RW_UNCONDITIONALLY (*master_dr) = 1; + } + } - if (operand_equal_p (ref_base_a, ref_base_b, 0)) - { - tree cb = bb_predicate (gimple_bb (DR_STMT (b))); - - if (DR_RW_UNCONDITIONALLY (b) == 1 - || is_true_predicate (cb) - || is_true_predicate (ca - = fold_or_predicates (EXPR_LOCATION (cb), ca, cb))) - { - DR_RW_UNCONDITIONALLY (a) = 1; - DR_RW_UNCONDITIONALLY (b) = 1; - found = true; - break; - } - } - } + base_master_dr = &baseref_DR_map->get_or_insert (base_ref,&exsist2); - if (!found) - { - DR_RW_UNCONDITIONALLY (a) = 0; - return false; - } - } + if (!exsist2) + { + if (DR_IS_WRITE (a) && is_true_predicate (ca)) + { + DR_WRITTEN_AT_LEAST_ONCE (a) = 1; + } + IFC_DR (a)->ored_result = ca; + *base_master_dr = a; + } + else + { + IFC_DR (*base_master_dr)->ored_result + = fold_or_predicates + (EXPR_LOCATION (IFC_DR (*base_master_dr)->ored_result), + ca, IFC_DR (*base_master_dr)->ored_result); - return true; + if (DR_IS_WRITE (a) + && (is_true_predicate (ca) + || (is_true_predicate + (IFC_DR (*base_master_dr)->ored_result)))) + { + DR_WRITTEN_AT_LEAST_ONCE (*base_master_dr) = 1; + } + } } -/* Returns true when the memory references of STMT are unconditionally - written. In other words, this function returns true when for every - data reference A written in STMT, there exist other writes to the - same data reference with predicates that add up (OR-up) to the true - predicate: this ensures that the data reference A is written on - every iteration of the if-converted loop. */ - +/* Returns true for the memory reference in STMT, same memory reference + is read or written unconditionally atleast once and the base memory + reference is written unconditionally once. This is to check reference + will not write fault. Also retuns true if the memory reference is + unconditionally read once then we are conditionally writing to memory + which is defined as read and write and is bound to the definition + we are seeing. */ static bool -write_memrefs_written_at_least_once (gimple *stmt, - vec<data_reference_p> drs) +memrefs_read_or_written_unconditionally (gimple *stmt, + vec<data_reference_p> drs) { - int i, j; - data_reference_p a, b; - tree ca = bb_predicate (gimple_bb (stmt)); + int i; + data_reference_p a, *master_dr, *base_master_dr; + bool found = false; + for (i = gimple_uid (stmt) - 1; drs.iterate (i, &a); i++) + { + if (DR_STMT (a) != stmt) + break; - for (i = 0; drs.iterate (i, &a); i++) - if (DR_STMT (a) == stmt - && DR_IS_WRITE (a)) - { - bool found = false; - int x = DR_WRITTEN_AT_LEAST_ONCE (a); + tree ref_base_a = DR_REF (a); + tree base = DR_BASE_OBJECT (a); - if (x == 0) - return false; + while (TREE_CODE (ref_base_a) == COMPONENT_REF + || TREE_CODE (ref_base_a) == IMAGPART_EXPR + || TREE_CODE (ref_base_a) == REALPART_EXPR) + ref_base_a = TREE_OPERAND (ref_base_a, 0); - if (x == 1) - continue; + master_dr = ref_DR_map->get (ref_base_a); + base_master_dr = baseref_DR_map->get (base); - for (j = 0; drs.iterate (j, &b); j++) - if (DR_STMT (b) != stmt - && DR_IS_WRITE (b) - && same_data_refs_base_objects (a, b)) - { - tree cb = bb_predicate (gimple_bb (DR_STMT (b))); + gcc_assert (master_dr != NULL && base_master_dr != NULL); - if (DR_WRITTEN_AT_LEAST_ONCE (b) == 1 - || is_true_predicate (cb) - || is_true_predicate (ca = fold_or_predicates (EXPR_LOCATION (cb), - ca, cb))) + if (DR_RW_UNCONDITIONALLY (*master_dr) == 1) + { + if (DR_WRITTEN_AT_LEAST_ONCE (*base_master_dr) == 1) + { + found = true; + break; + } + else + { + tree base_tree = get_base_address (DR_REF (a)); + if (DECL_P (base_tree) + && decl_binds_to_current_def_p (base_tree) + && !TREE_READONLY (base_tree)) { - DR_WRITTEN_AT_LEAST_ONCE (a) = 1; - DR_WRITTEN_AT_LEAST_ONCE (b) = 1; - found = true; + found = true; break; } } + } - if (!found) - { - DR_WRITTEN_AT_LEAST_ONCE (a) = 0; - return false; - } - } + if (!found) + { + DR_WRITTEN_AT_LEAST_ONCE (a) =0; + DR_RW_UNCONDITIONALLY (a) = 0; + return false; + } + } return true; } @@ -748,8 +770,7 @@ write_memrefs_written_at_least_once (gimple *stmt, static bool ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> refs) { - return write_memrefs_written_at_least_once (stmt, refs) - && memrefs_read_or_written_unconditionally (stmt, refs); + return memrefs_read_or_written_unconditionally (stmt, refs); } /* Wrapper around gimple_could_trap_p refined for the needs of the @@ -1292,6 +1313,7 @@ if_convertible_loop_p_1 (struct loop *loop, case GIMPLE_CALL: case GIMPLE_DEBUG: case GIMPLE_COND: + gimple_set_uid (gsi_stmt (gsi), 0); break; default: return false; @@ -1300,13 +1322,20 @@ if_convertible_loop_p_1 (struct loop *loop, data_reference_p dr; + ref_DR_map = new hash_map<tree_operand_hash, data_reference_p>; + baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>; + + predicate_bbs (loop); + for (i = 0; refs->iterate (i, &dr); i++) { dr->aux = XNEW (struct ifc_dr); DR_WRITTEN_AT_LEAST_ONCE (dr) = -1; DR_RW_UNCONDITIONALLY (dr) = -1; + if (gimple_uid (DR_STMT (dr)) == 0) + gimple_set_uid (DR_STMT (dr), i + 1); + hash_memrefs_baserefs_and_store_DRs_read_written_info (dr); } - predicate_bbs (loop); for (i = 0; i < loop->num_nodes; i++) { @@ -1403,6 +1432,15 @@ if_convertible_loop_p (struct loop *loop, bool *any_mask_load_store) free_data_refs (refs); free_dependence_relations (ddrs); + + if (ref_DR_map) + delete ref_DR_map; + ref_DR_map = NULL; + + if (baseref_DR_map) + delete baseref_DR_map; + baseref_DR_map = NULL; + return res; }