On Wed, 30 May 2012, Richard Guenther wrote: > > The patch below extents memset recognition to cover a few more > non-byte-size store loops and all byte-size store loops. This exposes > issues with our builtins.exp testsuite which has custom memset > routines like > > void * > my_memset (void *d, int c, size_t n) > { > char *dst = (char *) d; > while (n--) > *dst++ = c; > return (char *) d; > } > > Now, for LTO we have papered over similar issues by attaching > the used attribute to the functions. But the general question is - when > can we be sure the function we are dealing with are not the actual > implementation for the builtin call we want to generate? A few > things come to my mind: > > 1) the function already calls the function we want to generate (well, > it might be a tail-recursive memset implementation ...) > > 2) the function availability is AVAIL_LOCAL > > 3) ... ? > > For sure 2) would work, but it would severely restrict the transform > (do we care?). > > We have a similar issue with sin/cos -> sincos transform and a > trivial sincos implementation. > > Any ideas? > > Bootstrapped (with memset recognition enabled by default) and tested > on x86_64-unknown-linux-gnu with the aforementioned issues.
The following fixes it by simply always adding -fno-tree-loop-distribute-patterns to builtins.exp. Bootstrapped and tested on x86_64-unknown-linux-gnu. If there are no further comments I'll go with the local advise from Micha who says "who cares". Thanks, Richard. 2012-05-30 Richard Guenther <rguent...@suse.de> PR tree-optimization/53081 * tree-data-ref.h (stores_zero_from_loop): Rename to ... (stores_bytes_from_loop): ... this. (stmt_with_adjacent_zero_store_dr_p): Rename to ... (stmt_with_adjacent_byte_store_dr_p): ... this. * tree-data-ref.c (stmt_with_adjacent_zero_store_dr_p): Rename to ... (stmt_with_adjacent_byte_store_dr_p): ... this. Handle all kinds of byte-sized stores. (stores_zero_from_loop): Rename to ... (stores_bytes_from_loop): ... this. * tree-loop-distribution.c (generate_memset_zero): Rename to ... (generate_memset): ... this. Handle all kinds of byte-sized stores. (generate_builtin): Adjust. (can_generate_builtin): Likewise. (tree_loop_distribution): Likewise. * gcc.dg/tree-ssa/ldist-19.c: New testcase. * gcc.c-torture/execute/builtins/builtins.exp: Always pass -fno-tree-loop-distribute-patterns. Index: gcc/tree-data-ref.h =================================================================== *** gcc/tree-data-ref.h.orig 2012-05-30 13:24:52.000000000 +0200 --- gcc/tree-data-ref.h 2012-05-30 13:24:56.128827666 +0200 *************** index_in_loop_nest (int var, VEC (loop_p *** 606,616 **** } void stores_from_loop (struct loop *, VEC (gimple, heap) **); ! void stores_zero_from_loop (struct loop *, VEC (gimple, heap) **); void remove_similar_memory_refs (VEC (gimple, heap) **); bool rdg_defs_used_in_other_loops_p (struct graph *, int); bool have_similar_memory_accesses (gimple, gimple); ! bool stmt_with_adjacent_zero_store_dr_p (gimple); /* Returns true when STRIDE is equal in absolute value to the size of the unit type of TYPE. */ --- 606,616 ---- } void stores_from_loop (struct loop *, VEC (gimple, heap) **); ! void stores_bytes_from_loop (struct loop *, VEC (gimple, heap) **); void remove_similar_memory_refs (VEC (gimple, heap) **); bool rdg_defs_used_in_other_loops_p (struct graph *, int); bool have_similar_memory_accesses (gimple, gimple); ! bool stmt_with_adjacent_byte_store_dr_p (gimple); /* Returns true when STRIDE is equal in absolute value to the size of the unit type of TYPE. */ Index: gcc/tree-data-ref.c =================================================================== *** gcc/tree-data-ref.c.orig 2012-05-30 13:24:52.000000000 +0200 --- gcc/tree-data-ref.c 2012-05-30 13:24:56.141827666 +0200 *************** stores_from_loop (struct loop *loop, VEC *** 5248,5259 **** free (bbs); } ! /* Returns true when the statement at STMT is of the form "A[i] = 0" that contains a data reference on its LHS with a stride of the same ! size as its unit type. */ bool ! stmt_with_adjacent_zero_store_dr_p (gimple stmt) { tree lhs, rhs; bool res; --- 5248,5260 ---- free (bbs); } ! /* Returns true when the statement at STMT is of the form "A[i] = x" that contains a data reference on its LHS with a stride of the same ! size as its unit type that can be rewritten as a series of byte ! stores with the same value. */ bool ! stmt_with_adjacent_byte_store_dr_p (gimple stmt) { tree lhs, rhs; bool res; *************** stmt_with_adjacent_zero_store_dr_p (gimp *** 5272,5278 **** && DECL_BIT_FIELD (TREE_OPERAND (lhs, 1))) return false; ! if (!(integer_zerop (rhs) || real_zerop (rhs))) return false; dr = XCNEW (struct data_reference); --- 5273,5286 ---- && DECL_BIT_FIELD (TREE_OPERAND (lhs, 1))) return false; ! if (!(integer_zerop (rhs) ! || integer_all_onesp (rhs) ! || real_zerop (rhs) ! || (TREE_CODE (rhs) == CONSTRUCTOR ! && !TREE_CLOBBER_P (rhs)) ! || (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) ! && (TYPE_MODE (TREE_TYPE (lhs)) ! == TYPE_MODE (unsigned_char_type_node))))) return false; dr = XCNEW (struct data_reference); *************** stmt_with_adjacent_zero_store_dr_p (gimp *** 5291,5297 **** store to memory of the form "A[i] = 0". */ void ! stores_zero_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) { unsigned int i; basic_block bb; --- 5299,5305 ---- store to memory of the form "A[i] = 0". */ void ! stores_bytes_from_loop (struct loop *loop, VEC (gimple, heap) **stmts) { unsigned int i; basic_block bb; *************** stores_zero_from_loop (struct loop *loop *** 5302,5308 **** for (i = 0; i < loop->num_nodes; i++) for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) if ((stmt = gsi_stmt (si)) ! && stmt_with_adjacent_zero_store_dr_p (stmt)) VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si)); free (bbs); --- 5310,5316 ---- for (i = 0; i < loop->num_nodes; i++) for (bb = bbs[i], si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) if ((stmt = gsi_stmt (si)) ! && stmt_with_adjacent_byte_store_dr_p (stmt)) VEC_safe_push (gimple, heap, *stmts, gsi_stmt (si)); free (bbs); Index: gcc/tree-loop-distribution.c =================================================================== *** gcc/tree-loop-distribution.c.orig 2012-05-30 13:24:52.000000000 +0200 --- gcc/tree-loop-distribution.c 2012-05-30 13:59:08.657756607 +0200 *************** build_size_arg_loc (location_t loc, tree *** 300,307 **** /* Generate a call to memset. Return true when the operation succeeded. */ static void ! generate_memset_zero (gimple stmt, tree op0, tree nb_iter, ! gimple_stmt_iterator bsi) { tree addr_base, nb_bytes; bool res = false; --- 300,307 ---- /* Generate a call to memset. Return true when the operation succeeded. */ static void ! generate_memset (gimple stmt, tree op0, tree nb_iter, ! gimple_stmt_iterator bsi) { tree addr_base, nb_bytes; bool res = false; *************** generate_memset_zero (gimple stmt, tree *** 310,315 **** --- 310,316 ---- tree mem, fn; struct data_reference *dr = XCNEW (struct data_reference); location_t loc = gimple_location (stmt); + tree val; DR_STMT (dr) = stmt; DR_REF (dr) = op0; *************** generate_memset_zero (gimple stmt, tree *** 334,346 **** mem = force_gimple_operand (addr_base, &stmts, true, NULL); gimple_seq_add_seq (&stmt_list, stmts); fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); ! fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); gimple_seq_add_stmt (&stmt_list, fn_call); gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) ! fprintf (dump_file, "generated memset zero\n"); free_data_ref (dr); } --- 335,379 ---- mem = force_gimple_operand (addr_base, &stmts, true, NULL); gimple_seq_add_seq (&stmt_list, stmts); + /* This exactly matches stmt_with_adjacent_byte_store_dr_p which detects + stores of zero or byte-size integer stores. */ + val = gimple_assign_rhs1 (stmt); + if (integer_zerop (val) + || real_zerop (val) + || TREE_CODE (val) == CONSTRUCTOR) + val = integer_zero_node; + else if (integer_all_onesp (val)) + val = build_int_cst (integer_type_node, -1); + else + { + if (TREE_CODE (val) == INTEGER_CST) + val = fold_convert (integer_type_node, val); + else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) + { + gimple cstmt; + tree tem = create_tmp_reg (integer_type_node, NULL); + tem = make_ssa_name (tem, NULL); + cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE); + gimple_seq_add_stmt (&stmt_list, cstmt); + val = tem; + } + } + fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); ! fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); gimple_seq_add_stmt (&stmt_list, fn_call); gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING); if (dump_file && (dump_flags & TDF_DETAILS)) ! { ! fprintf (dump_file, "generated memset"); ! if (integer_zerop (val)) ! fprintf (dump_file, " zero\n"); ! else if (integer_all_onesp (val)) ! fprintf (dump_file, " minus one\n"); ! else ! fprintf (dump_file, "\n"); ! } free_data_ref (dr); } *************** generate_builtin (struct loop *loop, bit *** 386,399 **** if (stmt_has_scalar_dependences_outside_loop (stmt)) goto end; ! if (is_gimple_assign (stmt) && !is_gimple_reg (gimple_assign_lhs (stmt))) { /* Don't generate the builtins when there are more than one memory write. */ if (write != NULL) goto end; write = stmt; if (bb == loop->latch) nb_iter = number_of_latch_executions (loop); --- 419,443 ---- if (stmt_has_scalar_dependences_outside_loop (stmt)) goto end; ! if (gimple_assign_single_p (stmt) && !is_gimple_reg (gimple_assign_lhs (stmt))) { + tree rhs; + /* Don't generate the builtins when there are more than one memory write. */ if (write != NULL) goto end; + /* If the store is from a non-constant, verify the value + is defined outside of the loop. */ + rhs = gimple_assign_rhs1 (stmt); + if (TREE_CODE (rhs) == SSA_NAME + && !SSA_NAME_IS_DEFAULT_DEF (rhs) + && flow_bb_inside_loop_p + (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) + goto end; + write = stmt; if (bb == loop->latch) nb_iter = number_of_latch_executions (loop); *************** generate_builtin (struct loop *loop, bit *** 401,412 **** } } ! if (!stmt_with_adjacent_zero_store_dr_p (write)) goto end; /* The new statements will be placed before LOOP. */ bsi = gsi_last_bb (loop_preheader_edge (loop)->src); ! generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi); res = true; /* If this is the last partition for which we generate code, we have --- 445,456 ---- } } ! if (!stmt_with_adjacent_byte_store_dr_p (write)) goto end; /* The new statements will be placed before LOOP. */ bsi = gsi_last_bb (loop_preheader_edge (loop)->src); ! generate_memset (write, gimple_assign_lhs (write), nb_iter, bsi); res = true; /* If this is the last partition for which we generate code, we have *************** can_generate_builtin (struct graph *rdg, *** 825,831 **** gimple stmt = RDG_STMT (rdg, i); nb_writes++; if (!gimple_has_volatile_ops (stmt) ! && stmt_with_adjacent_zero_store_dr_p (stmt)) stores_zero++; } --- 869,875 ---- gimple stmt = RDG_STMT (rdg, i); nb_writes++; if (!gimple_has_volatile_ops (stmt) ! && stmt_with_adjacent_byte_store_dr_p (stmt)) stores_zero++; } *************** tree_loop_distribution (void) *** 1266,1272 **** /* With the following working list, we're asking distribute_loop to separate from the rest of the loop the stores of the form "A[i] = 0". */ ! stores_zero_from_loop (loop, &work_list); /* Do nothing if there are no patterns to be distributed. */ if (VEC_length (gimple, work_list) > 0) --- 1310,1316 ---- /* With the following working list, we're asking distribute_loop to separate from the rest of the loop the stores of the form "A[i] = 0". */ ! stores_bytes_from_loop (loop, &work_list); /* Do nothing if there are no patterns to be distributed. */ if (VEC_length (gimple, work_list) > 0) Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c =================================================================== *** /dev/null 1970-01-01 00:00:00.000000000 +0000 --- gcc/testsuite/gcc.dg/tree-ssa/ldist-19.c 2012-05-30 13:54:18.824766637 +0200 *************** *** 0 **** --- 1,63 ---- + /* { dg-do compile } */ + /* { dg-options "-O3 -fdump-tree-ldist-details" } */ + + struct Foo + { + char a; + }; + + struct Foo x[256]; + + static void __attribute__((noinline,noclone)) + foo() + { + int i; + for (i = 0; i < 256; ++i) + x[i] = (struct Foo){}; + } + + static void __attribute__((noinline,noclone)) + bar() + { + int i; + for (i = 0; i < 256; ++i) + x[i].a = 1; + } + + static void __attribute__((noinline,noclone)) + foobar(unsigned char c) + { + int i; + for (i = 0; i < 256; ++i) + x[i].a = c; + } + + struct Baz + { + short a; + }; + + struct Baz y[256]; + + static void __attribute__((noinline,noclone)) + baz() + { + int i; + for (i = 0; i < 256; ++i) + y[i].a = -1; + } + + int main() + { + volatile int x; + foo(); + bar(); + foobar(x); + baz(); + return 0; + } + + /* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */ + /* { dg-final { scan-tree-dump-times "generated memset minus one" 1 "ldist" } } */ + /* { dg-final { scan-tree-dump-times "generated memset" 4 "ldist" } } */ + /* { dg-final { cleanup-tree-dump "ldist" } } */ Index: gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp =================================================================== *** gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp.orig 2012-05-30 13:55:10.000000000 +0200 --- gcc/testsuite/gcc.c-torture/execute/builtins/builtins.exp 2012-05-30 13:59:38.878755539 +0200 *************** load_lib c-torture.exp *** 37,43 **** torture-init set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS ! set additional_flags "" if [istarget "powerpc-*-darwin*"] { lappend additional_flags "-Wl,-multiply_defined,suppress" } --- 37,43 ---- torture-init set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS ! set additional_flags "-fno-tree-loop-distribute-patterns" if [istarget "powerpc-*-darwin*"] { lappend additional_flags "-Wl,-multiply_defined,suppress" }