Hello,
I am posting this patch to get some feedback on the approach. The goal is
to replace malloc+free with a stack allocation (a decl actually) when the
size is a small constant.
For testing, I highjacked the "leaf" attribute, but it isn't right, I'll
remove it from the list (not sure what I'll do for the testcases then).
What I'd want instead is a "returns" attribute that means the function
will return (either by "return" or an exception), as opposed to having an
infinite loop, calling exit or longjmp, etc (sched-deps.c has a related
call_may_noreturn_p). The situation I am trying to avoid is:
p=malloc(12);
f(p)
free(p)
where f contains somewhere:
free(p); exit(42);
(or longjmp or something else that takes the regular call to free out of
the execution flow).
It passes most of the testsuite, but breaks a couple __builtin_object_size
tests:
struct A
{
char a[10];
int b;
char c[10];
};
int main(){
struct A *p = malloc (2 * sizeof (struct A));
assert (__builtin_object_size (&p->a, 1) == sizeof (p->a));
free (p);
}
__builtin_object_size now returns 56 instead of 10. I am not sure what to
do about that.
The size above which the malloc->stack transformation is not applied
should depend on a parameter, I don't know if it should get its own or
depend on an existing one. In any case, I'd like to avoid ending up with a
ridiculously low threshold (my programs use GMP, which internally uses
alloca up to 65536 bytes (even in recursive functions that have a dozen
allocations), so I don't want gcc to tell me that 50 bytes are too much).
A program with a double-free may, with this patch, end up crashing on the
first free instead of the second, but that's an invalid program anyway.
I don't know if tree-ssa-forwprop is a good place for it, but it was
convenient for a prototype. I like the idea of running it several times:
replacing malloc with a decl early can help other optimizations, and other
optimizations can make this one possible later.
The walk could be a bit expensive, but we only do it if we detected a
malloc of a small constant and at least one matching free. I guess I
should mark the malloc somehow to avoid performing the walk twice if there
are several (but not enough) matching free.
stack_vec is nice, it would be convenient if bitmaps also had a version
with a destructor so we don't need to explicitly deallocate them.
--
Marc Glisse
Index: gcc/testsuite/g++.dg/tree-ssa/heapstack-1.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/heapstack-1.C (revision 0)
+++ gcc/testsuite/g++.dg/tree-ssa/heapstack-1.C (working copy)
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+struct A {
+ void*p;
+ A(void*q) : p(q) {}
+ ~A(){ __builtin_free(p); }
+};
+void f(void*)__attribute__((__leaf__));
+void h(void*)__attribute__((__leaf__,__nothrow__));
+void g(){
+ void*p=__builtin_malloc(12);
+ A a(p);
+ f(p);
+}
+
+void i(){
+ void*p=__builtin_malloc(12);
+ h(p);
+ __builtin_free(p);
+}
+
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Property changes on: gcc/testsuite/g++.dg/tree-ssa/heapstack-1.C
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: gcc/testsuite/g++.dg/tree-ssa/heapstack-2.C
===================================================================
--- gcc/testsuite/g++.dg/tree-ssa/heapstack-2.C (revision 0)
+++ gcc/testsuite/g++.dg/tree-ssa/heapstack-2.C (working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f(void*)__attribute__((__leaf__));
+void g(){
+ void*p=__builtin_malloc(12);
+ f(p);
+ __builtin_free(p);
+}
+
+/* { dg-final { scan-tree-dump-times "malloc" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Property changes on: gcc/testsuite/g++.dg/tree-ssa/heapstack-2.C
___________________________________________________________________
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/heapstack-1.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/heapstack-1.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/heapstack-1.c (working copy)
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f(void*)__attribute__((__leaf__));
+void g(int m,int n){
+ int i;
+ void*p=__builtin_malloc(12);
+ switch(n){
+ case 1:
+ for (i=0; i<m; ++i)
+ f(p);
+ break;
+ case 2:
+ __builtin_free(p);
+ __builtin_exit(42);
+ default:;
+ }
+ __builtin_free(p);
+}
+
+/* { dg-final { scan-tree-dump-not "malloc" "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Property changes on: gcc/testsuite/gcc.dg/tree-ssa/heapstack-1.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/heapstack-2.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/heapstack-2.c (revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/heapstack-2.c (working copy)
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+void f(void*);
+void g(){
+ void*p=__builtin_malloc(12);
+ f(p);
+ __builtin_free(p);
+}
+
+/* { dg-final { scan-tree-dump-times "malloc" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
Property changes on: gcc/testsuite/gcc.dg/tree-ssa/heapstack-2.c
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Author Date Id Revision URL
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Index: gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c (revision 204620)
+++ gcc/testsuite/gcc.dg/tree-ssa/pr19831-3.c (working copy)
@@ -1,31 +1,31 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-optimized" } */
-void test2(void)
+void test2(int n)
{
- int *p = __builtin_malloc (sizeof (int) * 4);
+ int *p = __builtin_malloc (sizeof (int) * n);
if (p == (void *)0)
__builtin_abort ();
__builtin_free (p);
}
-void test5(int b)
+void test5(int n)
{
- int *p = __builtin_malloc (sizeof (int) * 4);
+ int *p = __builtin_malloc (sizeof (int) * n);
if (p)
__builtin_free (p);
}
-void test6(void)
+void test6(int n)
{
- int *p = __builtin_malloc (sizeof (int) * 4);
+ int *p = __builtin_malloc (sizeof (int) * n);
if (p == (void *)0)
__builtin_abort ();
if (p)
__builtin_free (p);
}
/* We should be able to remove all malloc/free pairs with CDDCE.
Assume p was non-NULL for test2.
For test5, it doesn't matter if p is NULL or non-NULL. */
Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c (revision 204620)
+++ gcc/tree-ssa-forwprop.c (working copy)
@@ -33,20 +33,21 @@ along with GCC; see the file COPYING3.
#include "tree-ssanames.h"
#include "tree-dfa.h"
#include "tree-pass.h"
#include "langhooks.h"
#include "flags.h"
#include "expr.h"
#include "cfgloop.h"
#include "optabs.h"
#include "tree-ssa-propagate.h"
#include "tree-ssa-dom.h"
+#include "params.h"
/* This pass propagates the RHS of assignment statements into use
sites of the LHS of the assignment. It's basically a specialized
form of tree combination. It is hoped all of this can disappear
when we have a generalized tree combiner.
One class of common cases we handle is forward propagating a single use
variable into a COND_EXPR.
bb0:
@@ -1478,20 +1479,113 @@ constant_pointer_difference (tree p1, tr
}
for (i = 0; i < cnt[0]; i++)
for (j = 0; j < cnt[1]; j++)
if (exps[0][i] == exps[1][j])
return size_binop (MINUS_EXPR, offs[0][i], offs[1][j]);
return NULL_TREE;
}
+/* We want to be sure that free (VAR) can't be called in another function, and
+ the easiest way to ensure that is to prove that it is called in this
+ function. The hardest part is avoiding some call to a function that may not
+ return because of exit, an infinite loop, setjmp, etc, where it might call
+ free on VAR. */
+static bool
+malloc_safe_on_stack (gimple stmt, vec<gimple, va_heap> *list_of_frees)
+{
+ tree var = gimple_call_lhs (stmt);
+ basic_block bb = gimple_bb (stmt);
+ stack_vec<basic_block, 20> bb_to_visit;
+ bitmap bb_visited = BITMAP_ALLOC (NULL);
+ bitmap_set_bit (bb_visited, bb->index);
+ gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
+
+next_stmt:
+ gsi_next_nondebug (&gsi);
+
+handle_stmt:
+ if (gsi_end_p (gsi))
+ {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ bb_to_visit.safe_push (e->dest);
+ }
+ goto next_bb;
+ }
+ stmt = gsi_stmt (gsi);
+ if (stmt_can_throw_external (stmt))
+ /* We could give up only if VAR has escaped (same for return), but that
+ means there is a memory leak, so don't bother. */
+ goto unsafe;
+
+ switch (gimple_code (stmt))
+ // TODO: GIMPLE_ASM, EH related gimples?
+ {
+ /* We leave the function without calling free. */
+ case GIMPLE_RETURN:
+ goto unsafe;
+
+ /* Statements that are irrelevant. */
+ case GIMPLE_ASSIGN:
+ case GIMPLE_LABEL:
+ case GIMPLE_NOP:
+ /* Last stmt of the bb, handled by looking at the outgoing edges. */
+ case GIMPLE_GOTO:
+ case GIMPLE_COND:
+ // TODO: special case: if (VAR == 0) ...
+ case GIMPLE_SWITCH:
+ goto next_stmt;
+
+ case GIMPLE_CALL:
+ {
+ // TODO: __builtin_(abort|trap|exit|unreachable)
+ // should be safe as well.
+ if (gimple_call_builtin_p (stmt, BUILT_IN_FREE)
+ && gimple_call_arg (stmt, 0) == var)
+ {
+ /* Nothing could throw an exception earlier in the block,
+ so we don't need to check the EH edges. */
+ list_of_frees->safe_push (stmt);
+ goto next_bb;
+ }
+ int flags = gimple_call_flags (stmt);
+ // FIXME: leaf is actually not safe, we need some new ECF_* flags.
+ if (flags & (ECF_CONST|ECF_PURE|ECF_LEAF|ECF_NOVOPS))
+ goto next_stmt;
+ goto unsafe;
+ }
+ default:
+ goto unsafe;
+ }
+
+next_bb:
+ if (bb_to_visit.is_empty ())
+ goto safe;
+ bb = bb_to_visit.last ();
+ bb_to_visit.pop ();
+ if (!bitmap_set_bit (bb_visited, bb->index))
+ goto next_bb;
+ gsi = gsi_start_bb (bb);
+ goto handle_stmt;
+
+unsafe:
+ BITMAP_FREE (bb_visited);
+ return false;
+safe:
+ BITMAP_FREE (bb_visited);
+ return true;
+}
+
/* *GSI_P is a GIMPLE_CALL to a builtin function.
Optimize
memcpy (p, "abcd", 4);
memset (p + 4, ' ', 3);
into
memcpy (p, "abcd ", 7);
call if the latter can be stored by pieces during expansion. */
static bool
simplify_builtin_call (gimple_stmt_iterator *gsi_p, tree callee2)
@@ -1681,20 +1775,64 @@ simplify_builtin_call (gimple_stmt_itera
gimple_call_set_arg (stmt2, 2,
build_int_cst (TREE_TYPE (len2), src_len));
unlink_stmt_vdef (stmt1);
gsi_remove (&gsi, true);
release_defs (stmt1);
update_stmt (stmt2);
return false;
}
}
break;
+
+ case BUILT_IN_FREE:
+ {
+ tree size;
+ tree ptr = gimple_call_arg (stmt2, 0);
+ if (TREE_CODE (ptr) != SSA_NAME)
+ return false;
+ stmt1 = SSA_NAME_DEF_STMT (ptr);
+ /* TODO: handle calloc. */
+ if (gimple_call_builtin_p (stmt1, BUILT_IN_MALLOC)
+ && host_integerp ((size = gimple_call_arg (stmt1, 0)), 1)
+ /* param_value(...) / 10? Some new parameter? */
+ && TREE_INT_CST_LOW (size)
+ <= (unsigned HOST_WIDE_INT)PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
+ {
+ gcc_checking_assert (gimple_call_lhs (stmt1) == ptr);
+ stack_vec<gimple, 20> list_of_frees;
+ if (!malloc_safe_on_stack (stmt1, &list_of_frees))
+ return false;
+ /* Declare array. */
+ tree elem_type = build_nonstandard_integer_type (BITS_PER_UNIT, 1);
+ tree array_type = build_array_type_nelts (elem_type,
+ TREE_INT_CST_LOW (size));
+ tree var = create_tmp_var (array_type, NULL);
+ tree p = fold_convert (TREE_TYPE (ptr), build_fold_addr_expr (var));
+ /* Replace free with a clobber. */
+ int i;
+ gimple free_stmt;
+ FOR_EACH_VEC_ELT (list_of_frees, i, free_stmt)
+ {
+ tree clobber = build_constructor (array_type, NULL);
+ TREE_THIS_VOLATILE (clobber) = 1;
+ gimple clobber_stmt = gimple_build_assign (var, clobber);
+ gimple_stmt_iterator gsi_free = gsi_for_stmt (free_stmt);
+ gsi_replace (&gsi_free, clobber_stmt, false);
+ }
+ /* Replace malloc with the address of the variable. */
+ gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt1);
+ update_call_from_tree (&gsi1, p);
+ update_stmt (gsi_stmt (gsi1));
+ return true;
+ }
+
+ }
default:
break;
}
return false;
}
/* Checks if expression has type of one-bit precision, or is a known
truth-valued expression. */
static bool
truth_valued_ssa_name (tree name)