Hi Richard, I have a patch for PR43814. It introduces an option that assumes that function arguments of pointer type are aligned, and uses that information in tree-ssa-ccp. This enables the memcpy in pr43814-2.c to be inlined.
I tested the patch successfully on-by-default on x86_64 and i686 (both gcc only builds). I also tested the patch on-by-default for ARM (gcc/glibc build). The patch generated wrong code for uselocale.c: ... glibc/locale/locale.h: ... /* This value can be passed to `uselocale' and may be returned by it. Passing this value to any other function has undefined behavior. */ # define LC_GLOBAL_LOCALE ((__locale_t) -1L) ... glibc/locale/uselocale.c: ... locale_t __uselocale (locale_t newloc) { locale_t oldloc = _NL_CURRENT_LOCALE; if (newloc != NULL) { const locale_t locobj = newloc == LC_GLOBAL_LOCALE ? &_nl_global_locale : newloc; ... The assumption that function arguments of pointer type are aligned, allowed the test 'newloc == LC_GLOBAL_LOCALE' to evaluate to false. But the usage of ((__locale_t) -1L) as function argument in uselocale violates that assumption. Fixing the definition of LC_GLOBAL_LOCALE allowed the gcc tests to run without regressions for ARM. Furthermore, the patch fixes ipa-sra-2.c and ipa-sra-6.c regressions on ARM, discussed here: - http://gcc.gnu.org/ml/gcc-patches/2011-08/msg00930.html - http://gcc.gnu.org/ml/gcc-patches/2011-09/msg00459.html But, since glibc uses this construct currently, the option is off-by-default for now. OK for trunk? Thanks, - Tom 2011-09-20 Tom de Vries <t...@codesourcery.com> PR target/43814 * tree-ssa-ccp.c (get_align_value): New function, factored out of get_value_from_alignment. (get_value_from_alignment): Use get_align_value. (get_value_for_expr): Use get_align_value to handle alignment of function argument pointers. * common.opt (faligned-pointer-argument): New option. * doc/invoke.texi (Optimization Options): Add -faligned-pointer-argument. (-faligned-pointer-argument): New item. * gcc/testsuite/gcc.dg/pr43814.c: New test. * gcc/testsuite/gcc.target/arm/pr43814-2.c: New test.
Index: gcc/tree-ssa-ccp.c =================================================================== --- gcc/tree-ssa-ccp.c (revision 178804) +++ gcc/tree-ssa-ccp.c (working copy) @@ -500,20 +500,14 @@ value_to_double_int (prop_value_t val) return double_int_zero; } -/* Return the value for the address expression EXPR based on alignment - information. */ +/* Return the value for an expr of type TYPE with alignment ALIGN and offset + BITPOS relative to the alignment. */ static prop_value_t -get_value_from_alignment (tree expr) +get_align_value (unsigned int align, tree type, unsigned HOST_WIDE_INT bitpos) { - tree type = TREE_TYPE (expr); prop_value_t val; - unsigned HOST_WIDE_INT bitpos; - unsigned int align; - - gcc_assert (TREE_CODE (expr) == ADDR_EXPR); - align = get_object_alignment_1 (TREE_OPERAND (expr, 0), &bitpos); val.mask = double_int_and_not (POINTER_TYPE_P (type) || TYPE_UNSIGNED (type) ? double_int_mask (TYPE_PRECISION (type)) @@ -529,6 +523,21 @@ get_value_from_alignment (tree expr) return val; } +/* Return the value for the address expression EXPR based on alignment + information. */ + +static prop_value_t +get_value_from_alignment (tree expr) +{ + unsigned int align; + unsigned HOST_WIDE_INT bitpos; + + gcc_assert (TREE_CODE (expr) == ADDR_EXPR); + + align = get_object_alignment_1 (TREE_OPERAND (expr, 0), &bitpos); + return get_align_value (align, TREE_TYPE (expr), bitpos); +} + /* Return the value for the tree operand EXPR. If FOR_BITS_P is true return constant bits extracted from alignment information for invariant addresses. */ @@ -540,11 +549,21 @@ get_value_for_expr (tree expr, bool for_ if (TREE_CODE (expr) == SSA_NAME) { + tree var = SSA_NAME_VAR (expr); val = *get_value (expr); - if (for_bits_p - && val.lattice_val == CONSTANT + if (!for_bits_p) + return val; + + if (val.lattice_val == CONSTANT && TREE_CODE (val.value) == ADDR_EXPR) val = get_value_from_alignment (val.value); + else if (flag_aligned_pointer_argument + && val.lattice_val == VARYING + && SSA_NAME_IS_DEFAULT_DEF (expr) + && TREE_CODE (var) == PARM_DECL + && POINTER_TYPE_P (TREE_TYPE (var))) + val = get_align_value (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (var))), + TREE_TYPE (var), 0); } else if (is_gimple_min_invariant (expr) && (!for_bits_p || TREE_CODE (expr) != ADDR_EXPR)) Index: gcc/common.opt =================================================================== --- gcc/common.opt (revision 178804) +++ gcc/common.opt (working copy) @@ -786,6 +786,10 @@ Driver Undocumented fabi-version= Common Joined RejectNegative UInteger Var(flag_abi_version) Init(2) +faligned-pointer-argument +Common Report Var(flag_aligned_pointer_argument) Optimization +Assume function arguments of pointer type are aligned. + falign-functions Common Report Var(align_functions,0) Optimization UInteger Align the start of functions Index: gcc/doc/invoke.texi =================================================================== --- gcc/doc/invoke.texi (revision 178804) +++ gcc/doc/invoke.texi (working copy) @@ -345,7 +345,8 @@ Objective-C and Objective-C++ Dialects}. @item Optimization Options @xref{Optimize Options,,Options that Control Optimization}. -@gccoptlist{-falign-functions[=@var{n}] -falign-jumps[=@var{n}] @gol +@gccoptlist{-faligned-pointer-argument -falign-functions[=@var{n}] @gol +-falign-jumps[=@var{n}] @gol -falign-labels[=@var{n}] -falign-loops[=@var{n}] -fassociative-math @gol -fauto-inc-dec -fbranch-probabilities -fbranch-target-load-optimize @gol -fbranch-target-load-optimize2 -fbtr-bb-exclusive -fcaller-saves @gol @@ -7552,6 +7553,10 @@ arithmetic on constants, the overflowed The @option{-fstrict-overflow} option is enabled at levels @option{-O2}, @option{-O3}, @option{-Os}. +@item -faligned-pointer-argument +@opindex faligned-pointer-argument +Assume function arguments of pointer type are aligned. + @item -falign-functions @itemx -falign-functions=@var{n} @opindex falign-functions Index: gcc/testsuite/gcc.dg/pr43814.c =================================================================== --- /dev/null (new file) +++ gcc/testsuite/gcc.dg/pr43814.c (revision 0) @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -faligned-pointer-argument -fdump-tree-ccp1-all" } */ + +void f (int *a) +{ + *(a+1) = 0; +} + +/* { dg-final { scan-tree-dump-times "ALIGN = \[2-9\]\[0-9\]*, MISALIGN = 0" 1 "ccp1"} } */ +/* { dg-final { cleanup-tree-dump "ccp1" } } */ + Index: gcc/testsuite/gcc.target/arm/pr43814-2.c =================================================================== --- /dev/null (new file) +++ gcc/testsuite/gcc.target/arm/pr43814-2.c (revision 0) @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -faligned-pointer-argument -fdump-rtl-expand" } */ + +typedef unsigned int size_t; +extern void* memcpy (void *, const void *, size_t); + +typedef union JValue { + void* l; +} JValue; +typedef struct Object { + int x; +} Object; + +extern __inline__ long long +dvmGetArgLong (const unsigned int* args, int elem) +{ + long long val; + memcpy (&val, &args[elem], 8); + return val; +} + +void +Dalvik_sun_misc_Unsafe_getObject (const unsigned int* args, JValue* pResult) +{ + Object* obj = (Object*) args[1]; + long long offset = dvmGetArgLong (args, 2); + Object** address = (Object**) (((unsigned char*) obj) + offset); + pResult->l = ((void*) *address); +} + +/* { dg-final { scan-rtl-dump-times "memcpy" 0 "expand"} } */ +/* { dg-final { cleanup-tree-dump "expand" } } */ +