GCC is able to fold references to members of global aggregate constants in many expressions but it doesn't known how do it for strlen() arguments. As a result, strlen calls with such arguments such the one below are not optimized:
const struct { char a[4]; } s = { "123" }; void f (void) { if (s.a[3]) abort (); // folded/eliminated } void g (void) { if (strlen (s.a) != 3) abort (); // not folded } The attached patch enables gimple_fold_builtin_strlen() to extract constant strings from aggregate initializers, analogously to how it extracts data of other types. Martin
PR tree-optimization/83693 - missing strlen optimization for array of arrays gcc/testsuite/ChangeLog: PR tree-optimization/83693 * gcc.dg/strlenopt-42.c: New test. gcc/ChangeLog: PR tree-optimization/83693 * expr.c (string_constant): Handle a bare STRING_CST. * gimple-fold.c (gimple_fold_builtin_strlen): Try to extract a string from an aggregate CONSTRUCTOR first. diff --git a/gcc/expr.c b/gcc/expr.c index cd1e57d..fbb50ad 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -11371,7 +11371,7 @@ string_constant (tree arg, tree *ptr_offset) array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0); offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1); if (TREE_CODE (array) != STRING_CST && !VAR_P (array)) - return 0; + return NULL_TREE; /* Check if the array has a nonzero lower bound. */ lower_bound = array_ref_low_bound (TREE_OPERAND (arg, 0)); @@ -11379,9 +11379,9 @@ string_constant (tree arg, tree *ptr_offset) { /* If the offset and base aren't both constants, return 0. */ if (TREE_CODE (lower_bound) != INTEGER_CST) - return 0; + return NULL_TREE; if (TREE_CODE (offset) != INTEGER_CST) - return 0; + return NULL_TREE; /* Adjust offset by the lower bound. */ offset = size_diffop (fold_convert (sizetype, offset), fold_convert (sizetype, lower_bound)); @@ -11392,13 +11392,13 @@ string_constant (tree arg, tree *ptr_offset) array = TREE_OPERAND (TREE_OPERAND (arg, 0), 0); offset = TREE_OPERAND (TREE_OPERAND (arg, 0), 1); if (TREE_CODE (array) != ADDR_EXPR) - return 0; + return NULL_TREE; array = TREE_OPERAND (array, 0); if (TREE_CODE (array) != STRING_CST && !VAR_P (array)) - return 0; + return NULL_TREE; } else - return 0; + return NULL_TREE; } else if (TREE_CODE (arg) == PLUS_EXPR || TREE_CODE (arg) == POINTER_PLUS_EXPR) { @@ -11423,10 +11423,15 @@ string_constant (tree arg, tree *ptr_offset) offset = arg0; } else - return 0; + return NULL_TREE; + } + else if (TREE_CODE (arg) == STRING_CST) + { + *ptr_offset = size_zero_node; + return arg; } else - return 0; + return NULL_TREE; if (TREE_CODE (array) == STRING_CST) { @@ -11442,14 +11447,14 @@ string_constant (tree arg, tree *ptr_offset) if (init == error_mark_node || !init || TREE_CODE (init) != STRING_CST) - return 0; + return NULL_TREE; /* Avoid const char foo[4] = "abcde"; */ if (DECL_SIZE_UNIT (array) == NULL_TREE || TREE_CODE (DECL_SIZE_UNIT (array)) != INTEGER_CST || (length = TREE_STRING_LENGTH (init)) <= 0 || compare_tree_int (DECL_SIZE_UNIT (array), length) < 0) - return 0; + return NULL_TREE; /* If variable is bigger than the string literal, OFFSET must be constant and inside of the bounds of the string literal. */ @@ -11457,13 +11462,13 @@ string_constant (tree arg, tree *ptr_offset) if (compare_tree_int (DECL_SIZE_UNIT (array), length) > 0 && (! tree_fits_uhwi_p (offset) || compare_tree_int (offset, length) >= 0)) - return 0; + return NULL_TREE; *ptr_offset = offset; return init; } - return 0; + return NULL_TREE; } /* Generate code to calculate OPS, and exploded expression diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c index fe3e0b4..f277324 100644 --- a/gcc/gimple-fold.c +++ b/gcc/gimple-fold.c @@ -3517,8 +3517,14 @@ gimple_fold_builtin_strlen (gimple_stmt_iterator *gsi) wide_int minlen; wide_int maxlen; + /* Try to extract a constant from an object's CONSTRUCTOR first. */ + tree arg = gimple_call_arg (stmt, 0); + if (TREE_CODE (arg) == ADDR_EXPR) + if (tree str = fold_const_aggregate_ref (TREE_OPERAND (arg, 0))) + arg = str; + tree lenrange[2]; - if (!get_range_strlen (gimple_call_arg (stmt, 0), lenrange) + if (!get_range_strlen (arg, lenrange) && lenrange[0] && TREE_CODE (lenrange[0]) == INTEGER_CST && lenrange[1] && TREE_CODE (lenrange[1]) == INTEGER_CST) { diff --git a/gcc/testsuite/gcc.dg/strlenopt-42.c b/gcc/testsuite/gcc.dg/strlenopt-42.c new file mode 100644 index 0000000..c0b4ece --- /dev/null +++ b/gcc/testsuite/gcc.dg/strlenopt-42.c @@ -0,0 +1,158 @@ +/* PR tree-optimization/83693 - missing strlen optimization for array + of arrays + { dg-do compile } + { dg-options "-O1 -fdump-tree-optimized" } */ + +#include "strlenopt.h" + +#define DIFF_MAX __PTRDIFF_MAX__ + +#define CAT(x, y) x ## y +#define CONCAT(x, y) CAT (x, y) +#define FAILNAME(name) CONCAT (call_ ## name ##_on_line_, __LINE__) + +#define FAIL(name) do { \ + extern void FAILNAME (name) (void); \ + FAILNAME (name)(); \ + } while (0) + +/* Macro to emit a call to function named + call_in_true_branch_not_eliminated_on_line_NNN() + for each call that's expected to be eliminated. The dg-final + scan-tree-dump-time directive at the bottom of the test verifies + that no such call appears in output. */ +#define ELIM(expr) \ + if (!(expr)) FAIL (in_true_branch_not_eliminated); \ + else (void)0 + +#define STR "0123456789abcdefghijklmnopqrtsuvwxyz" +#define S(N) (STR + sizeof (STR) - N - 1) + +const char a5_5[5][5] = { + "", "1", "12", "123", "1234" +}; + +const char b5_5[5][5] = { + [1] = "1", [0] = "", [3] = "123", [4] = "1234", [2] = "12" +}; + +const char* const a3_6_17[3][6] = { + { S (0), S (1), S (2), S (3), S (4), S (5) }, + { S (6), S (7), S (8), S (9), S (10), S (11) }, + { S (12), S (13), S (14), S (15), S (16), S (17) }, +}; + +void test_arrays (void) +{ + ELIM (strlen (a5_5[0]) == 0); + ELIM (strlen (a5_5[1]) == 1); + ELIM (strlen (a5_5[2]) == 2); + ELIM (strlen (a5_5[3]) == 3); + ELIM (strlen (a5_5[4]) == 4); + + ELIM (strlen (b5_5[0]) == 0); + ELIM (strlen (b5_5[1]) == 1); + ELIM (strlen (b5_5[2]) == 2); + ELIM (strlen (b5_5[3]) == 3); + ELIM (strlen (b5_5[4]) == 4); + + ELIM (strlen (a3_6_17[0][0]) == 0); + ELIM (strlen (a3_6_17[0][1]) == 1); + ELIM (strlen (a3_6_17[0][2]) == 2); + ELIM (strlen (a3_6_17[0][3]) == 3); + ELIM (strlen (a3_6_17[0][4]) == 4); + ELIM (strlen (a3_6_17[0][5]) == 5); + ELIM (strlen (a3_6_17[1][0]) == 6); + ELIM (strlen (a3_6_17[1][1]) == 7); + ELIM (strlen (a3_6_17[1][2]) == 8); + ELIM (strlen (a3_6_17[1][3]) == 9); + ELIM (strlen (a3_6_17[1][4]) == 10); + ELIM (strlen (a3_6_17[1][5]) == 11); + ELIM (strlen (a3_6_17[2][0]) == 12); + ELIM (strlen (a3_6_17[2][1]) == 13); + ELIM (strlen (a3_6_17[2][2]) == 14); + ELIM (strlen (a3_6_17[2][3]) == 15); + ELIM (strlen (a3_6_17[2][4]) == 16); + ELIM (strlen (a3_6_17[2][5]) == 17); +} + +struct MemberStrings +{ + char a5_5[5][5]; + const char* a3_6_17[3][6]; +}; + +const struct MemberStrings ma[] = { + { + { "", "1", "12", "123", "1234" }, + { + { S (0), S (1), S (2), S (3), S (4), S (5) }, + { S (6), S (7), S (8), S (9), S (10), S (11) }, + { S (12), S (13), S (14), S (15), S (16), S (17) }, + } + }, + + { + { "1234", "123", "12", "1", "" }, + { + { S (17), S (16), S (15), S (14), S (13), S (12) }, + { S (11), S (10), S (9), S (8), S (7) , S (6) }, + { S (5), S (4), S (3), S (2), S (1), S (0) }, + } + } +}; + +void test_member_arrays (void) +{ + ELIM (strlen (ma[0].a5_5[0]) == 0); + ELIM (strlen (ma[0].a5_5[1]) == 1); + ELIM (strlen (ma[0].a5_5[2]) == 2); + ELIM (strlen (ma[0].a5_5[3]) == 3); + ELIM (strlen (ma[0].a5_5[4]) == 4); + + ELIM (strlen (ma[0].a3_6_17[0][0]) == 0); + ELIM (strlen (ma[0].a3_6_17[0][1]) == 1); + ELIM (strlen (ma[0].a3_6_17[0][2]) == 2); + ELIM (strlen (ma[0].a3_6_17[0][3]) == 3); + ELIM (strlen (ma[0].a3_6_17[0][4]) == 4); + ELIM (strlen (ma[0].a3_6_17[0][5]) == 5); + ELIM (strlen (ma[0].a3_6_17[1][0]) == 6); + ELIM (strlen (ma[0].a3_6_17[1][1]) == 7); + ELIM (strlen (ma[0].a3_6_17[1][2]) == 8); + ELIM (strlen (ma[0].a3_6_17[1][3]) == 9); + ELIM (strlen (ma[0].a3_6_17[1][4]) == 10); + ELIM (strlen (ma[0].a3_6_17[1][5]) == 11); + ELIM (strlen (ma[0].a3_6_17[2][0]) == 12); + ELIM (strlen (ma[0].a3_6_17[2][1]) == 13); + ELIM (strlen (ma[0].a3_6_17[2][2]) == 14); + ELIM (strlen (ma[0].a3_6_17[2][3]) == 15); + ELIM (strlen (ma[0].a3_6_17[2][4]) == 16); + ELIM (strlen (ma[0].a3_6_17[2][5]) == 17); + + ELIM (strlen (ma[1].a5_5[0]) == 4); + ELIM (strlen (ma[1].a5_5[1]) == 3); + ELIM (strlen (ma[1].a5_5[2]) == 2); + ELIM (strlen (ma[1].a5_5[3]) == 1); + ELIM (strlen (ma[1].a5_5[4]) == 0); + + ELIM (strlen (ma[1].a3_6_17[0][0]) == 17); + ELIM (strlen (ma[1].a3_6_17[0][1]) == 16); + ELIM (strlen (ma[1].a3_6_17[0][2]) == 15); + ELIM (strlen (ma[1].a3_6_17[0][3]) == 14); + ELIM (strlen (ma[1].a3_6_17[0][4]) == 13); + ELIM (strlen (ma[1].a3_6_17[0][5]) == 12); + ELIM (strlen (ma[1].a3_6_17[1][0]) == 11); + ELIM (strlen (ma[1].a3_6_17[1][1]) == 10); + ELIM (strlen (ma[1].a3_6_17[1][2]) == 9); + ELIM (strlen (ma[1].a3_6_17[1][3]) == 8); + ELIM (strlen (ma[1].a3_6_17[1][4]) == 7); + ELIM (strlen (ma[1].a3_6_17[1][5]) == 6); + ELIM (strlen (ma[1].a3_6_17[2][0]) == 5); + ELIM (strlen (ma[1].a3_6_17[2][1]) == 4); + ELIM (strlen (ma[1].a3_6_17[2][2]) == 3); + ELIM (strlen (ma[1].a3_6_17[2][3]) == 2); + ELIM (strlen (ma[1].a3_6_17[2][4]) == 1); + ELIM (strlen (ma[1].a3_6_17[2][5]) == 0); +} + +/* { dg-final { scan-tree-dump-times "_not_eliminated" 0 "optimized" } } */