Martin, when I look at tree-ssa-forwprop.c:
str1 = string_constant (src1, &off1); if (str1 == NULL_TREE) break; if (!tree_fits_uhwi_p (off1) || compare_tree_int (off1, TREE_STRING_LENGTH (str1) - 1) > 0 || compare_tree_int (len1, TREE_STRING_LENGTH (str1) - tree_to_uhwi (off1)) > 0 || TREE_CODE (TREE_TYPE (str1)) != ARRAY_TYPE || TYPE_MODE (TREE_TYPE (TREE_TYPE (str1))) != TYPE_MODE (char_type_node)) break; I don't think it is a good idea to let string_constant return strings which have not necessarily valid content after the first NUL byte. It looks like the content has to be valid up to TREE_STRING_LENGTH. So may I suggest to do something like the following (diff to your last patch): --- expr.c.jj 2018-07-20 10:51:51.983605588 +0200 +++ expr.c 2018-07-20 15:07:29.769423479 +0200 @@ -11277,6 +11277,7 @@ tree string_constant (tree arg, tree *ptr_offset) { tree array; + tree array_size; STRIP_NOPS (arg); /* Non-constant index into the character array in an ARRAY_REF @@ -11295,9 +11296,11 @@ string_constant (tree arg, tree *ptr_off arg = TREE_OPERAND (arg, 0); tree ref = arg; + tree reftype = TREE_TYPE (ref); if (TREE_CODE (arg) == ARRAY_REF) { tree idx = TREE_OPERAND (arg, 1); + reftype = TREE_TYPE (TREE_OPERAND (arg, 0)); if (TREE_CODE (idx) != INTEGER_CST && TREE_CODE (argtype) == POINTER_TYPE) { @@ -11313,6 +11316,11 @@ string_constant (tree arg, tree *ptr_off return NULL_TREE; } } + if (TREE_CODE (reftype) != ARRAY_TYPE) + return NULL_TREE; + while (TREE_CODE (TREE_TYPE (reftype)) == ARRAY_TYPE) + reftype = TREE_TYPE (reftype); + array_size = TYPE_SIZE_UNIT (reftype); array = get_addr_base_and_unit_offset (ref, &base_off); if (!array || (TREE_CODE (array) != VAR_DECL @@ -11345,7 +11353,10 @@ string_constant (tree arg, tree *ptr_off return NULL_TREE; } else if (DECL_P (arg)) - array = arg; + { + array = arg; + array_size = DECL_SIZE_UNIT (array); + } else return NULL_TREE; @@ -11410,24 +11421,18 @@ string_constant (tree arg, tree *ptr_off if (!init || TREE_CODE (init) != STRING_CST) return NULL_TREE; - tree array_size = DECL_SIZE_UNIT (array); if (!array_size || TREE_CODE (array_size) != INTEGER_CST) return NULL_TREE; /* Avoid returning a string that doesn't fit in the array - it is stored in, like + it is stored in, like: const char a[4] = "abcde"; - but do handle those that fit even if they have excess - initializers, such as in - const char a[4] = "abc\000\000"; - The excess elements contribute to TREE_STRING_LENGTH() - but not to strlen(). */ - unsigned HOST_WIDE_INT charsize - = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (init)))); + ... or: + const char a[4] = "abc\000"; + ... because some callers access the string up to the limit + imposed by TREE_STRING_LENGTH. */ unsigned HOST_WIDE_INT length = TREE_STRING_LENGTH (init); - length = string_length (TREE_STRING_POINTER (init), charsize, - length / charsize); - if (compare_tree_int (array_size, length + 1) < 0) + if (compare_tree_int (array_size, length) < 0) return NULL_TREE; *ptr_offset = offset; Considering Richard's last comment, I don't know if TYPE_SIZE_UNIT of the ARRAY_TYPE is the correct way to get the size of the innermost arrayhere, but I think we will need it. Bernd.