Hi! On the following testcase, we've regressed in bar since 8.x, in 8.x store merging came up with mem = 64-bit constant, but starting with the change to transform {0,1,2,3,4,5,6,7} char initializers into STRING_CSTs, we don't do that anymore. The mem = STRING_CST expansion can do that, but only if there are no embedded zeros. The following patch improves it even for embedded zeros, by using a new callback for the can_store_by_pieces/store_by_pieces calls which knows how to handle STRING_CST. We don't need strlen in that case, can use TREE_STRING_CST instead. Additionally, if the STRING_CST is slightly shorter than the destination region, it might generate better code by trying to store_by_pieces it all in one go (bytes from STRING_CST until the last one, followed by artificially added zeros) and only if that doesn't seem to be beneficial (e.g. very small STRING_CST followed by kilobytes of zeros) goes for the store_by_pieces of STRING_CST (rounded up to next STORE_MAX_PIECES) followed by a clear_storage.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2019-02-16 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/66152 * builtins.h (c_readstr): Declare. * builtins.c (c_readstr): Remove forward declaration. Add null_terminated_p argument, if false, read all bytes from the string instead of stopping after '\0'. * expr.c (string_cst_read_str): New function. (store_expr): Use string_cst_read_str instead of builtin_strncpy_read_str. Try to store by pieces the whole exp_len first, and only if that fails, split it up into store by pieces followed by clear_storage. Formatting fix. * gcc.target/i386/pr66152.c: New test. --- gcc/builtins.h.jj 2019-02-14 08:06:37.878546571 +0100 +++ gcc/builtins.h 2019-02-15 11:33:50.208180171 +0100 @@ -103,6 +103,7 @@ struct c_strlen_data }; extern tree c_strlen (tree, int, c_strlen_data * = NULL, unsigned = 1); +extern rtx c_readstr (const char *, scalar_int_mode, bool = true); extern void expand_builtin_setjmp_setup (rtx, rtx); extern void expand_builtin_setjmp_receiver (rtx); extern void expand_builtin_update_setjmp_buf (rtx); --- gcc/builtins.c.jj 2019-02-11 20:58:48.509965578 +0100 +++ gcc/builtins.c 2019-02-15 11:37:00.046029652 +0100 @@ -95,7 +95,6 @@ builtin_info_type builtin_info[(int)END_ /* Non-zero if __builtin_constant_p should be folded right away. */ bool force_folding_builtin_constant_p; -static rtx c_readstr (const char *, scalar_int_mode); static int target_char_cast (tree, char *); static rtx get_memory_rtx (tree, tree); static int apply_args_size (void); @@ -802,10 +801,14 @@ c_strlen (tree src, int only_value, c_st } /* Return a constant integer corresponding to target reading - GET_MODE_BITSIZE (MODE) bits from string constant STR. */ - -static rtx -c_readstr (const char *str, scalar_int_mode mode) + GET_MODE_BITSIZE (MODE) bits from string constant STR. If + NULL_TERMINATED_P, reading stops after '\0' character, all further ones + are assumed to be zero, otherwise it reads as many characters + as needed. */ + +rtx +c_readstr (const char *str, scalar_int_mode mode, + bool null_terminated_p/*=true*/) { HOST_WIDE_INT ch; unsigned int i, j; @@ -830,7 +833,7 @@ c_readstr (const char *str, scalar_int_m j = j + UNITS_PER_WORD - 2 * (j % UNITS_PER_WORD) - 1; j *= BITS_PER_UNIT; - if (ch) + if (ch || !null_terminated_p) ch = (unsigned char) str[i]; tmp[j / HOST_BITS_PER_WIDE_INT] |= ch << (j % HOST_BITS_PER_WIDE_INT); } --- gcc/expr.c.jj 2019-02-08 20:00:40.309835608 +0100 +++ gcc/expr.c 2019-02-15 11:37:18.715719809 +0100 @@ -5453,6 +5453,30 @@ emit_storent_insn (rtx to, rtx from) return maybe_expand_insn (code, 2, ops); } +/* Helper function for store_expr storing of STRING_CST. */ + +static rtx +string_cst_read_str (void *data, HOST_WIDE_INT offset, scalar_int_mode mode) +{ + tree str = (tree) data; + + gcc_assert (offset >= 0); + if (offset >= TREE_STRING_LENGTH (str)) + return const0_rtx; + + if ((unsigned HOST_WIDE_INT) offset + GET_MODE_SIZE (mode) + > (unsigned HOST_WIDE_INT) TREE_STRING_LENGTH (str)) + { + char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode)); + size_t l = TREE_STRING_LENGTH (str) - offset; + memcpy (p, TREE_STRING_POINTER (str) + offset, l); + memset (p + l, '\0', GET_MODE_SIZE (mode) - l); + return c_readstr (p, mode, false); + } + + return c_readstr (TREE_STRING_POINTER (str) + offset, mode, false); +} + /* Generate code for computing expression EXP, and storing the value into TARGET. @@ -5472,7 +5496,7 @@ emit_storent_insn (rtx to, rtx from) rtx store_expr (tree exp, rtx target, int call_param_p, - bool nontemporal, bool reverse) + bool nontemporal, bool reverse) { rtx temp; rtx alt_rtl = NULL_RTX; @@ -5606,36 +5630,32 @@ store_expr (tree exp, rtx target, int ca if (TREE_STRING_LENGTH (str) <= 0) goto normal_expr; - str_copy_len = strlen (TREE_STRING_POINTER (str)); - if (str_copy_len < TREE_STRING_LENGTH (str) - 1) - goto normal_expr; + if (can_store_by_pieces (exp_len, string_cst_read_str, (void *) str, + MEM_ALIGN (target), false)) + { + store_by_pieces (target, exp_len, string_cst_read_str, (void *) str, + MEM_ALIGN (target), false, RETURN_BEGIN); + return NULL_RTX; + } str_copy_len = TREE_STRING_LENGTH (str); - if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0 - && TREE_STRING_POINTER (str)[TREE_STRING_LENGTH (str) - 1] == '\0') + if ((STORE_MAX_PIECES & (STORE_MAX_PIECES - 1)) == 0) { str_copy_len += STORE_MAX_PIECES - 1; str_copy_len &= ~(STORE_MAX_PIECES - 1); } - str_copy_len = MIN (str_copy_len, exp_len); - if (!can_store_by_pieces (str_copy_len, builtin_strncpy_read_str, - CONST_CAST (char *, TREE_STRING_POINTER (str)), - MEM_ALIGN (target), false)) + if (str_copy_len >= exp_len) goto normal_expr; - dest_mem = target; + if (!can_store_by_pieces (str_copy_len, string_cst_read_str, + (void *) str, MEM_ALIGN (target), false)) + goto normal_expr; - memop_ret retmode = exp_len > str_copy_len ? RETURN_END : RETURN_BEGIN; - dest_mem = store_by_pieces (dest_mem, - str_copy_len, builtin_strncpy_read_str, - CONST_CAST (char *, - TREE_STRING_POINTER (str)), - MEM_ALIGN (target), false, - retmode); - if (exp_len > str_copy_len) - clear_storage (adjust_address (dest_mem, BLKmode, 0), - GEN_INT (exp_len - str_copy_len), - BLOCK_OP_NORMAL); + dest_mem = store_by_pieces (target, str_copy_len, string_cst_read_str, + (void *) str, MEM_ALIGN (target), false, + RETURN_END); + clear_storage (adjust_address (dest_mem, BLKmode, 0), + GEN_INT (exp_len - str_copy_len), BLOCK_OP_NORMAL); return NULL_RTX; } else --- gcc/testsuite/gcc.target/i386/pr66152.c.jj 2019-02-15 11:55:56.212164557 +0100 +++ gcc/testsuite/gcc.target/i386/pr66152.c 2019-02-15 11:56:47.769308378 +0100 @@ -0,0 +1,25 @@ +/* PR rtl-optimization/66152 */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler "movabs\[^\n\r]*506097522914230528" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "movabs\[^\n\r]*505813836079825408" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler "mov\[^\n\r]*50462976" { target ia32 } } } */ +/* { dg-final { scan-assembler "mov\[^\n\r]*117835012" { target ia32 } } } */ +/* { dg-final { scan-assembler "mov\[^\n\r]*100925952" { target ia32 } } } */ +/* { dg-final { scan-assembler "mov\[^\n\r]*117768961" { target ia32 } } } */ + +void foo (char *); + +void +bar (void) +{ + char a[] = {0,1,2,3,4,5,6,7}; + foo (a); +} + +void +baz (void) +{ + char a[8] = "\0\2\4\6\1\3\5\7"; + foo (a); +} Jakub