Hi! When using the bswap pass infrastructure, BIT_NOT_EXPRs aren't allowed in the middle, but due to the way process_store handles those it can appear around the value, which is something output_merged_store didn't handle.
Fixed thusly, where we handle not just the case when the bswap (or nop) value needs inversion as whole, but also cases where only a few portions of it need xoring with some mask. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2018-01-15 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/83843 * gimple-ssa-store-merging.c (imm_store_chain_info::output_merged_store): Handle bit_not_p on store_immediate_info for bswap/nop orig_stores. * gcc.dg/store_merging_18.c: New test. --- gcc/gimple-ssa-store-merging.c.jj 2018-01-04 00:43:17.629703230 +0100 +++ gcc/gimple-ssa-store-merging.c 2018-01-15 12:29:14.105789381 +0100 @@ -3619,6 +3619,15 @@ imm_store_chain_info::output_merged_stor gimple_seq_add_stmt_without_update (&seq, stmt); src = gimple_assign_lhs (stmt); } + inv_op = invert_op (split_store, 2, int_type, xor_mask); + if (inv_op != NOP_EXPR) + { + stmt = gimple_build_assign (make_ssa_name (int_type), + inv_op, src, xor_mask); + gimple_set_location (stmt, loc); + gimple_seq_add_stmt_without_update (&seq, stmt); + src = gimple_assign_lhs (stmt); + } break; default: src = ops[0]; --- gcc/testsuite/gcc.dg/store_merging_18.c.jj 2018-01-15 12:43:49.607227365 +0100 +++ gcc/testsuite/gcc.dg/store_merging_18.c 2018-01-15 12:43:24.882245004 +0100 @@ -0,0 +1,51 @@ +/* PR tree-optimization/83843 */ +/* { dg-do run } */ +/* { dg-options "-O2 -fdump-tree-store-merging" } */ +/* { dg-final { scan-tree-dump-times "Merging successful" 3 "store-merging" { target store_merge } } } */ + +__attribute__((noipa)) void +foo (unsigned char *buf, unsigned char *tab) +{ + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = ~(v >> 8); + buf[1] = ~v; +} + +__attribute__((noipa)) void +bar (unsigned char *buf, unsigned char *tab) +{ + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = (v >> 8); + buf[1] = ~v; +} + +__attribute__((noipa)) void +baz (unsigned char *buf, unsigned char *tab) +{ + unsigned v = tab[1] ^ (tab[0] << 8); + buf[0] = ~(v >> 8); + buf[1] = v; +} + +int +main () +{ + volatile unsigned char l1 = 0; + volatile unsigned char l2 = 1; + unsigned char buf[2]; + unsigned char tab[2] = { l1 + 1, l2 * 2 }; + foo (buf, tab); + if (buf[0] != (unsigned char) ~1 || buf[1] != (unsigned char) ~2) + __builtin_abort (); + buf[0] = l1 + 7; + buf[1] = l2 * 8; + bar (buf, tab); + if (buf[0] != 1 || buf[1] != (unsigned char) ~2) + __builtin_abort (); + buf[0] = l1 + 9; + buf[1] = l2 * 10; + baz (buf, tab); + if (buf[0] != (unsigned char) ~1 || buf[1] != 2) + __builtin_abort (); + return 0; +} Jakub