On Tue, 24 Nov 2020, Jakub Jelinek wrote: > Hi! > > Currently the __builtin_clear_padding expansion code emits no code for > full words that don't have any padding bits, and most of the time if > the only padding bytes are from the start of the word it attempts to merge > them with previous padding store (via {}) or if the only padding bytes are > from the end of the word, it attempts to merge it with following padding > bytes. For everything else it was using a RMW, except when it found > an aligned char/short/int covering all the padding bytes and all those > padding bytes were all ones in that store. > > The following patch changes it, such that we only use RMW if the padding has > any bytes which have some padding and some non-padding bits (i.e. bitfields > are involved), often it is the same amount of instructions in the end and > avoids being thread-unsafe unless necessary (and avoids having to wait for > the reads to make it into the CPU). So, if there are no bitfields, > the function will just store some zero bytes, shorts, ints, long longs etc. > where needed. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
OK. Richard. > 2020-11-24 Jakub Jelinek <ja...@redhat.com> > > * gimple-fold.c (clear_padding_flush): If a word contains only 0 > or 0xff bytes of padding other than all set, all clear, all set > followed by all clear or all clear followed by all set, don't emit > a RMW operation on the whole word or parts of it, but instead > clear the individual bytes of padding. For paddings of one byte > size, don't use char[1] and {}, but instead just char and 0. > > --- gcc/gimple-fold.c.jj 2020-11-23 11:59:18.790279409 +0100 > +++ gcc/gimple-fold.c 2020-11-23 15:48:36.723510615 +0100 > @@ -4033,7 +4033,9 @@ clear_padding_flush (clear_padding_struc > { > size_t nonzero_first = wordsize; > size_t nonzero_last = 0; > - bool all_ones = true; > + size_t zero_first = wordsize; > + size_t zero_last = 0; > + bool all_ones = true, bytes_only = true; > if ((unsigned HOST_WIDE_INT) (buf->off + i + wordsize) > > (unsigned HOST_WIDE_INT) buf->sz) > { > @@ -4055,9 +4057,19 @@ clear_padding_flush (clear_padding_struc > all_ones = false; > nonzero_last = j + 1 - i; > } > + else > + { > + if (zero_first == wordsize) > + zero_first = j - i; > + zero_last = j + 1 - i; > + } > if (buf->buf[j] != 0 && buf->buf[j] != (unsigned char) ~0) > - all_ones = false; > + { > + all_ones = false; > + bytes_only = false; > + } > } > + size_t padding_end = i; > if (padding_bytes) > { > if (nonzero_first == 0 > @@ -4069,7 +4081,6 @@ clear_padding_flush (clear_padding_struc > padding_bytes += wordsize; > continue; > } > - size_t padding_end = i; > if (all_ones && nonzero_first == 0) > { > padding_bytes += nonzero_last; > @@ -4077,12 +4088,27 @@ clear_padding_flush (clear_padding_struc > nonzero_first = wordsize; > nonzero_last = 0; > } > - tree atype = build_array_type_nelts (char_type_node, padding_bytes); > + else if (bytes_only && nonzero_first == 0) > + { > + gcc_assert (zero_first && zero_first != wordsize); > + padding_bytes += zero_first; > + padding_end += zero_first; > + } > + tree atype, src; > + if (padding_bytes == 1) > + { > + atype = char_type_node; > + src = build_zero_cst (char_type_node); > + } > + else > + { > + atype = build_array_type_nelts (char_type_node, padding_bytes); > + src = build_constructor (atype, NULL); > + } > tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base, > build_int_cst (buf->alias_type, > buf->off + padding_end > - padding_bytes)); > - tree src = build_constructor (atype, NULL); > gimple *g = gimple_build_assign (dst, src); > gimple_set_location (g, buf->loc); > gsi_insert_before (buf->gsi, g, GSI_SAME_STMT); > @@ -4099,6 +4125,45 @@ clear_padding_flush (clear_padding_struc > padding_bytes = nonzero_last - nonzero_first; > continue; > } > + if (bytes_only) > + { > + /* If bitfields aren't involved in this word, prefer storing > + individual bytes or groups of them over performing a RMW > + operation on the whole word. */ > + gcc_assert (i + zero_last <= end); > + for (size_t j = padding_end; j < i + zero_last; j++) > + { > + if (buf->buf[j]) > + { > + size_t k; > + for (k = j; k < i + zero_last; k++) > + if (buf->buf[k] == 0) > + break; > + HOST_WIDE_INT off = buf->off + j; > + tree atype, src; > + if (k - j == 1) > + { > + atype = char_type_node; > + src = build_zero_cst (char_type_node); > + } > + else > + { > + atype = build_array_type_nelts (char_type_node, k - j); > + src = build_constructor (atype, NULL); > + } > + tree dst = build2_loc (buf->loc, MEM_REF, atype, > + buf->base, > + build_int_cst (buf->alias_type, off)); > + gimple *g = gimple_build_assign (dst, src); > + gimple_set_location (g, buf->loc); > + gsi_insert_before (buf->gsi, g, GSI_SAME_STMT); > + j = k; > + } > + } > + if (nonzero_last == wordsize) > + padding_bytes = nonzero_last - zero_last; > + continue; > + } > for (size_t eltsz = 1; eltsz <= wordsize; eltsz <<= 1) > { > if (nonzero_last - nonzero_first <= eltsz > @@ -4153,12 +4218,21 @@ clear_padding_flush (clear_padding_struc > { > if (padding_bytes) > { > - tree atype = build_array_type_nelts (char_type_node, padding_bytes); > + tree atype, src; > + if (padding_bytes == 1) > + { > + atype = char_type_node; > + src = build_zero_cst (char_type_node); > + } > + else > + { > + atype = build_array_type_nelts (char_type_node, padding_bytes); > + src = build_constructor (atype, NULL); > + } > tree dst = build2_loc (buf->loc, MEM_REF, atype, buf->base, > build_int_cst (buf->alias_type, > buf->off + end > - padding_bytes)); > - tree src = build_constructor (atype, NULL); > gimple *g = gimple_build_assign (dst, src); > gimple_set_location (g, buf->loc); > gsi_insert_before (buf->gsi, g, GSI_SAME_STMT); > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany; GF: Felix Imend