On Sun, Aug 30, 2020 at 11:24 AM Jakub Jelinek <ja...@redhat.com> wrote:
>
> On Fri, Aug 28, 2020 at 06:25:46PM +0200, Jakub Jelinek via Gcc-patches wrote:
> > You're right, thanks for spotting it, I've missed native_encode_rtx will do
> > quick_push rather than safe_push.
> >
> > Updated patch below, it shouldn't be needed in the second loop, because
> > the first loop should already grow it to the largest size.
>
> Testing beyond a bug in i386.md revealed also that I've lost a cast to long
> to avoid breaking 32-bit bootstrap.
>
> This is the version that passed bootstrap/regtest on both x86_64-linux and
> i686-linux. In both bootstraps/regtests together, it saved (from the
> statistics I've gathered) 63104 .rodata bytes (before constant merging),
> in 6814 hits of the data->desc->mark = ~(*slot)->desc->labelno;.
>
> Ok for trunk?
OK.
Thanks,
Richard.
> 2020-08-30 Jakub Jelinek <ja...@redhat.com>
>
> PR middle-end/54201
> * varasm.c: Include alloc-pool.h.
> (output_constant_pool_contents): Emit desc->mark < 0 entries as
> aliases.
> (struct constant_descriptor_rtx_data): New type.
> (constant_descriptor_rtx_data_cmp): New function.
> (struct const_rtx_data_hasher): New type.
> (const_rtx_data_hasher::hash, const_rtx_data_hasher::equal): New
> methods.
> (optimize_constant_pool): New function.
> (output_shared_constant_pool): Call it if TARGET_SUPPORTS_ALIASES.
>
> --- gcc/varasm.c.jj 2020-07-28 15:39:10.091755086 +0200
> +++ gcc/varasm.c 2020-08-28 18:21:58.943759578 +0200
> @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3.
> #include "asan.h"
> #include "rtl-iter.h"
> #include "file-prefix-map.h" /* remap_debug_filename() */
> +#include "alloc-pool.h"
>
> #ifdef XCOFF_DEBUGGING_INFO
> #include "xcoffout.h" /* Needed for external data declarations. */
> @@ -4198,7 +4199,27 @@ output_constant_pool_contents (struct rt
> class constant_descriptor_rtx *desc;
>
> for (desc = pool->first; desc ; desc = desc->next)
> - if (desc->mark)
> + if (desc->mark < 0)
> + {
> +#ifdef ASM_OUTPUT_DEF
> + const char *name = targetm.strip_name_encoding (XSTR (desc->sym, 0));
> + char label[256];
> + char buffer[256 + 32];
> + const char *p;
> +
> + ASM_GENERATE_INTERNAL_LABEL (label, "LC", ~desc->mark);
> + p = targetm.strip_name_encoding (label);
> + if (desc->offset)
> + {
> + sprintf (buffer, "%s+%ld", p, (long) (desc->offset));
> + p = buffer;
> + }
> + ASM_OUTPUT_DEF (asm_out_file, name, p);
> +#else
> + gcc_unreachable ();
> +#endif
> + }
> + else if (desc->mark)
> {
> /* If the constant is part of an object_block, make sure that
> the constant has been positioned within its block, but do not
> @@ -4216,6 +4237,160 @@ output_constant_pool_contents (struct rt
> }
> }
>
> +struct constant_descriptor_rtx_data {
> + constant_descriptor_rtx *desc;
> + target_unit *bytes;
> + unsigned short size;
> + unsigned short offset;
> + unsigned int hash;
> +};
> +
> +/* qsort callback to sort constant_descriptor_rtx_data * vector by
> + decreasing size. */
> +
> +static int
> +constant_descriptor_rtx_data_cmp (const void *p1, const void *p2)
> +{
> + constant_descriptor_rtx_data *const data1
> + = *(constant_descriptor_rtx_data * const *) p1;
> + constant_descriptor_rtx_data *const data2
> + = *(constant_descriptor_rtx_data * const *) p2;
> + if (data1->size > data2->size)
> + return -1;
> + if (data1->size < data2->size)
> + return 1;
> + if (data1->hash < data2->hash)
> + return -1;
> + gcc_assert (data1->hash > data2->hash);
> + return 1;
> +}
> +
> +struct const_rtx_data_hasher : nofree_ptr_hash<constant_descriptor_rtx_data>
> +{
> + static hashval_t hash (constant_descriptor_rtx_data *);
> + static bool equal (constant_descriptor_rtx_data *,
> + constant_descriptor_rtx_data *);
> +};
> +
> +/* Hash and compare functions for const_rtx_data_htab. */
> +
> +hashval_t
> +const_rtx_data_hasher::hash (constant_descriptor_rtx_data *data)
> +{
> + return data->hash;
> +}
> +
> +bool
> +const_rtx_data_hasher::equal (constant_descriptor_rtx_data *x,
> + constant_descriptor_rtx_data *y)
> +{
> + if (x->hash != y->hash || x->size != y->size)
> + return 0;
> + unsigned int align1 = x->desc->align;
> + unsigned int align2 = y->desc->align;
> + unsigned int offset1 = (x->offset * BITS_PER_UNIT) & (align1 - 1);
> + unsigned int offset2 = (y->offset * BITS_PER_UNIT) & (align2 - 1);
> + if (offset1)
> + align1 = least_bit_hwi (offset1);
> + if (offset2)
> + align2 = least_bit_hwi (offset2);
> + if (align2 > align1)
> + return 0;
> + if (memcmp (x->bytes, y->bytes, x->size * sizeof (target_unit)) != 0)
> + return 0;
> + return 1;
> +}
> +
> +/* Attempt to optimize constant pool POOL. If it contains both CONST_VECTOR
> + constants and scalar constants with the values of CONST_VECTOR elements,
> + try to alias the scalar constants with the CONST_VECTOR elements. */
> +
> +static void
> +optimize_constant_pool (struct rtx_constant_pool *pool)
> +{
> + auto_vec<target_unit, 128> buffer;
> + auto_vec<constant_descriptor_rtx_data *, 128> vec;
> + object_allocator<constant_descriptor_rtx_data>
> + data_pool ("constant_descriptor_rtx_data_pool");
> + int idx = 0;
> + size_t size = 0;
> + for (constant_descriptor_rtx *desc = pool->first; desc; desc = desc->next)
> + if (desc->mark > 0
> + && ! (SYMBOL_REF_HAS_BLOCK_INFO_P (desc->sym)
> + && SYMBOL_REF_BLOCK (desc->sym)))
> + {
> + buffer.truncate (0);
> + buffer.reserve (GET_MODE_SIZE (desc->mode));
> + if (native_encode_rtx (desc->mode, desc->constant, buffer, 0,
> + GET_MODE_SIZE (desc->mode)))
> + {
> + constant_descriptor_rtx_data *data = data_pool.allocate ();
> + data->desc = desc;
> + data->bytes = NULL;
> + data->size = GET_MODE_SIZE (desc->mode);
> + data->offset = 0;
> + data->hash = idx++;
> + size += data->size;
> + vec.safe_push (data);
> + }
> + }
> + if (idx)
> + {
> + vec.qsort (constant_descriptor_rtx_data_cmp);
> + unsigned min_size = vec.last ()->size;
> + target_unit *bytes = XNEWVEC (target_unit, size);
> + unsigned int i;
> + constant_descriptor_rtx_data *data;
> + hash_table<const_rtx_data_hasher> * htab
> + = new hash_table<const_rtx_data_hasher> (31);
> + size = 0;
> + FOR_EACH_VEC_ELT (vec, i, data)
> + {
> + buffer.truncate (0);
> + native_encode_rtx (data->desc->mode, data->desc->constant,
> + buffer, 0, data->size);
> + memcpy (bytes + size, buffer.address (), data->size);
> + data->bytes = bytes + size;
> + data->hash = iterative_hash (data->bytes,
> + data->size * sizeof (target_unit), 0);
> + size += data->size;
> + constant_descriptor_rtx_data **slot
> + = htab->find_slot_with_hash (data, data->hash, INSERT);
> + if (*slot)
> + {
> + data->desc->mark = ~(*slot)->desc->labelno;
> + data->desc->offset = (*slot)->offset;
> + }
> + else
> + {
> + unsigned int sz = 1 << floor_log2 (data->size);
> +
> + *slot = data;
> + for (sz >>= 1; sz >= min_size; sz >>= 1)
> + for (unsigned off = 0; off + sz <= data->size; off += sz)
> + {
> + constant_descriptor_rtx_data tmp;
> + tmp.desc = data->desc;
> + tmp.bytes = data->bytes + off;
> + tmp.size = sz;
> + tmp.offset = off;
> + tmp.hash = iterative_hash (tmp.bytes,
> + sz * sizeof (target_unit), 0);
> + slot = htab->find_slot_with_hash (&tmp, tmp.hash, INSERT);
> + if (*slot == NULL)
> + {
> + *slot = data_pool.allocate ();
> + **slot = tmp;
> + }
> + }
> + }
> + }
> + delete htab;
> + XDELETE (bytes);
> + }
> + data_pool.release ();
> +}
> +
> /* Mark all constants that are used in the current function, then write
> out the function's private constant pool. */
>
> @@ -4251,6 +4426,10 @@ output_constant_pool (const char *fnname
> void
> output_shared_constant_pool (void)
> {
> + if (optimize
> + && TARGET_SUPPORTS_ALIASES)
> + optimize_constant_pool (shared_constant_pool);
> +
> output_constant_pool_contents (shared_constant_pool);
> }
>
>
>
> Jakub
>