On 2018-07-24 09:57 AM, Michael Ploujnikov wrote:
> On 2018-07-20 06:05 AM, Richard Biener wrote:
>>>  /* Return a new assembler name for a clone with SUFFIX of a decl named
>>>     NAME.  */
>>> @@ -521,14 +521,13 @@ tree
>>>  clone_function_name_1 (const char *name, const char *suffix)
>>
>> pass this function the counter to use....
>>
>>>  {
>>>    size_t len = strlen (name);
>>> -  char *tmp_name, *prefix;
>>> +  char *prefix;
>>>
>>>    prefix = XALLOCAVEC (char, len + strlen (suffix) + 2);
>>>    memcpy (prefix, name, len);
>>>    strcpy (prefix + len + 1, suffix);
>>>    prefix[len] = symbol_table::symbol_suffix_separator ();
>>> -  ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++);
>>
>> and keep using ASM_FORMAT_PRIVATE_NAME here.  You need to change
>> the lto/lto-partition.c caller (just use zero as counter).
>>
>>> -  return get_identifier (tmp_name);
>>> +  return get_identifier (prefix);
>>>  }
>>>
>>>  /* Return a new assembler name for a clone of DECL with SUFFIX.  */
>>> @@ -537,7 +536,17 @@ tree
>>>  clone_function_name (tree decl, const char *suffix)
>>>  {
>>>    tree name = DECL_ASSEMBLER_NAME (decl);
>>> -  return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
>>> +  const char *decl_name = IDENTIFIER_POINTER (name);
>>> +  char *numbered_name;
>>> +  unsigned int *suffix_counter;
>>> +  if (!clone_fn_ids) {
>>> +    /* Initialize the per-function counter hash table if this is the first 
>>> call */
>>> +    clone_fn_ids = hash_map<const char *, unsigned>::create_ggc (64);
>>> +  }
>>
>> I still do not like throwing memory at the problem in this way for the
>> little benefit
>> this change provides :/
>>
>> So no approval from me at this point...
>>
>> Richard.
> 
> Can you give me an idea of the memory constraints that are involved?
> 
> The highest memory usage increase that I could find was when compiling
> a source file (from Linux) with roughly 10,000 functions. It showed a 2kB
> increase over the before-patch use of 6936kB which is barely 0.03%.
> 
> Using a single counter can result in more confusing namespacing when
> you have .bar.clone.4 despite there only being 3 clones of .bar.
> 
> From a practical point of view this change is helpful to anyone
> diffing binary output such as forensic analysts, Debian Reproducible
> Builds or even someone validating compiler output (before and after an input
> source patch). The extra changes that this patch alleviates are a
> distraction and could even be misleading. For example, applying a
> source patch to the same Linux source produces the following binary
> diff before my change:
> 
> --- /tmp/output.o.objdump
> +++ /tmp/patched-output.o.objdump
> @@ -1,5 +1,5 @@
> 
> -/tmp/uverbs_cmd/output.o:     file format elf32-i386
> +/tmp/uverbs_cmd/patched-output.o:     file format elf32-i386
> 
> 
>  Disassembly of section .text.get_order:
> @@ -265,12 +265,12 @@
>     3:        e9 fc ff ff ff          jmp    4 <put_cq_read+0x4>
>                       4: R_386_PC32   .text.put_uobj_read
> 
> -Disassembly of section .text.trace_kmalloc.constprop.3:
> +Disassembly of section .text.trace_kmalloc.constprop.4:
> 
> -00000000 <trace_kmalloc.constprop.3>:
> +00000000 <trace_kmalloc.constprop.4>:
>     0:        83 3d 04 00 00 00 00    cmpl   $0x0,0x4
>                       2: R_386_32     __tracepoint_kmalloc
> -   7:        74 34                   je     3d 
> <trace_kmalloc.constprop.3+0x3d>
> +   7:        74 34                   je     3d 
> <trace_kmalloc.constprop.4+0x3d>
>     9:        55                      push   %ebp
>     a:        89 cd                   mov    %ecx,%ebp
>     c:        57                      push   %edi
> @@ -281,7 +281,7 @@
>    13:        8b 1d 10 00 00 00       mov    0x10,%ebx
>                       15: R_386_32    __tracepoint_kmalloc
>    19:        85 db                   test   %ebx,%ebx
> -  1b:        74 1b                   je     38 
> <trace_kmalloc.constprop.3+0x38>
> +  1b:        74 1b                   je     38 
> <trace_kmalloc.constprop.4+0x38>
>    1d:        68 d0 00 00 00          push   $0xd0
>    22:        89 fa                   mov    %edi,%edx
>    24:        89 f0                   mov    %esi,%eax
> @@ -292,7 +292,7 @@
>    31:        58                      pop    %eax
>    32:        83 3b 00                cmpl   $0x0,(%ebx)
>    35:        5a                      pop    %edx
> -  36:        eb e3                   jmp    1b 
> <trace_kmalloc.constprop.3+0x1b>
> +  36:        eb e3                   jmp    1b 
> <trace_kmalloc.constprop.4+0x1b>
>    38:        5b                      pop    %ebx
>    39:        5e                      pop    %esi
>    3a:        5f                      pop    %edi
> @@ -846,7 +846,7 @@
>    78:        b8 5f 00 00 00          mov    $0x5f,%eax
>                       79: R_386_32    .text.ib_uverbs_alloc_pd
>    7d:        e8 fc ff ff ff          call   7e <ib_uverbs_alloc_pd+0x7e>
> -                     7e: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     7e: R_386_PC32  .text.trace_kmalloc.constprop.4
>    82:        c7 45 d4 f4 ff ff ff    movl   $0xfffffff4,-0x2c(%ebp)
>    89:        59                      pop    %ecx
>    8a:        85 db                   test   %ebx,%ebx
> @@ -1068,7 +1068,7 @@
>    9c:        b8 83 00 00 00          mov    $0x83,%eax
>                       9d: R_386_32    .text.ib_uverbs_reg_mr
>    a1:        e8 fc ff ff ff          call   a2 <ib_uverbs_reg_mr+0xa2>
> -                     a2: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     a2: R_386_PC32  .text.trace_kmalloc.constprop.4
>    a6:        ba f4 ff ff ff          mov    $0xfffffff4,%edx
>    ab:        58                      pop    %eax
>    ac:        85 db                   test   %ebx,%ebx
> @@ -1385,7 +1385,7 @@
>    99:        b8 7b 00 00 00          mov    $0x7b,%eax
>                       9a: R_386_32    .text.ib_uverbs_create_cq
>    9e:        e8 fc ff ff ff          call   9f <ib_uverbs_create_cq+0x9f>
> -                     9f: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     9f: R_386_PC32  .text.trace_kmalloc.constprop.4
>    a3:        58                      pop    %eax
>    a4:        85 db                   test   %ebx,%ebx
>    a6:        75 0a                   jne    b2 <ib_uverbs_create_cq+0xb2>
> @@ -1607,129 +1607,107 @@
>  00000000 <ib_uverbs_poll_cq>:
>     0:        55                      push   %ebp
>     1:        57                      push   %edi
> -   2:        89 c7                   mov    %eax,%edi
> -   4:        56                      push   %esi
> -   5:        89 ce                   mov    %ecx,%esi
> -   7:        b9 10 00 00 00          mov    $0x10,%ecx
> -   c:        53                      push   %ebx
> -   d:        83 ec 18                sub    $0x18,%esp
> -  10:        8d 44 24 08             lea    0x8(%esp),%eax
> -  14:        e8 fc ff ff ff          call   15 <ib_uverbs_poll_cq+0x15>
> -                     15: R_386_PC32  copy_from_user
> -  19:        85 c0                   test   %eax,%eax
> -  1b:        0f 85 34 01 00 00       jne    155 <ib_uverbs_poll_cq+0x155>
> -  21:        6b 44 24 14 34          imul   $0x34,0x14(%esp),%eax
> -  26:        ba d0 00 00 00          mov    $0xd0,%edx
> -  2b:        e8 fc ff ff ff          call   2c <ib_uverbs_poll_cq+0x2c>
> -                     2c: R_386_PC32  __kmalloc
> -  30:        89 c5                   mov    %eax,%ebp
> -  32:        85 c0                   test   %eax,%eax
> -  34:        0f 84 22 01 00 00       je     15c <ib_uverbs_poll_cq+0x15c>
> -  3a:        6b 44 24 14 30          imul   $0x30,0x14(%esp),%eax
> -  3f:        ba d0 00 00 00          mov    $0xd0,%edx
> -  44:        83 c0 08                add    $0x8,%eax
> -  47:        89 44 24 04             mov    %eax,0x4(%esp)
> -  4b:        e8 fc ff ff ff          call   4c <ib_uverbs_poll_cq+0x4c>
> -                     4c: R_386_PC32  __kmalloc
> -  50:        ba f4 ff ff ff          mov    $0xfffffff4,%edx
> -  55:        89 04 24                mov    %eax,(%esp)
> -  58:        85 c0                   test   %eax,%eax
> -  5a:        0f 84 e1 00 00 00       je     141 <ib_uverbs_poll_cq+0x141>
> -  60:        8b 4f 58                mov    0x58(%edi),%ecx
> -  63:        6a 00                   push   $0x0
> -  65:        b8 00 00 00 00          mov    $0x0,%eax
> -                     66: R_386_32    ib_uverbs_cq_idr
> -  6a:        8b 54 24 14             mov    0x14(%esp),%edx
> -  6e:        e8 fc ff ff ff          call   6f <ib_uverbs_poll_cq+0x6f>
> -                     6f: R_386_PC32  .text.idr_read_obj
> -  73:        ba ea ff ff ff          mov    $0xffffffea,%edx
> -  78:        89 c7                   mov    %eax,%edi
> -  7a:        58                      pop    %eax
> -  7b:        85 ff                   test   %edi,%edi
> -  7d:        0f 84 ae 00 00 00       je     131 <ib_uverbs_poll_cq+0x131>
> -  83:        8b 1f                   mov    (%edi),%ebx
> -  85:        8b 54 24 14             mov    0x14(%esp),%edx
> -  89:        89 e9                   mov    %ebp,%ecx
> -  8b:        89 f8                   mov    %edi,%eax
> -  8d:        ff 93 70 01 00 00       call   *0x170(%ebx)
> -  93:        8b 1c 24                mov    (%esp),%ebx
> -  96:        89 03                   mov    %eax,(%ebx)
> -  98:        89 f8                   mov    %edi,%eax
> -  9a:        e8 fc ff ff ff          call   9b <ib_uverbs_poll_cq+0x9b>
> -                     9b: R_386_PC32  .text.put_cq_read
> -  9f:        8b 1c 24                mov    (%esp),%ebx
> -  a2:        89 e8                   mov    %ebp,%eax
> -  a4:        6b 3b 34                imul   $0x34,(%ebx),%edi
> -  a7:        8d 53 08                lea    0x8(%ebx),%edx
> -  aa:        01 ef                   add    %ebp,%edi
> -  ac:        39 f8                   cmp    %edi,%eax
> -  ae:        74 67                   je     117 <ib_uverbs_poll_cq+0x117>
> -  b0:        8b 08                   mov    (%eax),%ecx
> -  b2:        8b 58 04                mov    0x4(%eax),%ebx
> -  b5:        83 c2 30                add    $0x30,%edx
> -  b8:        83 c0 34                add    $0x34,%eax
> -  bb:        89 4a d0                mov    %ecx,-0x30(%edx)
> -  be:        89 5a d4                mov    %ebx,-0x2c(%edx)
> -  c1:        8b 48 d4                mov    -0x2c(%eax),%ecx
> -  c4:        89 4a d8                mov    %ecx,-0x28(%edx)
> -  c7:        8b 48 d8                mov    -0x28(%eax),%ecx
> -  ca:        89 4a dc                mov    %ecx,-0x24(%edx)
> -  cd:        8b 48 dc                mov    -0x24(%eax),%ecx
> -  d0:        89 4a e0                mov    %ecx,-0x20(%edx)
> -  d3:        8b 48 e0                mov    -0x20(%eax),%ecx
> -  d6:        89 4a e4                mov    %ecx,-0x1c(%edx)
> -  d9:        8b 48 e8                mov    -0x18(%eax),%ecx
> -  dc:        89 4a e8                mov    %ecx,-0x18(%edx)
> -  df:        8b 48 e4                mov    -0x1c(%eax),%ecx
> -  e2:        8b 49 20                mov    0x20(%ecx),%ecx
> -  e5:        89 4a ec                mov    %ecx,-0x14(%edx)
> -  e8:        8b 48 ec                mov    -0x14(%eax),%ecx
> -  eb:        89 4a f0                mov    %ecx,-0x10(%edx)
> -  ee:        8b 48 f0                mov    -0x10(%eax),%ecx
> -  f1:        89 4a f4                mov    %ecx,-0xc(%edx)
> -  f4:        8b 48 f4                mov    -0xc(%eax),%ecx
> -  f7:        66 89 4a f8             mov    %cx,-0x8(%edx)
> -  fb:        66 8b 48 f6             mov    -0xa(%eax),%cx
> -  ff:        66 89 4a fa             mov    %cx,-0x6(%edx)
> - 103:        8a 48 f8                mov    -0x8(%eax),%cl
> - 106:        88 4a fc                mov    %cl,-0x4(%edx)
> - 109:        8a 48 f9                mov    -0x7(%eax),%cl
> - 10c:        88 4a fd                mov    %cl,-0x3(%edx)
> - 10f:        8a 48 fa                mov    -0x6(%eax),%cl
> - 112:        88 4a fe                mov    %cl,-0x2(%edx)
> - 115:        eb 95                   jmp    ac <ib_uverbs_poll_cq+0xac>
> - 117:        8b 14 24                mov    (%esp),%edx
> - 11a:        8b 4c 24 04             mov    0x4(%esp),%ecx
> - 11e:        8b 44 24 08             mov    0x8(%esp),%eax
> - 122:        e8 fc ff ff ff          call   123 <ib_uverbs_poll_cq+0x123>
> -                     123: R_386_PC32 copy_to_user
> - 127:        83 f8 01                cmp    $0x1,%eax
> - 12a:        19 d2                   sbb    %edx,%edx
> - 12c:        f7 d2                   not    %edx
> - 12e:        83 e2 f2                and    $0xfffffff2,%edx
> - 131:        8b 04 24                mov    (%esp),%eax
> - 134:        89 54 24 04             mov    %edx,0x4(%esp)
> - 138:        e8 fc ff ff ff          call   139 <ib_uverbs_poll_cq+0x139>
> -                     139: R_386_PC32 kfree
> - 13d:        8b 54 24 04             mov    0x4(%esp),%edx
> - 141:        89 e8                   mov    %ebp,%eax
> - 143:        89 14 24                mov    %edx,(%esp)
> - 146:        e8 fc ff ff ff          call   147 <ib_uverbs_poll_cq+0x147>
> -                     147: R_386_PC32 kfree
> - 14b:        8b 14 24                mov    (%esp),%edx
> - 14e:        85 d2                   test   %edx,%edx
> - 150:        0f 45 f2                cmovne %edx,%esi
> - 153:        eb 0c                   jmp    161 <ib_uverbs_poll_cq+0x161>
> - 155:        be f2 ff ff ff          mov    $0xfffffff2,%esi
> - 15a:        eb 05                   jmp    161 <ib_uverbs_poll_cq+0x161>
> - 15c:        be f4 ff ff ff          mov    $0xfffffff4,%esi
> - 161:        83 c4 18                add    $0x18,%esp
> - 164:        89 f0                   mov    %esi,%eax
> - 166:        5b                      pop    %ebx
> - 167:        5e                      pop    %esi
> - 168:        5f                      pop    %edi
> - 169:        5d                      pop    %ebp
> - 16a:        c3                      ret
> +   2:        bf f2 ff ff ff          mov    $0xfffffff2,%edi
> +   7:        56                      push   %esi
> +   8:        53                      push   %ebx
> +   9:        89 c3                   mov    %eax,%ebx
> +   b:        81 ec 84 00 00 00       sub    $0x84,%esp
> +  11:        89 4c 24 04             mov    %ecx,0x4(%esp)
> +  15:        8d 44 24 10             lea    0x10(%esp),%eax
> +  19:        b9 10 00 00 00          mov    $0x10,%ecx
> +  1e:        e8 fc ff ff ff          call   1f <ib_uverbs_poll_cq+0x1f>
> +                     1f: R_386_PC32  copy_from_user
> +  23:        89 04 24                mov    %eax,(%esp)
> +  26:        85 c0                   test   %eax,%eax
> +  28:        0f 85 1f 01 00 00       jne    14d <ib_uverbs_poll_cq+0x14d>
> +  2e:        8b 4b 58                mov    0x58(%ebx),%ecx
> +  31:        6a 00                   push   $0x0
> +  33:        b8 00 00 00 00          mov    $0x0,%eax
> +                     34: R_386_32    ib_uverbs_cq_idr
> +  38:        bf ea ff ff ff          mov    $0xffffffea,%edi
> +  3d:        8b 54 24 1c             mov    0x1c(%esp),%edx
> +  41:        e8 fc ff ff ff          call   42 <ib_uverbs_poll_cq+0x42>
> +                     42: R_386_PC32  .text.idr_read_obj
> +  46:        89 c3                   mov    %eax,%ebx
> +  48:        58                      pop    %eax
> +  49:        85 db                   test   %ebx,%ebx
> +  4b:        0f 84 fc 00 00 00       je     14d <ib_uverbs_poll_cq+0x14d>
> +  51:        8b 6c 24 10             mov    0x10(%esp),%ebp
> +  55:        b9 02 00 00 00          mov    $0x2,%ecx
> +  5a:        8d 7c 24 08             lea    0x8(%esp),%edi
> +  5e:        8b 04 24                mov    (%esp),%eax
> +  61:        8d 75 08                lea    0x8(%ebp),%esi
> +  64:        f3 ab                   rep stos %eax,%es:(%edi)
> +  66:        8b 44 24 1c             mov    0x1c(%esp),%eax
> +  6a:        39 44 24 08             cmp    %eax,0x8(%esp)
> +  6e:        73 1f                   jae    8f <ib_uverbs_poll_cq+0x8f>
> +  70:        8b 3b                   mov    (%ebx),%edi
> +  72:        8d 4c 24 50             lea    0x50(%esp),%ecx
> +  76:        ba 01 00 00 00          mov    $0x1,%edx
> +  7b:        89 d8                   mov    %ebx,%eax
> +  7d:        ff 97 70 01 00 00       call   *0x170(%edi)
> +  83:        89 c7                   mov    %eax,%edi
> +  85:        85 c0                   test   %eax,%eax
> +  87:        0f 88 b9 00 00 00       js     146 <ib_uverbs_poll_cq+0x146>
> +  8d:        75 21                   jne    b0 <ib_uverbs_poll_cq+0xb0>
> +  8f:        b9 08 00 00 00          mov    $0x8,%ecx
> +  94:        8d 54 24 08             lea    0x8(%esp),%edx
> +  98:        89 e8                   mov    %ebp,%eax
> +  9a:        bf f2 ff ff ff          mov    $0xfffffff2,%edi
> +  9f:        e8 fc ff ff ff          call   a0 <ib_uverbs_poll_cq+0xa0>
> +                     a0: R_386_PC32  copy_to_user
> +  a4:        85 c0                   test   %eax,%eax
> +  a6:        0f 44 7c 24 04          cmove  0x4(%esp),%edi
> +  ab:        e9 96 00 00 00          jmp    146 <ib_uverbs_poll_cq+0x146>
> +  b0:        8b 44 24 50             mov    0x50(%esp),%eax
> +  b4:        8b 54 24 54             mov    0x54(%esp),%edx
> +  b8:        b9 30 00 00 00          mov    $0x30,%ecx
> +  bd:        89 44 24 20             mov    %eax,0x20(%esp)
> +  c1:        8b 44 24 58             mov    0x58(%esp),%eax
> +  c5:        89 54 24 24             mov    %edx,0x24(%esp)
> +  c9:        8b 54 24 74             mov    0x74(%esp),%edx
> +  cd:        89 44 24 28             mov    %eax,0x28(%esp)
> +  d1:        8b 44 24 5c             mov    0x5c(%esp),%eax
> +  d5:        89 44 24 2c             mov    %eax,0x2c(%esp)
> +  d9:        8b 44 24 60             mov    0x60(%esp),%eax
> +  dd:        89 44 24 30             mov    %eax,0x30(%esp)
> +  e1:        8b 44 24 64             mov    0x64(%esp),%eax
> +  e5:        89 44 24 34             mov    %eax,0x34(%esp)
> +  e9:        8b 44 24 6c             mov    0x6c(%esp),%eax
> +  ed:        89 44 24 38             mov    %eax,0x38(%esp)
> +  f1:        8b 44 24 68             mov    0x68(%esp),%eax
> +  f5:        8b 40 20                mov    0x20(%eax),%eax
> +  f8:        89 54 24 44             mov    %edx,0x44(%esp)
> +  fc:        8d 54 24 20             lea    0x20(%esp),%edx
> + 100:        c6 44 24 4f 00          movb   $0x0,0x4f(%esp)
> + 105:        89 44 24 3c             mov    %eax,0x3c(%esp)
> + 109:        8b 44 24 70             mov    0x70(%esp),%eax
> + 10d:        89 44 24 40             mov    %eax,0x40(%esp)
> + 111:        8b 44 24 78             mov    0x78(%esp),%eax
> + 115:        89 44 24 48             mov    %eax,0x48(%esp)
> + 119:        8b 44 24 7c             mov    0x7c(%esp),%eax
> + 11d:        66 89 44 24 4c          mov    %ax,0x4c(%esp)
> + 122:        8a 44 24 7e             mov    0x7e(%esp),%al
> + 126:        88 44 24 4e             mov    %al,0x4e(%esp)
> + 12a:        89 f0                   mov    %esi,%eax
> + 12c:        e8 fc ff ff ff          call   12d <ib_uverbs_poll_cq+0x12d>
> +                     12d: R_386_PC32 copy_to_user
> + 131:        85 c0                   test   %eax,%eax
> + 133:        75 0c                   jne    141 <ib_uverbs_poll_cq+0x141>
> + 135:        83 c6 30                add    $0x30,%esi
> + 138:        ff 44 24 08             incl   0x8(%esp)
> + 13c:        e9 25 ff ff ff          jmp    66 <ib_uverbs_poll_cq+0x66>
> + 141:        bf f2 ff ff ff          mov    $0xfffffff2,%edi
> + 146:        89 d8                   mov    %ebx,%eax
> + 148:        e8 fc ff ff ff          call   149 <ib_uverbs_poll_cq+0x149>
> +                     149: R_386_PC32 .text.put_cq_read
> + 14d:        81 c4 84 00 00 00       add    $0x84,%esp
> + 153:        89 f8                   mov    %edi,%eax
> + 155:        5b                      pop    %ebx
> + 156:        5e                      pop    %esi
> + 157:        5f                      pop    %edi
> + 158:        5d                      pop    %ebp
> + 159:        c3                      ret
> 
>  Disassembly of section .text.ib_uverbs_req_notify_cq:
> 
> @@ -1915,7 +1893,7 @@
>    94:        b8 7b 00 00 00          mov    $0x7b,%eax
>                       95: R_386_32    .text.ib_uverbs_create_qp
>    99:        e8 fc ff ff ff          call   9a <ib_uverbs_create_qp+0x9a>
> -                     9a: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     9a: R_386_PC32  .text.trace_kmalloc.constprop.4
>    9e:        59                      pop    %ecx
>    9f:        c7 85 50 ff ff ff f4    movl   $0xfffffff4,-0xb0(%ebp)
>    a6:        ff ff ff
> @@ -2241,7 +2219,7 @@
>    68:        b8 4f 00 00 00          mov    $0x4f,%eax
>                       69: R_386_32    .text.ib_uverbs_query_qp
>    6d:        e8 fc ff ff ff          call   6e <ib_uverbs_query_qp+0x6e>
> -                     6e: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     6e: R_386_PC32  .text.trace_kmalloc.constprop.4
>    72:        59                      pop    %ecx
>    73:        c7 85 5c ff ff ff 10    movl   $0x10,-0xa4(%ebp)
>    7a:        00 00 00
> @@ -2260,7 +2238,7 @@
>    a3:        b8 86 00 00 00          mov    $0x86,%eax
>                       a4: R_386_32    .text.ib_uverbs_query_qp
>    a8:        e8 fc ff ff ff          call   a9 <ib_uverbs_query_qp+0xa9>
> -                     a9: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     a9: R_386_PC32  .text.trace_kmalloc.constprop.4
>    ad:        5a                      pop    %edx
>    ae:        85 db                   test   %ebx,%ebx
>    b0:        0f 84 c1 01 00 00       je     277 <ib_uverbs_query_qp+0x277>
> @@ -2462,7 +2440,7 @@
>    88:        b8 6f 00 00 00          mov    $0x6f,%eax
>                       89: R_386_32    .text.ib_uverbs_modify_qp
>    8d:        e8 fc ff ff ff          call   8e <ib_uverbs_modify_qp+0x8e>
> -                     8e: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     8e: R_386_PC32  .text.trace_kmalloc.constprop.4
>    92:        5a                      pop    %edx
>    93:        ba f4 ff ff ff          mov    $0xfffffff4,%edx
>    98:        85 db                   test   %ebx,%ebx
> @@ -3129,7 +3107,7 @@
>    6a:        b8 4c 00 00 00          mov    $0x4c,%eax
>                       6b: R_386_32    .text.ib_uverbs_create_ah
>    6f:        e8 fc ff ff ff          call   70 <ib_uverbs_create_ah+0x70>
> -                     70: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     70: R_386_PC32  .text.trace_kmalloc.constprop.4
>    74:        58                      pop    %eax
>    75:        85 db                   test   %ebx,%ebx
>    77:        75 0a                   jne    83 <ib_uverbs_create_ah+0x83>
> @@ -3396,7 +3374,7 @@
>    af:        b8 91 00 00 00          mov    $0x91,%eax
>                       b0: R_386_32    .text.ib_uverbs_attach_mcast
>    b4:        e8 fc ff ff ff          call   b5 <ib_uverbs_attach_mcast+0xb5>
> -                     b5: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     b5: R_386_PC32  .text.trace_kmalloc.constprop.4
>    b9:        58                      pop    %eax
>    ba:        85 db                   test   %ebx,%ebx
>    bc:        75 07                   jne    c5 <ib_uverbs_attach_mcast+0xc5>
> @@ -3572,7 +3550,7 @@
>    77:        b8 5e 00 00 00          mov    $0x5e,%eax
>                       78: R_386_32    .text.ib_uverbs_create_srq
>    7c:        e8 fc ff ff ff          call   7d <ib_uverbs_create_srq+0x7d>
> -                     7d: R_386_PC32  .text.trace_kmalloc.constprop.3
> +                     7d: R_386_PC32  .text.trace_kmalloc.constprop.4
>    81:        ba f4 ff ff ff          mov    $0xfffffff4,%edx
>    86:        58                      pop    %eax
>    87:        85 db                   test   %ebx,%ebx
> 
> Needless to say, after my change the diff only shows the truly changed
> functions.
> 
> - Michael
> 

Updated patch


gcc:
2018-07-26  Michael Ploujnikov  <michael.ploujni...@oracle.com>

       Make function clone name numbering independent.
       * cgraph.h (clone_function_name_1): Add clone_num argument
       * cgraphclones.c: Replace clone_fn_id_num with clone_fn_ids.
       (clone_function_name): Use counters from the hashtable.

lto:
2018-07-26  Michael Ploujnikov  <michael.ploujni...@oracle.com>

       * lto-partition.c (privatize_symbol_name_1): Pass 0 to
       clone_function_name_1.


testsuite:
2018-07-26  Michael Ploujnikov  <michael.ploujni...@oracle.com>

        Clone numbering should be independent for each function.
        * gcc/testsuite/gcc.dg/independent-cloneids-1.c: New test.

---
 gcc/cgraph.h                                  |  2 +-
 gcc/cgraphclones.c                            | 16 ++++++++---
 gcc/lto/lto-partition.c                       |  2 +-
 gcc/testsuite/gcc.dg/independent-cloneids-1.c | 38 +++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/independent-cloneids-1.c

diff --git gcc/cgraph.h gcc/cgraph.h
index a8b1b4c..fe44bd0 100644
--- gcc/cgraph.h
+++ gcc/cgraph.h
@@ -2368,7 +2368,7 @@ basic_block init_lowered_empty_function (tree, bool,
profile_count);
 tree thunk_adjust (gimple_stmt_iterator *, tree, bool, HOST_WIDE_INT, tree);
 /* In cgraphclones.c  */

-tree clone_function_name_1 (const char *, const char *);
+tree clone_function_name_1 (const char *, const char *, unsigned int);
 tree clone_function_name (tree decl, const char *);

 void tree_function_versioning (tree, tree, vec<ipa_replace_map *, va_gc> *,
diff --git gcc/cgraphclones.c gcc/cgraphclones.c
index 6e84a31..7de7a65 100644
--- gcc/cgraphclones.c
+++ gcc/cgraphclones.c
@@ -512,13 +512,13 @@ cgraph_node::create_clone (tree new_decl, profile_count
prof_count,
   return new_node;
 }

-static GTY(()) unsigned int clone_fn_id_num;
+static GTY(()) hash_map<const char *, unsigned> *clone_fn_ids;

 /* Return a new assembler name for a clone with SUFFIX of a decl named
    NAME.  */

 tree
-clone_function_name_1 (const char *name, const char *suffix)
+clone_function_name_1 (const char *name, const char *suffix, unsigned int
clone_num)
 {
   size_t len = strlen (name);
   char *tmp_name, *prefix;
@@ -527,7 +527,7 @@ clone_function_name_1 (const char *name, const char *suffix)
   memcpy (prefix, name, len);
   strcpy (prefix + len + 1, suffix);
   prefix[len] = symbol_table::symbol_suffix_separator ();
-  ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_fn_id_num++);
+  ASM_FORMAT_PRIVATE_NAME (tmp_name, prefix, clone_num);
   return get_identifier (tmp_name);
 }

@@ -537,7 +537,15 @@ tree
 clone_function_name (tree decl, const char *suffix)
 {
   tree name = DECL_ASSEMBLER_NAME (decl);
-  return clone_function_name_1 (IDENTIFIER_POINTER (name), suffix);
+  const char *decl_name = IDENTIFIER_POINTER (name);
+  unsigned int *suffix_counter;
+  if (!clone_fn_ids) {
+    /* Initialize the per-function counter hash table on first call */
+    clone_fn_ids = hash_map<const char *, unsigned>::create_ggc (64);
+  }
+  suffix_counter = &clone_fn_ids->get_or_insert(decl_name);
+  *suffix_counter = *suffix_counter + 1;
+  return clone_function_name_1 (decl_name, suffix, *suffix_counter);
 }


diff --git gcc/lto/lto-partition.c gcc/lto/lto-partition.c
index c7a5710..4b15232 100644
--- gcc/lto/lto-partition.c
+++ gcc/lto/lto-partition.c
@@ -965,7 +965,7 @@ privatize_symbol_name_1 (symtab_node *node, tree decl)
   name = maybe_rewrite_identifier (name);
   symtab->change_decl_assembler_name (decl,
                                      clone_function_name_1 (name,
-                                                            "lto_priv"));
+                                                            "lto_priv", 0));

   if (node->lto_file_data)
     lto_record_renamed_decl (node->lto_file_data, name,
diff --git gcc/testsuite/gcc.dg/independent-cloneids-1.c
gcc/testsuite/gcc.dg/independent-cloneids-1.c
new file mode 100644
index 0000000..d723e20
--- /dev/null
+++ gcc/testsuite/gcc.dg/independent-cloneids-1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fipa-cp -fipa-cp-clone -fdump-ipa-cp"  } */
+
+extern int printf (const char *, ...);
+
+static int __attribute__ ((noinline))
+foo (int arg)
+{
+  return 7 * arg;
+}
+
+static int __attribute__ ((noinline))
+bar (int arg)
+{
+  return arg * arg;
+}
+
+int
+baz (int arg)
+{
+  printf("%d\n", bar (3));
+  printf("%d\n", bar (4));
+  printf("%d\n", foo (5));
+  printf("%d\n", foo (6));
+  /* adding or removing the following call should not affect foo
+     function's clone numbering */
+  printf("%d\n", bar (7));
+  return foo (8);
+}
+
+/* { dg-final { scan-ipa-dump "Function bar.constprop.0" "cp" } } */
+/* { dg-final { scan-ipa-dump "Function bar.constprop.1" "cp" } } */
+/* { dg-final { scan-ipa-dump "Function bar.constprop.3" "cp" } } */
+/* { dg-final { scan-ipa-dump "Function foo.constprop.0" "cp" } } */
+/* { dg-final { scan-ipa-dump "Function foo.constprop.1" "cp" } } */
+/* { dg-final { scan-ipa-dump "Function foo.constprop.2" "cp" } } */
+/* { dg-final { scan-ipa-dump-not "Function foo.constprop.3" "cp" } } */
+/* { dg-final { scan-ipa-dump-not "Function foo.constprop.4" "cp" } } */
-- 
2.7.4

Attachment: signature.asc
Description: OpenPGP digital signature

Reply via email to