Hi,

Gentle ping this patch:

https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651025.html

BR,
Kewen

on 2024/7/12 00:15, Martin Jambor wrote:
> Hi,
> 
> can I add myself to the bunch of people who are pinging this?  Having
> this in will make our life easier.
> 
> Thanks a lot,
> 
> Martin
> 
> 
> On Wed, May 08 2024, Kewen.Lin wrote:
>> Hi,
>>
>> As the discussion in PR112980, although the current
>> implementation for -fpatchable-function-entry* conforms
>> with the documentation (making N NOPs be consecutive),
>> it's inefficient for both kernel and userspace livepatching
>> (see comments in PR for the details).
>>
>> So this patch is to change the current implementation by
>> emitting the "before" NOPs before global entry point and
>> the "after" NOPs after local entry point.  The new behavior
>> would not keep NOPs to be consecutive, so the documentation
>> is updated to emphasize this.
>>
>> Bootstrapped and regress-tested on powerpc64-linux-gnu
>> P8/P9 and powerpc64le-linux-gnu P9 and P10.
>>
>> Is it ok for trunk?  And backporting to active branches
>> after burn-in time?  I guess we should also mention this
>> change in changes.html?
>>
>> BR,
>> Kewen
>> -----
>>      PR target/112980
>>
>> gcc/ChangeLog:
>>
>>      * config/rs6000/rs6000-logue.cc (rs6000_output_function_prologue):
>>      Adjust the handling on patch area emitting with dual entry, remove
>>      the restriction on "before" NOPs count, not emit "before" NOPs any
>>      more but only emit "after" NOPs.
>>      * config/rs6000/rs6000.cc (rs6000_print_patchable_function_entry):
>>      Adjust by respecting cfun->machine->stop_patch_area_print.
>>      (rs6000_elf_declare_function_name): For ELFv2 with dual entry, set
>>      cfun->machine->stop_patch_area_print as true.
>>      * config/rs6000/rs6000.h (struct machine_function): Remove member
>>      global_entry_emitted, add new member stop_patch_area_print.
>>      * doc/invoke.texi (option -fpatchable-function-entry): Adjust the
>>      documentation for PowerPC ELFv2 dual entry.
>>
>> gcc/testsuite/ChangeLog:
>>
>>      * c-c++-common/patchable_function_entry-default.c: Adjust.
>>      * gcc.target/powerpc/pr99888-4.c: Likewise.
>>      * gcc.target/powerpc/pr99888-5.c: Likewise.
>>      * gcc.target/powerpc/pr99888-6.c: Likewise.
>> ---
>>  gcc/config/rs6000/rs6000-logue.cc             | 40 +++++--------------
>>  gcc/config/rs6000/rs6000.cc                   | 15 +++++--
>>  gcc/config/rs6000/rs6000.h                    | 10 +++--
>>  gcc/doc/invoke.texi                           |  8 ++--
>>  .../patchable_function_entry-default.c        |  3 --
>>  gcc/testsuite/gcc.target/powerpc/pr99888-4.c  |  4 +-
>>  gcc/testsuite/gcc.target/powerpc/pr99888-5.c  |  4 +-
>>  gcc/testsuite/gcc.target/powerpc/pr99888-6.c  |  4 +-
>>  8 files changed, 33 insertions(+), 55 deletions(-)
>>
>> diff --git a/gcc/config/rs6000/rs6000-logue.cc 
>> b/gcc/config/rs6000/rs6000-logue.cc
>> index 60ba15a8bc3..0eb019b44b3 100644
>> --- a/gcc/config/rs6000/rs6000-logue.cc
>> +++ b/gcc/config/rs6000/rs6000-logue.cc
>> @@ -4006,43 +4006,21 @@ rs6000_output_function_prologue (FILE *file)
>>        fprintf (file, "\tadd 2,2,12\n");
>>      }
>>
>> -      unsigned short patch_area_size = crtl->patch_area_size;
>> -      unsigned short patch_area_entry = crtl->patch_area_entry;
>> -      /* Need to emit the patching area.  */
>> -      if (patch_area_size > 0)
>> -    {
>> -      cfun->machine->global_entry_emitted = true;
>> -      /* As ELFv2 ABI shows, the allowable bytes between the global
>> -         and local entry points are 0, 4, 8, 16, 32 and 64 when
>> -         there is a local entry point.  Considering there are two
>> -         non-prefixed instructions for global entry point prologue
>> -         (8 bytes), the count for patchable nops before local entry
>> -         point would be 2, 6 and 14.  It's possible to support those
>> -         other counts of nops by not making a local entry point, but
>> -         we don't have clear use cases for them, so leave them
>> -         unsupported for now.  */
>> -      if (patch_area_entry > 0)
>> -        {
>> -          if (patch_area_entry != 2
>> -              && patch_area_entry != 6
>> -              && patch_area_entry != 14)
>> -            error ("unsupported number of nops before function entry (%u)",
>> -                   patch_area_entry);
>> -          rs6000_print_patchable_function_entry (file, patch_area_entry,
>> -                                                 true);
>> -          patch_area_size -= patch_area_entry;
>> -        }
>> -    }
>> -
>>        fputs ("\t.localentry\t", file);
>>        assemble_name (file, name);
>>        fputs (",.-", file);
>>        assemble_name (file, name);
>>        fputs ("\n", file);
>>        /* Emit the nops after local entry.  */
>> -      if (patch_area_size > 0)
>> -    rs6000_print_patchable_function_entry (file, patch_area_size,
>> -                                           patch_area_entry == 0);
>> +      unsigned short patch_area_size = crtl->patch_area_size;
>> +      unsigned short patch_area_entry = crtl->patch_area_entry;
>> +      if (patch_area_size > patch_area_entry)
>> +    {
>> +      cfun->machine->stop_patch_area_print = false;
>> +      patch_area_size -= patch_area_entry;
>> +      rs6000_print_patchable_function_entry (file, patch_area_size,
>> +                                             patch_area_entry == 0);
>> +    }
>>      }
>>
>>    else if (rs6000_pcrel_p ())
>> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
>> index 6ba9df4f02e..7e70741d59f 100644
>> --- a/gcc/config/rs6000/rs6000.cc
>> +++ b/gcc/config/rs6000/rs6000.cc
>> @@ -15190,12 +15190,14 @@ rs6000_print_patchable_function_entry (FILE *file,
>>                                     bool record_p)
>>  {
>>    bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p 
>> ();
>> -  /* For a function which needs global entry point, we will emit the
>> -     patchable area before and after local entry point under the control of
>> -     cfun->machine->global_entry_emitted, see the handling in function
>> +  /* For a function which needs global entry point, we will only emit the
>> +     patchable area after local entry point under the control of
>> +     !cfun->machine->stop_patch_area_print, see the handling in functions
>>       rs6000_output_function_prologue.  */
>> -  if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
>> +  if (!cfun->machine->stop_patch_area_print)
>>      default_print_patchable_function_entry (file, patch_area_size, 
>> record_p);
>> +  else
>> +    gcc_assert (global_entry_needed_p);
>>  }
>>
>>
>>  enum rtx_code
>> @@ -21381,6 +21383,11 @@ rs6000_elf_declare_function_name (FILE *file, const 
>> char *name, tree decl)
>>        fprintf (file, "\t.previous\n");
>>      }
>>    ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
>> +  /* At this time, the "before" NOPs have been already emitted,
>> +     let's stop generic code from printing the "after" NOPs and
>> +     emit just after local entry later.  */
>> +  if (rs6000_global_entry_point_prologue_needed_p ())
>> +    cfun->machine->stop_patch_area_print = true;
>>  }
>>
>>  static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
>> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
>> index 68bc45d65ba..73767fdae06 100644
>> --- a/gcc/config/rs6000/rs6000.h
>> +++ b/gcc/config/rs6000/rs6000.h
>> @@ -2434,10 +2434,12 @@ typedef struct GTY(()) machine_function
>>    bool lr_is_wrapped_separately;
>>    bool toc_is_wrapped_separately;
>>    bool mma_return_type_error;
>> -  /* Indicate global entry is emitted, only useful when the function 
>> requires
>> -     global entry.  It helps to control the patchable area before and after
>> -     local entry.  */
>> -  bool global_entry_emitted;
>> +  /* With ELFv2 ABI dual entry points being adopted, generic framework
>> +     targetm.asm_out.print_patchable_function_entry would generate "after"
>> +     NOPs before local entry, it is wrong.  This flag is to stop it from
>> +     printing patch area before local entry, it is only useful when the
>> +     function requires dual entry points.  */
>> +  bool stop_patch_area_print;
>>  } machine_function;
>>  #endif
>>
>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index c584664e168..58e48f7dc55 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -18363,11 +18363,11 @@ If @code{N=0}, no pad location is recorded.
>>  The NOP instructions are inserted at---and maybe before, depending on
>>  @var{M}---the function entry address, even before the prologue.  On
>>  PowerPC with the ELFv2 ABI, for a function with dual entry points,
>> -the local entry point is this function entry address.
>> +@var{M} NOP instructions are inserted before the global entry point and
>> +@var{N} - @var{M} NOP instructions are inserted after the local entry
>> +point, which means the NOP instructions may not be consecutive.
>>
>> -The maximum value of @var{N} and @var{M} is 65535.  On PowerPC with the
>> -ELFv2 ABI, for a function with dual entry points, the supported values
>> -for @var{M} are 0, 2, 6 and 14.
>> +The maximum value of @var{N} and @var{M} is 65535.
>>  @end table
>>
>>
>> diff --git a/gcc/testsuite/c-c++-common/patchable_function_entry-default.c 
>> b/gcc/testsuite/c-c++-common/patchable_function_entry-default.c
>> index 3ccbafc87db..899938b4aa3 100644
>> --- a/gcc/testsuite/c-c++-common/patchable_function_entry-default.c
>> +++ b/gcc/testsuite/c-c++-common/patchable_function_entry-default.c
>> @@ -1,9 +1,6 @@
>>  /* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } */
>>  /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
>>  /* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */
>> -/* See PR99888, one single preceding nop isn't allowed on powerpc_elfv2,
>> -   so overriding with two preceding nops to make it pass there.  */
>> -/* { dg-additional-options "-fpatchable-function-entry=3,2" { target 
>> powerpc_elfv2 } } */
>>  /* { dg-final { scan-assembler-times "nop|NOP|SWYM" 3 { target { ! { 
>> alpha*-*-* riscv*-*-* } } } } } */
>>  /* { dg-final { scan-assembler-times "bis" 3 { target alpha*-*-* } } } */
>>  /* { dg-final { scan-assembler-times "nop\n" 3 { target riscv*-*-* } } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-4.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr99888-4.c
>> index 00a8d4d316e..6f23f2bb939 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-4.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-4.c
>> @@ -2,12 +2,10 @@
>>  /* There is no global entry point prologue with pcrel.  */
>>  /* { dg-options "-mno-pcrel -fpatchable-function-entry=1,1" } */
>>
>> -/* Verify one error emitted for unexpected 1 nop before local
>> -   entry.  */
>> +/* Verify there is no error with 1 nop before local entry.  */
>>
>>  extern int a;
>>
>>  int test (int b) {
>>    return a + b;
>>  }
>> -/* { dg-error "unsupported number of nops before function entry \\(1\\)" "" 
>> { target *-*-* } .-1 } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-5.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr99888-5.c
>> index 39d3b4465f1..13f192ebd20 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-5.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-5.c
>> @@ -2,12 +2,10 @@
>>  /* There is no global entry point prologue with pcrel.  */
>>  /* { dg-options "-mno-pcrel -fpatchable-function-entry=7,3" } */
>>
>> -/* Verify one error emitted for unexpected 3 nops before local
>> -   entry.  */
>> +/* Verify no error emitted for 3 nops before local entry.  */
>>
>>  extern int a;
>>
>>  int test (int b) {
>>    return a + b;
>>  }
>> -/* { dg-error "unsupported number of nops before function entry \\(3\\)" "" 
>> { target *-*-* } .-1 } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-6.c 
>> b/gcc/testsuite/gcc.target/powerpc/pr99888-6.c
>> index c6c18dcc7ac..431c69cae9a 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-6.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-6.c
>> @@ -2,8 +2,7 @@
>>  /* There is no global entry point prologue with pcrel.  */
>>  /* { dg-options "-mno-pcrel" } */
>>
>> -/* Verify one error emitted for unexpected 4 nops before local
>> -   entry.  */
>> +/* Verify no error emitted for 4 nops before local entry.  */
>>
>>  extern int a;
>>
>> @@ -11,4 +10,3 @@ __attribute__ ((patchable_function_entry (20, 4)))
>>  int test (int b) {
>>    return a + b;
>>  }
>> -/* { dg-error "unsupported number of nops before function entry \\(4\\)" "" 
>> { target *-*-* } .-1 } */
>> --
>> 2.39.1

Reply via email to