Hi, Gentle ping this patch:
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651025.html BR, Kewen on 2024/7/12 00:15, Martin Jambor wrote: > Hi, > > can I add myself to the bunch of people who are pinging this? Having > this in will make our life easier. > > Thanks a lot, > > Martin > > > On Wed, May 08 2024, Kewen.Lin wrote: >> Hi, >> >> As the discussion in PR112980, although the current >> implementation for -fpatchable-function-entry* conforms >> with the documentation (making N NOPs be consecutive), >> it's inefficient for both kernel and userspace livepatching >> (see comments in PR for the details). >> >> So this patch is to change the current implementation by >> emitting the "before" NOPs before global entry point and >> the "after" NOPs after local entry point. The new behavior >> would not keep NOPs to be consecutive, so the documentation >> is updated to emphasize this. >> >> Bootstrapped and regress-tested on powerpc64-linux-gnu >> P8/P9 and powerpc64le-linux-gnu P9 and P10. >> >> Is it ok for trunk? And backporting to active branches >> after burn-in time? I guess we should also mention this >> change in changes.html? >> >> BR, >> Kewen >> ----- >> PR target/112980 >> >> gcc/ChangeLog: >> >> * config/rs6000/rs6000-logue.cc (rs6000_output_function_prologue): >> Adjust the handling on patch area emitting with dual entry, remove >> the restriction on "before" NOPs count, not emit "before" NOPs any >> more but only emit "after" NOPs. >> * config/rs6000/rs6000.cc (rs6000_print_patchable_function_entry): >> Adjust by respecting cfun->machine->stop_patch_area_print. >> (rs6000_elf_declare_function_name): For ELFv2 with dual entry, set >> cfun->machine->stop_patch_area_print as true. >> * config/rs6000/rs6000.h (struct machine_function): Remove member >> global_entry_emitted, add new member stop_patch_area_print. >> * doc/invoke.texi (option -fpatchable-function-entry): Adjust the >> documentation for PowerPC ELFv2 dual entry. >> >> gcc/testsuite/ChangeLog: >> >> * c-c++-common/patchable_function_entry-default.c: Adjust. >> * gcc.target/powerpc/pr99888-4.c: Likewise. >> * gcc.target/powerpc/pr99888-5.c: Likewise. >> * gcc.target/powerpc/pr99888-6.c: Likewise. >> --- >> gcc/config/rs6000/rs6000-logue.cc | 40 +++++-------------- >> gcc/config/rs6000/rs6000.cc | 15 +++++-- >> gcc/config/rs6000/rs6000.h | 10 +++-- >> gcc/doc/invoke.texi | 8 ++-- >> .../patchable_function_entry-default.c | 3 -- >> gcc/testsuite/gcc.target/powerpc/pr99888-4.c | 4 +- >> gcc/testsuite/gcc.target/powerpc/pr99888-5.c | 4 +- >> gcc/testsuite/gcc.target/powerpc/pr99888-6.c | 4 +- >> 8 files changed, 33 insertions(+), 55 deletions(-) >> >> diff --git a/gcc/config/rs6000/rs6000-logue.cc >> b/gcc/config/rs6000/rs6000-logue.cc >> index 60ba15a8bc3..0eb019b44b3 100644 >> --- a/gcc/config/rs6000/rs6000-logue.cc >> +++ b/gcc/config/rs6000/rs6000-logue.cc >> @@ -4006,43 +4006,21 @@ rs6000_output_function_prologue (FILE *file) >> fprintf (file, "\tadd 2,2,12\n"); >> } >> >> - unsigned short patch_area_size = crtl->patch_area_size; >> - unsigned short patch_area_entry = crtl->patch_area_entry; >> - /* Need to emit the patching area. */ >> - if (patch_area_size > 0) >> - { >> - cfun->machine->global_entry_emitted = true; >> - /* As ELFv2 ABI shows, the allowable bytes between the global >> - and local entry points are 0, 4, 8, 16, 32 and 64 when >> - there is a local entry point. Considering there are two >> - non-prefixed instructions for global entry point prologue >> - (8 bytes), the count for patchable nops before local entry >> - point would be 2, 6 and 14. It's possible to support those >> - other counts of nops by not making a local entry point, but >> - we don't have clear use cases for them, so leave them >> - unsupported for now. */ >> - if (patch_area_entry > 0) >> - { >> - if (patch_area_entry != 2 >> - && patch_area_entry != 6 >> - && patch_area_entry != 14) >> - error ("unsupported number of nops before function entry (%u)", >> - patch_area_entry); >> - rs6000_print_patchable_function_entry (file, patch_area_entry, >> - true); >> - patch_area_size -= patch_area_entry; >> - } >> - } >> - >> fputs ("\t.localentry\t", file); >> assemble_name (file, name); >> fputs (",.-", file); >> assemble_name (file, name); >> fputs ("\n", file); >> /* Emit the nops after local entry. */ >> - if (patch_area_size > 0) >> - rs6000_print_patchable_function_entry (file, patch_area_size, >> - patch_area_entry == 0); >> + unsigned short patch_area_size = crtl->patch_area_size; >> + unsigned short patch_area_entry = crtl->patch_area_entry; >> + if (patch_area_size > patch_area_entry) >> + { >> + cfun->machine->stop_patch_area_print = false; >> + patch_area_size -= patch_area_entry; >> + rs6000_print_patchable_function_entry (file, patch_area_size, >> + patch_area_entry == 0); >> + } >> } >> >> else if (rs6000_pcrel_p ()) >> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc >> index 6ba9df4f02e..7e70741d59f 100644 >> --- a/gcc/config/rs6000/rs6000.cc >> +++ b/gcc/config/rs6000/rs6000.cc >> @@ -15190,12 +15190,14 @@ rs6000_print_patchable_function_entry (FILE *file, >> bool record_p) >> { >> bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p >> (); >> - /* For a function which needs global entry point, we will emit the >> - patchable area before and after local entry point under the control of >> - cfun->machine->global_entry_emitted, see the handling in function >> + /* For a function which needs global entry point, we will only emit the >> + patchable area after local entry point under the control of >> + !cfun->machine->stop_patch_area_print, see the handling in functions >> rs6000_output_function_prologue. */ >> - if (!global_entry_needed_p || cfun->machine->global_entry_emitted) >> + if (!cfun->machine->stop_patch_area_print) >> default_print_patchable_function_entry (file, patch_area_size, >> record_p); >> + else >> + gcc_assert (global_entry_needed_p); >> } >> >> >> enum rtx_code >> @@ -21381,6 +21383,11 @@ rs6000_elf_declare_function_name (FILE *file, const >> char *name, tree decl) >> fprintf (file, "\t.previous\n"); >> } >> ASM_OUTPUT_FUNCTION_LABEL (file, name, decl); >> + /* At this time, the "before" NOPs have been already emitted, >> + let's stop generic code from printing the "after" NOPs and >> + emit just after local entry later. */ >> + if (rs6000_global_entry_point_prologue_needed_p ()) >> + cfun->machine->stop_patch_area_print = true; >> } >> >> static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED; >> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h >> index 68bc45d65ba..73767fdae06 100644 >> --- a/gcc/config/rs6000/rs6000.h >> +++ b/gcc/config/rs6000/rs6000.h >> @@ -2434,10 +2434,12 @@ typedef struct GTY(()) machine_function >> bool lr_is_wrapped_separately; >> bool toc_is_wrapped_separately; >> bool mma_return_type_error; >> - /* Indicate global entry is emitted, only useful when the function >> requires >> - global entry. It helps to control the patchable area before and after >> - local entry. */ >> - bool global_entry_emitted; >> + /* With ELFv2 ABI dual entry points being adopted, generic framework >> + targetm.asm_out.print_patchable_function_entry would generate "after" >> + NOPs before local entry, it is wrong. This flag is to stop it from >> + printing patch area before local entry, it is only useful when the >> + function requires dual entry points. */ >> + bool stop_patch_area_print; >> } machine_function; >> #endif >> >> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi >> index c584664e168..58e48f7dc55 100644 >> --- a/gcc/doc/invoke.texi >> +++ b/gcc/doc/invoke.texi >> @@ -18363,11 +18363,11 @@ If @code{N=0}, no pad location is recorded. >> The NOP instructions are inserted at---and maybe before, depending on >> @var{M}---the function entry address, even before the prologue. On >> PowerPC with the ELFv2 ABI, for a function with dual entry points, >> -the local entry point is this function entry address. >> +@var{M} NOP instructions are inserted before the global entry point and >> +@var{N} - @var{M} NOP instructions are inserted after the local entry >> +point, which means the NOP instructions may not be consecutive. >> >> -The maximum value of @var{N} and @var{M} is 65535. On PowerPC with the >> -ELFv2 ABI, for a function with dual entry points, the supported values >> -for @var{M} are 0, 2, 6 and 14. >> +The maximum value of @var{N} and @var{M} is 65535. >> @end table >> >> >> diff --git a/gcc/testsuite/c-c++-common/patchable_function_entry-default.c >> b/gcc/testsuite/c-c++-common/patchable_function_entry-default.c >> index 3ccbafc87db..899938b4aa3 100644 >> --- a/gcc/testsuite/c-c++-common/patchable_function_entry-default.c >> +++ b/gcc/testsuite/c-c++-common/patchable_function_entry-default.c >> @@ -1,9 +1,6 @@ >> /* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } */ >> /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */ >> /* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */ >> -/* See PR99888, one single preceding nop isn't allowed on powerpc_elfv2, >> - so overriding with two preceding nops to make it pass there. */ >> -/* { dg-additional-options "-fpatchable-function-entry=3,2" { target >> powerpc_elfv2 } } */ >> /* { dg-final { scan-assembler-times "nop|NOP|SWYM" 3 { target { ! { >> alpha*-*-* riscv*-*-* } } } } } */ >> /* { dg-final { scan-assembler-times "bis" 3 { target alpha*-*-* } } } */ >> /* { dg-final { scan-assembler-times "nop\n" 3 { target riscv*-*-* } } } */ >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-4.c >> b/gcc/testsuite/gcc.target/powerpc/pr99888-4.c >> index 00a8d4d316e..6f23f2bb939 100644 >> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-4.c >> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-4.c >> @@ -2,12 +2,10 @@ >> /* There is no global entry point prologue with pcrel. */ >> /* { dg-options "-mno-pcrel -fpatchable-function-entry=1,1" } */ >> >> -/* Verify one error emitted for unexpected 1 nop before local >> - entry. */ >> +/* Verify there is no error with 1 nop before local entry. */ >> >> extern int a; >> >> int test (int b) { >> return a + b; >> } >> -/* { dg-error "unsupported number of nops before function entry \\(1\\)" "" >> { target *-*-* } .-1 } */ >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-5.c >> b/gcc/testsuite/gcc.target/powerpc/pr99888-5.c >> index 39d3b4465f1..13f192ebd20 100644 >> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-5.c >> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-5.c >> @@ -2,12 +2,10 @@ >> /* There is no global entry point prologue with pcrel. */ >> /* { dg-options "-mno-pcrel -fpatchable-function-entry=7,3" } */ >> >> -/* Verify one error emitted for unexpected 3 nops before local >> - entry. */ >> +/* Verify no error emitted for 3 nops before local entry. */ >> >> extern int a; >> >> int test (int b) { >> return a + b; >> } >> -/* { dg-error "unsupported number of nops before function entry \\(3\\)" "" >> { target *-*-* } .-1 } */ >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-6.c >> b/gcc/testsuite/gcc.target/powerpc/pr99888-6.c >> index c6c18dcc7ac..431c69cae9a 100644 >> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-6.c >> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-6.c >> @@ -2,8 +2,7 @@ >> /* There is no global entry point prologue with pcrel. */ >> /* { dg-options "-mno-pcrel" } */ >> >> -/* Verify one error emitted for unexpected 4 nops before local >> - entry. */ >> +/* Verify no error emitted for 4 nops before local entry. */ >> >> extern int a; >> >> @@ -11,4 +10,3 @@ __attribute__ ((patchable_function_entry (20, 4))) >> int test (int b) { >> return a + b; >> } >> -/* { dg-error "unsupported number of nops before function entry \\(4\\)" "" >> { target *-*-* } .-1 } */ >> -- >> 2.39.1