Hi, On Fri, Aug 09 2024, Kewen.Lin wrote: > Hi, > > Gentle ping this patch: > > https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651025.html
I'd like to second this ping, please. Thank you, Martin > > BR, > Kewen > >>> on 2024/7/12 00:15, Martin Jambor wrote: >>>> Hi, >>>> >>>> can I add myself to the bunch of people who are pinging this? >>>> Having >>>> this in will make our life easier. >>>> >>>> Thanks a lot, >>>> >>>> Martin >>>> >>>> >>>> On Wed, May 08 2024, Kewen.Lin wrote: >>>>> Hi, >>>>> >>>>> As the discussion in PR112980, although the current >>>>> implementation for -fpatchable-function-entry* conforms >>>>> with the documentation (making N NOPs be consecutive), >>>>> it's inefficient for both kernel and userspace livepatching >>>>> (see comments in PR for the details). >>>>> >>>>> So this patch is to change the current implementation by >>>>> emitting the "before" NOPs before global entry point and >>>>> the "after" NOPs after local entry point. The new behavior >>>>> would not keep NOPs to be consecutive, so the documentation >>>>> is updated to emphasize this. >>>>> >>>>> Bootstrapped and regress-tested on powerpc64-linux-gnu >>>>> P8/P9 and powerpc64le-linux-gnu P9 and P10. >>>>> >>>>> Is it ok for trunk? And backporting to active branches >>>>> after burn-in time? I guess we should also mention this >>>>> change in changes.html? >>>>> >>>>> BR, >>>>> Kewen >>>>> ----- >>>>> PR target/112980 >>>>> >>>>> gcc/ChangeLog: >>>>> >>>>> * config/rs6000/rs6000-logue.cc >>>>> (rs6000_output_function_prologue): >>>>> Adjust the handling on patch area emitting with dual >>>>> entry, remove >>>>> the restriction on "before" NOPs count, not emit >>>>> "before" NOPs any >>>>> more but only emit "after" NOPs. >>>>> * config/rs6000/rs6000.cc >>>>> (rs6000_print_patchable_function_entry): >>>>> Adjust by respecting cfun->machine- >>>>>> stop_patch_area_print. >>>>> (rs6000_elf_declare_function_name): For ELFv2 with dual >>>>> entry, set >>>>> cfun->machine->stop_patch_area_print as true. >>>>> * config/rs6000/rs6000.h (struct machine_function): >>>>> Remove member >>>>> global_entry_emitted, add new member >>>>> stop_patch_area_print. >>>>> * doc/invoke.texi (option -fpatchable-function-entry): >>>>> Adjust the >>>>> documentation for PowerPC ELFv2 dual entry. >>>>> >>>>> gcc/testsuite/ChangeLog: >>>>> >>>>> * c-c++-common/patchable_function_entry-default.c: >>>>> Adjust. >>>>> * gcc.target/powerpc/pr99888-4.c: Likewise. >>>>> * gcc.target/powerpc/pr99888-5.c: Likewise. >>>>> * gcc.target/powerpc/pr99888-6.c: Likewise. >>>>> --- >>>>> gcc/config/rs6000/rs6000-logue.cc | 40 +++++-------- >>>>> ------ >>>>> gcc/config/rs6000/rs6000.cc | 15 +++++-- >>>>> gcc/config/rs6000/rs6000.h | 10 +++-- >>>>> gcc/doc/invoke.texi | 8 ++-- >>>>> .../patchable_function_entry-default.c | 3 -- >>>>> gcc/testsuite/gcc.target/powerpc/pr99888-4.c | 4 +- >>>>> gcc/testsuite/gcc.target/powerpc/pr99888-5.c | 4 +- >>>>> gcc/testsuite/gcc.target/powerpc/pr99888-6.c | 4 +- >>>>> 8 files changed, 33 insertions(+), 55 deletions(-) >>>>> >>>>> diff --git a/gcc/config/rs6000/rs6000-logue.cc >>>>> b/gcc/config/rs6000/rs6000-logue.cc >>>>> index 60ba15a8bc3..0eb019b44b3 100644 >>>>> --- a/gcc/config/rs6000/rs6000-logue.cc >>>>> +++ b/gcc/config/rs6000/rs6000-logue.cc >>>>> @@ -4006,43 +4006,21 @@ rs6000_output_function_prologue (FILE >>>>> *file) >>>>> fprintf (file, "\tadd 2,2,12\n"); >>>>> } >>>>> >>>>> - unsigned short patch_area_size = crtl->patch_area_size; >>>>> - unsigned short patch_area_entry = crtl->patch_area_entry; >>>>> - /* Need to emit the patching area. */ >>>>> - if (patch_area_size > 0) >>>>> - { >>>>> - cfun->machine->global_entry_emitted = true; >>>>> - /* As ELFv2 ABI shows, the allowable bytes between the >>>>> global >>>>> - and local entry points are 0, 4, 8, 16, 32 and 64 >>>>> when >>>>> - there is a local entry point. Considering there >>>>> are two >>>>> - non-prefixed instructions for global entry point >>>>> prologue >>>>> - (8 bytes), the count for patchable nops before >>>>> local entry >>>>> - point would be 2, 6 and 14. It's possible to >>>>> support those >>>>> - other counts of nops by not making a local entry >>>>> point, but >>>>> - we don't have clear use cases for them, so leave >>>>> them >>>>> - unsupported for now. */ >>>>> - if (patch_area_entry > 0) >>>>> - { >>>>> - if (patch_area_entry != 2 >>>>> - && patch_area_entry != 6 >>>>> - && patch_area_entry != 14) >>>>> - error ("unsupported number of nops before >>>>> function entry (%u)", >>>>> - patch_area_entry); >>>>> - rs6000_print_patchable_function_entry (file, >>>>> patch_area_entry, >>>>> - true); >>>>> - patch_area_size -= patch_area_entry; >>>>> - } >>>>> - } >>>>> - >>>>> fputs ("\t.localentry\t", file); >>>>> assemble_name (file, name); >>>>> fputs (",.-", file); >>>>> assemble_name (file, name); >>>>> fputs ("\n", file); >>>>> /* Emit the nops after local entry. */ >>>>> - if (patch_area_size > 0) >>>>> - rs6000_print_patchable_function_entry (file, >>>>> patch_area_size, >>>>> - patch_area_entry >>>>> == 0); >>>>> + unsigned short patch_area_size = crtl->patch_area_size; >>>>> + unsigned short patch_area_entry = crtl->patch_area_entry; >>>>> + if (patch_area_size > patch_area_entry) >>>>> + { >>>>> + cfun->machine->stop_patch_area_print = false; >>>>> + patch_area_size -= patch_area_entry; >>>>> + rs6000_print_patchable_function_entry (file, >>>>> patch_area_size, >>>>> + >>>>> patch_area_entry == 0); >>>>> + } >>>>> } >>>>> >>>>> else if (rs6000_pcrel_p ()) >>>>> diff --git a/gcc/config/rs6000/rs6000.cc >>>>> b/gcc/config/rs6000/rs6000.cc >>>>> index 6ba9df4f02e..7e70741d59f 100644 >>>>> --- a/gcc/config/rs6000/rs6000.cc >>>>> +++ b/gcc/config/rs6000/rs6000.cc >>>>> @@ -15190,12 +15190,14 @@ rs6000_print_patchable_function_entry >>>>> (FILE *file, >>>>> bool record_p) >>>>> { >>>>> bool global_entry_needed_p = >>>>> rs6000_global_entry_point_prologue_needed_p (); >>>>> - /* For a function which needs global entry point, we will emit >>>>> the >>>>> - patchable area before and after local entry point under the >>>>> control of >>>>> - cfun->machine->global_entry_emitted, see the handling in >>>>> function >>>>> + /* For a function which needs global entry point, we will only >>>>> emit the >>>>> + patchable area after local entry point under the control of >>>>> + !cfun->machine->stop_patch_area_print, see the handling in >>>>> functions >>>>> rs6000_output_function_prologue. */ >>>>> - if (!global_entry_needed_p || cfun->machine- >>>>>> global_entry_emitted) >>>>> + if (!cfun->machine->stop_patch_area_print) >>>>> default_print_patchable_function_entry (file, >>>>> patch_area_size, record_p); >>>>> + else >>>>> + gcc_assert (global_entry_needed_p); >>>>> } >>>>> >>>>> >>>>> enum rtx_code >>>>> @@ -21381,6 +21383,11 @@ rs6000_elf_declare_function_name (FILE >>>>> *file, const char *name, tree decl) >>>>> fprintf (file, "\t.previous\n"); >>>>> } >>>>> ASM_OUTPUT_FUNCTION_LABEL (file, name, decl); >>>>> + /* At this time, the "before" NOPs have been already emitted, >>>>> + let's stop generic code from printing the "after" NOPs and >>>>> + emit just after local entry later. */ >>>>> + if (rs6000_global_entry_point_prologue_needed_p ()) >>>>> + cfun->machine->stop_patch_area_print = true; >>>>> } >>>>> >>>>> static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED; >>>>> diff --git a/gcc/config/rs6000/rs6000.h >>>>> b/gcc/config/rs6000/rs6000.h >>>>> index 68bc45d65ba..73767fdae06 100644 >>>>> --- a/gcc/config/rs6000/rs6000.h >>>>> +++ b/gcc/config/rs6000/rs6000.h >>>>> @@ -2434,10 +2434,12 @@ typedef struct GTY(()) machine_function >>>>> bool lr_is_wrapped_separately; >>>>> bool toc_is_wrapped_separately; >>>>> bool mma_return_type_error; >>>>> - /* Indicate global entry is emitted, only useful when the >>>>> function requires >>>>> - global entry. It helps to control the patchable area >>>>> before and after >>>>> - local entry. */ >>>>> - bool global_entry_emitted; >>>>> + /* With ELFv2 ABI dual entry points being adopted, generic >>>>> framework >>>>> + targetm.asm_out.print_patchable_function_entry would >>>>> generate "after" >>>>> + NOPs before local entry, it is wrong. This flag is to stop >>>>> it from >>>>> + printing patch area before local entry, it is only useful >>>>> when the >>>>> + function requires dual entry points. */ >>>>> + bool stop_patch_area_print; >>>>> } machine_function; >>>>> #endif >>>>> >>>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi >>>>> index c584664e168..58e48f7dc55 100644 >>>>> --- a/gcc/doc/invoke.texi >>>>> +++ b/gcc/doc/invoke.texi >>>>> @@ -18363,11 +18363,11 @@ If @code{N=0}, no pad location is >>>>> recorded. >>>>> The NOP instructions are inserted at---and maybe before, >>>>> depending on >>>>> @var{M}---the function entry address, even before the prologue. >>>>> On >>>>> PowerPC with the ELFv2 ABI, for a function with dual entry >>>>> points, >>>>> -the local entry point is this function entry address. >>>>> +@var{M} NOP instructions are inserted before the global entry >>>>> point and >>>>> +@var{N} - @var{M} NOP instructions are inserted after the local >>>>> entry >>>>> +point, which means the NOP instructions may not be consecutive. >>>>> >>>>> -The maximum value of @var{N} and @var{M} is 65535. On PowerPC >>>>> with the >>>>> -ELFv2 ABI, for a function with dual entry points, the supported >>>>> values >>>>> -for @var{M} are 0, 2, 6 and 14. >>>>> +The maximum value of @var{N} and @var{M} is 65535. >>>>> @end table >>>>> >>>>> >>>>> diff --git a/gcc/testsuite/c-c++-common/patchable_function_entry- >>>>> default.c b/gcc/testsuite/c-c++-common/patchable_function_entry- >>>>> default.c >>>>> index 3ccbafc87db..899938b4aa3 100644 >>>>> --- a/gcc/testsuite/c-c++-common/patchable_function_entry- >>>>> default.c >>>>> +++ b/gcc/testsuite/c-c++-common/patchable_function_entry- >>>>> default.c >>>>> @@ -1,9 +1,6 @@ >>>>> /* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } >>>>> */ >>>>> /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */ >>>>> /* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */ >>>>> -/* See PR99888, one single preceding nop isn't allowed on >>>>> powerpc_elfv2, >>>>> - so overriding with two preceding nops to make it pass there. >>>>> */ >>>>> -/* { dg-additional-options "-fpatchable-function-entry=3,2" { >>>>> target powerpc_elfv2 } } */ >>>>> /* { dg-final { scan-assembler-times "nop|NOP|SWYM" 3 { target { >>>>> ! { alpha*-*-* riscv*-*-* } } } } } */ >>>>> /* { dg-final { scan-assembler-times "bis" 3 { target alpha*-*-* >>>>> } } } */ >>>>> /* { dg-final { scan-assembler-times "nop\n" 3 { target riscv*- >>>>> *-* } } } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-4.c >>>>> b/gcc/testsuite/gcc.target/powerpc/pr99888-4.c >>>>> index 00a8d4d316e..6f23f2bb939 100644 >>>>> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-4.c >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-4.c >>>>> @@ -2,12 +2,10 @@ >>>>> /* There is no global entry point prologue with pcrel. */ >>>>> /* { dg-options "-mno-pcrel -fpatchable-function-entry=1,1" } */ >>>>> >>>>> -/* Verify one error emitted for unexpected 1 nop before local >>>>> - entry. */ >>>>> +/* Verify there is no error with 1 nop before local entry. */ >>>>> >>>>> extern int a; >>>>> >>>>> int test (int b) { >>>>> return a + b; >>>>> } >>>>> -/* { dg-error "unsupported number of nops before function entry >>>>> \\(1\\)" "" { target *-*-* } .-1 } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-5.c >>>>> b/gcc/testsuite/gcc.target/powerpc/pr99888-5.c >>>>> index 39d3b4465f1..13f192ebd20 100644 >>>>> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-5.c >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-5.c >>>>> @@ -2,12 +2,10 @@ >>>>> /* There is no global entry point prologue with pcrel. */ >>>>> /* { dg-options "-mno-pcrel -fpatchable-function-entry=7,3" } */ >>>>> >>>>> -/* Verify one error emitted for unexpected 3 nops before local >>>>> - entry. */ >>>>> +/* Verify no error emitted for 3 nops before local entry. */ >>>>> >>>>> extern int a; >>>>> >>>>> int test (int b) { >>>>> return a + b; >>>>> } >>>>> -/* { dg-error "unsupported number of nops before function entry >>>>> \\(3\\)" "" { target *-*-* } .-1 } */ >>>>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-6.c >>>>> b/gcc/testsuite/gcc.target/powerpc/pr99888-6.c >>>>> index c6c18dcc7ac..431c69cae9a 100644 >>>>> --- a/gcc/testsuite/gcc.target/powerpc/pr99888-6.c >>>>> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-6.c >>>>> @@ -2,8 +2,7 @@ >>>>> /* There is no global entry point prologue with pcrel. */ >>>>> /* { dg-options "-mno-pcrel" } */ >>>>> >>>>> -/* Verify one error emitted for unexpected 4 nops before local >>>>> - entry. */ >>>>> +/* Verify no error emitted for 4 nops before local entry. */ >>>>> >>>>> extern int a; >>>>> >>>>> @@ -11,4 +10,3 @@ __attribute__ ((patchable_function_entry (20, >>>>> 4))) >>>>> int test (int b) { >>>>> return a + b; >>>>> } >>>>> -/* { dg-error "unsupported number of nops before function entry >>>>> \\(4\\)" "" { target *-*-* } .-1 } */ >>>>> -- >>>>> 2.39.1 >>> >>