Hi,

Gentle ping: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/600277.html

BR,
Kewen

on 2022/8/25 13:50, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> As PR99888 and its related show, the current support for
> -fpatchable-function-entry on powerpc ELFv2 doesn't work
> well with global entry existence.  For example, with one
> command line option -fpatchable-function-entry=3,2, it got
> below w/o this patch:
> 
>   .LPFE1:
>         nop
>         nop
>         .type   foo, @function
>   foo:
>         nop
>   .LFB0:
>         .cfi_startproc
>   .LCF0:
>   0:      addis 2,12,.TOC.-.LCF0@ha
>         addi 2,2,.TOC.-.LCF0@l
>         .localentry     foo,.-foo
> 
> , the assembly is unexpected since the patched nops have
> no effects when being entered from local entry.
> 
> This patch is to update the nops patched before and after
> local entry, it looks like:
> 
>         .type   foo, @function
>   foo:
>   .LFB0:
>         .cfi_startproc
>   .LCF0:
>   0:      addis 2,12,.TOC.-.LCF0@ha
>         addi 2,2,.TOC.-.LCF0@l
>         nop
>         nop
>         .localentry     foo,.-foo
>         nop
> 
> Bootstrapped and regtested on powerpc64-linux-gnu P7 & P8,
> and powerpc64le-linux-gnu P9 & P10.
> 
> v4: Change the remaining NOP to nop and update documentation of option
>     -fpatchable-function-entry for PowerPC ELFv2 ABI dual entry points
>     as Segher suggested.
> 
> v3: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599925.html
> 
> v2: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599617.html
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599461.html
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> ----
>       PR target/99888
>       PR target/105649
> 
> gcc/ChangeLog:
> 
>       * doc/invoke.texi (option -fpatchable-function-entry): Adjust the
>       documentation for PowerPC ELFv2 ABI dual entry points.
>       * config/rs6000/rs6000-internal.h
>       (rs6000_print_patchable_function_entry): New function declaration.
>       * config/rs6000/rs6000-logue.cc (rs6000_output_function_prologue):
>       Support patchable-function-entry by emitting nops before and after
>       local entry for the function that needs global entry.
>       * config/rs6000/rs6000.cc (rs6000_print_patchable_function_entry): Skip
>       the function that needs global entry till global entry has been
>       emitted.
>       * config/rs6000/rs6000.h (struct machine_function): New bool member
>       global_entry_emitted.
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/powerpc/pr99888-1.c: New test.
>       * gcc.target/powerpc/pr99888-2.c: New test.
>       * gcc.target/powerpc/pr99888-3.c: New test.
>       * gcc.target/powerpc/pr99888-4.c: New test.
>       * gcc.target/powerpc/pr99888-5.c: New test.
>       * gcc.target/powerpc/pr99888-6.c: New test.
>       * c-c++-common/patchable_function_entry-default.c: Adjust for
>       powerpc_elfv2 to avoid compilation error.
> ---
>  gcc/config/rs6000/rs6000-internal.h           |  5 +++
>  gcc/config/rs6000/rs6000-logue.cc             | 32 ++++++++++++++
>  gcc/config/rs6000/rs6000.cc                   | 10 ++++-
>  gcc/config/rs6000/rs6000.h                    |  4 ++
>  gcc/doc/invoke.texi                           |  8 +++-
>  .../patchable_function_entry-default.c        |  3 ++
>  gcc/testsuite/gcc.target/powerpc/pr99888-1.c  | 43 +++++++++++++++++++
>  gcc/testsuite/gcc.target/powerpc/pr99888-2.c  | 43 +++++++++++++++++++
>  gcc/testsuite/gcc.target/powerpc/pr99888-3.c  | 11 +++++
>  gcc/testsuite/gcc.target/powerpc/pr99888-4.c  | 13 ++++++
>  gcc/testsuite/gcc.target/powerpc/pr99888-5.c  | 13 ++++++
>  gcc/testsuite/gcc.target/powerpc/pr99888-6.c  | 14 ++++++
>  12 files changed, 195 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-3.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-4.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-5.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-6.c
> 
> diff --git a/gcc/config/rs6000/rs6000-internal.h 
> b/gcc/config/rs6000/rs6000-internal.h
> index 8ee8c987b81..d80c04b5ae5 100644
> --- a/gcc/config/rs6000/rs6000-internal.h
> +++ b/gcc/config/rs6000/rs6000-internal.h
> @@ -183,10 +183,15 @@ extern tree rs6000_fold_builtin (tree fndecl 
> ATTRIBUTE_UNUSED,
>                                tree *args ATTRIBUTE_UNUSED,
>                                bool ignore ATTRIBUTE_UNUSED);
> 
> +extern void rs6000_print_patchable_function_entry (FILE *,
> +                                                unsigned HOST_WIDE_INT,
> +                                                bool);
> +
>  extern bool rs6000_passes_float;
>  extern bool rs6000_passes_long_double;
>  extern bool rs6000_passes_vector;
>  extern bool rs6000_returns_struct;
>  extern bool cpu_builtin_p;
> 
> +
>  #endif
> diff --git a/gcc/config/rs6000/rs6000-logue.cc 
> b/gcc/config/rs6000/rs6000-logue.cc
> index 59fe1c8cb8b..51f55d1d527 100644
> --- a/gcc/config/rs6000/rs6000-logue.cc
> +++ b/gcc/config/rs6000/rs6000-logue.cc
> @@ -4013,11 +4013,43 @@ rs6000_output_function_prologue (FILE *file)
>         fprintf (file, "\tadd 2,2,12\n");
>       }
> 
> +      unsigned short patch_area_size = crtl->patch_area_size;
> +      unsigned short patch_area_entry = crtl->patch_area_entry;
> +      /* Need to emit the patching area.  */
> +      if (patch_area_size > 0)
> +     {
> +       cfun->machine->global_entry_emitted = true;
> +       /* As ELFv2 ABI shows, the allowable bytes between the global
> +          and local entry points are 0, 4, 8, 16, 32 and 64 when
> +          there is a local entry point.  Considering there are two
> +          non-prefixed instructions for global entry point prologue
> +          (8 bytes), the count for patchable nops before local entry
> +          point would be 2, 6 and 14.  It's possible to support those
> +          other counts of nops by not making a local entry point, but
> +          we don't have clear user cases for them, so leave them
> +          unsupported for now.  */
> +       if (patch_area_entry > 0)
> +         {
> +           if (patch_area_entry != 2
> +               && patch_area_entry != 6
> +               && patch_area_entry != 14)
> +             error ("unsupported number of nops before function entry (%u)",
> +                    patch_area_entry);
> +           rs6000_print_patchable_function_entry (file, patch_area_entry,
> +                                                  true);
> +           patch_area_size -= patch_area_entry;
> +         }
> +     }
> +
>        fputs ("\t.localentry\t", file);
>        assemble_name (file, name);
>        fputs (",.-", file);
>        assemble_name (file, name);
>        fputs ("\n", file);
> +      /* Emit the nops after local entry.  */
> +      if (patch_area_size > 0)
> +     rs6000_print_patchable_function_entry (file, patch_area_size,
> +                                            patch_area_entry == 0);
>      }
> 
>    else if (rs6000_pcrel_p ())
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index 777a06599c3..c79a0ee7a49 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -14898,8 +14898,14 @@ rs6000_print_patchable_function_entry (FILE *file,
>    if (!(TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
>        && HAVE_GAS_SECTION_LINK_ORDER)
>      flags |= SECTION_LINK_ORDER;
> -  default_print_patchable_function_entry_1 (file, patch_area_size, record_p,
> -                                         flags);
> +  bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p 
> ();
> +  /* For a function which needs global entry point, we will emit the
> +     patchable area before and after local entry point under the control of
> +     cfun->machine->global_entry_emitted, see the handling in function
> +     rs6000_output_function_prologue.  */
> +  if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
> +    default_print_patchable_function_entry_1 (file, patch_area_size, 
> record_p,
> +                                           flags);
>  }
> 
> 
>  enum rtx_code
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index ad9bf0f7358..c352421e87c 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -2439,6 +2439,10 @@ typedef struct GTY(()) machine_function
>    bool lr_is_wrapped_separately;
>    bool toc_is_wrapped_separately;
>    bool mma_return_type_error;
> +  /* Indicate global entry is emitted, only useful when the function requires
> +     global entry.  It helps to control the patchable area before and after
> +     local entry.  */
> +  bool global_entry_emitted;
>  } machine_function;
>  #endif
> 
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index b7c75028789..c98f5ceb0b9 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -16717,9 +16717,13 @@ the area size or to remove it completely on a single 
> function.
>  If @code{N=0}, no pad location is recorded.
> 
>  The NOP instructions are inserted at---and maybe before, depending on
> -@var{M}---the function entry address, even before the prologue.
> +@var{M}---the function entry address, even before the prologue.  On
> +PowerPC with the ELFv2 ABI, for one function with dual entry points,
> +the local entry point is taken as the function entry for generation.
> 
> -The maximum value of @var{N} and @var{M} is 65535.
> +The maximum value of @var{N} and @var{M} is 65535.  On PowerPC with the
> +ELFv2 ABI, for one function with dual entry points, the supported values
> +for @var{M} are 0, 2, 6 and 14.
>  @end table
> 
> 
> diff --git a/gcc/testsuite/c-c++-common/patchable_function_entry-default.c 
> b/gcc/testsuite/c-c++-common/patchable_function_entry-default.c
> index 7036f7bfbea..a501efccb19 100644
> --- a/gcc/testsuite/c-c++-common/patchable_function_entry-default.c
> +++ b/gcc/testsuite/c-c++-common/patchable_function_entry-default.c
> @@ -1,6 +1,9 @@
>  /* { dg-do compile { target { ! { nvptx*-*-* visium-*-* } } } } */
>  /* { dg-options "-O2 -fpatchable-function-entry=3,1" } */
>  /* { dg-additional-options "-fno-pie" { target sparc*-*-* } } */
> +/* See PR99888, one single preceding nop isn't allowed on powerpc_elfv2,
> +   so overriding with two preceding nops to make it pass there.  */
> +/* { dg-additional-options "-fpatchable-function-entry=3,2" { target 
> powerpc_elfv2 } } */
>  /* { dg-final { scan-assembler-times "nop|NOP|SWYM" 3 { target { ! { 
> alpha*-*-* } } } } } */
>  /* { dg-final { scan-assembler-times "bis" 3 { target alpha*-*-* } } } */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr99888-1.c
> new file mode 100644
> index 00000000000..9370b4e7438
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-1.c
> @@ -0,0 +1,43 @@
> +/* Verify no errors for different nops after local entry on ELFv2.  */
> +
> +extern int a;
> +
> +__attribute__ ((noipa, patchable_function_entry (1, 0)))
> +int test1 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (2, 0)))
> +int test2 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (3, 0)))
> +int test3 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (4, 0)))
> +int test4 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (5, 0)))
> +int test5 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (6, 0)))
> +int test6 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (7, 0)))
> +int test7 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (8, 0)))
> +int test8 (int b) {
> +  return a + b;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-2.c 
> b/gcc/testsuite/gcc.target/powerpc/pr99888-2.c
> new file mode 100644
> index 00000000000..45061712602
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-2.c
> @@ -0,0 +1,43 @@
> +/* Verify no errors for 2, 6 and 14 nops before local entry on ELFv2.  */
> +
> +extern int a;
> +
> +__attribute__ ((noipa, patchable_function_entry (2, 2)))
> +int test1 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (4, 2)))
> +int test2 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (6, 6)))
> +int test3 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (8, 6)))
> +int test4 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (16, 6)))
> +int test5 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (14, 14)))
> +int test6 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (28, 14)))
> +int test7 (int b) {
> +  return a + b;
> +}
> +
> +__attribute__ ((noipa, patchable_function_entry (64, 14)))
> +int test8 (int b) {
> +  return a + b;
> +}
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-3.c 
> b/gcc/testsuite/gcc.target/powerpc/pr99888-3.c
> new file mode 100644
> index 00000000000..4531ae32036
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-3.c
> @@ -0,0 +1,11 @@
> +/* { dg-options "-fpatchable-function-entry=1" } */
> +
> +/* Verify no errors on ELFv2, using command line option instead of
> +   function attribute.  */
> +
> +extern int a;
> +
> +int test (int b) {
> +  return a + b;
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-4.c 
> b/gcc/testsuite/gcc.target/powerpc/pr99888-4.c
> new file mode 100644
> index 00000000000..00a8d4d316e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-4.c
> @@ -0,0 +1,13 @@
> +/* { dg-require-effective-target powerpc_elfv2 } */
> +/* There is no global entry point prologue with pcrel.  */
> +/* { dg-options "-mno-pcrel -fpatchable-function-entry=1,1" } */
> +
> +/* Verify one error emitted for unexpected 1 nop before local
> +   entry.  */
> +
> +extern int a;
> +
> +int test (int b) {
> +  return a + b;
> +}
> +/* { dg-error "unsupported number of nops before function entry \\(1\\)" "" 
> { target *-*-* } .-1 } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-5.c 
> b/gcc/testsuite/gcc.target/powerpc/pr99888-5.c
> new file mode 100644
> index 00000000000..39d3b4465f1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-5.c
> @@ -0,0 +1,13 @@
> +/* { dg-require-effective-target powerpc_elfv2 } */
> +/* There is no global entry point prologue with pcrel.  */
> +/* { dg-options "-mno-pcrel -fpatchable-function-entry=7,3" } */
> +
> +/* Verify one error emitted for unexpected 3 nops before local
> +   entry.  */
> +
> +extern int a;
> +
> +int test (int b) {
> +  return a + b;
> +}
> +/* { dg-error "unsupported number of nops before function entry \\(3\\)" "" 
> { target *-*-* } .-1 } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr99888-6.c 
> b/gcc/testsuite/gcc.target/powerpc/pr99888-6.c
> new file mode 100644
> index 00000000000..c6c18dcc7ac
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr99888-6.c
> @@ -0,0 +1,14 @@
> +/* { dg-require-effective-target powerpc_elfv2 } */
> +/* There is no global entry point prologue with pcrel.  */
> +/* { dg-options "-mno-pcrel" } */
> +
> +/* Verify one error emitted for unexpected 4 nops before local
> +   entry.  */
> +
> +extern int a;
> +
> +__attribute__ ((patchable_function_entry (20, 4)))
> +int test (int b) {
> +  return a + b;
> +}
> +/* { dg-error "unsupported number of nops before function entry \\(4\\)" "" 
> { target *-*-* } .-1 } */
> --
> 2.27.0
> 
> 
> 

Reply via email to