Andi Kleen <a...@linux.intel.com> writes:

Ping^3!

> Andi Kleen <a...@linux.intel.com> writes:
>
> Ping!^2
>
>> Andi Kleen <a...@firstfloor.org> writes:
>>
>> Ping!
>>
>>> From: Andi Kleen <a...@linux.intel.com>
>>>
>>> When instrumenting programs using __fentry__ it is often useful
>>> to instrument the function return too. Traditionally this
>>> has been done by patching the return address on the stack
>>> frame on entry. However this is fairly complicated (trace
>>> function has to emulate a stack) and also slow because
>>> it causes a branch misprediction on every return.
>>>
>>> Add an option to generate call or nop instrumentation for
>>> every return instead, including patch sections.
>>>
>>> This will increase the program size slightly, but can be a
>>> lot faster and simpler.
>>>
>>> This version only instruments true returns, not sibling
>>> calls or tail recursion. This matches the semantics of the
>>> original stack.
>>>
>>> gcc/:
>>>
>>> 2018-11-04  Andi Kleen  <a...@linux.intel.com>
>>>
>>>     * config/i386/i386-opts.h (enum instrument_return): Add.
>>>     * config/i386/i386.c (output_return_instrumentation): Add.
>>>     (ix86_output_function_return): Call output_return_instrumentation.
>>>     (ix86_output_call_insn): Call output_return_instrumentation.
>>>     * config/i386/i386.opt: Add -minstrument-return=.
>>>     * doc/invoke.texi (-minstrument-return): Document.
>>>
>>> gcc/testsuite/:
>>>
>>> 2018-11-04  Andi Kleen  <a...@linux.intel.com>
>>>
>>>     * gcc.target/i386/returninst1.c: New test.
>>>     * gcc.target/i386/returninst2.c: New test.
>>>     * gcc.target/i386/returninst3.c: New test.
>>> ---
>>>  gcc/config/i386/i386-opts.h                 |  6 ++++
>>>  gcc/config/i386/i386.c                      | 36 +++++++++++++++++++++
>>>  gcc/config/i386/i386.opt                    | 21 ++++++++++++
>>>  gcc/doc/invoke.texi                         | 14 ++++++++
>>>  gcc/testsuite/gcc.target/i386/returninst1.c | 14 ++++++++
>>>  gcc/testsuite/gcc.target/i386/returninst2.c | 21 ++++++++++++
>>>  gcc/testsuite/gcc.target/i386/returninst3.c |  9 ++++++
>>>  7 files changed, 121 insertions(+)
>>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c
>>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c
>>>  create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c
>>>
>>> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
>>> index 46366cbfa72..35e9413100e 100644
>>> --- a/gcc/config/i386/i386-opts.h
>>> +++ b/gcc/config/i386/i386-opts.h
>>> @@ -119,4 +119,10 @@ enum indirect_branch {
>>>    indirect_branch_thunk_extern
>>>  };
>>>  
>>> +enum instrument_return {
>>> +  instrument_return_none = 0,
>>> +  instrument_return_call,
>>> +  instrument_return_nop5
>>> +};
>>> +
>>>  #endif
>>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>>> index f9ef0b4445b..f7cd94a8139 100644
>>> --- a/gcc/config/i386/i386.c
>>> +++ b/gcc/config/i386/i386.c
>>> @@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op)
>>>      return "%!jmp\t%A0";
>>>  }
>>>  
>>> +/* Output return instrumentation for current function if needed.  */
>>> +
>>> +static void
>>> +output_return_instrumentation (void)
>>> +{
>>> +  if (ix86_instrument_return != instrument_return_none
>>> +      && flag_fentry
>>> +      && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
>>> +    {
>>> +      if (ix86_flag_record_return)
>>> +   fprintf (asm_out_file, "1:\n");
>>> +      switch (ix86_instrument_return)
>>> +   {
>>> +   case instrument_return_call:
>>> +     fprintf (asm_out_file, "\tcall\t__return__\n");
>>> +     break;
>>> +   case instrument_return_nop5:
>>> +     /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
>>> +     fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
>>> +     break;
>>> +   case instrument_return_none:
>>> +     break;
>>> +   }
>>> +
>>> +      if (ix86_flag_record_return)
>>> +   {
>>> +     fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
>>> +     fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
>>> +     fprintf (asm_out_file, "\t.previous\n");
>>> +   }
>>> +    }
>>> +}
>>> +
>>>  /* Output function return.  CALL_OP is the jump target.  Add a REP
>>>     prefix to RET if LONG_P is true and function return is kept.  */
>>>  
>>>  const char *
>>>  ix86_output_function_return (bool long_p)
>>>  {
>>> +  output_return_instrumentation ();
>>> +
>>>    if (cfun->machine->function_return_type != indirect_branch_keep)
>>>      {
>>>        char thunk_name[32];
>>> @@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
>>>  
>>>    if (SIBLING_CALL_P (insn))
>>>      {
>>> +      output_return_instrumentation ();
>>>        if (direct_p)
>>>     {
>>>       if (ix86_nopic_noplt_attribute_p (call_op))
>>> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
>>> index e7fbf9b6f99..5925b75244f 100644
>>> --- a/gcc/config/i386/i386.opt
>>> +++ b/gcc/config/i386/i386.opt
>>> @@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code 
>>> generation.
>>>  mcldemote
>>>  Target Report Mask(ISA_CLDEMOTE) Var(ix86_isa_flags2) Save
>>>  Support CLDEMOTE built-in functions and code generation.
>>> +
>>> +minstrument-return=
>>> +Target Report RejectNegative Joined Enum(instrument_return) 
>>> Var(ix86_instrument_return) Init(instrument_return_none)
>>> +Instrument function exit in instrumented functions with __fentry__.
>>> +
>>> +Enum
>>> +Name(instrument_return) Type(enum instrument_return)
>>> +Known choices for return instrumentation with -minstrument-return=
>>> +
>>> +EnumValue
>>> +Enum(instrument_return) String(none) Value(instrument_return_none)
>>> +
>>> +EnumValue
>>> +Enum(instrument_return) String(call) Value(instrument_return_call)
>>> +
>>> +EnumValue
>>> +Enum(instrument_return) String(nop5) Value(instrument_return_nop5)
>>> +
>>> +mrecord-return
>>> +Target Report Var(ix86_flag_record_return) Init(0)
>>> +Generate a __return_loc section pointing to all return instrumentation 
>>> code.
>>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>>> index 1743c64582e..939be3e251b 100644
>>> --- a/gcc/doc/invoke.texi
>>> +++ b/gcc/doc/invoke.texi
>>> @@ -1301,6 +1301,7 @@ See RS/6000 and PowerPC Options.
>>>  -mcmodel=@var{code-model}  -mabi=@var{name}  -maddress-mode=@var{mode} @gol
>>>  -m32  -m64  -mx32  -m16  -miamcu  -mlarge-data-threshold=@var{num} @gol
>>>  -msse2avx  -mfentry  -mrecord-mcount  -mnop-mcount  -m8bit-idiv @gol
>>> +-minstrument-return=@var{type} @gol
>>>  -mavx256-split-unaligned-load  -mavx256-split-unaligned-store @gol
>>>  -malign-data=@var{type}  -mstack-protector-guard=@var{guard} @gol
>>>  -mstack-protector-guard-reg=@var{reg} @gol
>>> @@ -28442,6 +28443,19 @@ the profiling functions as NOPs. This is useful 
>>> when they
>>>  should be patched in later dynamically. This is likely only
>>>  useful together with @option{-mrecord-mcount}.
>>>  
>>> +@item -minstrument-return=@var{type}
>>> +@opindex minstrument-return
>>> +Instrument function exit in -pg -mfentry instrumented functions with
>>> +call to specified function. This only instruments true returns ending
>>> +with ret, but not sibling calls ending with jump. Valid types
>>> +are @var{none} to not instrument, @var{call} to generate a call to 
>>> __return__,
>>> +or @var{nop5} to generate a 5 byte nop.
>>> +
>>> +@item -mrecord-return
>>> +@itemx -mno-record-return
>>> +@opindex mrecord-return
>>> +Generate a __return_loc section pointing to all return instrumentation 
>>> code.
>>> +
>>>  @item -mskip-rax-setup
>>>  @itemx -mno-skip-rax-setup
>>>  @opindex mskip-rax-setup
>>> diff --git a/gcc/testsuite/gcc.target/i386/returninst1.c 
>>> b/gcc/testsuite/gcc.target/i386/returninst1.c
>>> new file mode 100644
>>> index 00000000000..f970e75a774
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/i386/returninst1.c
>>> @@ -0,0 +1,14 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-pg -mfentry -minstrument-return=call -mrecord-return" } 
>>> */
>>> +/* { dg-final { scan-assembler "call.*__return__" } } */
>>> +/* { dg-final { scan-assembler "section.*return_loc" } } */
>>> +
>>> +int func(int a)
>>> +{
>>> +  return a+1;
>>> +}
>>> +
>>> +int func2(int a)
>>> +{
>>> +  return a+1;
>>> +}
>>> diff --git a/gcc/testsuite/gcc.target/i386/returninst2.c 
>>> b/gcc/testsuite/gcc.target/i386/returninst2.c
>>> new file mode 100644
>>> index 00000000000..716b38556dd
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/i386/returninst2.c
>>> @@ -0,0 +1,21 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-pg -mfentry -minstrument-return=nop5 -mrecord-return" } 
>>> */
>>> +/* { dg-final { scan-assembler-times "0x0f, 0x1f, 0x44, 0x00, 0x00" 3 } } 
>>> */
>>> +/* { dg-final { scan-assembler "section.*return_loc" } } */
>>> +
>>> +int func(int a)
>>> +{
>>> +  return a+1;
>>> +}
>>> +
>>> +int func2(int a)
>>> +{
>>> +  return a+1;
>>> +}
>>> +
>>> +extern void func4(int);
>>> +
>>> +int func3(int a)
>>> +{
>>> +  func4(a + 1);
>>> +}
>>> diff --git a/gcc/testsuite/gcc.target/i386/returninst3.c 
>>> b/gcc/testsuite/gcc.target/i386/returninst3.c
>>> new file mode 100644
>>> index 00000000000..5bbc60e8bd4
>>> --- /dev/null
>>> +++ b/gcc/testsuite/gcc.target/i386/returninst3.c
>>> @@ -0,0 +1,9 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-pg -mfentry -minstrument-return=call" } */
>>> +/* { dg-final { scan-assembler-not "call.*__return__" } } */
>>> +
>>> +__attribute__((no_instrument_function))
>>> +int func(int a)
>>> +{
>>> +  return a+1;
>>> +}

Reply via email to