Andi Kleen <a...@linux.intel.com> writes: Ping^3!
> Andi Kleen <a...@linux.intel.com> writes: > > Ping!^2 > >> Andi Kleen <a...@firstfloor.org> writes: >> >> Ping! >> >>> From: Andi Kleen <a...@linux.intel.com> >>> >>> When instrumenting programs using __fentry__ it is often useful >>> to instrument the function return too. Traditionally this >>> has been done by patching the return address on the stack >>> frame on entry. However this is fairly complicated (trace >>> function has to emulate a stack) and also slow because >>> it causes a branch misprediction on every return. >>> >>> Add an option to generate call or nop instrumentation for >>> every return instead, including patch sections. >>> >>> This will increase the program size slightly, but can be a >>> lot faster and simpler. >>> >>> This version only instruments true returns, not sibling >>> calls or tail recursion. This matches the semantics of the >>> original stack. >>> >>> gcc/: >>> >>> 2018-11-04 Andi Kleen <a...@linux.intel.com> >>> >>> * config/i386/i386-opts.h (enum instrument_return): Add. >>> * config/i386/i386.c (output_return_instrumentation): Add. >>> (ix86_output_function_return): Call output_return_instrumentation. >>> (ix86_output_call_insn): Call output_return_instrumentation. >>> * config/i386/i386.opt: Add -minstrument-return=. >>> * doc/invoke.texi (-minstrument-return): Document. >>> >>> gcc/testsuite/: >>> >>> 2018-11-04 Andi Kleen <a...@linux.intel.com> >>> >>> * gcc.target/i386/returninst1.c: New test. >>> * gcc.target/i386/returninst2.c: New test. >>> * gcc.target/i386/returninst3.c: New test. >>> --- >>> gcc/config/i386/i386-opts.h | 6 ++++ >>> gcc/config/i386/i386.c | 36 +++++++++++++++++++++ >>> gcc/config/i386/i386.opt | 21 ++++++++++++ >>> gcc/doc/invoke.texi | 14 ++++++++ >>> gcc/testsuite/gcc.target/i386/returninst1.c | 14 ++++++++ >>> gcc/testsuite/gcc.target/i386/returninst2.c | 21 ++++++++++++ >>> gcc/testsuite/gcc.target/i386/returninst3.c | 9 ++++++ >>> 7 files changed, 121 insertions(+) >>> create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c >>> create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c >>> create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c >>> >>> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h >>> index 46366cbfa72..35e9413100e 100644 >>> --- a/gcc/config/i386/i386-opts.h >>> +++ b/gcc/config/i386/i386-opts.h >>> @@ -119,4 +119,10 @@ enum indirect_branch { >>> indirect_branch_thunk_extern >>> }; >>> >>> +enum instrument_return { >>> + instrument_return_none = 0, >>> + instrument_return_call, >>> + instrument_return_nop5 >>> +}; >>> + >>> #endif >>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c >>> index f9ef0b4445b..f7cd94a8139 100644 >>> --- a/gcc/config/i386/i386.c >>> +++ b/gcc/config/i386/i386.c >>> @@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op) >>> return "%!jmp\t%A0"; >>> } >>> >>> +/* Output return instrumentation for current function if needed. */ >>> + >>> +static void >>> +output_return_instrumentation (void) >>> +{ >>> + if (ix86_instrument_return != instrument_return_none >>> + && flag_fentry >>> + && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl)) >>> + { >>> + if (ix86_flag_record_return) >>> + fprintf (asm_out_file, "1:\n"); >>> + switch (ix86_instrument_return) >>> + { >>> + case instrument_return_call: >>> + fprintf (asm_out_file, "\tcall\t__return__\n"); >>> + break; >>> + case instrument_return_nop5: >>> + /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ >>> + fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); >>> + break; >>> + case instrument_return_none: >>> + break; >>> + } >>> + >>> + if (ix86_flag_record_return) >>> + { >>> + fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n"); >>> + fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); >>> + fprintf (asm_out_file, "\t.previous\n"); >>> + } >>> + } >>> +} >>> + >>> /* Output function return. CALL_OP is the jump target. Add a REP >>> prefix to RET if LONG_P is true and function return is kept. */ >>> >>> const char * >>> ix86_output_function_return (bool long_p) >>> { >>> + output_return_instrumentation (); >>> + >>> if (cfun->machine->function_return_type != indirect_branch_keep) >>> { >>> char thunk_name[32]; >>> @@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op) >>> >>> if (SIBLING_CALL_P (insn)) >>> { >>> + output_return_instrumentation (); >>> if (direct_p) >>> { >>> if (ix86_nopic_noplt_attribute_p (call_op)) >>> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt >>> index e7fbf9b6f99..5925b75244f 100644 >>> --- a/gcc/config/i386/i386.opt >>> +++ b/gcc/config/i386/i386.opt >>> @@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code >>> generation. >>> mcldemote >>> Target Report Mask(ISA_CLDEMOTE) Var(ix86_isa_flags2) Save >>> Support CLDEMOTE built-in functions and code generation. >>> + >>> +minstrument-return= >>> +Target Report RejectNegative Joined Enum(instrument_return) >>> Var(ix86_instrument_return) Init(instrument_return_none) >>> +Instrument function exit in instrumented functions with __fentry__. >>> + >>> +Enum >>> +Name(instrument_return) Type(enum instrument_return) >>> +Known choices for return instrumentation with -minstrument-return= >>> + >>> +EnumValue >>> +Enum(instrument_return) String(none) Value(instrument_return_none) >>> + >>> +EnumValue >>> +Enum(instrument_return) String(call) Value(instrument_return_call) >>> + >>> +EnumValue >>> +Enum(instrument_return) String(nop5) Value(instrument_return_nop5) >>> + >>> +mrecord-return >>> +Target Report Var(ix86_flag_record_return) Init(0) >>> +Generate a __return_loc section pointing to all return instrumentation >>> code. >>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi >>> index 1743c64582e..939be3e251b 100644 >>> --- a/gcc/doc/invoke.texi >>> +++ b/gcc/doc/invoke.texi >>> @@ -1301,6 +1301,7 @@ See RS/6000 and PowerPC Options. >>> -mcmodel=@var{code-model} -mabi=@var{name} -maddress-mode=@var{mode} @gol >>> -m32 -m64 -mx32 -m16 -miamcu -mlarge-data-threshold=@var{num} @gol >>> -msse2avx -mfentry -mrecord-mcount -mnop-mcount -m8bit-idiv @gol >>> +-minstrument-return=@var{type} @gol >>> -mavx256-split-unaligned-load -mavx256-split-unaligned-store @gol >>> -malign-data=@var{type} -mstack-protector-guard=@var{guard} @gol >>> -mstack-protector-guard-reg=@var{reg} @gol >>> @@ -28442,6 +28443,19 @@ the profiling functions as NOPs. This is useful >>> when they >>> should be patched in later dynamically. This is likely only >>> useful together with @option{-mrecord-mcount}. >>> >>> +@item -minstrument-return=@var{type} >>> +@opindex minstrument-return >>> +Instrument function exit in -pg -mfentry instrumented functions with >>> +call to specified function. This only instruments true returns ending >>> +with ret, but not sibling calls ending with jump. Valid types >>> +are @var{none} to not instrument, @var{call} to generate a call to >>> __return__, >>> +or @var{nop5} to generate a 5 byte nop. >>> + >>> +@item -mrecord-return >>> +@itemx -mno-record-return >>> +@opindex mrecord-return >>> +Generate a __return_loc section pointing to all return instrumentation >>> code. >>> + >>> @item -mskip-rax-setup >>> @itemx -mno-skip-rax-setup >>> @opindex mskip-rax-setup >>> diff --git a/gcc/testsuite/gcc.target/i386/returninst1.c >>> b/gcc/testsuite/gcc.target/i386/returninst1.c >>> new file mode 100644 >>> index 00000000000..f970e75a774 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/i386/returninst1.c >>> @@ -0,0 +1,14 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-pg -mfentry -minstrument-return=call -mrecord-return" } >>> */ >>> +/* { dg-final { scan-assembler "call.*__return__" } } */ >>> +/* { dg-final { scan-assembler "section.*return_loc" } } */ >>> + >>> +int func(int a) >>> +{ >>> + return a+1; >>> +} >>> + >>> +int func2(int a) >>> +{ >>> + return a+1; >>> +} >>> diff --git a/gcc/testsuite/gcc.target/i386/returninst2.c >>> b/gcc/testsuite/gcc.target/i386/returninst2.c >>> new file mode 100644 >>> index 00000000000..716b38556dd >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/i386/returninst2.c >>> @@ -0,0 +1,21 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-pg -mfentry -minstrument-return=nop5 -mrecord-return" } >>> */ >>> +/* { dg-final { scan-assembler-times "0x0f, 0x1f, 0x44, 0x00, 0x00" 3 } } >>> */ >>> +/* { dg-final { scan-assembler "section.*return_loc" } } */ >>> + >>> +int func(int a) >>> +{ >>> + return a+1; >>> +} >>> + >>> +int func2(int a) >>> +{ >>> + return a+1; >>> +} >>> + >>> +extern void func4(int); >>> + >>> +int func3(int a) >>> +{ >>> + func4(a + 1); >>> +} >>> diff --git a/gcc/testsuite/gcc.target/i386/returninst3.c >>> b/gcc/testsuite/gcc.target/i386/returninst3.c >>> new file mode 100644 >>> index 00000000000..5bbc60e8bd4 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/i386/returninst3.c >>> @@ -0,0 +1,9 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-options "-pg -mfentry -minstrument-return=call" } */ >>> +/* { dg-final { scan-assembler-not "call.*__return__" } } */ >>> + >>> +__attribute__((no_instrument_function)) >>> +int func(int a) >>> +{ >>> + return a+1; >>> +}