Hi Christophe,

On 29/09/23 2:09 pm, Christophe Leroy wrote:


Le 28/09/2023 à 21:48, Hari Bathini a écrit :
patch_instruction() entails setting up pte, patching the instruction,
clearing the pte and flushing the tlb. If multiple instructions need
to be patched, every instruction would have to go through the above
drill unnecessarily. Instead, introduce function patch_instructions()
that sets up the pte, clears the pte and flushes the tlb only once per
page range of instructions to be patched. This adds a slight overhead
to patch_instruction() call while improving the patching time for
scenarios where more than one instruction needs to be patched.

On my powerpc8xx, this patch leads to an increase of about 8% of the
time needed to activate ftrace function tracer.

Interesting! My observation on ppc64le was somewhat different.
With single cpu, average ticks were almost similar with and without
the patch (~1580). I saw a performance degradation of less than
0.6% without vs with this patch to activate function tracer.

Ticks to activate function tracer in 15 attempts without
this patch (avg: 108734089):
106619626
111712292
111030404
111021344
111313530
106253773
107156175
106887038
107215379
108646636
108040287
108311770
107842343
106894310
112066423

Ticks to activate function tracer in 15 attempts with
this patch (avg: 109328578):
109378357
108794095
108595381
107622142
110689418
107287276
107132093
112540481
111311830
112608265
102883923
112054554
111762570
109874309
107393979

I used the below patch for the experiment:

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index b00112d7ad4..0979d12d00c 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -19,6 +19,10 @@
 #include <asm/page.h>
 #include <asm/code-patching.h>
 #include <asm/inst.h>
+#include <asm/time.h>
+
+unsigned long patching_time;
+unsigned long num_times;

static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr)
 {
@@ -353,7 +357,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
        return err;
 }

-int patch_instruction(u32 *addr, ppc_inst_t instr)
+int ___patch_instruction(u32 *addr, ppc_inst_t instr)
 {
        int err;
        unsigned long flags;
@@ -376,6 +380,19 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)

        return err;
 }
+
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+       u64 start;
+       int err;
+
+       start = get_tb();
+       err = ___patch_instruction(addr, instr);
+       patching_time += (get_tb() - start);
+       num_times++;
+
+       return err;
+}
 NOKPROBE_SYMBOL(patch_instruction);

 int patch_branch(u32 *addr, unsigned long target, int flags)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 1d4bc493b2f..f52694cfeab 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -35,6 +35,18 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 #define KERNEL_ATTR_RW(_name) \
 static struct kobj_attribute _name##_attr = __ATTR_RW(_name)

+unsigned long patch_avgtime;
+extern unsigned long patching_time;
+extern unsigned long num_times;
+
+static ssize_t patching_avgtime_show(struct kobject *kobj,
+                                    struct kobj_attribute *attr, char *buf)
+{
+       patch_avgtime = patching_time / num_times;
+       return sysfs_emit(buf, "%lu\n", patch_avgtime);
+}
+KERNEL_ATTR_RO(patching_avgtime);
+
 /* current uevent sequence number */
 static ssize_t uevent_seqnum_show(struct kobject *kobj,
                                  struct kobj_attribute *attr, char *buf)
@@ -250,6 +262,7 @@ struct kobject *kernel_kobj;
 EXPORT_SYMBOL_GPL(kernel_kobj);

 static struct attribute * kernel_attrs[] = {
+       &patching_avgtime_attr.attr,
        &fscaps_attr.attr,
        &uevent_seqnum_attr.attr,
        &cpu_byteorder_attr.attr,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index abaaf516fca..5eb950bcab9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -50,6 +50,7 @@
 #include <linux/workqueue.h>

 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
+#include <asm/time.h>

 #include "trace.h"
 #include "trace_output.h"
@@ -6517,6 +6518,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
        bool had_max_tr;
 #endif
        int ret = 0;
+       u64 start;

        mutex_lock(&trace_types_lock);

@@ -6536,6 +6538,10 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
                ret = -EINVAL;
                goto out;
        }
+
+       pr_warn("Current tracer: %s, Changing to tracer: %s\n",
+               tr->current_trace->name, t->name);
+       start = get_tb();
        if (t == tr->current_trace)
                goto out;

@@ -6614,6 +6620,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
        tr->current_trace->enabled++;
        trace_branch_enable(tr);
  out:
+       pr_warn("Time taken to enable tracer is %llu\n", (get_tb() - start));
        mutex_unlock(&trace_types_lock);

        return ret;

Thanks
Hari

Reply via email to