From: Masami Hiramatsu (Google) <[email protected]> When tracing the kernel local variables, sometimes we need to get the CPU local variables. To access it, current simple dereference is not enough.
Thus, introduce a special +CPU() dereference to access per-cpu variable for the current CPU (accessing other CPU variable may race with updates on other CPUs). Also +PCPU() is for accessing per-cpu pointer. +CPU(pcp) is equal to this_cpu_read(pcp) And +PCPU(pcp) is equal to this_cpu_ptr(pcp) Signed-off-by: Masami Hiramatsu (Google) <[email protected]> --- Documentation/trace/eprobetrace.rst | 3 ++ Documentation/trace/fprobetrace.rst | 3 ++ Documentation/trace/kprobetrace.rst | 3 ++ kernel/trace/trace.c | 1 + kernel/trace/trace_probe.c | 48 +++++++++++++++++++++-------------- kernel/trace/trace_probe.h | 2 + kernel/trace/trace_probe_tmpl.h | 30 ++++++++++++++++++---- 7 files changed, 65 insertions(+), 25 deletions(-) diff --git a/Documentation/trace/eprobetrace.rst b/Documentation/trace/eprobetrace.rst index dcf92d5b4175..0c7878df02f6 100644 --- a/Documentation/trace/eprobetrace.rst +++ b/Documentation/trace/eprobetrace.rst @@ -40,6 +40,9 @@ Synopsis of eprobe_events $comm : Fetch current task comm. $current : Fetch the address of the current task_struct. +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4) + +CPU(FETCHARG) : Fetch memory at FETCHARG address on the CPU specified by CPU. + This is useful for fetching per-CPU variables. + +PCPU(FETCHARG) : Fetch memory address at FETCHARG address on the per-CPU area. \IMM : Store an immediate value to the argument. NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types diff --git a/Documentation/trace/fprobetrace.rst b/Documentation/trace/fprobetrace.rst index 3392cab016b3..c851f98bb310 100644 --- a/Documentation/trace/fprobetrace.rst +++ b/Documentation/trace/fprobetrace.rst @@ -52,6 +52,9 @@ Synopsis of fprobe-events $comm : Fetch current task comm. $current : Fetch the address of the current task_struct. +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*4)(\*5) + +CPU(FETCHARG) : Fetch memory at FETCHARG address on the CPU specified by CPU. + This is useful for fetching per-CPU variables. + +PCPU(FETCHARG) : Fetch memory address at FETCHARG address on the per-CPU area. \IMM : Store an immediate value to the argument. NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index 81e4fe38791d..bc806fd82a91 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -55,6 +55,9 @@ Synopsis of kprobe_events $comm : Fetch current task comm. $current : Fetch the address of the current task_struct. +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4) + +CPU(FETCHARG) : Fetch memory at FETCHARG address on the CPU specified by CPU. + This is useful for fetching per-CPU variables. + +PCPU(FETCHARG) : Fetch memory address at FETCHARG address on the per-CPU area. \IMM : Store an immediate value to the argument. NAME=FETCHARG : Set NAME as the argument name of FETCHARG. FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index e185a006cb08..2b8c8ac4036a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4332,6 +4332,7 @@ static const char readme_msg[] = "\t $stack<index>, $stack, $retval, $comm, $current\n" #endif "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n" + "\t +CPU(<fetcharg>), +PCPU(<fetcharg>)\n" "\t kernel return probes support: $retval, $arg<N>, $comm\n" "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n" "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n" diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 2c5deb1e1463..fa6757222fe6 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -1396,26 +1396,36 @@ parse_probe_arg(char *arg, const struct fetch_type *type, case '+': /* deref memory */ case '-': - if (arg[1] == 'u') { - deref = FETCH_OP_UDEREF; - arg[1] = arg[0]; - arg++; - } - if (arg[0] == '+') - arg++; /* Skip '+', because kstrtol() rejects it. */ - tmp = strchr(arg, '('); - if (!tmp) { - trace_probe_log_err(ctx->offset, DEREF_NEED_BRACE); - return -EINVAL; - } - *tmp = '\0'; - ret = kstrtol(arg, 0, &offset); - if (ret) { - trace_probe_log_err(ctx->offset, BAD_DEREF_OFFS); - break; + if (str_has_prefix(arg, "+CPU(")) { + deref = FETCH_OP_DEREF_CPU; + arg += 5; + ctx->offset += 5; + } else if (str_has_prefix(arg, "+PCPU(")) { + deref = FETCH_OP_CPU_PTR; + arg += 6; + ctx->offset += 6; + } else { + if (arg[1] == 'u') { + deref = FETCH_OP_UDEREF; + arg[1] = arg[0]; + arg++; + } + if (arg[0] == '+') + arg++; /* Skip '+', because kstrtol() rejects it. */ + tmp = strchr(arg, '('); + if (!tmp) { + trace_probe_log_err(ctx->offset, DEREF_NEED_BRACE); + return -EINVAL; + } + *tmp = '\0'; + ret = kstrtol(arg, 0, &offset); + if (ret) { + trace_probe_log_err(ctx->offset, BAD_DEREF_OFFS); + break; + } + ctx->offset += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0); + arg = tmp + 1; } - ctx->offset += (tmp + 1 - arg) + (arg[0] != '-' ? 1 : 0); - arg = tmp + 1; tmp = strrchr(arg, ')'); if (!tmp) { trace_probe_log_err(ctx->offset + strlen(arg), diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index f2b31089779c..bec04bcc4226 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -100,6 +100,8 @@ enum fetch_op { // Stage 2 (dereference) op FETCH_OP_DEREF, /* Dereference: .offset */ FETCH_OP_UDEREF, /* User-space Dereference: .offset */ + FETCH_OP_DEREF_CPU, /* Per-CPU Dereference for this CPU */ + FETCH_OP_CPU_PTR, /* Per-CPU pointer for this CPU */ // Stage 3 (store) ops FETCH_OP_ST_RAW, /* Raw: .size */ FETCH_OP_ST_MEM, /* Mem: .offset, .size */ diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index f630930288d2..82d753decf48 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -129,25 +129,43 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val, struct fetch_insn *s3 = NULL; int total = 0, ret = 0, i = 0; u32 loc = 0; - unsigned long lval = val; + unsigned long lval, llval = val; stage2: /* 2nd stage: dereference memory if needed */ do { - if (code->op == FETCH_OP_DEREF) { - lval = val; + lval = val; + switch (code->op) { + case FETCH_OP_DEREF: ret = probe_mem_read(&val, (void *)val + code->offset, sizeof(val)); - } else if (code->op == FETCH_OP_UDEREF) { - lval = val; + break; + case FETCH_OP_UDEREF: ret = probe_mem_read_user(&val, (void *)val + code->offset, sizeof(val)); - } else break; + case FETCH_OP_DEREF_CPU: + case FETCH_OP_CPU_PTR: + if (!is_kernel_percpu_address(val)) { + ret = -EFAULT; + break; + } + val = (unsigned long)this_cpu_ptr((void __percpu *)val); + if (code->op == FETCH_OP_DEREF_CPU) + ret = probe_mem_read(&val, (void *)val, sizeof(val)); + else + ret = 0; + break; + default: + lval = llval; + goto out; + } if (ret) return ret; + llval = lval; code++; } while (1); +out: s3 = code; stage3:
