On 11/02/2017 11:58 PM, Sandipan Das wrote:
For added security, the layout of some structures can be
randomized by enabling CONFIG_GCC_PLUGIN_RANDSTRUCT. One
such structure is task_struct. To build BPF programs, we
use Clang which does not support this feature. So, if we
attempt to read a field of a structure with a randomized
layout within a BPF program, we do not get the expected
value because of incorrect offsets. To observe this, it
is not mandatory to have CONFIG_GCC_PLUGIN_RANDSTRUCT
enabled because the structure annotations/members added
for this purpose are enough to cause this. So, all kernel
builds are affected.

For example, considering samples/bpf/offwaketime_kern.c,
if we try to print the values of pid and comm inside the
task_struct passed to waker() by adding the following
lines of code at the appropriate place

   char fmt[] = "waker(): p->pid = %u, p->comm = %s\n";
   bpf_trace_printk(fmt, sizeof(fmt), _(p->pid), _(p->comm));

it is seen that upon rebuilding and running this sample
followed by inspecting /sys/kernel/debug/tracing/trace,
the output looks like the following

                                _-----=> irqs-off
                               / _----=> need-resched
                              | / _---=> hardirq/softirq
                              || / _--=> preempt-depth
                              ||| /     delay
             TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
                | |       |   ||||       |         |
           <idle>-0     [007] d.s.  1883.443594: 0x00000001: waker(): p->pid = 0, 
p->comm =
           <idle>-0     [018] d.s.  1883.453588: 0x00000001: waker(): p->pid = 0, 
p->comm =
           <idle>-0     [007] d.s.  1883.463584: 0x00000001: waker(): p->pid = 0, 
p->comm =
           <idle>-0     [009] d.s.  1883.483586: 0x00000001: waker(): p->pid = 0, 
p->comm =
           <idle>-0     [005] d.s.  1883.493583: 0x00000001: waker(): p->pid = 0, 
p->comm =
           <idle>-0     [009] d.s.  1883.503583: 0x00000001: waker(): p->pid = 0, 
p->comm =
           <idle>-0     [018] d.s.  1883.513578: 0x00000001: waker(): p->pid = 0, 
p->comm =
  systemd-journal-3140  [003] d...  1883.627660: 0x00000001: waker(): p->pid = 0, 
p->comm =
  systemd-journal-3140  [003] d...  1883.627704: 0x00000001: waker(): p->pid = 0, 
p->comm =
  systemd-journal-3140  [003] d...  1883.627723: 0x00000001: waker(): p->pid = 0, 
p->comm =

To avoid this, we add new BPF helpers that read the
correct values for some of the important task_struct
members such as pid, tgid, comm and flags which are
extensively used in BPF-based analysis tools such as
bcc. Since these helpers are built with GCC, they use
the correct offsets when referencing a member.
Just to add that we were seeing the same issue (but had no clue until looked at this patch , thanks). Its easy to reproduce by running bcc example task_switch.py where pid (prev_pid) is retrieved from struct task_struct and that is always zero. we tried printing other task_struct members such as 'comm' and see that as empty string as well.


-Tushar

Signed-off-by: Sandipan Das <sandi...@linux.vnet.ibm.com>
---
  include/linux/bpf.h                       |  3 ++
  include/uapi/linux/bpf.h                  | 13 ++++++
  kernel/bpf/core.c                         |  3 ++
  kernel/bpf/helpers.c                      | 75 +++++++++++++++++++++++++++++++
  kernel/trace/bpf_trace.c                  |  6 +++
  tools/testing/selftests/bpf/bpf_helpers.h |  9 ++++
  6 files changed, 109 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f1af7d63d678..5993a0f5262b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -418,6 +418,9 @@ extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
  extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
  extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
  extern const struct bpf_func_proto bpf_get_current_comm_proto;
+extern const struct bpf_func_proto bpf_get_task_pid_tgid_proto;
+extern const struct bpf_func_proto bpf_get_task_comm_proto;
+extern const struct bpf_func_proto bpf_get_task_flags_proto;
  extern const struct bpf_func_proto bpf_skb_vlan_push_proto;
  extern const struct bpf_func_proto bpf_skb_vlan_pop_proto;
  extern const struct bpf_func_proto bpf_get_stackid_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f90860d1f897..324508d27bd2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -338,6 +338,16 @@ union bpf_attr {
   *     @skb: pointer to skb
   *     Return: classid if != 0
   *
+ * u64 bpf_get_task_pid_tgid(struct task_struct *task)
+ *     Return: task->tgid << 32 | task->pid
+ *
+ * int bpf_get_task_comm(struct task_struct *task)
+ *     Stores task->comm into buf
+ *     Return: 0 on success or negative error
+ *
+ * u32 bpf_get_task_flags(struct task_struct *task)
+ *     Return: task->flags
+ *
   * int bpf_skb_vlan_push(skb, vlan_proto, vlan_tci)
   *     Return: 0 on success or negative error
   *
@@ -602,6 +612,9 @@ union bpf_attr {
        FN(get_current_uid_gid),        \
        FN(get_current_comm),           \
        FN(get_cgroup_classid),         \
+       FN(get_task_pid_tgid),          \
+       FN(get_task_comm),              \
+       FN(get_task_flags),             \
        FN(skb_vlan_push),              \
        FN(skb_vlan_pop),               \
        FN(skb_get_tunnel_key),         \
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7b62df86be1d..c69c17d6514a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1438,6 +1438,9 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
  const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
  const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
  const struct bpf_func_proto bpf_get_current_comm_proto __weak;
+const struct bpf_func_proto bpf_get_task_pid_tgid_proto __weak;
+const struct bpf_func_proto bpf_get_task_comm_proto __weak;
+const struct bpf_func_proto bpf_get_task_flags_proto __weak;
  const struct bpf_func_proto bpf_sock_map_update_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 3d24e238221e..f45259dce117 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -179,3 +179,78 @@ const struct bpf_func_proto bpf_get_current_comm_proto = {
        .arg1_type      = ARG_PTR_TO_UNINIT_MEM,
        .arg2_type      = ARG_CONST_SIZE,
  };
+
+BPF_CALL_1(bpf_get_task_pid_tgid, struct task_struct *, task)
+{
+       int ret;
+       u32 pid, tgid;
+
+       ret = probe_kernel_read(&pid, &task->pid, sizeof(pid));
+       if (unlikely(ret < 0))
+               goto err;
+
+       ret = probe_kernel_read(&tgid, &task->tgid, sizeof(tgid));
+       if (unlikely(ret < 0))
+               goto err;
+
+       return (u64) tgid << 32 | pid;
+err:
+       return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_task_pid_tgid_proto = {
+       .func           = bpf_get_task_pid_tgid,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_get_task_comm, struct task_struct *, task, char *, buf, u32, 
size)
+{
+       int ret;
+       char comm[TASK_COMM_LEN];
+
+       ret = probe_kernel_read(comm, task->comm, sizeof(comm));
+       if (unlikely(ret < 0))
+               goto err_clear;
+
+       strncpy(buf, comm, size);
+
+       /* Verifier guarantees that size > 0. For task->comm exceeding
+        * size, guarantee that buf is %NUL-terminated. Unconditionally
+        * done here to save the size test.
+        */
+       buf[size - 1] = 0;
+       return 0;
+err_clear:
+       memset(buf, 0, size);
+       return -EINVAL;
+}
+
+const struct bpf_func_proto bpf_get_task_comm_proto = {
+       .func           = bpf_get_task_comm,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+       .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
+       .arg3_type      = ARG_CONST_SIZE,
+};
+
+BPF_CALL_1(bpf_get_task_flags, struct task_struct *, task)
+{
+       int ret;
+       unsigned int flags;
+
+       ret = probe_kernel_read(&flags, &task->flags, sizeof(flags));
+       if (unlikely(ret < 0))
+               return -EINVAL;
+
+       return flags;
+}
+
+const struct bpf_func_proto bpf_get_task_flags_proto = {
+       .func           = bpf_get_task_flags,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_ANYTHING,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index dc498b605d5d..a31f5cf68cbc 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -477,6 +477,12 @@ static const struct bpf_func_proto 
*tracing_func_proto(enum bpf_func_id func_id)
                return &bpf_get_smp_processor_id_proto;
        case BPF_FUNC_get_numa_node_id:
                return &bpf_get_numa_node_id_proto;
+       case BPF_FUNC_get_task_pid_tgid:
+               return &bpf_get_task_pid_tgid_proto;
+       case BPF_FUNC_get_task_comm:
+               return &bpf_get_task_comm_proto;
+       case BPF_FUNC_get_task_flags:
+               return &bpf_get_task_flags_proto;
        case BPF_FUNC_perf_event_read:
                return &bpf_perf_event_read_proto;
        case BPF_FUNC_probe_write_user:
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
b/tools/testing/selftests/bpf/bpf_helpers.h
index b2e02bdcd098..8c64df027d2c 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -1,6 +1,8 @@
  #ifndef __BPF_HELPERS_H
  #define __BPF_HELPERS_H
+struct task_struct;
+
  /* helper macro to place programs, maps, license in
   * different sections in elf_bpf file. Section names
   * are interpreted by elf_bpf loader
@@ -31,6 +33,13 @@ static unsigned long long (*bpf_get_current_uid_gid)(void) =
        (void *) BPF_FUNC_get_current_uid_gid;
  static int (*bpf_get_current_comm)(void *buf, int buf_size) =
        (void *) BPF_FUNC_get_current_comm;
+static unsigned long long (*bpf_get_task_pid_tgid)(struct task_struct *task) =
+       (void *) BPF_FUNC_get_task_pid_tgid;
+static int (*bpf_get_task_comm)(struct task_struct *task,
+                               void *buf, int buf_size) =
+       (void *) BPF_FUNC_get_task_comm;
+static unsigned int (*bpf_get_task_flags)(struct task_struct *task) =
+       (void *) BPF_FUNC_get_task_flags;
  static unsigned long long (*bpf_perf_event_read)(void *map,
                                                 unsigned long long flags) =
        (void *) BPF_FUNC_perf_event_read;

Reply via email to