> On Jun 26, 2019, at 6:32 AM, Daniel Borkmann <dan...@iogearbox.net> wrote:
> 
> On 06/25/2019 08:23 PM, Song Liu wrote:
>> This patch introduce unprivileged BPF access. The access control is
>> achieved via device /dev/bpf. Users with access to /dev/bpf are able
>> to access BPF syscall.
>> 
>> Two ioctl command are added to /dev/bpf:
>> 
>> The first two commands get/put permission to access sys_bpf. This
>> permission is noted by setting bit TASK_BPF_FLAG_PERMITTED of
>> current->bpf_flags. This permission cannot be inherited via fork().
>> 
>> Helper function bpf_capable() is added to check whether the task has got
>> permission via /dev/bpf.
>> 
>> Signed-off-by: Song Liu <songliubrav...@fb.com>
> 
> [ Lets Cc Jann so he has a chance to review as he was the one who suggested
>  the idea. ]
> 
>> ---
>> Documentation/ioctl/ioctl-number.txt |  1 +
>> include/linux/bpf.h                  | 12 +++++
>> include/linux/sched.h                |  8 ++++
>> include/uapi/linux/bpf.h             |  5 ++
>> kernel/bpf/arraymap.c                |  2 +-
>> kernel/bpf/cgroup.c                  |  2 +-
>> kernel/bpf/core.c                    |  4 +-
>> kernel/bpf/cpumap.c                  |  2 +-
>> kernel/bpf/devmap.c                  |  2 +-
>> kernel/bpf/hashtab.c                 |  4 +-
>> kernel/bpf/lpm_trie.c                |  2 +-
>> kernel/bpf/offload.c                 |  2 +-
>> kernel/bpf/queue_stack_maps.c        |  2 +-
>> kernel/bpf/reuseport_array.c         |  2 +-
>> kernel/bpf/stackmap.c                |  2 +-
>> kernel/bpf/syscall.c                 | 72 +++++++++++++++++++++-------
>> kernel/bpf/verifier.c                |  2 +-
>> kernel/bpf/xskmap.c                  |  2 +-
>> kernel/fork.c                        |  4 ++
>> net/core/filter.c                    |  6 +--
>> 20 files changed, 104 insertions(+), 34 deletions(-)
>> 
>> diff --git a/Documentation/ioctl/ioctl-number.txt 
>> b/Documentation/ioctl/ioctl-number.txt
>> index c9558146ac58..19998b99d603 100644
>> --- a/Documentation/ioctl/ioctl-number.txt
>> +++ b/Documentation/ioctl/ioctl-number.txt
>> @@ -327,6 +327,7 @@ Code  Seq#(hex)  Include File            Comments
>> 0xB4 00-0F   linux/gpio.h            <mailto:linux-g...@vger.kernel.org>
>> 0xB5 00-0F   uapi/linux/rpmsg.h      
>> <mailto:linux-remotep...@vger.kernel.org>
>> 0xB6 all     linux/fpga-dfl.h
>> +0xBP        01-02   uapi/linux/bpf.h        <mailto:b...@vger.kernel.org>
>> 0xC0 00-0F   linux/usb/iowarrior.h
>> 0xCA 00-0F   uapi/misc/cxl.h
>> 0xCA 10-2F   uapi/misc/ocxl.h
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index a62e7889b0b6..dbba7870f6df 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -14,6 +14,10 @@
>> #include <linux/numa.h>
>> #include <linux/wait.h>
>> #include <linux/u64_stats_sync.h>
>> +#include <linux/sched.h>
>> +#include <linux/capability.h>
>> +
>> +#include <asm/current.h>
>> 
>> struct bpf_verifier_env;
>> struct perf_event;
>> @@ -742,6 +746,12 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
>> union bpf_attr *kattr,
>> int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
>>                                   const union bpf_attr *kattr,
>>                                   union bpf_attr __user *uattr);
>> +
>> +static inline bool bpf_capable(int cap)
>> +{
>> +    return test_bit(TASK_BPF_FLAG_PERMITTED, &current->bpf_flags) ||
>> +            capable(cap);
>> +}
>> #else /* !CONFIG_BPF_SYSCALL */
>> static inline struct bpf_prog *bpf_prog_get(u32 ufd)
>> {
>> @@ -874,6 +884,8 @@ static inline int 
>> bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
>> {
>>      return -ENOTSUPP;
>> }
>> +
>> +#define bpf_capable(cap) capable((cap))
>> #endif /* CONFIG_BPF_SYSCALL */
>> 
>> static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index 11837410690f..ddd33d4476c5 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -1200,6 +1200,10 @@ struct task_struct {
>>      unsigned long                   prev_lowest_stack;
>> #endif
>> 
>> +#ifdef CONFIG_BPF_SYSCALL
>> +    unsigned long                   bpf_flags;
>> +#endif
> 
> There are plenty of bits available here:
> 
>        /* --- cacheline 14 boundary (896 bytes) --- */
>        unsigned int               in_execve:1;          /*   896:31  4 */
>        unsigned int               in_iowait:1;          /*   896:30  4 */
>        unsigned int               restore_sigmask:1;    /*   896:29  4 */
>        unsigned int               in_user_fault:1;      /*   896:28  4 */
>        unsigned int               no_cgroup_migration:1; /*   896:27  4 */
>        unsigned int               frozen:1;             /*   896:26  4 */
>        unsigned int               use_memdelay:1;       /*   896:25  4 */
> 
>        /* XXX 25 bits hole, try to pack */
>        /* XXX 4 bytes hole, try to pack */
> 
> Given that bpf is pretty much enabled by default everywhere, I don't think we
> should waste so much space in task_struct just for this flag (pretty sure that
> task_struct is the equivalent of sk_buff that rather needs a diet). Other 
> options
> could be to add to atomic_flags which also still has space.

Good point. Let me find a free bit for it. 

> 
>>      /*
>>       * New fields for task_struct should be added above here, so that
>>       * they are included in the randomized portion of task_struct.
>> @@ -1772,6 +1776,10 @@ static inline void set_task_cpu(struct task_struct 
>> *p, unsigned int cpu)
>> 
>> #endif /* CONFIG_SMP */
[...]
>> +
>> +static long bpf_dev_ioctl(struct file *filp,
>> +                      unsigned int ioctl, unsigned long arg)
>> +{
>> +    switch (ioctl) {
>> +    case BPF_DEV_IOCTL_GET_PERM:
>> +            set_bit(TASK_BPF_FLAG_PERMITTED, &current->bpf_flags);
>> +            break;
>> +    case BPF_DEV_IOCTL_PUT_PERM:
>> +            clear_bit(TASK_BPF_FLAG_PERMITTED, &current->bpf_flags);
> 
> I think the get/put for uapi is a bit misleading, first thought at least for
> me is on get/put_user() when I read the name.

I am not good at naming things. What would be better names here? 

> 
>> +            break;
>> +    default:
>> +            return -EINVAL;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static const struct file_operations bpf_chardev_ops = {
>> +    .unlocked_ioctl = bpf_dev_ioctl,
>> +};
>> +
>> +static struct miscdevice bpf_dev = {
>> +    .minor          = MISC_DYNAMIC_MINOR,
>> +    .name           = "bpf",
>> +    .fops           = &bpf_chardev_ops,
>> +    .mode           = 0440,
>> +    .nodename       = "bpf",
> 
> Here's what kvm does:
> 
> static struct miscdevice kvm_dev = {
>        KVM_MINOR,
>        "kvm",
>        &kvm_chardev_ops,
> };
> 
> Is there an actual reason that mode is not 0 by default in bpf case? Why
> we need to define nodename?

Based on my understanding, mode of 0440 is what we want. If we leave it 
as 0, it will use default value of 0600. I guess we can just set it to 
0440, as user space can change it later anyway. 

I guess we really don't need nodename. I will remove it. 

Thanks,
Song




Reply via email to