On 08/25/2017 09:05 PM, David Ahern wrote:
Add support for recursively applying sock filters attached to a cgroup.
For now, start with the inner cgroup attached to the socket and work back
to the root or first cgroup without the recursive flag set. Once the
recursive flag is set for a cgroup all descendant group's must have the
flag as well.
Signed-off-by: David Ahern <dsah...@gmail.com>
[...]
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f71f5e07d82d..595e31b30f23 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -151,6 +151,15 @@ enum bpf_attach_type {
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+/* If BPF_F_RECURSIVE flag is used in BPF_PROG_ATTACH command
+ * cgroups are walked recursively back to the root cgroup or the
+ * first cgroup without the flag set running any program attached.
+ * Once the flag is set, it MUST be set for all descendant cgroups.
+ */
+#define BPF_F_RECURSIVE (1U << 1)
+
+#define BPF_F_ALL_ATTACH_FLAGS (BPF_F_ALLOW_OVERRIDE | BPF_F_RECURSIVE)
+
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
* has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 546113430049..eb1f436c18fb 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -47,10 +47,16 @@ void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup
*parent)
unsigned int type;
for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
- struct bpf_prog *e;
+ struct bpf_prog *e = NULL;
+
+ /* do not need to set effective program if cgroups are
+ * walked recursively
+ */
+ cgrp->bpf.is_recursive[type] = parent->bpf.is_recursive[type];
+ if (!cgrp->bpf.is_recursive[type])
+ e =
rcu_dereference_protected(parent->bpf.effective[type],
+
lockdep_is_held(&cgroup_mutex));
[...]
- e = rcu_dereference_protected(parent->bpf.effective[type],
- lockdep_is_held(&cgroup_mutex));
rcu_assign_pointer(cgrp->bpf.effective[type], e);
cgrp->bpf.disallow_override[type] =
parent->bpf.disallow_override[type];
}
[...]
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d5774a6851f1..a1ab5dbaae89 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1187,7 +1187,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL;
- if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
+ if (attr->attach_flags & ~BPF_F_ALL_ATTACH_FLAGS)
return -EINVAL;
switch (attr->attach_type) {
@@ -1222,7 +1222,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
}
ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
- attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
+ attr->attach_flags);
if (ret)
bpf_prog_put(prog);
cgroup_put(cgrp);
@@ -1252,7 +1252,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
- ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
+ ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, 0);
cgroup_put(cgrp);
break;
Can you elaborate on the semantical changes for the programs
setting the new flag which are not using below cgroup_bpf_run_filter_sk()
helper to walk back to root?
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index df2e0f14a95d..27a4f14435a3 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -5176,14 +5176,35 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
#ifdef CONFIG_CGROUP_BPF
int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type, bool overridable)
+ enum bpf_attach_type type, u32 flags)
{
struct cgroup *parent = cgroup_parent(cgrp);
int ret;
mutex_lock(&cgroup_mutex);
- ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
+ ret = __cgroup_bpf_update(cgrp, parent, prog, type, flags);
mutex_unlock(&cgroup_mutex);
return ret;
}
+
+int cgroup_bpf_run_filter_sk(struct sock *sk,
+ enum bpf_attach_type type)
+{
+ struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ int ret = 0;
+
+ while (cgrp) {
+ ret = __cgroup_bpf_run_filter_sk(cgrp, sk, type);
+ if (ret)
+ break;
+
+ if (!cgrp->bpf.is_recursive[type])
+ break;
+
+ cgrp = cgroup_parent(cgrp);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(cgroup_bpf_run_filter_sk);
#endif /* CONFIG_CGROUP_BPF */