Previously, memory allocated for a map was not accounted. Therefore, this memory could not be taken into consideration by the cgroups memory controller.
This patch introduces the "BPF_F_ACCOUNT_MEM" flag which enables the memory accounting for a map, and it can be set during the map creation ("BPF_MAP_CREATE") in "map_flags". When enabled, we account only that amount of memory which is charged against the "RLIMIT_MEMLOCK" limit. To validate the change, first we create the memory cgroup "test-map": # mkdir /sys/fs/cgroup/memory/test-map And then we run the following program against the cgroup: $ cat test_map.c <..> int main() { usleep(3 * 1000000); assert(bpf_create_map(BPF_MAP_TYPE_HASH, 8, 16, 65536, 0) > 0); usleep(3 * 1000000); } # cgexec -g memory:test-map ./test_map & # cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes 397312 258048 <after 3 sec the map has been created> # bpftool map list 19: hash flags 0x0 key 8B value 16B max_entries 65536 memlock 5771264B # cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes 401408 262144 As we can see, the memory allocated for map is not accounted, as 397312B + 5771264B > 401408B. Next, we enabled the accounting and re-run the test: $ cat test_map.c <..> int main() { usleep(3 * 1000000); assert(bpf_create_map(BPF_MAP_TYPE_HASH, 8, 16, 65536, BPF_F_ACCOUNT_MEM) > 0); usleep(3 * 1000000); } # cgexec -g memory:test-map ./test_map & # cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes 450560 307200 <after 3 sec the map has been created> # bpftool map list 20: hash flags 0x80 key 8B value 16B max_entries 65536 memlock 5771264B # cat /sys/fs/cgroup/memory/test-map/memory{,.kmem}.usage_in_bytes 6221824 6078464 This time, the memory (including kmem) is accounted, as 450560B + 5771264B <= 6221824B Signed-off-by: Martynas Pumputis <m...@lambda.lt> --- include/linux/bpf.h | 5 +++-- include/uapi/linux/bpf.h | 2 ++ kernel/bpf/arraymap.c | 14 +++++++++----- kernel/bpf/bpf_lru_list.c | 11 +++++++++-- kernel/bpf/bpf_lru_list.h | 1 + kernel/bpf/cpumap.c | 12 +++++++++--- kernel/bpf/devmap.c | 10 ++++++++-- kernel/bpf/hashtab.c | 19 ++++++++++++++----- kernel/bpf/lpm_trie.c | 19 +++++++++++++------ kernel/bpf/queue_stack_maps.c | 5 +++-- kernel/bpf/reuseport_array.c | 3 ++- kernel/bpf/stackmap.c | 12 ++++++++---- kernel/bpf/syscall.c | 12 ++++++++---- kernel/bpf/xskmap.c | 9 +++++++-- net/core/sock_map.c | 13 +++++++++---- tools/include/uapi/linux/bpf.h | 3 +++ 16 files changed, 108 insertions(+), 42 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e734f163bd0b..353a3f4304fe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -79,7 +79,8 @@ struct bpf_map { u32 btf_value_type_id; struct btf *btf; bool unpriv_array; - /* 55 bytes hole */ + bool account_mem; + /* 54 bytes hole */ /* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@ -506,7 +507,7 @@ void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages); -void *bpf_map_area_alloc(size_t size, int numa_node); +void *bpf_map_area_alloc(size_t size, int numa_node, bool account_mem); void bpf_map_area_free(void *base); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 91c43884f295..a374ccbaa51b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -278,6 +278,8 @@ enum bpf_attach_type { #define BPF_F_NO_COMMON_LRU (1U << 1) /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* Enable memory accounting for map */ +#define BPF_F_ACCOUNT_MEM (1U << 7) #define BPF_OBJ_NAME_LEN 16U diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 25632a75d630..86417f2e6f1b 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -34,14 +34,17 @@ static void bpf_array_free_percpu(struct bpf_array *array) } } -static int bpf_array_alloc_percpu(struct bpf_array *array) +static int bpf_array_alloc_percpu(struct bpf_array *array, bool account_mem) { void __percpu *ptr; + gfp_t gfp = GFP_USER | __GFP_NOWARN; int i; + if (account_mem) + gfp |= __GFP_ACCOUNT; + for (i = 0; i < array->map.max_entries; i++) { - ptr = __alloc_percpu_gfp(array->elem_size, 8, - GFP_USER | __GFP_NOWARN); + ptr = __alloc_percpu_gfp(array->elem_size, 8, gfp); if (!ptr) { bpf_array_free_percpu(array); return -ENOMEM; @@ -82,6 +85,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); u64 cost, array_size, mask64; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); struct bpf_array *array; elem_size = round_up(attr->value_size, 8); @@ -129,7 +133,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) return ERR_PTR(ret); /* allocate all map elements and zero-initialize them */ - array = bpf_map_area_alloc(array_size, numa_node); + array = bpf_map_area_alloc(array_size, numa_node, account_mem); if (!array) return ERR_PTR(-ENOMEM); array->index_mask = index_mask; @@ -140,7 +144,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.pages = cost; array->elem_size = elem_size; - if (percpu && bpf_array_alloc_percpu(array)) { + if (percpu && bpf_array_alloc_percpu(array, account_mem)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index e6ef4401a138..4d58537e0af2 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -7,6 +7,7 @@ #include <linux/cpumask.h> #include <linux/spinlock.h> #include <linux/percpu.h> +#include <linux/gfp.h> #include "bpf_lru_list.h" @@ -646,12 +647,17 @@ static void bpf_lru_list_init(struct bpf_lru_list *l) } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, + bool account_mem, del_from_htab_func del_from_htab, void *del_arg) { + gfp_t gfp = GFP_KERNEL; int cpu; + if (account_mem) + gfp |= __GFP_ACCOUNT; + if (percpu) { - lru->percpu_lru = alloc_percpu(struct bpf_lru_list); + lru->percpu_lru = alloc_percpu_gfp(struct bpf_lru_list, gfp); if (!lru->percpu_lru) return -ENOMEM; @@ -665,7 +671,8 @@ int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, } else { struct bpf_common_lru *clru = &lru->common_lru; - clru->local_list = alloc_percpu(struct bpf_lru_locallist); + clru->local_list = alloc_percpu_gfp(struct bpf_lru_locallist, + gfp); if (!clru->local_list) return -ENOMEM; diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h index 7d4f89b7cb84..89566665592b 100644 --- a/kernel/bpf/bpf_lru_list.h +++ b/kernel/bpf/bpf_lru_list.h @@ -74,6 +74,7 @@ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node) } int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset, + bool account_mem, del_from_htab_func del_from_htab, void *delete_arg); void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset, u32 elem_size, u32 nr_elems); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 8974b3755670..1e84bf78716e 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -81,6 +81,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) struct bpf_cpu_map *cmap; int err = -ENOMEM; u64 cost; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); + gfp_t gfp = GFP_KERNEL; int ret; if (!capable(CAP_SYS_ADMIN)) @@ -117,16 +119,20 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) goto free_cmap; } + if (account_mem) + gfp |= __GFP_ACCOUNT; /* A per cpu bitfield with a bit per possible CPU in map */ - cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr), - __alignof__(unsigned long)); + cmap->flush_needed = __alloc_percpu_gfp(cpu_map_bitmap_size(attr), + __alignof__(unsigned long), + gfp); if (!cmap->flush_needed) goto free_cmap; /* Alloc array for possible remote "destination" CPUs */ cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *), - cmap->map.numa_node); + cmap->map.numa_node, + account_mem); if (!cmap->cpu_map) goto free_percpu; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 191b79948424..acfc1b35aa51 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -90,6 +90,8 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) struct bpf_dtab *dtab; int err = -EINVAL; u64 cost; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); + gfp_t gfp; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -120,16 +122,20 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) err = -ENOMEM; + gfp = GFP_KERNEL | __GFP_NOWARN; + if (account_mem) + gfp |= __GFP_ACCOUNT; /* A per cpu bitfield with a bit per possible net device */ dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), __alignof__(unsigned long), - GFP_KERNEL | __GFP_NOWARN); + gfp); if (!dtab->flush_needed) goto free_dtab; dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), - dtab->map.numa_node); + dtab->map.numa_node, + account_mem); if (!dtab->netdev_map) goto free_dtab; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 4b7c76765d9d..fc2f44451256 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -23,6 +23,7 @@ #define HTAB_CREATE_FLAG_MASK \ (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \ + BPF_F_ACCOUNT_MEM | \ BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ZERO_SEED) struct bucket { @@ -139,27 +140,32 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key, return NULL; } -static int prealloc_init(struct bpf_htab *htab) +static int prealloc_init(struct bpf_htab *htab, bool account_mem) { u32 num_entries = htab->map.max_entries; + gfp_t gfp = GFP_USER | __GFP_NOWARN; int err = -ENOMEM, i; if (!htab_is_percpu(htab) && !htab_is_lru(htab)) num_entries += num_possible_cpus(); htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries, - htab->map.numa_node); + htab->map.numa_node, + account_mem); if (!htab->elems) return -ENOMEM; if (!htab_is_percpu(htab)) goto skip_percpu_elems; + if (account_mem) + gfp |= __GFP_ACCOUNT; + for (i = 0; i < num_entries; i++) { u32 size = round_up(htab->map.value_size, 8); void __percpu *pptr; - pptr = __alloc_percpu_gfp(size, 8, GFP_USER | __GFP_NOWARN); + pptr = __alloc_percpu_gfp(size, 8, gfp); if (!pptr) goto free_elems; htab_elem_set_ptr(get_htab_elem(htab, i), htab->map.key_size, @@ -173,6 +179,7 @@ static int prealloc_init(struct bpf_htab *htab) htab->map.map_flags & BPF_F_NO_COMMON_LRU, offsetof(struct htab_elem, hash) - offsetof(struct htab_elem, lru_node), + account_mem, htab_lru_map_delete_node, htab); else @@ -313,6 +320,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) */ bool percpu_lru = (attr->map_flags & BPF_F_NO_COMMON_LRU); bool prealloc = !(attr->map_flags & BPF_F_NO_PREALLOC); + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); struct bpf_htab *htab; int err, i; u64 cost; @@ -374,7 +382,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) err = -ENOMEM; htab->buckets = bpf_map_area_alloc(htab->n_buckets * sizeof(struct bucket), - htab->map.numa_node); + htab->map.numa_node, + account_mem); if (!htab->buckets) goto free_htab; @@ -389,7 +398,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) } if (prealloc) { - err = prealloc_init(htab); + err = prealloc_init(htab, account_mem); if (err) goto free_buckets; diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index abf1002080df..8421fdb816f3 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -277,16 +277,19 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) } static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie, - const void *value) + const void *value, + bool account_mem) { struct lpm_trie_node *node; size_t size = sizeof(struct lpm_trie_node) + trie->data_size; + gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; if (value) size += trie->map.value_size; - node = kmalloc_node(size, GFP_ATOMIC | __GFP_NOWARN, - trie->map.numa_node); + if (account_mem) + gfp |= __GFP_ACCOUNT; + node = kmalloc_node(size, gfp, trie->map.numa_node); if (!node) return NULL; @@ -327,7 +330,7 @@ static int trie_update_elem(struct bpf_map *map, goto out; } - new_node = lpm_trie_node_alloc(trie, value); + new_node = lpm_trie_node_alloc(trie, value, map->account_mem); if (!new_node) { ret = -ENOMEM; goto out; @@ -394,7 +397,7 @@ static int trie_update_elem(struct bpf_map *map, goto out; } - im_node = lpm_trie_node_alloc(trie, NULL); + im_node = lpm_trie_node_alloc(trie, NULL, map->account_mem); if (!im_node) { ret = -ENOMEM; goto out; @@ -542,6 +545,8 @@ static int trie_delete_elem(struct bpf_map *map, void *_key) static struct bpf_map *trie_alloc(union bpf_attr *attr) { struct lpm_trie *trie; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); + gfp_t gfp = GFP_USER | __GFP_NOWARN; u64 cost = sizeof(*trie), cost_per_node; int ret; @@ -558,7 +563,9 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr) attr->value_size > LPM_VAL_SIZE_MAX) return ERR_PTR(-EINVAL); - trie = kzalloc(sizeof(*trie), GFP_USER | __GFP_NOWARN); + if (account_mem) + gfp |= __GFP_ACCOUNT; + trie = kzalloc(sizeof(*trie), gfp); if (!trie) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index b384ea9f3254..040ec350af3d 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -11,7 +11,7 @@ #include "percpu_freelist.h" #define QUEUE_STACK_CREATE_FLAG_MASK \ - (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | BPF_F_ACCOUNT_MEM) struct bpf_queue_stack { @@ -69,6 +69,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) int ret, numa_node = bpf_map_attr_numa_node(attr); struct bpf_queue_stack *qs; u64 size, queue_size, cost; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); size = (u64) attr->max_entries + 1; cost = queue_size = sizeof(*qs) + size * attr->value_size; @@ -81,7 +82,7 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) if (ret < 0) return ERR_PTR(ret); - qs = bpf_map_area_alloc(queue_size, numa_node); + qs = bpf_map_area_alloc(queue_size, numa_node, account_mem); if (!qs) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c index 18e225de80ff..a9a2709c7507 100644 --- a/kernel/bpf/reuseport_array.c +++ b/kernel/bpf/reuseport_array.c @@ -152,6 +152,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) int err, numa_node = bpf_map_attr_numa_node(attr); struct reuseport_array *array; u64 cost, array_size; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -170,7 +171,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr) return ERR_PTR(err); /* allocate all map elements and zero-initialize them */ - array = bpf_map_area_alloc(array_size, numa_node); + array = bpf_map_area_alloc(array_size, numa_node, account_mem); if (!array) return ERR_PTR(-ENOMEM); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index d43b14535827..46d37c7e09a2 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -61,13 +61,15 @@ static inline int stack_map_data_size(struct bpf_map *map) sizeof(struct bpf_stack_build_id) : sizeof(u64); } -static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) +static int prealloc_elems_and_freelist(struct bpf_stack_map *smap, + bool account_mem) { u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; int err; smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, - smap->map.numa_node); + smap->map.numa_node, + account_mem); if (!smap->elems) return -ENOMEM; @@ -90,6 +92,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) u32 value_size = attr->value_size; struct bpf_stack_map *smap; u64 cost, n_buckets; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); int err; if (!capable(CAP_SYS_ADMIN)) @@ -119,7 +122,8 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-E2BIG); - smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr)); + smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr), + account_mem); if (!smap) return ERR_PTR(-ENOMEM); @@ -141,7 +145,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) if (err) goto free_smap; - err = prealloc_elems_and_freelist(smap); + err = prealloc_elems_and_freelist(smap, account_mem); if (err) goto put_buffers; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b155cd17c1bd..13f2e1731a47 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -131,25 +131,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; map->ops = ops; map->map_type = type; + map->account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); return map; } -void *bpf_map_area_alloc(size_t size, int numa_node) +void *bpf_map_area_alloc(size_t size, int numa_node, bool account_mem) { /* We definitely need __GFP_NORETRY, so OOM killer doesn't * trigger under memory pressure as we really just want to * fail instead. */ - const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; + gfp_t gfp = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; void *area; + if (account_mem) + gfp |= __GFP_ACCOUNT; + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - area = kmalloc_node(size, GFP_USER | flags, numa_node); + area = kmalloc_node(size, GFP_USER | gfp, numa_node); if (area != NULL) return area; } - return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, + return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | gfp, __builtin_return_address(0)); } diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 686d244e798d..bbc1f142326f 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -20,6 +20,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) int cpu, err = -EINVAL; struct xsk_map *m; u64 cost; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); + gfp_t gfp = GFP_KERNEL; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -49,7 +51,9 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) err = -ENOMEM; - m->flush_list = alloc_percpu(struct list_head); + if (account_mem) + gfp |= __GFP_ACCOUNT; + m->flush_list = alloc_percpu_gfp(struct list_head, gfp); if (!m->flush_list) goto free_m; @@ -58,7 +62,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) m->xsk_map = bpf_map_area_alloc(m->map.max_entries * sizeof(struct xdp_sock *), - m->map.numa_node); + m->map.numa_node, + account_mem); if (!m->xsk_map) goto free_percpu; return &m->map; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index be6092ac69f8..eefcfd1294c0 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -18,13 +18,15 @@ struct bpf_stab { raw_spinlock_t lock; }; -#define SOCK_CREATE_FLAG_MASK \ - (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) +#define SOCK_CREATE_FLAG_MASK \ + (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \ + BPF_F_ACCOUNT_MEM) static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; u64 cost; + bool account_mem = (attr->map_flags & BPF_F_ACCOUNT_MEM); int err; if (!capable(CAP_NET_ADMIN)) @@ -56,7 +58,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) stab->sks = bpf_map_area_alloc(stab->map.max_entries * sizeof(struct sock *), - stab->map.numa_node); + stab->map.numa_node, + account_mem); if (stab->sks) return &stab->map; err = -ENOMEM; @@ -788,6 +791,7 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) struct bpf_htab *htab; int i, err; u64 cost; + bool account = (attr->map_flags & BPF_F_ACCOUNT_MEM); if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -823,7 +827,8 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr) htab->buckets = bpf_map_area_alloc(htab->buckets_num * sizeof(struct bpf_htab_bucket), - htab->map.numa_node); + htab->map.numa_node, + account); if (!htab->buckets) { err = -ENOMEM; goto free_htab; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 91c43884f295..dc490e3fdce3 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -278,6 +278,9 @@ enum bpf_attach_type { #define BPF_F_NO_COMMON_LRU (1U << 1) /* Specify numa node during map creation */ #define BPF_F_NUMA_NODE (1U << 2) +/* Enable memory accounting for map */ +#define BPF_F_ACCOUNT_MEM (1U << 7) + #define BPF_OBJ_NAME_LEN 16U -- 2.20.1