snip
.
.
.

+
+config PROTECTABLE_MEMORY
+    bool
+    depends on MMU


Curious, would you also want to depend on "SECURITY" as well, as this is being advertised as a compliment to __read_only_after_init, per the file header comments, as I'm assuming ro_after_init would be disabled if the SECURITY Kconfig selection is *NOT* selected?


+    depends on ARCH_HAS_SET_MEMORY
+    select GENERIC_ALLOCATOR
+    default y
diff --git a/mm/Makefile b/mm/Makefile
index e669f02c5a54..959fdbdac118 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -65,6 +65,7 @@ obj-$(CONFIG_SPARSEMEM)       += sparse.o
  obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
  obj-$(CONFIG_SLOB) += slob.o
  obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+obj-$(CONFIG_PROTECTABLE_MEMORY) += pmalloc.o
  obj-$(CONFIG_KSM) += ksm.o
  obj-$(CONFIG_PAGE_POISONING) += page_poison.o
  obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/pmalloc.c b/mm/pmalloc.c
new file mode 100644
index 000000000000..acdec0fbdde6
--- /dev/null
+++ b/mm/pmalloc.c
@@ -0,0 +1,468 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * pmalloc.c: Protectable Memory Allocator
+ *
+ * (C) Copyright 2017 Huawei Technologies Co. Ltd.
+ * Author: Igor Stoppa<igor.sto...@huawei.com>
+ */
+
+#include <linux/printk.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/genalloc.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/atomic.h>
+#include <linux/rculist.h>
+#include <linux/set_memory.h>
+#include <linux/bug.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+#include <linux/pmalloc.h>
+/*
+ * pmalloc_data contains the data specific to a pmalloc pool,
+ * in a format compatible with the design of gen_alloc.
+ * Some of the fields are used for exposing the corresponding parameter
+ * to userspace, through sysfs.
+ */
+struct pmalloc_data {
+       struct gen_pool *pool;  /* Link back to the associated pool. */
+       bool protected;     /* Status of the pool: RO or RW. */

nitpick, you could probably get a tad bit better byte packing alignment of this struct if "bool protected" was stuck as the last element in this data structure.

+       struct kobj_attribute attr_protected; /* Sysfs attribute. */
+       struct kobj_attribute attr_avail;     /* Sysfs attribute. */
+       struct kobj_attribute attr_size;      /* Sysfs attribute. */
+       struct kobj_attribute attr_chunks;    /* Sysfs attribute. */
+       struct kobject *pool_kobject;
+       struct list_head node; /* list of pools */
+};
+
+static LIST_HEAD(pmalloc_final_list);
+static LIST_HEAD(pmalloc_tmp_list);
+static struct list_head *pmalloc_list = &pmalloc_tmp_list;
+static DEFINE_MUTEX(pmalloc_mutex);
+static struct kobject *pmalloc_kobject;
+
+static ssize_t pmalloc_pool_show_protected(struct kobject *dev,
+                                          struct kobj_attribute *attr,
+                                          char *buf)
+{
+       struct pmalloc_data *data;
+
+       data = container_of(attr, struct pmalloc_data, attr_protected);
+       if (data->protected)
+               return sprintf(buf, "protected\n");
+       else
+               return sprintf(buf, "unprotected\n");
+}
+
+static ssize_t pmalloc_pool_show_avail(struct kobject *dev,
+                                      struct kobj_attribute *attr,
+                                      char *buf)
+{
+       struct pmalloc_data *data;
+
+       data = container_of(attr, struct pmalloc_data, attr_avail);
+       return sprintf(buf, "%lu\n",
+                      (unsigned long)gen_pool_avail(data->pool));
+}
+
+static ssize_t pmalloc_pool_show_size(struct kobject *dev,
+                                     struct kobj_attribute *attr,
+                                     char *buf)
+{
+       struct pmalloc_data *data;
+
+       data = container_of(attr, struct pmalloc_data, attr_size);
+       return sprintf(buf, "%lu\n",
+                      (unsigned long)gen_pool_size(data->pool));
+}


Curious, will this show the size in bytes?


+
+static void pool_chunk_number(struct gen_pool *pool,
+                             struct gen_pool_chunk *chunk, void *data)
+{
+       unsigned long *counter = data;
+
+       (*counter)++;
+}
+
+static ssize_t pmalloc_pool_show_chunks(struct kobject *dev,
+                                       struct kobj_attribute *attr,
+                                       char *buf)
+{
+       struct pmalloc_data *data;
+       unsigned long chunks_num = 0;
+
+       data = container_of(attr, struct pmalloc_data, attr_chunks);
+       gen_pool_for_each_chunk(data->pool, pool_chunk_number, &chunks_num);
+       return sprintf(buf, "%lu\n", chunks_num);
+}
+
+/* Exposes the pool and its attributes through sysfs. */
+static struct kobject *pmalloc_connect(struct pmalloc_data *data)
+{
+       const struct attribute *attrs[] = {
+               &data->attr_protected.attr,
+               &data->attr_avail.attr,
+               &data->attr_size.attr,
+               &data->attr_chunks.attr,
+               NULL
+       };
+       struct kobject *kobj;
+
+       kobj = kobject_create_and_add(data->pool->name, pmalloc_kobject);
+       if (unlikely(!kobj))
+               return NULL;
+
+       if (unlikely(sysfs_create_files(kobj, attrs) < 0)) {
+               kobject_put(kobj);
+               kobj = NULL;
+       }
+       return kobj;
+}
+
+/* Removes the pool and its attributes from sysfs. */
+static void pmalloc_disconnect(struct pmalloc_data *data,
+                              struct kobject *kobj)
+{
+       const struct attribute *attrs[] = {
+               &data->attr_protected.attr,
+               &data->attr_avail.attr,
+               &data->attr_size.attr,
+               &data->attr_chunks.attr,
+               NULL
+       };
+
+       sysfs_remove_files(kobj, attrs);
+       kobject_put(kobj);
+}
+
+/* Declares an attribute of the pool. */
+#define pmalloc_attr_init(data, attr_name) \
+do { \
+       sysfs_attr_init(&data->attr_##attr_name.attr); \
+       data->attr_##attr_name.attr.name = #attr_name; \
+       data->attr_##attr_name.attr.mode = VERIFY_OCTAL_PERMISSIONS(0400); \
+       data->attr_##attr_name.show = pmalloc_pool_show_##attr_name; \

Why are these ##attr's being used and not the #define macros already in the kernel (DEVICE_ATTR(), DEVICE_ATTR_RO(), etc found in /include/linux/device.h)?  Those macros are much easier to read and use.

+} while (0)
+
+struct gen_pool *pmalloc_create_pool(const char *name, int min_alloc_order)
+{
+       struct gen_pool *pool;
+       const char *pool_name;
+       struct pmalloc_data *data;
+
+       if (unlikely(!name)) {
+               WARN(true, "unnamed pool");
+               return NULL;
+       }
+
+       if (min_alloc_order < 0)
+               min_alloc_order = ilog2(sizeof(unsigned long));
+
+       pool = gen_pool_create(min_alloc_order, NUMA_NO_NODE);
+       if (unlikely(!pool))
+               return NULL;
+
+       mutex_lock(&pmalloc_mutex);
+       list_for_each_entry(data, pmalloc_list, node)
+               if (!strcmp(name, data->pool->name))
+                       goto same_name_err;
+
+       pool_name = kstrdup(name, GFP_KERNEL);
+       if (unlikely(!pool_name))
+               goto name_alloc_err;
+
+       data = kzalloc(sizeof(struct pmalloc_data), GFP_KERNEL);
+       if (unlikely(!data))
+               goto data_alloc_err;
+
+       data->protected = false;
+       data->pool = pool;
+       pmalloc_attr_init(data, protected);
+       pmalloc_attr_init(data, avail);
+       pmalloc_attr_init(data, size);
+       pmalloc_attr_init(data, chunks);
+       pool->data = data;
+       pool->name = pool_name;
+
+       list_add(&data->node, pmalloc_list);
+       if (pmalloc_list == &pmalloc_final_list)
+               data->pool_kobject = pmalloc_connect(data);
+       mutex_unlock(&pmalloc_mutex);
+       return pool;
+
+data_alloc_err:
+       kfree(pool_name);
+name_alloc_err:
+same_name_err:
+       mutex_unlock(&pmalloc_mutex);
+       gen_pool_destroy(pool);
+       return NULL;
+}
+
+static inline bool chunk_tagging(void *chunk, bool tag)
+{
+       struct vm_struct *area;
+       struct page *page;
+
+       if (!is_vmalloc_addr(chunk))
+               return false;
+
+       page = vmalloc_to_page(chunk);
+       if (unlikely(!page))
+               return false;
+
+       area = page->area;
+       if (tag)
+               area->flags |= VM_PMALLOC;
+       else
+               area->flags &= ~VM_PMALLOC;
+       return true;
+}
+
+
+static inline bool tag_chunk(void *chunk)
+{
+       return chunk_tagging(chunk, true);
+}
+
+
+static inline bool untag_chunk(void *chunk)
+{
+       return chunk_tagging(chunk, false);
+}
+
+enum {
+       INVALID_PMALLOC_OBJECT = -1,
+       NOT_PMALLOC_OBJECT = 0,
+       VALID_PMALLOC_OBJECT = 1,
+};
+
+int is_pmalloc_object(const void *ptr, const unsigned long n)
+{
+       struct vm_struct *area;
+       struct page *page;
+       unsigned long area_start;
+       unsigned long area_end;
+       unsigned long object_start;
+       unsigned long object_end;
+
+
+       /*
+        * is_pmalloc_object gets called pretty late, so chances are high
+        * that the object is indeed of vmalloc type
+        */
+       if (unlikely(!is_vmalloc_addr(ptr)))
+               return NOT_PMALLOC_OBJECT;
+
+       page = vmalloc_to_page(ptr);
+       if (unlikely(!page))
+               return NOT_PMALLOC_OBJECT;
+
+       area = page->area;
+
+       if (likely(!(area->flags & VM_PMALLOC)))
+               return NOT_PMALLOC_OBJECT;
+
+       area_start = (unsigned long)area->addr;
+       area_end = area_start + area->nr_pages * PAGE_SIZE - 1;
+       object_start = (unsigned long)ptr;
+       object_end = object_start + n - 1;
+
+       if (likely((area_start <= object_start) &&
+                  (object_end <= area_end)))
+               return VALID_PMALLOC_OBJECT;
+       else
+               return INVALID_PMALLOC_OBJECT;
+}
+
+
+bool pmalloc_prealloc(struct gen_pool *pool, size_t size)
+{
+       void *chunk;
+       size_t chunk_size;
+       bool add_error;
+
+       /* Expand pool */
+       chunk_size = roundup(size, PAGE_SIZE);
+       chunk = vmalloc(chunk_size);
+       if (unlikely(chunk == NULL))
+               return false;
+
+       /* Locking is already done inside gen_pool_add */
+       add_error = gen_pool_add(pool, (unsigned long)chunk, chunk_size,
+                                NUMA_NO_NODE);
+       if (unlikely(add_error != 0))
+               goto abort;
+
+       return true;
+abort:
+       vfree_atomic(chunk);
+       return false;
+
+}
+
+void *pmalloc(struct gen_pool *pool, size_t size, gfp_t gfp)
+{
+       void *chunk;
+       size_t chunk_size;
+       bool add_error;
+       unsigned long retval;
+
+       if (unlikely(((struct pmalloc_data *)(pool->data))->protected)) {
+               WARN(true, "pool %s is already protected", pool->name);
+               return NULL;
+       }
+
+retry_alloc_from_pool:
+       retval = gen_pool_alloc(pool, size);
+       if (retval)
+               goto return_allocation;
+
+       if (unlikely((gfp & __GFP_ATOMIC))) {
+               if (unlikely((gfp & __GFP_NOFAIL)))
+                       goto retry_alloc_from_pool;
+               else
+                       return NULL;
+       }
+
+       /* Expand pool */
+       chunk_size = roundup(size, PAGE_SIZE);
+       chunk = vmalloc(chunk_size);
+       if (unlikely(!chunk)) {
+               if (unlikely((gfp & __GFP_NOFAIL)))
+                       goto retry_alloc_from_pool;
+               else
+                       return NULL;
+       }
+       if (unlikely(!tag_chunk(chunk)))
+               goto free;
+
+       /* Locking is already done inside gen_pool_add */
+       add_error = gen_pool_add(pool, (unsigned long)chunk, chunk_size,
+                                NUMA_NO_NODE);
+       if (unlikely(add_error))
+               goto abort;
+
+       retval = gen_pool_alloc(pool, size);
+       if (retval) {
+return_allocation:
+               *(size_t *)retval = size;
+               if (gfp & __GFP_ZERO)
+                       memset((void *)retval, 0, size);
+               return (void *)retval;
+       }
+       /*
+        * Here there is no test for __GFP_NO_FAIL because, in case of
+        * concurrent allocation, one thread might add a chunk to the
+        * pool and this memory could be allocated by another thread,
+        * before the first thread gets a chance to use it.
+        * As long as vmalloc succeeds, it's ok to retry.
+        */
+       goto retry_alloc_from_pool;
+abort:
+       untag_chunk(chunk);
+free:
+       vfree_atomic(chunk);
+       return NULL;
+}
+
+static void pmalloc_chunk_set_protection(struct gen_pool *pool,
+                                        struct gen_pool_chunk *chunk,
+                                        void *data)
+{
+       const bool *flag = data;
+       size_t chunk_size = chunk->end_addr + 1 - chunk->start_addr;
+       unsigned long pages = chunk_size / PAGE_SIZE;
+
+       if (unlikely(chunk_size & (PAGE_SIZE - 1))) {
+               WARN(true, "Chunk size is not a multiple of PAGE_SIZE.");
+               return;
+       }
+
+       if (*flag)
+               set_memory_ro(chunk->start_addr, pages);
+       else
+               set_memory_rw(chunk->start_addr, pages);
+}
+
+static int pmalloc_pool_set_protection(struct gen_pool *pool, bool protection)
+{
+       struct pmalloc_data *data;
+       struct gen_pool_chunk *chunk;
+
+       data = pool->data;
+       if (unlikely(data->protected == protection)) {
+               WARN(true, "The pool %s is already protected as requested",
+                    pool->name);
+               return 0;
+       }
+       data->protected = protection;
+       list_for_each_entry(chunk, &(pool)->chunks, next_chunk)
+               pmalloc_chunk_set_protection(pool, chunk, &protection);
+       return 0;
+}
+
+int pmalloc_protect_pool(struct gen_pool *pool)
+{
+       return pmalloc_pool_set_protection(pool, true);
+}
+
+
+static void pmalloc_chunk_free(struct gen_pool *pool,
+                              struct gen_pool_chunk *chunk, void *data)
+{
+       untag_chunk(chunk);
+       gen_pool_flush_chunk(pool, chunk);
+       vfree_atomic((void *)chunk->start_addr);
+}
+
+
+int pmalloc_destroy_pool(struct gen_pool *pool)
+{
+       struct pmalloc_data *data;
+
+       data = pool->data;
+
+       mutex_lock(&pmalloc_mutex);
+       list_del(&data->node);
+       mutex_unlock(&pmalloc_mutex);
+
+       if (likely(data->pool_kobject))
+               pmalloc_disconnect(data, data->pool_kobject);
+
+       pmalloc_pool_set_protection(pool, false);
+       gen_pool_for_each_chunk(pool, pmalloc_chunk_free, NULL);
+       gen_pool_destroy(pool);
+       kfree(data);
+       return 0;
+}
+
+/*
+ * When the sysfs is ready to receive registrations, connect all the
+ * pools previously created. Also enable further pools to be connected
+ * right away.
+ */
+static int __init pmalloc_late_init(void)
+{
+       struct pmalloc_data *data, *n;
+
+       pmalloc_kobject = kobject_create_and_add("pmalloc", kernel_kobj);
+
+       mutex_lock(&pmalloc_mutex);
+       pmalloc_list = &pmalloc_final_list;
+
+       if (likely(pmalloc_kobject != NULL)) {
+               list_for_each_entry_safe(data, n, &pmalloc_tmp_list, node) {
+                       list_move(&data->node, &pmalloc_final_list);
+                       pmalloc_connect(data);
+               }
+       }
+       mutex_unlock(&pmalloc_mutex);
+       return 0;

I'd just go ahead and return a different value if pmalloc_kobject does equal to NULL.  __init is already returning a value, and the __init is already checking an error case for failure, might as well go all the way and stick a little icing on the cake and return a different (errno) value for this case.



Thanks,
Jay

Reply via email to