Since original zram code did not implement any per-cpu operations,
my previous patch (staging: zram: add Crypto API support) did not
include them either.

This patch complements the first one with per-cpu support for Crypto,
allocating tfms buffer separately for each online processor.
Changes are based on zswap and zcache per-cpu code.

Basic tests (concurrent writing several 10-40MB chunks to zram) performed
on an ARM-based EXYNOS4412 Quad-Core showed that per-cpu code provides
noticeable time saving, ranging between 30-40% for LZO and LZ4 compressors.
Sample data (LZO): writing 160MB, 40MB per thread took 0.60s with per-cpu
code included and approximately 0.80s without per-cpu support.

Signed-off-by: Piotr Sarna <p.sa...@partner.samsung.com>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnier...@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.p...@samsung.com>
---
 drivers/staging/zram/zram_drv.c |  146 +++++++++++++++++++++++++++++++++------
 drivers/staging/zram/zram_drv.h |    1 -
 2 files changed, 125 insertions(+), 22 deletions(-)

diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index d6f1f67..3dd5085 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -30,6 +30,7 @@
 #include <linux/highmem.h>
 #include <linux/slab.h>
 #include <linux/crypto.h>
+#include <linux/cpu.h>
 #include <linux/string.h>
 #include <linux/vmalloc.h>
 
@@ -46,7 +47,7 @@ static unsigned int num_devices = 1;
 
 /* Cryptographic API features */
 static char *zram_compressor = ZRAM_COMPRESSOR_DEFAULT;
-static struct crypto_comp *zram_comp_tfm;
+static struct crypto_comp * __percpu *zram_comp_pcpu_tfms;
 
 enum comp_op {
        ZRAM_COMPOP_COMPRESS,
@@ -59,7 +60,7 @@ static int zram_comp_op(enum comp_op op, const u8 *src, 
unsigned int slen,
        struct crypto_comp *tfm;
        int ret;
 
-       tfm = zram_comp_tfm;
+       tfm = *per_cpu_ptr(zram_comp_pcpu_tfms, get_cpu());
        switch (op) {
        case ZRAM_COMPOP_COMPRESS:
                ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
@@ -70,6 +71,7 @@ static int zram_comp_op(enum comp_op op, const u8 *src, 
unsigned int slen,
        default:
                ret = -EINVAL;
        }
+       put_cpu();
 
        return ret;
 }
@@ -87,9 +89,9 @@ static int __init zram_comp_init(void)
        }
        pr_info("using %s compressor\n", zram_compressor);
 
-       /* alloc transform */
-       zram_comp_tfm = crypto_alloc_comp(zram_compressor, 0, 0);
-       if (!zram_comp_tfm)
+       /* alloc percpu transforms */
+       zram_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
+       if (!zram_comp_pcpu_tfms)
                return -ENOMEM;
 
        return 0;
@@ -97,8 +99,110 @@ static int __init zram_comp_init(void)
 
 static inline void zram_comp_exit(void)
 {
-       if (zram_comp_tfm)
-               crypto_free_comp(zram_comp_tfm);
+       /* free percpu transforms */
+       if (zram_comp_pcpu_tfms)
+               free_percpu(zram_comp_pcpu_tfms);
+}
+
+
+/* Crypto API features: percpu code */
+#define ZRAM_DSTMEM_ORDER 1
+static DEFINE_PER_CPU(u8 *, zram_dstmem);
+
+static int zram_comp_cpu_up(int cpu)
+{
+       struct crypto_comp *tfm;
+
+       tfm = crypto_alloc_comp(zram_compressor, 0, 0);
+       if (IS_ERR(tfm))
+               return NOTIFY_BAD;
+       *per_cpu_ptr(zram_comp_pcpu_tfms, cpu) = tfm;
+       return NOTIFY_OK;
+}
+
+static void zram_comp_cpu_down(int cpu)
+{
+       struct crypto_comp *tfm;
+
+       tfm = *per_cpu_ptr(zram_comp_pcpu_tfms, cpu);
+       crypto_free_comp(tfm);
+       *per_cpu_ptr(zram_comp_pcpu_tfms, cpu) = NULL;
+}
+
+static int zram_cpu_notifier(struct notifier_block *nb,
+                               unsigned long action, void *pcpu)
+{
+       int ret;
+       int cpu = (long) pcpu;
+
+       switch (action) {
+       case CPU_UP_PREPARE:
+               ret = zram_comp_cpu_up(cpu);
+               if (ret != NOTIFY_OK) {
+                       pr_err("zram: can't allocate compressor xform\n");
+                       return ret;
+               }
+               per_cpu(zram_dstmem, cpu) = (void *)__get_free_pages(
+                       GFP_KERNEL | __GFP_REPEAT, ZRAM_DSTMEM_ORDER);
+               break;
+       case CPU_DEAD:
+       case CPU_UP_CANCELED:
+               zram_comp_cpu_down(cpu);
+               free_pages((unsigned long) per_cpu(zram_dstmem, cpu),
+                           ZRAM_DSTMEM_ORDER);
+               per_cpu(zram_dstmem, cpu) = NULL;
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block zram_cpu_notifier_block = {
+       .notifier_call = zram_cpu_notifier
+};
+
+/* Helper function releasing tfms from online cpus */
+static inline void zram_comp_cpus_down(void)
+{
+       int cpu;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               void *pcpu = (void *)(long)cpu;
+               zram_cpu_notifier(&zram_cpu_notifier_block,
+                                 CPU_UP_CANCELED, pcpu);
+       }
+       put_online_cpus();
+}
+
+static int zram_cpu_init(void)
+{
+       int ret;
+       unsigned int cpu;
+
+       ret = register_cpu_notifier(&zram_cpu_notifier_block);
+       if (ret) {
+               pr_err("zram: can't register cpu notifier\n");
+               goto out;
+       }
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               void *pcpu = (void *)(long)cpu;
+               if (zram_cpu_notifier(&zram_cpu_notifier_block,
+                                     CPU_UP_PREPARE, pcpu) != NOTIFY_OK)
+                       goto cleanup;
+       }
+       put_online_cpus();
+       return ret;
+
+cleanup:
+       zram_comp_cpus_down();
+
+out:
+       put_online_cpus();
+       return -ENOMEM;
 }
 /* end of Cryptographic API features */
 
@@ -250,7 +354,6 @@ static inline int valid_io_request(struct zram *zram, 
struct bio *bio)
 static void zram_meta_free(struct zram_meta *meta)
 {
        zs_destroy_pool(meta->mem_pool);
-       free_pages((unsigned long)meta->compress_buffer, 1);
        vfree(meta->table);
        kfree(meta);
 }
@@ -262,18 +365,11 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
        if (!meta)
                goto out;
 
-       meta->compress_buffer =
-               (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
-       if (!meta->compress_buffer) {
-               pr_err("Error allocating compressor buffer space\n");
-               goto free_meta;
-       }
-
        num_pages = disksize >> PAGE_SHIFT;
        meta->table = vzalloc(num_pages * sizeof(*meta->table));
        if (!meta->table) {
                pr_err("Error allocating zram address table\n");
-               goto free_buffer;
+               goto free_meta;
        }
 
        meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
@@ -286,8 +382,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize)
 
 free_table:
        vfree(meta->table);
-free_buffer:
-       free_pages((unsigned long)meta->compress_buffer, 1);
 free_meta:
        kfree(meta);
        meta = NULL;
@@ -455,7 +549,8 @@ static int zram_bvec_write(struct zram *zram, struct 
bio_vec *bvec, u32 index,
        struct zram_meta *meta = zram->meta;
 
        page = bvec->bv_page;
-       src = meta->compress_buffer;
+       src = __get_cpu_var(zram_dstmem);
+       BUG_ON(src == NULL);
 
        if (is_partial_io(bvec)) {
                /*
@@ -916,18 +1011,24 @@ static int __init zram_init(void)
                goto out;
        }
 
+       if (zram_cpu_init()) {
+               pr_err("Per-cpu initialization failed\n");
+               ret = -ENOMEM;
+               goto free_comp;
+       }
+
        if (num_devices > max_num_devices) {
                pr_warn("Invalid value for num_devices: %u\n",
                                num_devices);
                ret = -EINVAL;
-               goto free_comp;
+               goto free_cpu_comp;
        }
 
        zram_major = register_blkdev(0, "zram");
        if (zram_major <= 0) {
                pr_warn("Unable to get major number\n");
                ret = -EBUSY;
-               goto free_comp;
+               goto free_cpu_comp;
        }
 
        /* Allocate the device array and initialize each one */
@@ -953,6 +1054,8 @@ free_devices:
        kfree(zram_devices);
 unregister:
        unregister_blkdev(zram_major, "zram");
+free_cpu_comp:
+       zram_comp_cpus_down();
 free_comp:
        zram_comp_exit();
 out:
@@ -976,6 +1079,7 @@ static void __exit zram_exit(void)
        unregister_blkdev(zram_major, "zram");
 
        kfree(zram_devices);
+       zram_comp_cpus_down();
        zram_comp_exit();
        pr_debug("Cleanup done!\n");
 }
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h
index 93f4d14..474474a 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/staging/zram/zram_drv.h
@@ -88,7 +88,6 @@ struct zram_stats {
 };
 
 struct zram_meta {
-       void *compress_buffer;
        struct table *table;
        struct zs_pool *mem_pool;
 };
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to