Signed-off-by: Thomas Schoebel-Theuer <t...@schoebel-theuer.de>
---
 drivers/staging/mars/xio_bricks/lib_mapfree.c | 380 ++++++++++++++++++++++++++
 include/linux/xio/lib_mapfree.h               |  84 ++++++
 2 files changed, 464 insertions(+)
 create mode 100644 drivers/staging/mars/xio_bricks/lib_mapfree.c
 create mode 100644 include/linux/xio/lib_mapfree.h

diff --git a/drivers/staging/mars/xio_bricks/lib_mapfree.c 
b/drivers/staging/mars/xio_bricks/lib_mapfree.c
new file mode 100644
index 0000000..6b464d7
--- /dev/null
+++ b/drivers/staging/mars/xio_bricks/lib_mapfree.c
@@ -0,0 +1,380 @@
+/*
+ * MARS Long Distance Replication Software
+ *
+ * Copyright (C) 2010-2014 Thomas Schoebel-Theuer
+ * Copyright (C) 2011-2014 1&1 Internet AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/xio/lib_mapfree.h>
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/blkdev.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/file.h>
+
+/*  time to wait between background mapfree operations */
+int mapfree_period_sec = 10;
+
+/*  some grace space where no regular cleanup should occur */
+int mapfree_grace_keep_mb = 16;
+
+static
+DECLARE_RWSEM(mapfree_mutex);
+
+static
+LIST_HEAD(mapfree_list);
+
+void mapfree_pages(struct mapfree_info *mf, int grace_keep)
+{
+       struct address_space *mapping;
+       pgoff_t start;
+       pgoff_t end;
+
+       if (unlikely(!mf))
+           goto done;
+       if (unlikely(!mf->mf_filp))
+               goto done;
+
+       mapping = mf->mf_filp->f_mapping;
+       if (unlikely(!mapping))
+               goto done;
+
+       if (grace_keep < 0) { /*  force full flush */
+               start = 0;
+               end = -1;
+       } else {
+               unsigned long flags;
+               loff_t tmp;
+               loff_t min;
+
+               spin_lock_irqsave(&mf->mf_lock, flags);
+
+               min = tmp = mf->mf_min[0];
+               if (likely(mf->mf_min[1] < min))
+                       min = mf->mf_min[1];
+               if (tmp) {
+                       mf->mf_min[1] = tmp;
+                       mf->mf_min[0] = 0;
+               }
+
+               spin_unlock_irqrestore(&mf->mf_lock, flags);
+
+               min -= (loff_t)grace_keep * (1024 * 1024); /*  megabytes */
+               end = 0;
+
+               if (min > 0 || mf->mf_last) {
+                       start = mf->mf_last / PAGE_SIZE;
+                       /*  add some grace overlapping */
+                       if (likely(start > 0))
+                               start--;
+                       mf->mf_last = min;
+                       end = min / PAGE_SIZE;
+               } else  { /*  there was no progress for at least 2 rounds */
+                       start = 0;
+                       if (!grace_keep) /*  also flush thoroughly */
+                               end = -1;
+               }
+
+               XIO_DBG("file = '%s' start = %lu end = %lu\n", mf->mf_name, 
start, end);
+       }
+
+       if (end > start || end == -1)
+               invalidate_mapping_pages(mapping, start, end);
+
+done:;
+}
+
+static
+void _mapfree_put(struct mapfree_info *mf)
+{
+       if (atomic_dec_and_test(&mf->mf_count)) {
+               XIO_DBG("closing file '%s' filp = %p\n", mf->mf_name, 
mf->mf_filp);
+               list_del_init(&mf->mf_head);
+               CHECK_HEAD_EMPTY(&mf->mf_dirty_anchor);
+               if (likely(mf->mf_filp)) {
+                       mapfree_pages(mf, -1);
+                       filp_close(mf->mf_filp, NULL);
+               }
+               brick_string_free(mf->mf_name);
+               brick_mem_free(mf);
+       }
+}
+
+void mapfree_put(struct mapfree_info *mf)
+{
+       if (likely(mf)) {
+               down_write(&mapfree_mutex);
+               _mapfree_put(mf);
+               up_write(&mapfree_mutex);
+       }
+}
+
+struct mapfree_info *mapfree_get(const char *name, int flags)
+{
+       struct mapfree_info *mf = NULL;
+       struct list_head *tmp;
+
+       if (!(flags & O_DIRECT)) {
+               down_read(&mapfree_mutex);
+               for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = 
tmp->next) {
+                       struct mapfree_info *_mf = container_of(tmp, struct 
mapfree_info, mf_head);
+
+                       if (_mf->mf_flags == flags && !strcmp(_mf->mf_name, 
name)) {
+                               mf = _mf;
+                               atomic_inc(&mf->mf_count);
+                               break;
+                       }
+               }
+               up_read(&mapfree_mutex);
+
+               if (mf)
+                       goto done;
+       }
+
+       for (;;) {
+               struct address_space *mapping;
+               struct inode *inode = NULL;
+               int ra = 1;
+               int prot = 0600;
+
+               mm_segment_t oldfs;
+
+               mf = brick_zmem_alloc(sizeof(struct mapfree_info));
+
+               mf->mf_name = brick_strdup(name);
+
+               mf->mf_flags = flags;
+               INIT_LIST_HEAD(&mf->mf_head);
+               INIT_LIST_HEAD(&mf->mf_dirty_anchor);
+               atomic_set(&mf->mf_count, 1);
+               spin_lock_init(&mf->mf_lock);
+               mf->mf_max = -1;
+
+               oldfs = get_fs();
+               set_fs(get_ds());
+               mf->mf_filp = filp_open(name, flags, prot);
+               set_fs(oldfs);
+
+               XIO_DBG("file '%s' flags = %d prot = %d filp = %p\n", name, 
flags, prot, mf->mf_filp);
+
+               if (unlikely(!mf->mf_filp || IS_ERR(mf->mf_filp))) {
+                       int err = PTR_ERR(mf->mf_filp);
+
+                       XIO_ERR("can't open file '%s' status=%d\n", name, err);
+                       mf->mf_filp = NULL;
+                       _mapfree_put(mf);
+                       mf = NULL;
+                       break;
+               }
+
+               mapping = mf->mf_filp->f_mapping;
+               if (likely(mapping))
+                       inode = mapping->host;
+               if (unlikely(!mapping || !inode)) {
+                       XIO_ERR("file '%s' has no mapping\n", name);
+                       mf->mf_filp = NULL;
+                       _mapfree_put(mf);
+                       mf = NULL;
+                       break;
+               }
+
+               mapping_set_gfp_mask(mapping, mapping_gfp_mask(mapping) & 
~(__GFP_IO | __GFP_FS));
+
+               mf->mf_max = i_size_read(inode);
+
+               if (S_ISBLK(inode->i_mode)) {
+                       XIO_INF("changing blkdev readahead from %lu to %d\n",
+                               
inode->i_bdev->bd_disk->queue->backing_dev_info.ra_pages,
+                               ra);
+                       
inode->i_bdev->bd_disk->queue->backing_dev_info.ra_pages = ra;
+               }
+
+               if (flags & O_DIRECT) { /*  never share them */
+                       break;
+               }
+
+               /*  maintain global list of all open files */
+               down_write(&mapfree_mutex);
+               for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = 
tmp->next) {
+                       struct mapfree_info *_mf = container_of(tmp, struct 
mapfree_info, mf_head);
+
+                       if (unlikely(_mf->mf_flags == flags && 
!strcmp(_mf->mf_name, name))) {
+                               XIO_WRN("race on creation of '%s' detected\n", 
name);
+                               _mapfree_put(mf);
+                               mf = _mf;
+                               atomic_inc(&mf->mf_count);
+                               goto leave;
+                       }
+               }
+               list_add_tail(&mf->mf_head, &mapfree_list);
+leave:
+               up_write(&mapfree_mutex);
+               break;
+       }
+done:
+       return mf;
+}
+
+void mapfree_set(struct mapfree_info *mf, loff_t min, loff_t max)
+{
+       unsigned long flags;
+
+       if (likely(mf)) {
+               spin_lock_irqsave(&mf->mf_lock, flags);
+               if (!mf->mf_min[0] || mf->mf_min[0] > min)
+                       mf->mf_min[0] = min;
+               if (max >= 0 && mf->mf_max < max)
+                       mf->mf_max = max;
+               spin_unlock_irqrestore(&mf->mf_lock, flags);
+       }
+}
+
+static
+int mapfree_thread(void *data)
+{
+       while (!brick_thread_should_stop()) {
+               struct mapfree_info *mf = NULL;
+               struct list_head *tmp;
+               long long eldest = 0;
+
+               brick_msleep(500);
+
+               if (mapfree_period_sec <= 0)
+                       continue;
+
+               down_read(&mapfree_mutex);
+
+               for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = 
tmp->next) {
+                       struct mapfree_info *_mf = container_of(tmp, struct 
mapfree_info, mf_head);
+
+                       if (unlikely(!_mf->mf_jiffies)) {
+                               _mf->mf_jiffies = jiffies;
+                               continue;
+                       }
+                       if ((long long)jiffies - _mf->mf_jiffies > 
mapfree_period_sec * HZ &&
+                           (!mf || _mf->mf_jiffies < eldest)) {
+                               mf = _mf;
+                               eldest = _mf->mf_jiffies;
+                       }
+               }
+               if (mf)
+                       atomic_inc(&mf->mf_count);
+
+               up_read(&mapfree_mutex);
+
+               if (!mf)
+                       continue;
+
+               mapfree_pages(mf, mapfree_grace_keep_mb);
+
+               mf->mf_jiffies = jiffies;
+               mapfree_put(mf);
+       }
+       return 0;
+}
+
+/***************** dirty IOs on the fly  *****************/
+
+void mf_insert_dirty(struct mapfree_info *mf, struct dirty_info *di)
+{
+       unsigned long flags;
+
+       if (likely(di->dirty_aio && mf)) {
+               spin_lock_irqsave(&mf->mf_lock, flags);
+               list_del(&di->dirty_head);
+               list_add(&di->dirty_head, &mf->mf_dirty_anchor);
+               spin_unlock_irqrestore(&mf->mf_lock, flags);
+       }
+}
+
+void mf_remove_dirty(struct mapfree_info *mf, struct dirty_info *di)
+{
+       unsigned long flags;
+
+       if (!list_empty(&di->dirty_head) && mf) {
+               spin_lock_irqsave(&mf->mf_lock, flags);
+               list_del_init(&di->dirty_head);
+               spin_unlock_irqrestore(&mf->mf_lock, flags);
+       }
+}
+
+void mf_get_dirty(struct mapfree_info *mf, loff_t *min, loff_t *max, int 
min_stage, int max_stage)
+{
+       unsigned long flags;
+
+       struct list_head *tmp;
+
+       if (unlikely(!mf))
+           goto done;
+
+       spin_lock_irqsave(&mf->mf_lock, flags);
+       for (tmp = mf->mf_dirty_anchor.next; tmp != &mf->mf_dirty_anchor; tmp = 
tmp->next) {
+               struct dirty_info *di = container_of(tmp, struct dirty_info, 
dirty_head);
+               struct aio_object *aio = di->dirty_aio;
+
+               if (unlikely(!aio))
+                       continue;
+               if (di->dirty_stage < min_stage || di->dirty_stage > max_stage)
+                       continue;
+               if (aio->io_pos < *min)
+                       *min = aio->io_pos;
+               if (aio->io_pos + aio->io_len > *max)
+                       *max = aio->io_pos + aio->io_len;
+       }
+       spin_unlock_irqrestore(&mf->mf_lock, flags);
+done:;
+}
+
+void mf_get_any_dirty(const char *filename, loff_t *min, loff_t *max, int 
min_stage, int max_stage)
+{
+       struct list_head *tmp;
+
+       down_read(&mapfree_mutex);
+       for (tmp = mapfree_list.next; tmp != &mapfree_list; tmp = tmp->next) {
+               struct mapfree_info *mf = container_of(tmp, struct 
mapfree_info, mf_head);
+
+               if (!strcmp(mf->mf_name, filename))
+                       mf_get_dirty(mf, min, max, min_stage, max_stage);
+       }
+       up_read(&mapfree_mutex);
+}
+
+/***************** module init stuff ************************/
+
+static
+struct task_struct *mf_thread;
+
+int __init init_xio_mapfree(void)
+{
+       XIO_DBG("init_mapfree()\n");
+       mf_thread = brick_thread_create(mapfree_thread, NULL, "xio_mapfree");
+       if (unlikely(!mf_thread)) {
+               XIO_ERR("could not create mapfree thread\n");
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+void exit_xio_mapfree(void)
+{
+       XIO_DBG("exit_mapfree()\n");
+       if (likely(mf_thread)) {
+               brick_thread_stop(mf_thread);
+               mf_thread = NULL;
+       }
+}
diff --git a/include/linux/xio/lib_mapfree.h b/include/linux/xio/lib_mapfree.h
new file mode 100644
index 0000000..e7594e12
--- /dev/null
+++ b/include/linux/xio/lib_mapfree.h
@@ -0,0 +1,84 @@
+/*
+ * MARS Long Distance Replication Software
+ *
+ * Copyright (C) 2010-2014 Thomas Schoebel-Theuer
+ * Copyright (C) 2011-2014 1&1 Internet AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef XIO_LIB_MAPFREE_H
+#define XIO_LIB_MAPFREE_H
+
+/* Mapfree infrastructure.
+ *
+ * Purposes:
+ *
+ * 1) Open files only once when possible, do ref-counting on struct 
mapfree_info
+ *
+ * 2) Automatically call invalidate_mapping_pages() in the background on
+ *    "unused" areas to free resources.
+ *    Used areas can be indicated by calling mapfree_set() frequently.
+ *    Usage model: tailored to sequential logfiles.
+ *
+ * 3) Do it all in a completely decoupled manner, in order to prevent resource 
deadlocks.
+ *
+ * 4) Also to prevent deadlocks: always set mapping_set_gfp_mask() accordingly.
+ */
+
+#include <linux/xio/xio.h>
+
+extern int mapfree_period_sec;
+extern int mapfree_grace_keep_mb;
+
+struct mapfree_info {
+       struct list_head mf_head;
+       struct list_head mf_dirty_anchor;
+       char            *mf_name;
+       struct file     *mf_filp;
+       int              mf_flags;
+       int              mf_mode;
+       atomic_t         mf_count;
+       spinlock_t       mf_lock;
+       loff_t           mf_min[2];
+       loff_t           mf_last;
+       loff_t           mf_max;
+       long long        mf_jiffies;
+};
+
+struct dirty_info {
+       struct list_head dirty_head;
+       struct aio_object *dirty_aio;
+       int dirty_stage;
+};
+
+struct mapfree_info *mapfree_get(const char *filename, int flags);
+
+void mapfree_put(struct mapfree_info *mf);
+
+void mapfree_set(struct mapfree_info *mf, loff_t min, loff_t max);
+
+void mapfree_pages(struct mapfree_info *mf, int grace_keep);
+
+/***************** dirty IOs on the fly  *****************/
+
+void mf_insert_dirty(struct mapfree_info *mf, struct dirty_info *di);
+void mf_remove_dirty(struct mapfree_info *mf, struct dirty_info *di);
+void mf_get_dirty(struct mapfree_info *mf, loff_t *min, loff_t *max, int 
min_stage, int max_stage);
+void mf_get_any_dirty(const char *filename, loff_t *min, loff_t *max, int 
min_stage, int max_stage);
+
+/***************** module init stuff ************************/
+
+int __init init_xio_mapfree(void);
+
+void exit_xio_mapfree(void);
+
+#endif
-- 
2.6.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to