[PATCH 13/13] GFS: lock_dlm module

David Teigland Thu, 01 Sep 2005 06:56:29 -0700

The lock_dlm module uses the DLM in linux/drivers/dlm/ for inter-node
locking.


Signed-off-by: Ken Preslan <[EMAIL PROTECTED]>
Signed-off-by: David Teigland <[EMAIL PROTECTED]>

---

 fs/gfs2/locking/dlm/Makefile   |    3 
 fs/gfs2/locking/dlm/lock.c     |  533 +++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/locking/dlm/lock_dlm.h |  200 +++++++++++++++
 fs/gfs2/locking/dlm/main.c     |   62 ++++
 fs/gfs2/locking/dlm/mount.c    |  218 ++++++++++++++++
 fs/gfs2/locking/dlm/plock.c    |  274 +++++++++++++++++++++
 fs/gfs2/locking/dlm/sysfs.c    |  283 +++++++++++++++++++++
 fs/gfs2/locking/dlm/thread.c   |  355 +++++++++++++++++++++++++++
 include/linux/lock_dlm_plock.h |   40 +++
 9 files changed, 1968 insertions(+)

diff -urpN a/fs/gfs2/locking/dlm/Makefile b/fs/gfs2/locking/dlm/Makefile
--- a/fs/gfs2/locking/dlm/Makefile      1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/Makefile      2005-09-01 17:48:48.143749048 +0800
@@ -0,0 +1,3 @@
+obj-$(CONFIG_GFS2_FS) += lock_dlm.o
+lock_dlm-y := lock.o main.o mount.o sysfs.o thread.o plock.o
+
diff -urpN a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c
--- a/fs/gfs2/locking/dlm/lock.c        1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/lock.c        2005-09-01 17:48:48.139749656 +0800
@@ -0,0 +1,533 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+static char junk_lvb[GDLM_LVB_SIZE];
+
+static void queue_complete(struct gdlm_lock *lp)
+{
+       struct gdlm_ls *ls = lp->ls;
+
+       clear_bit(LFL_ACTIVE, &lp->flags);
+
+       spin_lock(&ls->async_lock);
+       list_add_tail(&lp->clist, &ls->complete);
+       spin_unlock(&ls->async_lock);
+       wake_up(&ls->thread_wait);
+}
+
+static inline void gdlm_ast(void *astarg)
+{
+       queue_complete((struct gdlm_lock *) astarg);
+}
+
+static inline void gdlm_bast(void *astarg, int mode)
+{
+       struct gdlm_lock *lp = astarg;
+       struct gdlm_ls *ls = lp->ls;
+
+       if (!mode) {
+               printk("lock_dlm: bast mode zero %x,%"PRIx64"\n",
+                       lp->lockname.ln_type, lp->lockname.ln_number);
+               return;
+       }
+
+       spin_lock(&ls->async_lock);
+       if (!lp->bast_mode) {
+               list_add_tail(&lp->blist, &ls->blocking);
+               lp->bast_mode = mode;
+       } else if (lp->bast_mode < mode)
+               lp->bast_mode = mode;
+       spin_unlock(&ls->async_lock);
+       wake_up(&ls->thread_wait);
+}
+
+void gdlm_queue_delayed(struct gdlm_lock *lp)
+{
+       struct gdlm_ls *ls = lp->ls;
+
+       spin_lock(&ls->async_lock);
+       list_add_tail(&lp->delay_list, &ls->delayed);
+       spin_unlock(&ls->async_lock);
+}
+
+/* convert gfs lock-state to dlm lock-mode */
+
+static int16_t make_mode(int16_t lmstate)
+{
+       switch (lmstate) {
+       case LM_ST_UNLOCKED:
+               return DLM_LOCK_NL;
+       case LM_ST_EXCLUSIVE:
+               return DLM_LOCK_EX;
+       case LM_ST_DEFERRED:
+               return DLM_LOCK_CW;
+       case LM_ST_SHARED:
+               return DLM_LOCK_PR;
+       default:
+               GDLM_ASSERT(0, printk("unknown LM state %d\n", lmstate););
+       }
+}
+
+/* convert dlm lock-mode to gfs lock-state */
+
+int16_t gdlm_make_lmstate(int16_t dlmmode)
+{
+       switch (dlmmode) {
+       case DLM_LOCK_IV:
+       case DLM_LOCK_NL:
+               return LM_ST_UNLOCKED;
+       case DLM_LOCK_EX:
+               return LM_ST_EXCLUSIVE;
+       case DLM_LOCK_CW:
+               return LM_ST_DEFERRED;
+       case DLM_LOCK_PR:
+               return LM_ST_SHARED;
+       default:
+               GDLM_ASSERT(0, printk("unknown DLM mode %d\n", dlmmode););
+       }
+}
+
+/* verify agreement with GFS on the current lock state, NB: DLM_LOCK_NL and
+   DLM_LOCK_IV are both considered LM_ST_UNLOCKED by GFS. */
+
+static void check_cur_state(struct gdlm_lock *lp, unsigned int cur_state)
+{
+       int16_t cur = make_mode(cur_state);
+       if (lp->cur != DLM_LOCK_IV)
+               GDLM_ASSERT(lp->cur == cur, printk("%d, %d\n", lp->cur, cur););
+}
+
+static inline unsigned int make_flags(struct gdlm_lock *lp,
+                                     unsigned int gfs_flags,
+                                     int16_t cur, int16_t req)
+{
+       unsigned int lkf = 0;
+
+       if (gfs_flags & LM_FLAG_TRY)
+               lkf |= DLM_LKF_NOQUEUE;
+
+       if (gfs_flags & LM_FLAG_TRY_1CB) {
+               lkf |= DLM_LKF_NOQUEUE;
+               lkf |= DLM_LKF_NOQUEUEBAST;
+       }
+
+       if (gfs_flags & LM_FLAG_PRIORITY) {
+               lkf |= DLM_LKF_NOORDER;
+               lkf |= DLM_LKF_HEADQUE;
+       }
+
+       if (gfs_flags & LM_FLAG_ANY) {
+               if (req == DLM_LOCK_PR)
+                       lkf |= DLM_LKF_ALTCW;
+               else if (req == DLM_LOCK_CW)
+                       lkf |= DLM_LKF_ALTPR;
+       }
+
+       if (lp->lksb.sb_lkid != 0) {
+               lkf |= DLM_LKF_CONVERT;
+
+               /* Conversion deadlock avoidance by DLM */
+
+               if (!test_bit(LFL_FORCE_PROMOTE, &lp->flags) &&
+                   !(lkf & DLM_LKF_NOQUEUE) &&
+                   cur > DLM_LOCK_NL && req > DLM_LOCK_NL && cur != req)
+                       lkf |= DLM_LKF_CONVDEADLK;
+       }
+
+       if (lp->lvb)
+               lkf |= DLM_LKF_VALBLK;
+
+       return lkf;
+}
+
+/* make_strname - convert GFS lock numbers to a string */
+
+static inline void make_strname(struct lm_lockname *lockname,
+                               struct gdlm_strname *str)
+{
+       sprintf(str->name, "%8x%16"PRIx64, lockname->ln_type,
+               lockname->ln_number);
+       str->namelen = GDLM_STRNAME_BYTES;
+}
+
+int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name,
+                  struct gdlm_lock **lpp)
+{
+       struct gdlm_lock *lp;
+
+       lp = kmalloc(sizeof(struct gdlm_lock), GFP_KERNEL);
+       if (!lp)
+               return -ENOMEM;
+
+       memset(lp, 0, sizeof(struct gdlm_lock));
+       lp->lockname = *name;
+       lp->ls = ls;
+       lp->cur = DLM_LOCK_IV;
+       lp->lvb = NULL;
+       lp->hold_null = NULL;
+       init_completion(&lp->ast_wait);
+       INIT_LIST_HEAD(&lp->clist);
+       INIT_LIST_HEAD(&lp->blist);
+       INIT_LIST_HEAD(&lp->delay_list);
+
+       spin_lock(&ls->async_lock);
+       list_add(&lp->all_list, &ls->all_locks);
+       ls->all_locks_count++;
+       spin_unlock(&ls->async_lock);
+
+       *lpp = lp;
+       return 0;
+}
+
+void gdlm_delete_lp(struct gdlm_lock *lp)
+{
+       struct gdlm_ls *ls = lp->ls;
+
+       spin_lock(&ls->async_lock);
+       if (!list_empty(&lp->clist))
+               list_del_init(&lp->clist);
+       if (!list_empty(&lp->blist))
+               list_del_init(&lp->blist);
+       if (!list_empty(&lp->delay_list))
+               list_del_init(&lp->delay_list);
+       GDLM_ASSERT(!list_empty(&lp->all_list),);
+       list_del_init(&lp->all_list);
+       ls->all_locks_count--;
+       spin_unlock(&ls->async_lock);
+
+       kfree(lp);
+}
+
+int gdlm_get_lock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+                 lm_lock_t **lockp)
+{
+       struct gdlm_lock *lp;
+       int error;
+
+       error = gdlm_create_lp((struct gdlm_ls *) lockspace, name, &lp);
+
+       *lockp = (lm_lock_t *) lp;
+       return error;
+}
+
+void gdlm_put_lock(lm_lock_t *lock)
+{
+       gdlm_delete_lp((struct gdlm_lock *) lock);
+}
+
+void gdlm_do_lock(struct gdlm_lock *lp, struct dlm_range *range)
+{
+       struct gdlm_ls *ls = lp->ls;
+       struct gdlm_strname str;
+       int error, bast = 1;
+
+       /*
+        * When recovery is in progress, delay lock requests for submission
+        * once recovery is done.  Requests for recovery (NOEXP) and unlocks
+        * can pass.
+        */
+
+       if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+           !test_bit(LFL_NOBLOCK, &lp->flags) && lp->req != DLM_LOCK_NL) {
+               gdlm_queue_delayed(lp);
+               return;
+       }
+
+       /*
+        * Submit the actual lock request.
+        */
+
+       if (test_bit(LFL_NOBAST, &lp->flags))
+               bast = 0;
+
+       make_strname(&lp->lockname, &str);
+
+       set_bit(LFL_ACTIVE, &lp->flags);
+
+       log_debug("lk %x,%"PRIx64" id %x %d,%d %x", lp->lockname.ln_type,
+                 lp->lockname.ln_number, lp->lksb.sb_lkid,
+                 lp->cur, lp->req, lp->lkf);
+
+       error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf,
+                        str.name, str.namelen, 0, gdlm_ast, (void *) lp,
+                        bast ? gdlm_bast : NULL, range);
+
+       if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) {
+               lp->lksb.sb_status = -EAGAIN;
+               queue_complete(lp);
+               error = 0;
+       }
+
+       GDLM_ASSERT(!error,
+                  printk("%s: num=%x,%"PRIx64" err=%d cur=%d req=%d lkf=%x\n",
+                         ls->fsname, lp->lockname.ln_type,
+                         lp->lockname.ln_number, error, lp->cur, lp->req,
+                         lp->lkf););
+}
+
+void gdlm_do_unlock(struct gdlm_lock *lp)
+{
+       unsigned int lkf = 0;
+       int error;
+
+       set_bit(LFL_DLM_UNLOCK, &lp->flags);
+       set_bit(LFL_ACTIVE, &lp->flags);
+
+       if (lp->lvb)
+               lkf = DLM_LKF_VALBLK;
+
+       log_debug("un %x,%"PRIx64" %x %d %x", lp->lockname.ln_type,
+                 lp->lockname.ln_number, lp->lksb.sb_lkid, lp->cur, lkf);
+
+       error = dlm_unlock(lp->ls->dlm_lockspace, lp->lksb.sb_lkid, lkf,
+                          NULL, lp);
+
+       GDLM_ASSERT(!error,
+                  printk("%s: error=%d num=%x,%"PRIx64" lkf=%x flags=%lx\n",
+                         lp->ls->fsname, error, lp->lockname.ln_type,
+                         lp->lockname.ln_number, lkf, lp->flags););
+}
+
+unsigned int gdlm_lock(lm_lock_t *lock, unsigned int cur_state,
+                      unsigned int req_state, unsigned int flags)
+{
+       struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+       clear_bit(LFL_DLM_CANCEL, &lp->flags);
+       if (flags & LM_FLAG_NOEXP)
+               set_bit(LFL_NOBLOCK, &lp->flags);
+
+       check_cur_state(lp, cur_state);
+       lp->req = make_mode(req_state);
+       lp->lkf = make_flags(lp, flags, lp->cur, lp->req);
+
+       gdlm_do_lock(lp, NULL);
+       return LM_OUT_ASYNC;
+}
+
+unsigned int gdlm_unlock(lm_lock_t *lock, unsigned int cur_state)
+{
+       struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+       clear_bit(LFL_DLM_CANCEL, &lp->flags);
+       if (lp->cur == DLM_LOCK_IV)
+               return 0;
+       gdlm_do_unlock(lp);
+       return LM_OUT_ASYNC;
+}
+
+void gdlm_cancel(lm_lock_t *lock)
+{
+       struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+       struct gdlm_ls *ls = lp->ls;
+       int error, delay_list = 0;
+
+       if (test_bit(LFL_DLM_CANCEL, &lp->flags))
+               return;
+
+       log_info("gdlm_cancel %x,%"PRIx64" flags %lx",
+                lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
+
+       spin_lock(&ls->async_lock);
+       if (!list_empty(&lp->delay_list)) {
+               list_del_init(&lp->delay_list);
+               delay_list = 1;
+       }
+       spin_unlock(&ls->async_lock);
+
+       if (delay_list) {
+               set_bit(LFL_CANCEL, &lp->flags);
+               set_bit(LFL_ACTIVE, &lp->flags);
+               queue_complete(lp);
+               return;
+       }
+
+       if (!test_bit(LFL_ACTIVE, &lp->flags) ||
+           test_bit(LFL_DLM_UNLOCK, &lp->flags))       {
+               log_info("gdlm_cancel skip %x,%"PRIx64" flags %lx",
+                        lp->lockname.ln_type, lp->lockname.ln_number,
+                        lp->flags);
+               return;
+       }
+
+       /* the lock is blocked in the dlm */
+
+       set_bit(LFL_DLM_CANCEL, &lp->flags);
+       set_bit(LFL_ACTIVE, &lp->flags);
+
+       error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, DLM_LKF_CANCEL,
+                          NULL, lp);
+
+       log_info("gdlm_cancel rv %d %x,%"PRIx64" flags %lx", error,
+                lp->lockname.ln_type, lp->lockname.ln_number, lp->flags);
+
+       if (error == -EBUSY)
+               clear_bit(LFL_DLM_CANCEL, &lp->flags);
+}
+
+int gdlm_add_lvb(struct gdlm_lock *lp)
+{
+       char *lvb;
+
+       lvb = kmalloc(GDLM_LVB_SIZE, GFP_KERNEL);
+       if (!lvb)
+               return -ENOMEM;
+
+       memset(lvb, 0, GDLM_LVB_SIZE);
+
+       lp->lksb.sb_lvbptr = lvb;
+       lp->lvb = lvb;
+       return 0;
+}
+
+void gdlm_del_lvb(struct gdlm_lock *lp)
+{
+       kfree(lp->lvb);
+       lp->lvb = NULL;
+       lp->lksb.sb_lvbptr = NULL;
+}
+
+/* This can do a synchronous dlm request (requiring a lock_dlm thread to get
+   the completion) because gfs won't call hold_lvb() during a callback (from
+   the context of a lock_dlm thread). */
+
+static int hold_null_lock(struct gdlm_lock *lp)
+{
+       struct gdlm_lock *lpn = NULL;
+       int error;
+
+       if (lp->hold_null) {
+               printk("lock_dlm: lvb already held\n");
+               return 0;
+       }
+
+       error = gdlm_create_lp(lp->ls, &lp->lockname, &lpn);
+       if (error)
+               goto out;
+
+       lpn->lksb.sb_lvbptr = junk_lvb;
+       lpn->lvb = junk_lvb;
+
+       lpn->req = DLM_LOCK_NL;
+       lpn->lkf = DLM_LKF_VALBLK | DLM_LKF_EXPEDITE;
+       set_bit(LFL_NOBAST, &lpn->flags);
+       set_bit(LFL_INLOCK, &lpn->flags);
+
+       init_completion(&lpn->ast_wait);
+       gdlm_do_lock(lpn, NULL);
+       wait_for_completion(&lpn->ast_wait);
+       error = lp->lksb.sb_status;
+       if (error) {
+               printk("lock_dlm: hold_null_lock dlm error %d\n", error);
+               gdlm_delete_lp(lpn);
+               lpn = NULL;
+       }
+ out:
+       lp->hold_null = lpn;
+       return error;
+}
+
+/* This cannot do a synchronous dlm request (requiring a lock_dlm thread to get
+   the completion) because gfs may call unhold_lvb() during a callback (from
+   the context of a lock_dlm thread) which could cause a deadlock since the
+   other lock_dlm thread could be engaged in recovery. */
+
+static void unhold_null_lock(struct gdlm_lock *lp)
+{
+       struct gdlm_lock *lpn = lp->hold_null;
+
+       GDLM_ASSERT(lpn,);
+       lpn->lksb.sb_lvbptr = NULL;
+       lpn->lvb = NULL;
+       set_bit(LFL_UNLOCK_DELETE, &lpn->flags);
+       gdlm_do_unlock(lpn);
+       lp->hold_null = NULL;
+}
+
+/* Acquire a NL lock because gfs requires the value block to remain
+   intact on the resource while the lvb is "held" even if it's holding no locks
+   on the resource. */
+
+int gdlm_hold_lvb(lm_lock_t *lock, char **lvbp)
+{
+       struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+       int error;
+
+       error = gdlm_add_lvb(lp);
+       if (error)
+               return error;
+
+       *lvbp = lp->lvb;
+
+       error = hold_null_lock(lp);
+       if (error)
+               gdlm_del_lvb(lp);
+
+       return error;
+}
+
+void gdlm_unhold_lvb(lm_lock_t *lock, char *lvb)
+{
+       struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+       unhold_null_lock(lp);
+       gdlm_del_lvb(lp);
+}
+
+void gdlm_sync_lvb(lm_lock_t *lock, char *lvb)
+{
+       struct gdlm_lock *lp = (struct gdlm_lock *) lock;
+
+       if (lp->cur != DLM_LOCK_EX)
+               return;
+
+       init_completion(&lp->ast_wait);
+       set_bit(LFL_SYNC_LVB, &lp->flags);
+
+       lp->req = DLM_LOCK_EX;
+       lp->lkf = make_flags(lp, 0, lp->cur, lp->req);
+
+       gdlm_do_lock(lp, NULL);
+       wait_for_completion(&lp->ast_wait);
+}
+
+void gdlm_submit_delayed(struct gdlm_ls *ls)
+{
+       struct gdlm_lock *lp, *safe;
+
+       spin_lock(&ls->async_lock);
+       list_for_each_entry_safe(lp, safe, &ls->delayed, delay_list) {
+               list_del_init(&lp->delay_list);
+               list_add_tail(&lp->delay_list, &ls->submit);
+       }
+       spin_unlock(&ls->async_lock);
+       wake_up(&ls->thread_wait);
+}
+
+int gdlm_release_all_locks(struct gdlm_ls *ls)
+{
+       struct gdlm_lock *lp, *safe;
+       int count = 0;
+
+       spin_lock(&ls->async_lock);
+       list_for_each_entry_safe(lp, safe, &ls->all_locks, all_list) {
+               list_del_init(&lp->all_list);
+
+               if (lp->lvb && lp->lvb != junk_lvb)
+                       kfree(lp->lvb);
+               kfree(lp);
+               count++;
+       }
+       spin_unlock(&ls->async_lock);
+
+       return count;
+}
+
diff -urpN a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
--- a/fs/gfs2/locking/dlm/lock_dlm.h    1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/lock_dlm.h    2005-09-01 17:48:48.147748440 +0800
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef LOCK_DLM_DOT_H
+#define LOCK_DLM_DOT_H
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/socket.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/kobject.h>
+#include <linux/fcntl.h>
+#include <linux/wait.h>
+#include <net/sock.h>
+
+#include <linux/dlm.h>
+#include "../harness/lm_interface.h"
+
+/*
+ * Internally, we prefix things with gdlm_ and GDLM_ (for gfs-dlm) since a
+ * prefix of lock_dlm_ gets awkward.  Externally, GFS refers to this module
+ * as "lock_dlm".
+ */
+
+#define GDLM_STRNAME_BYTES     24
+#define GDLM_LVB_SIZE          32
+#define GDLM_DROP_COUNT                50000
+#define GDLM_DROP_PERIOD       60
+
+/* GFS uses 12 bytes to identify a resource (32 bit type + 64 bit number).
+   We sprintf these numbers into a 24 byte string of hex values to make them
+   human-readable (to make debugging simpler.) */
+
+struct gdlm_strname {
+       unsigned char           name[GDLM_STRNAME_BYTES];
+       unsigned short          namelen;
+};
+
+#define DFL_BLOCK_LOCKS                0
+#define DFL_JOIN_DONE          1
+#define DFL_LEAVE_DONE         2
+#define DFL_TERMINATE          3
+#define DFL_SPECTATOR          4
+#define DFL_WITHDRAW           5
+
+struct gdlm_ls {
+       uint32_t                id;
+       int                     jid;
+       int                     first;
+       int                     first_done;
+       unsigned long           flags;
+       struct kobject          kobj;
+       char                    clustername[128];
+       char                    fsname[128];
+       int                     fsflags;
+       dlm_lockspace_t         *dlm_lockspace;
+       lm_callback_t           fscb;
+       lm_fsdata_t             *fsdata;
+       int                     recover_jid;
+       int                     recover_done;
+       spinlock_t              async_lock;
+       struct list_head        complete;
+       struct list_head        blocking;
+       struct list_head        delayed;
+       struct list_head        submit;
+       struct list_head        all_locks;
+       uint32_t                all_locks_count;
+       wait_queue_head_t       wait_control;
+       struct task_struct      *thread1;
+       struct task_struct      *thread2;
+       wait_queue_head_t       thread_wait;
+       unsigned long           drop_time;
+       int                     drop_locks_count;
+       int                     drop_locks_period;
+};
+
+#define LFL_NOBLOCK            0
+#define LFL_NOCACHE            1
+#define LFL_DLM_UNLOCK         2
+#define LFL_DLM_CANCEL         3
+#define LFL_SYNC_LVB           4
+#define LFL_FORCE_PROMOTE      5
+#define LFL_REREQUEST          6
+#define LFL_ACTIVE             7
+#define LFL_INLOCK             8
+#define LFL_CANCEL             9
+#define LFL_NOBAST             10
+#define LFL_HEADQUE            11
+#define LFL_UNLOCK_DELETE      12
+
+struct gdlm_lock {
+       struct gdlm_ls          *ls;
+       struct lm_lockname      lockname;
+       char                    *lvb;
+       struct dlm_lksb         lksb;
+
+       int16_t                 cur;
+       int16_t                 req;
+       int16_t                 prev_req;
+       uint32_t                lkf;            /* dlm flags DLM_LKF_ */
+       unsigned long           flags;          /* lock_dlm flags LFL_ */
+
+       int                     bast_mode;      /* protected by async_lock */
+       struct completion       ast_wait;
+
+       struct list_head        clist;          /* complete */
+       struct list_head        blist;          /* blocking */
+       struct list_head        delay_list;     /* delayed */
+       struct list_head        all_list;       /* all locks for the fs */
+       struct gdlm_lock        *hold_null;     /* NL lock for hold_lvb */
+};
+
+#if (BITS_PER_LONG == 64)
+#define PRIx64 "lx"
+#else
+#define PRIx64 "Lx"
+#endif
+
+#define GDLM_ASSERT(x, do) \
+{ \
+  if (!(x)) \
+  { \
+    printk("\nlock_dlm:  Assertion failed on line %d of file %s\n" \
+           "lock_dlm:  assertion:  \"%s\"\n" \
+           "lock_dlm:  time = %lu\n", \
+           __LINE__, __FILE__, #x, jiffies); \
+    {do} \
+    printk("\n"); \
+    BUG(); \
+    panic("lock_dlm:  Record message above and reboot.\n"); \
+  } \
+}
+
+#define log_print(lev, fmt, arg...) printk(lev "lock_dlm: " fmt "\n" , ## arg)
+#define log_info(fmt, arg...)  log_print(KERN_INFO , fmt , ## arg)
+#define log_error(fmt, arg...) log_print(KERN_ERR , fmt , ## arg)
+#ifdef LOCK_DLM_LOG_DEBUG
+#define log_debug(fmt, arg...) log_print(KERN_DEBUG , fmt , ## arg)
+#else
+#define log_debug(fmt, arg...)
+#endif
+
+/* sysfs.c */
+
+int gdlm_sysfs_init(void);
+void gdlm_sysfs_exit(void);
+int gdlm_kobject_setup(struct gdlm_ls *);
+void gdlm_kobject_release(struct gdlm_ls *);
+
+/* thread.c */
+
+int gdlm_init_threads(struct gdlm_ls *);
+void gdlm_release_threads(struct gdlm_ls *);
+
+/* lock.c */
+
+int16_t gdlm_make_lmstate(int16_t);
+void gdlm_queue_delayed(struct gdlm_lock *);
+void gdlm_submit_delayed(struct gdlm_ls *);
+int gdlm_release_all_locks(struct gdlm_ls *);
+int gdlm_create_lp(struct gdlm_ls *, struct lm_lockname *, struct gdlm_lock 
**);
+void gdlm_delete_lp(struct gdlm_lock *);
+int gdlm_add_lvb(struct gdlm_lock *);
+void gdlm_del_lvb(struct gdlm_lock *);
+void gdlm_do_lock(struct gdlm_lock *, struct dlm_range *);
+void gdlm_do_unlock(struct gdlm_lock *);
+
+int gdlm_get_lock(lm_lockspace_t *, struct lm_lockname *, lm_lock_t **);
+void gdlm_put_lock(lm_lock_t *);
+unsigned int gdlm_lock(lm_lock_t *, unsigned int, unsigned int, unsigned int);
+unsigned int gdlm_unlock(lm_lock_t *, unsigned int);
+void gdlm_cancel(lm_lock_t *);
+int gdlm_hold_lvb(lm_lock_t *, char **);
+void gdlm_unhold_lvb(lm_lock_t *, char *);
+void gdlm_sync_lvb(lm_lock_t *, char *);
+
+/* plock.c */
+
+int gdlm_plock_init(void);
+void gdlm_plock_exit(void);
+int gdlm_plock(lm_lockspace_t *, struct lm_lockname *, struct file *, int,
+               struct file_lock *);
+int gdlm_plock_get(lm_lockspace_t *, struct lm_lockname *, struct file *,
+               struct file_lock *);
+int gdlm_punlock(lm_lockspace_t *, struct lm_lockname *, struct file *,
+               struct file_lock *);
+#endif
+
diff -urpN a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
--- a/fs/gfs2/locking/dlm/main.c        1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/main.c        2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/init.h>
+
+#include "lock_dlm.h"
+
+extern int gdlm_drop_count;
+extern int gdlm_drop_period;
+
+extern struct lm_lockops gdlm_ops;
+
+int __init init_lock_dlm(void)
+{
+       int error;
+
+       error = lm_register_proto(&gdlm_ops);
+       if (error) {
+               printk("lock_dlm:  can't register protocol: %d\n", error);
+               return error;
+       }
+
+       error = gdlm_sysfs_init();
+       if (error) {
+               lm_unregister_proto(&gdlm_ops);
+               return error;
+       }
+
+       error = gdlm_plock_init();
+       if (error) {
+               gdlm_sysfs_exit();
+               lm_unregister_proto(&gdlm_ops);
+               return error;
+       }
+
+       gdlm_drop_count = GDLM_DROP_COUNT;
+       gdlm_drop_period = GDLM_DROP_PERIOD;
+
+       printk("Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
+       return 0;
+}
+
+void __exit exit_lock_dlm(void)
+{
+       gdlm_plock_exit();
+       gdlm_sysfs_exit();
+       lm_unregister_proto(&gdlm_ops);
+}
+
+module_init(init_lock_dlm);
+module_exit(exit_lock_dlm);
+
+MODULE_DESCRIPTION("GFS DLM Locking Module");
+MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_LICENSE("GPL");
+
diff -urpN a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
--- a/fs/gfs2/locking/dlm/mount.c       1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/mount.c       2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+int gdlm_drop_count;
+int gdlm_drop_period;
+struct lm_lockops gdlm_ops;
+
+
+static struct gdlm_ls *init_gdlm(lm_callback_t cb, lm_fsdata_t *fsdata,
+                                int flags, char *table_name)
+{
+       struct gdlm_ls *ls;
+       char buf[256], *p;
+
+       ls = kmalloc(sizeof(struct gdlm_ls), GFP_KERNEL);
+       if (!ls)
+               return NULL;
+
+       memset(ls, 0, sizeof(struct gdlm_ls));
+
+       ls->drop_locks_count = gdlm_drop_count;
+       ls->drop_locks_period = gdlm_drop_period;
+
+       ls->fscb = cb;
+       ls->fsdata = fsdata;
+       ls->fsflags = flags;
+
+       spin_lock_init(&ls->async_lock);
+
+       INIT_LIST_HEAD(&ls->complete);
+       INIT_LIST_HEAD(&ls->blocking);
+       INIT_LIST_HEAD(&ls->delayed);
+       INIT_LIST_HEAD(&ls->submit);
+       INIT_LIST_HEAD(&ls->all_locks);
+
+       init_waitqueue_head(&ls->thread_wait);
+       init_waitqueue_head(&ls->wait_control);
+       ls->thread1 = NULL;
+       ls->thread2 = NULL;
+       ls->drop_time = jiffies;
+       ls->jid = -1;
+
+       strncpy(buf, table_name, 256);
+       buf[255] = '\0';
+
+       p = strstr(buf, ":");
+       if (!p) {
+               printk("lock_dlm: invalid table_name \"%s\"\n", table_name);
+               kfree(ls);
+               return NULL;
+       }
+       *p = '\0';
+       p++;
+
+       strncpy(ls->clustername, buf, 128);
+       strncpy(ls->fsname, p, 128);
+
+       return ls;
+}
+
+static int gdlm_mount(char *table_name, char *host_data,
+                       lm_callback_t cb, lm_fsdata_t *fsdata,
+                       unsigned int min_lvb_size, int flags,
+                       struct lm_lockstruct *lockstruct)
+{
+       struct gdlm_ls *ls;
+       int error = -ENOMEM;
+
+       if (min_lvb_size > GDLM_LVB_SIZE)
+               goto out;
+
+       ls = init_gdlm(cb, fsdata, flags, table_name);
+       if (!ls)
+               goto out;
+
+       error = gdlm_init_threads(ls);
+       if (error)
+               goto out_free;
+
+       error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname),
+                                 &ls->dlm_lockspace, 0, GDLM_LVB_SIZE);
+       if (error) {
+               printk("lock_dlm: dlm_new_lockspace error %d\n", error);
+               goto out_thread;
+       }
+
+       error = gdlm_kobject_setup(ls);
+       if (error)
+               goto out_dlm;
+       kobject_uevent(&ls->kobj, KOBJ_MOUNT, NULL);
+
+       /* Now we depend on userspace to notice the new mount,
+          join the appropriate group, and do a write to our sysfs
+          "mounted" or "terminate" file.  Before the start, userspace
+          must set "jid" and "first". */
+
+       error = wait_event_interruptible(ls->wait_control,
+                       test_bit(DFL_JOIN_DONE, &ls->flags));
+       if (error)
+               goto out_sysfs;
+
+       if (test_bit(DFL_TERMINATE, &ls->flags)) {
+               error = -ERESTARTSYS;
+               goto out_sysfs;
+       }
+
+       lockstruct->ls_jid = ls->jid;
+       lockstruct->ls_first = ls->first;
+       lockstruct->ls_lockspace = ls;
+       lockstruct->ls_ops = &gdlm_ops;
+       lockstruct->ls_flags = 0;
+       lockstruct->ls_lvb_size = GDLM_LVB_SIZE;
+       return 0;
+
+ out_sysfs:
+       gdlm_kobject_release(ls);
+ out_dlm:
+       dlm_release_lockspace(ls->dlm_lockspace, 2);
+ out_thread:
+       gdlm_release_threads(ls);
+ out_free:
+       kfree(ls);
+ out:
+       return error;
+}
+
+static void gdlm_unmount(lm_lockspace_t *lockspace)
+{
+       struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+       int rv;
+
+       log_debug("unmount flags %lx", ls->flags);
+
+       if (test_bit(DFL_WITHDRAW, &ls->flags)) {
+               gdlm_kobject_release(ls);
+               goto out;
+       }
+
+       kobject_uevent(&ls->kobj, KOBJ_UMOUNT, NULL);
+
+       wait_event_interruptible(ls->wait_control,
+                                test_bit(DFL_LEAVE_DONE, &ls->flags));
+
+       gdlm_kobject_release(ls);
+       dlm_release_lockspace(ls->dlm_lockspace, 2);
+       gdlm_release_threads(ls);
+       rv = gdlm_release_all_locks(ls);
+       if (rv)
+               log_info("lm_dlm_unmount: %d stray locks freed", rv);
+ out:
+       kfree(ls);
+}
+
+static void gdlm_recovery_done(lm_lockspace_t *lockspace, unsigned int jid,
+                               unsigned int message)
+{
+       struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+       ls->recover_done = jid;
+       kobject_uevent(&ls->kobj, KOBJ_CHANGE, NULL);
+}
+
+static void gdlm_others_may_mount(lm_lockspace_t *lockspace)
+{
+       struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+       ls->first_done = 1;
+       kobject_uevent(&ls->kobj, KOBJ_CHANGE, NULL);
+}
+
+static void gdlm_withdraw(lm_lockspace_t *lockspace)
+{
+       struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+
+       /* userspace suspends locking on all other members */
+
+       kobject_uevent(&ls->kobj, KOBJ_OFFLINE, NULL);
+
+       wait_event_interruptible(ls->wait_control,
+                                test_bit(DFL_WITHDRAW, &ls->flags));
+
+       dlm_release_lockspace(ls->dlm_lockspace, 2);
+       gdlm_release_threads(ls);
+       gdlm_release_all_locks(ls);
+
+       kobject_uevent(&ls->kobj, KOBJ_UMOUNT, NULL);
+
+       /* userspace leaves the mount group, we don't need to wait for
+          that to complete */
+}
+
+struct lm_lockops gdlm_ops = {
+       .lm_proto_name = "lock_dlm",
+       .lm_mount = gdlm_mount,
+       .lm_others_may_mount = gdlm_others_may_mount,
+       .lm_unmount = gdlm_unmount,
+       .lm_withdraw = gdlm_withdraw,
+       .lm_get_lock = gdlm_get_lock,
+       .lm_put_lock = gdlm_put_lock,
+       .lm_lock = gdlm_lock,
+       .lm_unlock = gdlm_unlock,
+       .lm_plock = gdlm_plock,
+       .lm_punlock = gdlm_punlock,
+       .lm_plock_get = gdlm_plock_get,
+       .lm_cancel = gdlm_cancel,
+       .lm_hold_lvb = gdlm_hold_lvb,
+       .lm_unhold_lvb = gdlm_unhold_lvb,
+       .lm_sync_lvb = gdlm_sync_lvb,
+       .lm_recovery_done = gdlm_recovery_done,
+       .lm_owner = THIS_MODULE,
+};
+
diff -urpN a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
--- a/fs/gfs2/locking/dlm/plock.c       1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/plock.c       2005-09-01 17:48:48.148748288 +0800
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+#include <linux/lock_dlm_plock.h>
+
+#include <linux/miscdevice.h>
+
+static spinlock_t ops_lock;
+static struct list_head send_list;
+static struct list_head recv_list;
+static wait_queue_head_t send_wq;
+static wait_queue_head_t recv_wq;
+
+struct plock_op {
+       struct list_head list;
+       int done;
+       struct gdlm_plock_info info;
+};
+
+static inline void set_version(struct gdlm_plock_info *info)
+{
+       info->version[0] = GDLM_PLOCK_VERSION_MAJOR;
+       info->version[1] = GDLM_PLOCK_VERSION_MINOR;
+       info->version[2] = GDLM_PLOCK_VERSION_PATCH;
+}
+
+static int check_version(struct gdlm_plock_info *info)
+{
+       if ((GDLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
+           (GDLM_PLOCK_VERSION_MINOR < info->version[1])) {
+               log_error("plock device version mismatch: "
+                         "kernel (%u.%u.%u), user (%u.%u.%u)",
+                         GDLM_PLOCK_VERSION_MAJOR,
+                         GDLM_PLOCK_VERSION_MINOR,
+                         GDLM_PLOCK_VERSION_PATCH,
+                         info->version[0],
+                         info->version[1],
+                         info->version[2]);
+               return -EINVAL;
+       }
+       return 0;
+}
+
+int gdlm_plock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+              struct file *file, int cmd, struct file_lock *fl)
+{
+       struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+       struct plock_op *op;
+       int rv;
+
+       op = kzalloc(sizeof(*op), GFP_KERNEL);
+       if (!op)
+               return -ENOMEM;
+
+       log_debug("en plock %x,%"PRIx64"", name->ln_type, name->ln_number);
+
+       set_version(&op->info);
+       op->info.optype         = GDLM_PLOCK_OP_LOCK;
+       op->info.pid            = (uint32_t) fl->fl_owner;
+       op->info.ex             = (fl->fl_type == F_WRLCK);
+       op->info.wait           = IS_SETLKW(cmd);
+       op->info.fsid           = ls->id;
+       op->info.number         = name->ln_number;
+       op->info.start          = fl->fl_start;
+       op->info.end            = fl->fl_end;
+
+       INIT_LIST_HEAD(&op->list);
+       spin_lock(&ops_lock);
+       list_add_tail(&op->list, &send_list);
+       spin_unlock(&ops_lock);
+       wake_up(&send_wq);
+
+       wait_event(recv_wq, (op->done != 0));
+
+       spin_lock(&ops_lock);
+       if (!list_empty(&op->list)) {
+               printk("plock op on list\n");
+               list_del(&op->list);
+       }
+       spin_unlock(&ops_lock);
+
+       log_debug("ex plock done %d rv %d", op->done, op->info.rv);
+
+       rv = op->info.rv;
+
+       if (!rv) {
+               if (posix_lock_file_wait(file, fl) < 0)
+                       log_error("gdlm_plock: vfs lock error %x,%"PRIx64"",
+                                 name->ln_type, name->ln_number);
+       }
+
+       kfree(op);
+       return rv;
+}
+
+int gdlm_punlock(lm_lockspace_t *lockspace, struct lm_lockname *name,
+                struct file *file, struct file_lock *fl)
+{
+       struct gdlm_ls *ls = (struct gdlm_ls *) lockspace;
+       struct plock_op *op;
+       int rv;
+
+       op = kzalloc(sizeof(*op), GFP_KERNEL);
+       if (!op)
+               return -ENOMEM;
+
+       log_debug("en punlock %x,%"PRIx64"", name->ln_type, name->ln_number);
+
+       if (posix_lock_file_wait(file, fl) < 0)
+               log_error("gdlm_punlock: vfs unlock error %x,%"PRIx64"",
+                         name->ln_type, name->ln_number);
+
+       set_version(&op->info);
+       op->info.optype         = GDLM_PLOCK_OP_UNLOCK;
+       op->info.pid            = (uint32_t) fl->fl_owner;
+       op->info.fsid           = ls->id;
+       op->info.number         = name->ln_number;
+       op->info.start          = fl->fl_start;
+       op->info.end            = fl->fl_end;
+
+       INIT_LIST_HEAD(&op->list);
+       spin_lock(&ops_lock);
+       list_add_tail(&op->list, &send_list);
+       spin_unlock(&ops_lock);
+       wake_up(&send_wq);
+
+       wait_event(recv_wq, (op->done != 0));
+
+       spin_lock(&ops_lock);
+       if (!list_empty(&op->list)) {
+               printk("plock op on list\n");
+               list_del(&op->list);
+       }
+       spin_unlock(&ops_lock);
+
+       log_debug("ex punlock done %d rv %d", op->done, op->info.rv);
+
+       rv = op->info.rv;
+
+       kfree(op);
+       return rv;
+}
+
+int gdlm_plock_get(lm_lockspace_t *lockspace, struct lm_lockname *name,
+                  struct file *file, struct file_lock *fl)
+{
+       return -ENOSYS;
+}
+
+/* a read copies out one plock request from the send list */
+static ssize_t dev_read(struct file *file, char __user *u, size_t count,
+                       loff_t *ppos)
+{
+       struct gdlm_plock_info info;
+       struct plock_op *op = NULL;
+
+       if (count < sizeof(info))
+               return -EINVAL;
+
+       spin_lock(&ops_lock);
+       if (!list_empty(&send_list)) {
+               op = list_entry(send_list.next, struct plock_op, list);
+               list_move(&op->list, &recv_list);
+               memcpy(&info, &op->info, sizeof(info));
+       }
+       spin_unlock(&ops_lock);
+
+       if (!op)
+               return -EAGAIN;
+
+       log_debug("send %"PRIx64" op %d ex %d wait %d", info.number,
+                 info.optype, info.ex, info.wait);
+
+       if (copy_to_user(u, &info, sizeof(info)))
+               return -EFAULT;
+       return sizeof(info);
+}
+
+/* a write copies in one plock result that should match a plock_op
+   on the recv list */
+static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
+                        loff_t *ppos)
+{
+       struct gdlm_plock_info info;
+       struct plock_op *op;
+       int found = 0;
+
+       if (count != sizeof(info))
+               return -EINVAL;
+
+       if (copy_from_user(&info, u, sizeof(info)))
+               return -EFAULT;
+
+       if (check_version(&info))
+               return -EINVAL;
+
+       log_debug("recv %"PRIx64" op %d ex %d wait %d", info.number,
+                 info.optype, info.ex, info.wait);
+
+       spin_lock(&ops_lock);
+       list_for_each_entry(op, &recv_list, list) {
+               if (op->info.fsid == info.fsid &&
+                   op->info.number == info.number) {
+                       list_del_init(&op->list);
+                       found = 1;
+                       op->done = 1;
+                       memcpy(&op->info, &info, sizeof(info));
+                       break;
+               }
+       }
+       spin_unlock(&ops_lock);
+
+       if (found)
+               wake_up(&recv_wq);
+       else
+               printk("gdlm dev_write no op %x %"PRIx64"\n", info.fsid,
+                       info.number);
+       return count;
+}
+
+static unsigned int dev_poll(struct file *file, poll_table *wait)
+{
+       poll_wait(file, &send_wq, wait);
+
+       spin_lock(&ops_lock);
+       if (!list_empty(&send_list)) {
+               spin_unlock(&ops_lock);
+               return POLLIN | POLLRDNORM;
+       }
+       spin_unlock(&ops_lock);
+       return 0;
+}
+
+static struct file_operations dev_fops = {
+       .read    = dev_read,
+       .write   = dev_write,
+       .poll    = dev_poll,
+       .owner   = THIS_MODULE
+};
+
+static struct miscdevice plock_dev_misc = {
+       .minor = MISC_DYNAMIC_MINOR,
+       .name = GDLM_PLOCK_MISC_NAME,
+       .fops = &dev_fops
+};
+
+int gdlm_plock_init(void)
+{
+       int rv;
+
+       spin_lock_init(&ops_lock);
+       INIT_LIST_HEAD(&send_list);
+       INIT_LIST_HEAD(&recv_list);
+       init_waitqueue_head(&send_wq);
+       init_waitqueue_head(&recv_wq);
+
+       rv = misc_register(&plock_dev_misc);
+       if (rv)
+               printk("gdlm_plock_init: misc_register failed %d", rv);
+       return rv;
+}
+
+void gdlm_plock_exit(void)
+{
+       if (misc_deregister(&plock_dev_misc) < 0)
+               printk("gdlm_plock_exit: misc_deregister failed");
+}
+
diff -urpN a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
--- a/fs/gfs2/locking/dlm/sysfs.c       1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/sysfs.c       2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <linux/ctype.h>
+#include <linux/stat.h>
+
+#include "lock_dlm.h"
+
+static ssize_t gdlm_block_show(struct gdlm_ls *ls, char *buf)
+{
+       ssize_t ret;
+       int val = 0;
+
+       if (test_bit(DFL_BLOCK_LOCKS, &ls->flags))
+               val = 1;
+       ret = sprintf(buf, "%d\n", val);
+       return ret;
+}
+
+static ssize_t gdlm_block_store(struct gdlm_ls *ls, const char *buf, size_t 
len)
+{
+       ssize_t ret = len;
+       int val;
+
+       val = simple_strtol(buf, NULL, 0);
+
+       if (val == 1)
+               set_bit(DFL_BLOCK_LOCKS, &ls->flags);
+       else if (val == 0) {
+               clear_bit(DFL_BLOCK_LOCKS, &ls->flags);
+               gdlm_submit_delayed(ls);
+       } else
+               ret = -EINVAL;
+       return ret;
+}
+
+static ssize_t gdlm_mounted_show(struct gdlm_ls *ls, char *buf)
+{
+       ssize_t ret;
+       int val = -2;
+
+       if (test_bit(DFL_TERMINATE, &ls->flags))
+               val = -1;
+       else if (test_bit(DFL_LEAVE_DONE, &ls->flags))
+               val = 0;
+       else if (test_bit(DFL_JOIN_DONE, &ls->flags))
+               val = 1;
+       ret = sprintf(buf, "%d\n", val);
+       return ret;
+}
+
+static ssize_t gdlm_mounted_store(struct gdlm_ls *ls, const char *buf, size_t 
len)
+{
+       ssize_t ret = len;
+       int val;
+
+       val = simple_strtol(buf, NULL, 0);
+
+       if (val == 1)
+               set_bit(DFL_JOIN_DONE, &ls->flags);
+       else if (val == 0)
+               set_bit(DFL_LEAVE_DONE, &ls->flags);
+       else if (val == -1) {
+               set_bit(DFL_TERMINATE, &ls->flags);
+               set_bit(DFL_JOIN_DONE, &ls->flags);
+               set_bit(DFL_LEAVE_DONE, &ls->flags);
+       } else
+               ret = -EINVAL;
+       wake_up(&ls->wait_control);
+       return ret;
+}
+
+static ssize_t gdlm_withdraw_show(struct gdlm_ls *ls, char *buf)
+{
+       ssize_t ret;
+       int val = 0;
+
+       if (test_bit(DFL_WITHDRAW, &ls->flags))
+               val = 1;
+       ret = sprintf(buf, "%d\n", val);
+       return ret;
+}
+
+static ssize_t gdlm_withdraw_store(struct gdlm_ls *ls, const char *buf, size_t 
len)
+{
+       ssize_t ret = len;
+       int val;
+
+       val = simple_strtol(buf, NULL, 0);
+
+       if (val == 1)
+               set_bit(DFL_WITHDRAW, &ls->flags);
+       else
+               ret = -EINVAL;
+       wake_up(&ls->wait_control);
+       return ret;
+}
+
+static ssize_t gdlm_id_show(struct gdlm_ls *ls, char *buf)
+{
+       return sprintf(buf, "%u\n", ls->id);
+}
+
+static ssize_t gdlm_id_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+       ls->id = simple_strtoul(buf, NULL, 0);
+       return len;
+}
+
+static ssize_t gdlm_jid_show(struct gdlm_ls *ls, char *buf)
+{
+       return sprintf(buf, "%d\n", ls->jid);
+}
+
+static ssize_t gdlm_jid_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+       ls->jid = simple_strtol(buf, NULL, 0);
+       return len;
+}
+
+static ssize_t gdlm_first_show(struct gdlm_ls *ls, char *buf)
+{
+       return sprintf(buf, "%d\n", ls->first);
+}
+
+static ssize_t gdlm_first_store(struct gdlm_ls *ls, const char *buf, size_t 
len)
+{
+       ls->first = simple_strtol(buf, NULL, 0);
+       return len;
+}
+
+static ssize_t gdlm_first_done_show(struct gdlm_ls *ls, char *buf)
+{
+       return sprintf(buf, "%d\n", ls->first_done);
+}
+
+static ssize_t gdlm_recover_show(struct gdlm_ls *ls, char *buf)
+{
+       return sprintf(buf, "%d\n", ls->recover_jid);
+}
+
+static ssize_t gdlm_recover_store(struct gdlm_ls *ls, const char *buf, size_t 
len)
+{
+       ls->recover_jid = simple_strtol(buf, NULL, 0);
+       ls->fscb(ls->fsdata, LM_CB_NEED_RECOVERY, &ls->recover_jid);
+       return len;
+}
+
+static ssize_t gdlm_recover_done_show(struct gdlm_ls *ls, char *buf)
+{
+       ssize_t ret;
+       ret = sprintf(buf, "%d\n", ls->recover_done);
+       return ret;
+}
+
+static ssize_t gdlm_cluster_show(struct gdlm_ls *ls, char *buf)
+{
+       ssize_t ret;
+       ret = sprintf(buf, "%s\n", ls->clustername);
+       return ret;
+}
+
+static ssize_t gdlm_options_show(struct gdlm_ls *ls, char *buf)
+{
+       ssize_t ret = 0;
+
+       if (ls->fsflags & LM_MFLAG_SPECTATOR)
+               ret += sprintf(buf, "spectator ");
+
+       return ret;
+}
+
+struct gdlm_attr {
+       struct attribute attr;
+       ssize_t (*show)(struct gdlm_ls *, char *);
+       ssize_t (*store)(struct gdlm_ls *, const char *, size_t);
+};
+
+#define GDLM_ATTR(_name,_mode,_show,_store) \
+static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
+
+GDLM_ATTR(block, S_IRUGO | S_IWUSR, gdlm_block_show, gdlm_block_store);
+GDLM_ATTR(mounted, S_IRUGO | S_IWUSR, gdlm_mounted_show, gdlm_mounted_store);
+GDLM_ATTR(withdraw, S_IRUGO | S_IWUSR, gdlm_withdraw_show, 
gdlm_withdraw_store);
+GDLM_ATTR(id, S_IRUGO | S_IWUSR, gdlm_id_show, gdlm_id_store);
+GDLM_ATTR(jid, S_IRUGO | S_IWUSR, gdlm_jid_show, gdlm_jid_store);
+GDLM_ATTR(first, S_IRUGO | S_IWUSR, gdlm_first_show, gdlm_first_store);
+GDLM_ATTR(first_done, S_IRUGO, gdlm_first_done_show, NULL);
+GDLM_ATTR(recover, S_IRUGO | S_IWUSR, gdlm_recover_show, gdlm_recover_store);
+GDLM_ATTR(recover_done, S_IRUGO, gdlm_recover_done_show, NULL);
+GDLM_ATTR(cluster, S_IRUGO, gdlm_cluster_show, NULL);
+GDLM_ATTR(options, S_IRUGO, gdlm_options_show, NULL);
+
+static struct attribute *gdlm_attrs[] = {
+       &gdlm_attr_block.attr,
+       &gdlm_attr_mounted.attr,
+       &gdlm_attr_withdraw.attr,
+       &gdlm_attr_id.attr,
+       &gdlm_attr_jid.attr,
+       &gdlm_attr_first.attr,
+       &gdlm_attr_first_done.attr,
+       &gdlm_attr_recover.attr,
+       &gdlm_attr_recover_done.attr,
+       &gdlm_attr_cluster.attr,
+       &gdlm_attr_options.attr,
+       NULL,
+};
+
+static ssize_t gdlm_attr_show(struct kobject *kobj, struct attribute *attr,
+                             char *buf)
+{
+       struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
+       struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
+       return a->show ? a->show(ls, buf) : 0;
+}
+
+static ssize_t gdlm_attr_store(struct kobject *kobj, struct attribute *attr,
+                              const char *buf, size_t len)
+{
+       struct gdlm_ls *ls = container_of(kobj, struct gdlm_ls, kobj);
+       struct gdlm_attr *a = container_of(attr, struct gdlm_attr, attr);
+       return a->store ? a->store(ls, buf, len) : len;
+}
+
+static struct sysfs_ops gdlm_attr_ops = {
+       .show  = gdlm_attr_show,
+       .store = gdlm_attr_store,
+};
+
+static struct kobj_type gdlm_ktype = {
+       .default_attrs = gdlm_attrs,
+       .sysfs_ops     = &gdlm_attr_ops,
+};
+
+static struct kset gdlm_kset = {
+       .subsys = &kernel_subsys,
+       .kobj   = {.name = "lock_dlm",},
+       .ktype  = &gdlm_ktype,
+};
+
+int gdlm_kobject_setup(struct gdlm_ls *ls)
+{
+       int error;
+
+       error = kobject_set_name(&ls->kobj, "%s", ls->fsname);
+       if (error)
+               return error;
+
+       ls->kobj.kset = &gdlm_kset;
+       ls->kobj.ktype = &gdlm_ktype;
+
+       error = kobject_register(&ls->kobj);
+
+       return 0;
+}
+
+void gdlm_kobject_release(struct gdlm_ls *ls)
+{
+       kobject_unregister(&ls->kobj);
+}
+
+int gdlm_sysfs_init(void)
+{
+       int error;
+
+       error = kset_register(&gdlm_kset);
+       if (error)
+               printk("lock_dlm: cannot register kset %d\n", error);
+
+       return error;
+}
+
+void gdlm_sysfs_exit(void)
+{
+       kset_unregister(&gdlm_kset);
+}
+
diff -urpN a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c
--- a/fs/gfs2/locking/dlm/thread.c      1970-01-01 07:30:00.000000000 +0730
+++ b/fs/gfs2/locking/dlm/thread.c      2005-09-01 17:48:48.140749504 +0800
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include "lock_dlm.h"
+
+/* A lock placed on this queue is re-submitted to DLM as soon as the lock_dlm
+   thread gets to it. */
+
+static void queue_submit(struct gdlm_lock *lp)
+{
+       struct gdlm_ls *ls = lp->ls;
+
+       spin_lock(&ls->async_lock);
+       list_add_tail(&lp->delay_list, &ls->submit);
+       spin_unlock(&ls->async_lock);
+       wake_up(&ls->thread_wait);
+}
+
+static void process_submit(struct gdlm_lock *lp)
+{
+       gdlm_do_lock(lp, NULL);
+}
+
+static void process_blocking(struct gdlm_lock *lp, int bast_mode)
+{
+       struct gdlm_ls *ls = lp->ls;
+       unsigned int cb;
+
+       switch (gdlm_make_lmstate(bast_mode)) {
+       case LM_ST_EXCLUSIVE:
+               cb = LM_CB_NEED_E;
+               break;
+       case LM_ST_DEFERRED:
+               cb = LM_CB_NEED_D;
+               break;
+       case LM_ST_SHARED:
+               cb = LM_CB_NEED_S;
+               break;
+       default:
+               GDLM_ASSERT(0, printk("unknown bast mode %u\n",lp->bast_mode););
+       }
+
+       ls->fscb(ls->fsdata, cb, &lp->lockname);
+}
+
+static void process_complete(struct gdlm_lock *lp)
+{
+       struct gdlm_ls *ls = lp->ls;
+       struct lm_async_cb acb;
+       int16_t prev_mode = lp->cur;
+
+       memset(&acb, 0, sizeof(acb));
+
+       if (lp->lksb.sb_status == -DLM_ECANCEL) {
+               log_info("complete dlm cancel %x,%"PRIx64" flags %lx",
+                        lp->lockname.ln_type, lp->lockname.ln_number,
+                        lp->flags);
+
+               lp->req = lp->cur;
+               acb.lc_ret |= LM_OUT_CANCELED;
+               if (lp->cur == DLM_LOCK_IV)
+                       lp->lksb.sb_lkid = 0;
+               goto out;
+       }
+
+       if (test_and_clear_bit(LFL_DLM_UNLOCK, &lp->flags)) {
+               if (lp->lksb.sb_status != -DLM_EUNLOCK) {
+                       log_info("unlock sb_status %d %x,%"PRIx64" flags %lx",
+                                lp->lksb.sb_status, lp->lockname.ln_type,
+                                lp->lockname.ln_number, lp->flags);
+                       return;
+               }
+
+               lp->cur = DLM_LOCK_IV;
+               lp->req = DLM_LOCK_IV;
+               lp->lksb.sb_lkid = 0;
+
+               if (test_and_clear_bit(LFL_UNLOCK_DELETE, &lp->flags)) {
+                       gdlm_delete_lp(lp);
+                       return;
+               }
+               goto out;
+       }
+
+       if (lp->lksb.sb_flags & DLM_SBF_VALNOTVALID)
+               memset(lp->lksb.sb_lvbptr, 0, GDLM_LVB_SIZE);
+
+       if (lp->lksb.sb_flags & DLM_SBF_ALTMODE) {
+               if (lp->req == DLM_LOCK_PR)
+                       lp->req = DLM_LOCK_CW;
+               else if (lp->req == DLM_LOCK_CW)
+                       lp->req = DLM_LOCK_PR;
+       }
+
+       /*
+        * A canceled lock request.  The lock was just taken off the delayed
+        * list and was never even submitted to dlm.
+        */
+
+       if (test_and_clear_bit(LFL_CANCEL, &lp->flags)) {
+               log_info("complete internal cancel %x,%"PRIx64"",
+                        lp->lockname.ln_type, lp->lockname.ln_number);
+               lp->req = lp->cur;
+               acb.lc_ret |= LM_OUT_CANCELED;
+               goto out;
+       }
+
+       /*
+        * An error occured.
+        */
+
+       if (lp->lksb.sb_status) {
+               /* a "normal" error */
+               if ((lp->lksb.sb_status == -EAGAIN) &&
+                   (lp->lkf & DLM_LKF_NOQUEUE)) {
+                       lp->req = lp->cur;
+                       if (lp->cur == DLM_LOCK_IV)
+                               lp->lksb.sb_lkid = 0;
+                       goto out;
+               }
+
+               /* this could only happen with cancels I think */
+               log_info("ast sb_status %d %x,%"PRIx64" flags %lx",
+                        lp->lksb.sb_status, lp->lockname.ln_type,
+                        lp->lockname.ln_number, lp->flags);
+               return;
+       }
+
+       /*
+        * This is an AST for an EX->EX conversion for sync_lvb from GFS.
+        */
+
+       if (test_and_clear_bit(LFL_SYNC_LVB, &lp->flags)) {
+               complete(&lp->ast_wait);
+               return;
+       }
+
+       /*
+        * A lock has been demoted to NL because it initially completed during
+        * BLOCK_LOCKS.  Now it must be requested in the originally requested
+        * mode.
+        */
+
+       if (test_and_clear_bit(LFL_REREQUEST, &lp->flags)) {
+               GDLM_ASSERT(lp->req == DLM_LOCK_NL,);
+               GDLM_ASSERT(lp->prev_req > DLM_LOCK_NL,);
+
+               lp->cur = DLM_LOCK_NL;
+               lp->req = lp->prev_req;
+               lp->prev_req = DLM_LOCK_IV;
+               lp->lkf &= ~DLM_LKF_CONVDEADLK;
+
+               set_bit(LFL_NOCACHE, &lp->flags);
+
+               if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+                   !test_bit(LFL_NOBLOCK, &lp->flags))
+                       gdlm_queue_delayed(lp);
+               else
+                       queue_submit(lp);
+               return;
+       }
+
+       /*
+        * A request is granted during dlm recovery.  It may be granted
+        * because the locks of a failed node were cleared.  In that case,
+        * there may be inconsistent data beneath this lock and we must wait
+        * for recovery to complete to use it.  When gfs recovery is done this
+        * granted lock will be converted to NL and then reacquired in this
+        * granted state.
+        */
+
+       if (test_bit(DFL_BLOCK_LOCKS, &ls->flags) &&
+           !test_bit(LFL_NOBLOCK, &lp->flags) &&
+           lp->req != DLM_LOCK_NL) {
+
+               lp->cur = lp->req;
+               lp->prev_req = lp->req;
+               lp->req = DLM_LOCK_NL;
+               lp->lkf |= DLM_LKF_CONVERT;
+               lp->lkf &= ~DLM_LKF_CONVDEADLK;
+
+               log_debug("rereq %x,%"PRIx64" id %x %d,%d",
+                         lp->lockname.ln_type, lp->lockname.ln_number,
+                         lp->lksb.sb_lkid, lp->cur, lp->req);
+
+               set_bit(LFL_REREQUEST, &lp->flags);
+               queue_submit(lp);
+               return;
+       }
+
+       /*
+        * DLM demoted the lock to NL before it was granted so GFS must be
+        * told it cannot cache data for this lock.
+        */
+
+       if (lp->lksb.sb_flags & DLM_SBF_DEMOTED)
+               set_bit(LFL_NOCACHE, &lp->flags);
+
+ out:
+       /*
+        * This is an internal lock_dlm lock
+        */
+
+       if (test_bit(LFL_INLOCK, &lp->flags)) {
+               clear_bit(LFL_NOBLOCK, &lp->flags);
+               lp->cur = lp->req;
+               complete(&lp->ast_wait);
+               return;
+       }
+
+       /*
+        * Normal completion of a lock request.  Tell GFS it now has the lock.
+        */
+
+       clear_bit(LFL_NOBLOCK, &lp->flags);
+       lp->cur = lp->req;
+
+       acb.lc_name = lp->lockname;
+       acb.lc_ret |= gdlm_make_lmstate(lp->cur);
+
+       if (!test_and_clear_bit(LFL_NOCACHE, &lp->flags) &&
+           (lp->cur > DLM_LOCK_NL) && (prev_mode > DLM_LOCK_NL))
+               acb.lc_ret |= LM_OUT_CACHEABLE;
+
+       ls->fscb(ls->fsdata, LM_CB_ASYNC, &acb);
+}
+
+static inline int no_work(struct gdlm_ls *ls, int blocking)
+{
+       int ret;
+
+       spin_lock(&ls->async_lock);
+       ret = list_empty(&ls->complete) && list_empty(&ls->submit);
+       if (ret && blocking)
+               ret = list_empty(&ls->blocking);
+       spin_unlock(&ls->async_lock);
+
+       return ret;
+}
+
+static inline int check_drop(struct gdlm_ls *ls)
+{
+       if (!ls->drop_locks_count)
+               return 0;
+
+       if (time_after(jiffies, ls->drop_time + ls->drop_locks_period * HZ)) {
+               ls->drop_time = jiffies;
+               if (ls->all_locks_count >= ls->drop_locks_count)
+                       return 1;
+       }
+       return 0;
+}
+
+static int gdlm_thread(void *data)
+{
+       struct gdlm_ls *ls = (struct gdlm_ls *) data;
+       struct gdlm_lock *lp = NULL;
+       int blist = 0;
+       uint8_t complete, blocking, submit, drop;
+       DECLARE_WAITQUEUE(wait, current);
+
+       /* Only thread1 is allowed to do blocking callbacks since gfs
+          may wait for a completion callback within a blocking cb. */
+
+       if (current == ls->thread1)
+               blist = 1;
+
+       while (!kthread_should_stop()) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               add_wait_queue(&ls->thread_wait, &wait);
+               if (no_work(ls, blist))
+                       schedule();
+               remove_wait_queue(&ls->thread_wait, &wait);
+               set_current_state(TASK_RUNNING);
+
+               complete = blocking = submit = drop = 0;
+
+               spin_lock(&ls->async_lock);
+
+               if (blist && !list_empty(&ls->blocking)) {
+                       lp = list_entry(ls->blocking.next, struct gdlm_lock,
+                                       blist);
+                       list_del_init(&lp->blist);
+                       blocking = lp->bast_mode;
+                       lp->bast_mode = 0;
+               } else if (!list_empty(&ls->complete)) {
+                       lp = list_entry(ls->complete.next, struct gdlm_lock,
+                                       clist);
+                       list_del_init(&lp->clist);
+                       complete = 1;
+               } else if (!list_empty(&ls->submit)) {
+                       lp = list_entry(ls->submit.next, struct gdlm_lock,
+                                       delay_list);
+                       list_del_init(&lp->delay_list);
+                       submit = 1;
+               }
+
+               drop = check_drop(ls);
+               spin_unlock(&ls->async_lock);
+
+               if (complete)
+                       process_complete(lp);
+
+               else if (blocking)
+                       process_blocking(lp, blocking);
+
+               else if (submit)
+                       process_submit(lp);
+
+               if (drop)
+                       ls->fscb(ls->fsdata, LM_CB_DROPLOCKS, NULL);
+
+               schedule();
+       }
+
+       return 0;
+}
+
+int gdlm_init_threads(struct gdlm_ls *ls)
+{
+       struct task_struct *p;
+       int error;
+
+       p = kthread_run(gdlm_thread, ls, "lock_dlm1");
+       error = IS_ERR(p);
+       if (error) {
+               log_error("can't start lock_dlm1 thread %d", error);
+               return error;
+       }
+       ls->thread1 = p;
+
+       p = kthread_run(gdlm_thread, ls, "lock_dlm2");
+       error = IS_ERR(p);
+       if (error) {
+               log_error("can't start lock_dlm2 thread %d", error);
+               kthread_stop(ls->thread1);
+               return error;
+       }
+       ls->thread2 = p;
+
+       return 0;
+}
+
+void gdlm_release_threads(struct gdlm_ls *ls)
+{
+       kthread_stop(ls->thread1);
+       kthread_stop(ls->thread2);
+}
+
diff -urpN a/include/linux/lock_dlm_plock.h b/include/linux/lock_dlm_plock.h
--- a/include/linux/lock_dlm_plock.h    1970-01-01 07:30:00.000000000 +0730
+++ b/include/linux/lock_dlm_plock.h    2005-09-01 17:48:48.142749200 +0800
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#ifndef __LOCK_DLM_PLOCK_DOT_H__
+#define __LOCK_DLM_PLOCK_DOT_H__
+
+#define GDLM_PLOCK_MISC_NAME           "lock_dlm_plock"
+
+#define GDLM_PLOCK_VERSION_MAJOR       1
+#define GDLM_PLOCK_VERSION_MINOR       0
+#define GDLM_PLOCK_VERSION_PATCH       0
+
+enum {
+       GDLM_PLOCK_OP_LOCK = 1,
+       GDLM_PLOCK_OP_UNLOCK,
+       GDLM_PLOCK_OP_GET,
+};
+
+struct gdlm_plock_info {
+       __u32 version[3];
+       __u8 optype;
+       __u8 ex;
+       __u8 wait;
+       __u8 pad;
+       __u32 pid;
+       __s32 nodeid;
+       __s32 rv;
+       __u32 fsid;
+       __u64 number;
+       __u64 start;
+       __u64 end;
+};
+
+#endif
+
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 13/13] GFS: lock_dlm module

Reply via email to