The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=c3d8a93126b9dd05fcfed3685bc3817f3c1eccc9

commit c3d8a93126b9dd05fcfed3685bc3817f3c1eccc9
Author:     Konstantin Belousov <k...@freebsd.org>
AuthorDate: 2023-08-07 04:45:01 +0000
Commit:     Konstantin Belousov <k...@freebsd.org>
CommitDate: 2024-08-06 04:05:58 +0000

    Re-implement rangelocks part 1
    
    Using the algorithms from https://doi.org/10.1145/3342195.3387533.
    
    For the first part, consider all range lock requests as exclusive.
    
    Reviewed by:    markj, Olivier Certner <olce.free...@certner.fr>
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    Differential revision:  https://reviews.freebsd.org/D41787
---
 sys/kern/kern_rangelock.c | 455 +++++++++++++++++++++++++---------------------
 sys/kern/kern_thread.c    |   1 -
 sys/kern/uipc_shm.c       |   8 +-
 sys/kern/vfs_subr.c       |   2 -
 sys/sys/rangelock.h       |  45 ++---
 sys/sys/vnode.h           |  13 +-
 6 files changed, 267 insertions(+), 257 deletions(-)

diff --git a/sys/kern/kern_rangelock.c b/sys/kern/kern_rangelock.c
index a9d8c342c267..186a42caebf0 100644
--- a/sys/kern/kern_rangelock.c
+++ b/sys/kern/kern_rangelock.c
@@ -27,304 +27,335 @@
  */
 
 #include <sys/param.h>
+#include <sys/kassert.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/rangelock.h>
-#include <sys/systm.h>
+#include <sys/sleepqueue.h>
+#include <sys/smr.h>
 
 #include <vm/uma.h>
 
+/*
+ * Implementation of range locks based on the paper
+ * https://doi.org/10.1145/3342195.3387533
+ * arXiv:2006.12144v1 [cs.OS] 22 Jun 2020
+ * Scalable Range Locks for Scalable Address Spaces and Beyond
+ * by Alex Kogan, Dave Dice, and Shady Issa
+ */
+
+static struct rl_q_entry *rl_e_unmark(const struct rl_q_entry *e);
+
+/*
+ * rl_q_next links all granted ranges in the lock.  We cannot free an
+ * rl_q_entry while in the smr section, and cannot reuse rl_q_next
+ * linkage since other threads might follow it even after CAS removed
+ * the range.  Use rl_q_free for local list of ranges to remove after
+ * the smr section is dropped.
+ */
 struct rl_q_entry {
-       TAILQ_ENTRY(rl_q_entry) rl_q_link;
+       struct rl_q_entry *rl_q_next;
+       struct rl_q_entry *rl_q_free;
        off_t           rl_q_start, rl_q_end;
        int             rl_q_flags;
+#ifdef INVARIANTS
+       struct thread   *rl_q_owner;
+#endif
 };
 
 static uma_zone_t rl_entry_zone;
+static smr_t rl_smr;
 
 static void
 rangelock_sys_init(void)
 {
-
        rl_entry_zone = uma_zcreate("rl_entry", sizeof(struct rl_q_entry),
-           NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+           NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct rl_q_entry),
+           UMA_ZONE_SMR);
+       rl_smr = uma_zone_get_smr(rl_entry_zone);
 }
-SYSINIT(vfs, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
+SYSINIT(rl, SI_SUB_LOCK, SI_ORDER_ANY, rangelock_sys_init, NULL);
 
 static struct rl_q_entry *
-rlqentry_alloc(void)
+rlqentry_alloc(vm_ooffset_t start, vm_ooffset_t end, int flags)
 {
+       struct rl_q_entry *e;
+
+       e = uma_zalloc_smr(rl_entry_zone, M_WAITOK);
+       e->rl_q_next = NULL;
+       e->rl_q_free = NULL;
+       e->rl_q_start = start;
+       e->rl_q_end = end;
+       e->rl_q_flags = flags;
+#ifdef INVARIANTS
+       e->rl_q_owner = curthread;
+#endif
+       return (e);
+}
 
-       return (uma_zalloc(rl_entry_zone, M_WAITOK));
+void
+rangelock_init(struct rangelock *lock)
+{
+       lock->sleepers = false;
+       atomic_store_ptr(&lock->head, NULL);
 }
 
 void
-rlqentry_free(struct rl_q_entry *rleq)
+rangelock_destroy(struct rangelock *lock)
 {
+       struct rl_q_entry *e, *ep;
 
-       uma_zfree(rl_entry_zone, rleq);
+       MPASS(!lock->sleepers);
+       for (e = (struct rl_q_entry *)atomic_load_ptr(&lock->head);
+           e != NULL; e = rl_e_unmark(ep)) {
+               ep = atomic_load_ptr(&e->rl_q_next);
+               uma_zfree_smr(rl_entry_zone, e);
+       }
 }
 
-void
-rangelock_init(struct rangelock *lock)
+static bool
+rl_e_is_marked(const struct rl_q_entry *e)
 {
+       return (((uintptr_t)e & 1) != 0);
+}
 
-       TAILQ_INIT(&lock->rl_waiters);
-       lock->rl_currdep = NULL;
+static struct rl_q_entry *
+rl_e_unmark(const struct rl_q_entry *e)
+{
+       MPASS(rl_e_is_marked(e));
+       return ((struct rl_q_entry *)((uintptr_t)e & ~1));
 }
 
-void
-rangelock_destroy(struct rangelock *lock)
+static struct rl_q_entry *
+rl_q_load(struct rl_q_entry **p)
 {
+       return ((struct rl_q_entry *)atomic_load_acq_ptr((uintptr_t *)p));
+}
 
-       KASSERT(TAILQ_EMPTY(&lock->rl_waiters), ("Dangling waiters"));
+void
+rangelock_unlock(struct rangelock *lock, void *cookie)
+{
+       struct rl_q_entry *e;
+
+       e = cookie;
+       MPASS(lock != NULL && e != NULL);
+       MPASS(!rl_e_is_marked(rl_q_load(&e->rl_q_next)));
+       MPASS(e->rl_q_owner == curthread);
+
+       sleepq_lock(&lock->sleepers);
+#ifdef INVARIANTS
+       int r = atomic_testandset_long((uintptr_t *)&e->rl_q_next, 0);
+       MPASS(r == 0);
+#else
+       atomic_set_ptr((uintptr_t *)&e->rl_q_next, 1);
+#endif
+       lock->sleepers = false;
+       sleepq_broadcast(&lock->sleepers, SLEEPQ_SLEEP, 0, 0);
+       sleepq_release(&lock->sleepers);
 }
 
 /*
- * Two entries are compatible if their ranges do not overlap, or both
- * entries are for read.
+ * result: -1 if e1 before e2
+ *          1 if e1 after e2
+ *          0 if e1 and e2 overlap
  */
 static int
-ranges_overlap(const struct rl_q_entry *e1,
-    const struct rl_q_entry *e2)
+rl_e_compare(const struct rl_q_entry *e1, const struct rl_q_entry *e2)
 {
-
-       if (e1->rl_q_start < e2->rl_q_end && e1->rl_q_end > e2->rl_q_start)
+       if (e1 == NULL)
                return (1);
+       if (e1->rl_q_start >= e2->rl_q_end)
+               return (1);
+       if (e2->rl_q_start >= e1->rl_q_end)
+               return (-1);
        return (0);
 }
 
-/*
- * Recalculate the lock->rl_currdep after an unlock.
- */
 static void
-rangelock_calc_block(struct rangelock *lock)
+rl_insert_sleep(struct rangelock *lock)
 {
-       struct rl_q_entry *entry, *nextentry, *entry1;
-
-       for (entry = lock->rl_currdep; entry != NULL; entry = nextentry) {
-               nextentry = TAILQ_NEXT(entry, rl_q_link);
-               if (entry->rl_q_flags & RL_LOCK_READ) {
-                       /* Reads must not overlap with granted writes. */
-                       for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
-                           !(entry1->rl_q_flags & RL_LOCK_READ);
-                           entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
-                               if (ranges_overlap(entry, entry1))
-                                       goto out;
-                       }
-               } else {
-                       /* Write must not overlap with any granted locks. */
-                       for (entry1 = TAILQ_FIRST(&lock->rl_waiters);
-                           entry1 != entry;
-                           entry1 = TAILQ_NEXT(entry1, rl_q_link)) {
-                               if (ranges_overlap(entry, entry1))
-                                       goto out;
-                       }
-
-                       /* Move grantable write locks to the front. */
-                       TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
-                       TAILQ_INSERT_HEAD(&lock->rl_waiters, entry, rl_q_link);
-               }
-
-               /* Grant this lock. */
-               entry->rl_q_flags |= RL_LOCK_GRANTED;
-               wakeup(entry);
-       }
-out:
-       lock->rl_currdep = entry;
+       smr_exit(rl_smr);
+       DROP_GIANT();
+       lock->sleepers = true;
+       sleepq_add(&lock->sleepers, NULL, "rangelk", 0, 0);
+       sleepq_wait(&lock->sleepers, PRI_USER);
+       PICKUP_GIANT();
+       smr_enter(rl_smr);
 }
 
-static void
-rangelock_unlock_locked(struct rangelock *lock, struct rl_q_entry *entry,
-    struct mtx *ilk, bool do_calc_block)
+static bool
+rl_q_cas(struct rl_q_entry **prev, struct rl_q_entry *old,
+    struct rl_q_entry *new)
 {
-
-       MPASS(lock != NULL && entry != NULL && ilk != NULL);
-       mtx_assert(ilk, MA_OWNED);
-
-       if (!do_calc_block) {
-               /*
-                * This is the case where rangelock_enqueue() has been called
-                * with trylock == true and just inserted this entry in the
-                * queue.
-                * If rl_currdep is this entry, rl_currdep needs to
-                * be set to the next entry in the rl_waiters list.
-                * However, since this entry is the last entry in the
-                * list, the next entry is NULL.
-                */
-               if (lock->rl_currdep == entry) {
-                       KASSERT(TAILQ_NEXT(lock->rl_currdep, rl_q_link) == NULL,
-                           ("rangelock_enqueue: next entry not NULL"));
-                       lock->rl_currdep = NULL;
-               }
-       } else
-               KASSERT(entry != lock->rl_currdep, ("stuck currdep"));
-
-       TAILQ_REMOVE(&lock->rl_waiters, entry, rl_q_link);
-       if (do_calc_block)
-               rangelock_calc_block(lock);
-       mtx_unlock(ilk);
-       if (curthread->td_rlqe == NULL)
-               curthread->td_rlqe = entry;
-       else
-               rlqentry_free(entry);
+       return (atomic_cmpset_rel_ptr((uintptr_t *)prev, (uintptr_t)old,
+           (uintptr_t)new) != 0);
 }
 
-void
-rangelock_unlock(struct rangelock *lock, void *cookie, struct mtx *ilk)
+static bool
+rl_insert(struct rangelock *lock, struct rl_q_entry *e, bool trylock,
+    struct rl_q_entry **free)
 {
+       struct rl_q_entry *cur, *next, **prev;
+       int r;
+
+again:
+       prev = &lock->head;
+       if (rl_q_load(prev) == NULL && rl_q_cas(prev, NULL, e))
+               return (true);
+
+       for (cur = rl_q_load(prev);;) {
+               if (rl_e_is_marked(cur))
+                       goto again;
+
+               if (cur != NULL) {
+                       next = rl_q_load(&cur->rl_q_next);
+                       if (rl_e_is_marked(next)) {
+                               next = rl_e_unmark(next);
+                               if (rl_q_cas(prev, cur, next)) {
+#ifdef INVARIANTS
+                                       cur->rl_q_owner = NULL;
+#endif
+                                       cur->rl_q_free = *free;
+                                       *free = cur;
+                               }
+                               cur = next;
+                               continue;
+                       }
+               }
 
-       MPASS(lock != NULL && cookie != NULL && ilk != NULL);
-
-       mtx_lock(ilk);
-       rangelock_unlock_locked(lock, cookie, ilk, true);
-}
-
-/*
- * Unlock the sub-range of granted lock.
- */
-void *
-rangelock_unlock_range(struct rangelock *lock, void *cookie, off_t start,
-    off_t end, struct mtx *ilk)
-{
-       struct rl_q_entry *entry;
-
-       MPASS(lock != NULL && cookie != NULL && ilk != NULL);
-       entry = cookie;
-       KASSERT(entry->rl_q_flags & RL_LOCK_GRANTED,
-           ("Unlocking non-granted lock"));
-       KASSERT(entry->rl_q_start == start, ("wrong start"));
-       KASSERT(entry->rl_q_end >= end, ("wrong end"));
-
-       mtx_lock(ilk);
-       if (entry->rl_q_end == end) {
-               rangelock_unlock_locked(lock, cookie, ilk, true);
-               return (NULL);
+               r = rl_e_compare(cur, e);
+               if (r == -1) {
+                       prev = &cur->rl_q_next;
+                       cur = rl_q_load(prev);
+               } else if (r == 0) {
+                       sleepq_lock(&lock->sleepers);
+                       if (__predict_false(rl_e_is_marked(rl_q_load(
+                           &cur->rl_q_next)))) {
+                               sleepq_release(&lock->sleepers);
+                               continue;
+                       }
+                       if (trylock) {
+                               sleepq_release(&lock->sleepers);
+                               return (false);
+                       }
+                       rl_insert_sleep(lock);
+                       /* e is still valid */
+                       goto again;
+               } else /* r == 1 */ {
+                       e->rl_q_next = cur;
+                       if (rl_q_cas(prev, cur, e)) {
+                               atomic_thread_fence_acq();
+                               return (true);
+                       }
+                       /* Reset rl_q_next in case we hit fast path. */
+                       e->rl_q_next = NULL;
+                       cur = rl_q_load(prev);
+               }
        }
-       entry->rl_q_end = end;
-       rangelock_calc_block(lock);
-       mtx_unlock(ilk);
-       return (cookie);
 }
 
-/*
- * Add the lock request to the queue of the pending requests for
- * rangelock.  Sleep until the request can be granted unless trylock == true.
- */
-static void *
-rangelock_enqueue(struct rangelock *lock, off_t start, off_t end, int mode,
-    struct mtx *ilk, bool trylock)
+static struct rl_q_entry *
+rangelock_lock_int(struct rangelock *lock, struct rl_q_entry *e,
+    bool trylock)
 {
-       struct rl_q_entry *entry;
-       struct thread *td;
-
-       MPASS(lock != NULL && ilk != NULL);
-
-       td = curthread;
-       if (td->td_rlqe != NULL) {
-               entry = td->td_rlqe;
-               td->td_rlqe = NULL;
-       } else
-               entry = rlqentry_alloc();
-       MPASS(entry != NULL);
-       entry->rl_q_flags = mode;
-       entry->rl_q_start = start;
-       entry->rl_q_end = end;
-
-       mtx_lock(ilk);
-       /*
-        * XXXKIB TODO. Check that a thread does not try to enqueue a
-        * lock that is incompatible with another request from the same
-        * thread.
-        */
-
-       TAILQ_INSERT_TAIL(&lock->rl_waiters, entry, rl_q_link);
-       /*
-        * If rl_currdep == NULL, there is no entry waiting for a conflicting
-        * range to be resolved, so set rl_currdep to this entry.  If there is
-        * no conflicting entry for this entry, rl_currdep will be set back to
-        * NULL by rangelock_calc_block().
-        */
-       if (lock->rl_currdep == NULL)
-               lock->rl_currdep = entry;
-       rangelock_calc_block(lock);
-       while (!(entry->rl_q_flags & RL_LOCK_GRANTED)) {
-               if (trylock) {
-                       /*
-                        * For this case, the range is not actually locked
-                        * yet, but removal from the list requires the same
-                        * steps, except for not doing a rangelock_calc_block()
-                        * call, since rangelock_calc_block() was called above.
-                        */
-                       rangelock_unlock_locked(lock, entry, ilk, false);
-                       return (NULL);
-               }
-               msleep(entry, ilk, 0, "range", 0);
+       struct rl_q_entry *free, *x, *xp;
+       bool res;
+
+       free = NULL;
+       smr_enter(rl_smr);
+       res = rl_insert(lock, e, trylock, &free);
+       smr_exit(rl_smr);
+       MPASS(trylock || res);
+       if (!res) {
+               e->rl_q_free = free;
+               free = e;
+               e = NULL;
+       }
+       for (x = free; x != NULL; x = xp) {
+               MPASS(!rl_e_is_marked(x));
+               xp = x->rl_q_free;
+               MPASS(!rl_e_is_marked(xp));
+               uma_zfree_smr(rl_entry_zone, x);
        }
-       mtx_unlock(ilk);
-       return (entry);
+       return (e);
 }
 
 void *
-rangelock_rlock(struct rangelock *lock, off_t start, off_t end, struct mtx 
*ilk)
+rangelock_rlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
 {
+       struct rl_q_entry *e;
 
-       return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, false));
+       e = rlqentry_alloc(start, end, RL_LOCK_READ);
+       return (rangelock_lock_int(lock, e, false));
 }
 
 void *
-rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end,
-    struct mtx *ilk)
+rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t 
end)
 {
+       struct rl_q_entry *e;
 
-       return (rangelock_enqueue(lock, start, end, RL_LOCK_READ, ilk, true));
+       e = rlqentry_alloc(start, end, RL_LOCK_READ);
+       return (rangelock_lock_int(lock, e, true));
 }
 
 void *
-rangelock_wlock(struct rangelock *lock, off_t start, off_t end, struct mtx 
*ilk)
+rangelock_wlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t end)
 {
+       struct rl_q_entry *e;
 
-       return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, false));
+       e = rlqentry_alloc(start, end, RL_LOCK_WRITE);
+       return (rangelock_lock_int(lock, e, true));
 }
 
 void *
-rangelock_trywlock(struct rangelock *lock, off_t start, off_t end,
-    struct mtx *ilk)
+rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start, vm_ooffset_t 
end)
 {
+       struct rl_q_entry *e;
 
-       return (rangelock_enqueue(lock, start, end, RL_LOCK_WRITE, ilk, true));
+       e = rlqentry_alloc(start, end, RL_LOCK_WRITE);
+       return (rangelock_lock_int(lock, e, true));
 }
 
 #ifdef INVARIANT_SUPPORT
 void
 _rangelock_cookie_assert(void *cookie, int what, const char *file, int line)
 {
-       struct rl_q_entry *entry;
-       int flags;
-
-       MPASS(cookie != NULL);
-       entry = cookie;
-       flags = entry->rl_q_flags;
-       switch (what) {
-       case RCA_LOCKED:
-               if ((flags & RL_LOCK_GRANTED) == 0)
-                       panic("rangelock not held @ %s:%d\n", file, line);
-               break;
-       case RCA_RLOCKED:
-               if ((flags & (RL_LOCK_GRANTED | RL_LOCK_READ)) !=
-                   (RL_LOCK_GRANTED | RL_LOCK_READ))
-                       panic("rangelock not rlocked @ %s:%d\n", file, line);
-               break;
-       case RCA_WLOCKED:
-               if ((flags & (RL_LOCK_GRANTED | RL_LOCK_WRITE)) !=
-                   (RL_LOCK_GRANTED | RL_LOCK_WRITE))
-                       panic("rangelock not wlocked @ %s:%d\n", file, line);
-               break;
-       default:
-               panic("Unknown rangelock assertion: %d @ %s:%d", what, file,
-                   line);
-       }
 }
 #endif /* INVARIANT_SUPPORT */
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+
+DB_SHOW_COMMAND(rangelock, db_show_rangelock)
+{
+       struct rangelock *lock;
+       struct rl_q_entry *e, *x;
+
+       if (!have_addr) {
+               db_printf("show rangelock addr\n");
+               return;
+       }
+
+       lock = (struct rangelock *)addr;
+       db_printf("rangelock %p sleepers %d\n", lock, lock->sleepers);
+       for (e = lock->head;;) {
+               x = rl_e_is_marked(e) ? rl_e_unmark(e) : e;
+               if (x == NULL)
+                       break;
+               db_printf("  entry %p marked %d %d start %#jx end %#jx "
+                   "flags %x next %p",
+                   e, rl_e_is_marked(e), rl_e_is_marked(x->rl_q_next),
+                   x->rl_q_start, x->rl_q_end, x->rl_q_flags, x->rl_q_next);
+#ifdef INVARIANTS
+               db_printf(" owner %p (%d)", x->rl_q_owner,
+                   x->rl_q_owner != NULL ? x->rl_q_owner->td_tid : -1);
+#endif
+               db_printf("\n");
+               e = x->rl_q_next;
+       }
+}
+
+#endif /* DDB */
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 00f99516773c..c951e7297c89 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -478,7 +478,6 @@ thread_fini(void *mem, int size)
 
        td = (struct thread *)mem;
        EVENTHANDLER_DIRECT_INVOKE(thread_fini, td);
-       rlqentry_free(td->td_rlqe);
        turnstile_free(td->td_turnstile);
        sleepq_free(td->td_sleepqueue);
        umtx_thread_fini(td);
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 7672ded459df..49b5b56dee17 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -184,13 +184,13 @@ SYSCTL_INT(_vm_largepages, OID_AUTO, reclaim_tries,
     "Number of contig reclaims before giving up for default alloc policy");
 
 #define        shm_rangelock_unlock(shmfd, cookie)                             
\
-       rangelock_unlock(&(shmfd)->shm_rl, (cookie), &(shmfd)->shm_mtx)
+       rangelock_unlock(&(shmfd)->shm_rl, (cookie))
 #define        shm_rangelock_rlock(shmfd, start, end)                          
\
-       rangelock_rlock(&(shmfd)->shm_rl, (start), (end), &(shmfd)->shm_mtx)
+       rangelock_rlock(&(shmfd)->shm_rl, (start), (end))
 #define        shm_rangelock_tryrlock(shmfd, start, end)                       
\
-       rangelock_tryrlock(&(shmfd)->shm_rl, (start), (end), &(shmfd)->shm_mtx)
+       rangelock_tryrlock(&(shmfd)->shm_rl, (start), (end))
 #define        shm_rangelock_wlock(shmfd, start, end)                          
\
-       rangelock_wlock(&(shmfd)->shm_rl, (start), (end), &(shmfd)->shm_mtx)
+       rangelock_wlock(&(shmfd)->shm_rl, (start), (end))
 
 static int
 uiomove_object_page(vm_object_t obj, size_t len, struct uio *uio)
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 8012fab29081..f192c6798858 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -2180,8 +2180,6 @@ freevnode(struct vnode *vp)
        VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
        VNASSERT(pctrie_is_empty(&bo->bo_dirty.bv_root), vp,
            ("dirty blk trie not empty"));
-       VNASSERT(TAILQ_EMPTY(&vp->v_rl.rl_waiters), vp,
-           ("Dangling rangelock waiters"));
        VNASSERT((vp->v_iflag & (VI_DOINGINACT | VI_OWEINACT)) == 0, vp,
            ("Leaked inactivation"));
        VI_UNLOCK(vp);
diff --git a/sys/sys/rangelock.h b/sys/sys/rangelock.h
index 62ccf77c03bc..310371bef879 100644
--- a/sys/sys/rangelock.h
+++ b/sys/sys/rangelock.h
@@ -29,12 +29,14 @@
 #ifndef        _SYS_RANGELOCK_H
 #define        _SYS_RANGELOCK_H
 
-#include <sys/queue.h>
+#include <sys/types.h>
+#ifndef _KERNEL
+#include <stdbool.h>
+#endif
 
 #define        RL_LOCK_READ            0x0001
 #define        RL_LOCK_WRITE           0x0002
 #define        RL_LOCK_TYPE_MASK       0x0003
-#define        RL_LOCK_GRANTED         0x0004
 
 struct rl_q_entry;
 
@@ -44,42 +46,25 @@ struct rl_q_entry;
  * all existing lock owners are compatible with the request. Two lock
  * owners are compatible if their ranges do not overlap, or both
  * owners are for read.
- *
- * Access to the structure itself is synchronized with the externally
- * supplied mutex.
- *
- * rl_waiters is the queue containing in order (a) granted write lock
- * requests, (b) granted read lock requests, and (c) in order of arrival,
- * lock requests which cannot be granted yet.
- *
- * rl_currdep is the first lock request that cannot be granted now due
- * to the preceding requests conflicting with it (i.e., it points to
- * position (c) in the list above).
  */
 struct rangelock {
-       TAILQ_HEAD(, rl_q_entry) rl_waiters;
-       struct rl_q_entry       *rl_currdep;
+       struct rl_q_entry *head;
+       bool sleepers;
 };
 
 #ifdef _KERNEL
 
-struct mtx;
-
 void    rangelock_init(struct rangelock *lock);
 void    rangelock_destroy(struct rangelock *lock);
-void    rangelock_unlock(struct rangelock *lock, void *cookie,
-           struct mtx *ilk);
-void   *rangelock_unlock_range(struct rangelock *lock, void *cookie,
-           off_t start, off_t end, struct mtx *ilk);
-void   *rangelock_rlock(struct rangelock *lock, off_t start, off_t end,
-           struct mtx *ilk);
-void   *rangelock_tryrlock(struct rangelock *lock, off_t start, off_t end,
-           struct mtx *ilk);
-void   *rangelock_wlock(struct rangelock *lock, off_t start, off_t end,
-           struct mtx *ilk);
-void   *rangelock_trywlock(struct rangelock *lock, off_t start, off_t end,
-           struct mtx *ilk);
-void    rlqentry_free(struct rl_q_entry *rlqe);
+void    rangelock_unlock(struct rangelock *lock, void *cookie);
+void   *rangelock_rlock(struct rangelock *lock, vm_ooffset_t start,
+    vm_ooffset_t end);
+void   *rangelock_tryrlock(struct rangelock *lock, vm_ooffset_t start,
+    vm_ooffset_t end);
+void   *rangelock_wlock(struct rangelock *lock, vm_ooffset_t start,
+    vm_ooffset_t end);
+void   *rangelock_trywlock(struct rangelock *lock, vm_ooffset_t start,
+    vm_ooffset_t end);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void   _rangelock_cookie_assert(void *cookie, int what, const char *file,
     int line);
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index dc926d7a9c9e..0d0f228f7051 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -833,18 +833,15 @@ void      vn_seqc_write_end(struct vnode *vp);
 #define        vn_seqc_consistent(vp, seq)     seqc_consistent(&(vp)->v_seqc, 
seq)
 
 #define        vn_rangelock_unlock(vp, cookie)                                 
\
-       rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp))
-#define        vn_rangelock_unlock_range(vp, cookie, start, end)               
\
-       rangelock_unlock_range(&(vp)->v_rl, (cookie), (start), (end),   \
-           VI_MTX(vp))
+       rangelock_unlock(&(vp)->v_rl, (cookie))
 #define        vn_rangelock_rlock(vp, start, end)                              
\
-       rangelock_rlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+       rangelock_rlock(&(vp)->v_rl, (start), (end))
 #define        vn_rangelock_tryrlock(vp, start, end)                           
\
-       rangelock_tryrlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+       rangelock_tryrlock(&(vp)->v_rl, (start), (end))
 #define        vn_rangelock_wlock(vp, start, end)                              
\
-       rangelock_wlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+       rangelock_wlock(&(vp)->v_rl, (start), (end))
 #define        vn_rangelock_trywlock(vp, start, end)                           
\
-       rangelock_trywlock(&(vp)->v_rl, (start), (end), VI_MTX(vp))
+       rangelock_trywlock(&(vp)->v_rl, (start), (end))
 
 #define        vn_irflag_read(vp)      atomic_load_short(&(vp)->v_irflag)
 void   vn_irflag_set_locked(struct vnode *vp, short toset);

Reply via email to