Author: mckusick
Date: Mon Oct 21 00:28:02 2013
New Revision: 256817
URL: http://svnweb.freebsd.org/changeset/base/256817

Log:
  Restructuring of the soft updates code to set it up so that the
  single kernel-wide soft update lock can be replaced with a
  per-filesystem soft-updates lock. This per-filesystem lock will
  allow each filesystem to have its own soft-updates flushing thread
  rather than being limited to a single soft-updates flushing thread
  for the entire kernel.
  
  Move soft update variables out of the ufsmount structure and into
  their own mount_softdeps structure referenced by ufsmount field
  um_softdep.  Eventually the per-filesystem lock will be in this
  structure. For now there is simply a pointer to the kernel-wide
  soft updates lock.
  
  Change all instances of ACQUIRE_LOCK and FREE_LOCK to pass the lock
  pointer in the mount_softdeps structure instead of a pointer to the
  kernel-wide soft-updates lock.
  
  Replace the five hash tables used by soft updates with per-filesystem
  copies of these tables allocated in the mount_softdeps structure.
  
  Several functions that flush dependencies when too many are allocated
  in the kernel used to operate across all filesystems. They are now
  parameterized to flush dependencies from a specified filesystem.
  For now, we stick with the round-robin flushing strategy when the
  kernel as a whole has too many dependencies allocated.
  
  While there are many lines of changes, there should be no functional
  change in the operation of soft updates.
  
  Tested by:    Peter Holm and Scott Long
  Sponsored by: Netflix

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/softdep.h
  head/sys/ufs/ufs/ufsmount.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c      Mon Oct 21 00:04:26 2013        
(r256816)
+++ head/sys/ufs/ffs/ffs_softdep.c      Mon Oct 21 00:28:02 2013        
(r256817)
@@ -616,48 +616,6 @@ softdep_freework(wkhd)
 
 FEATURE(softupdates, "FFS soft-updates support");
 
-/*
- * These definitions need to be adapted to the system to which
- * this file is being ported.
- */
-
-#define M_SOFTDEP_FLAGS        (M_WAITOK)
-
-#define        D_PAGEDEP       0
-#define        D_INODEDEP      1
-#define        D_BMSAFEMAP     2
-#define        D_NEWBLK        3
-#define        D_ALLOCDIRECT   4
-#define        D_INDIRDEP      5
-#define        D_ALLOCINDIR    6
-#define        D_FREEFRAG      7
-#define        D_FREEBLKS      8
-#define        D_FREEFILE      9
-#define        D_DIRADD        10
-#define        D_MKDIR         11
-#define        D_DIRREM        12
-#define        D_NEWDIRBLK     13
-#define        D_FREEWORK      14
-#define        D_FREEDEP       15
-#define        D_JADDREF       16
-#define        D_JREMREF       17
-#define        D_JMVREF        18
-#define        D_JNEWBLK       19
-#define        D_JFREEBLK      20
-#define        D_JFREEFRAG     21
-#define        D_JSEG          22
-#define        D_JSEGDEP       23
-#define        D_SBDEP         24
-#define        D_JTRUNC        25
-#define        D_JFSYNC        26
-#define        D_SENTINEL      27
-#define        D_LAST          D_SENTINEL
-
-unsigned long dep_current[D_LAST + 1];
-unsigned long dep_highuse[D_LAST + 1];
-unsigned long dep_total[D_LAST + 1];
-unsigned long dep_write[D_LAST + 1];
-
 static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0,
     "soft updates stats");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
@@ -669,6 +627,11 @@ static SYSCTL_NODE(_debug_softdep, OID_A
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
     "current dependencies written");
 
+unsigned long dep_current[D_LAST + 1];
+unsigned long dep_highuse[D_LAST + 1];
+unsigned long dep_total[D_LAST + 1];
+unsigned long dep_write[D_LAST + 1];
+
 #define        SOFTDEP_TYPE(type, str, long)                                   
\
     static MALLOC_DEFINE(M_ ## type, #str, long);                      \
     SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD,      \
@@ -713,6 +676,9 @@ static MALLOC_DEFINE(M_SENTINEL, "sentin
 
 static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
 static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
+static MALLOC_DEFINE(M_MOUNTDATA, "softdep", "Softdep per-mount data");
+
+#define M_SOFTDEP_FLAGS        (M_WAITOK)
 
 /* 
  * translate from workitem type to memory type
@@ -749,8 +715,6 @@ static struct malloc_type *memtype[] = {
        M_SENTINEL
 };
 
-static LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
-
 #define DtoM(type) (memtype[type])
 
 /*
@@ -766,51 +730,16 @@ static LIST_HEAD(mkdirlist, mkdir) mkdir
 #define        DOT_OFFSET      offsetof(struct dirtemplate, dot_ino)
 
 /*
- * Forward declarations.
- */
-struct inodedep_hashhead;
-struct newblk_hashhead;
-struct pagedep_hashhead;
-struct bmsafemap_hashhead;
-
-/*
- * Private journaling structures.
- */
-struct jblocks {
-       struct jseglst  jb_segs;        /* TAILQ of current segments. */
-       struct jseg     *jb_writeseg;   /* Next write to complete. */
-       struct jseg     *jb_oldestseg;  /* Oldest segment with valid entries. */
-       struct jextent  *jb_extent;     /* Extent array. */
-       uint64_t        jb_nextseq;     /* Next sequence number. */
-       uint64_t        jb_oldestwrseq; /* Oldest written sequence number. */
-       uint8_t         jb_needseg;     /* Need a forced segment. */
-       uint8_t         jb_suspended;   /* Did journal suspend writes? */
-       int             jb_avail;       /* Available extents. */
-       int             jb_used;        /* Last used extent. */
-       int             jb_head;        /* Allocator head. */
-       int             jb_off;         /* Allocator extent offset. */
-       int             jb_blocks;      /* Total disk blocks covered. */
-       int             jb_free;        /* Total disk blocks free. */
-       int             jb_min;         /* Minimum free space. */
-       int             jb_low;         /* Low on space. */
-       int             jb_age;         /* Insertion time of oldest rec. */
-};
-
-struct jextent {
-       ufs2_daddr_t    je_daddr;       /* Disk block address. */
-       int             je_blocks;      /* Disk block count. */
-};
-
-/*
  * Internal function prototypes.
  */
+static void check_clear_deps(struct mount *);
 static void softdep_error(char *, int);
 static int softdep_process_worklist(struct mount *, int);
 static int softdep_waitidle(struct mount *);
 static void drain_output(struct vnode *);
 static struct buf *getdirtybuf(struct buf *, struct rwlock *, int);
-static void clear_remove(void);
-static void clear_inodedeps(void);
+static void clear_remove(struct mount *);
+static void clear_inodedeps(struct mount *);
 static void unlinked_inodedep(struct mount *, struct inodedep *);
 static void clear_unlinked_inodedep(struct inodedep *);
 static struct inodedep *first_unlinked_inodedep(struct ufsmount *);
@@ -954,20 +883,20 @@ static    void allocdirect_merge(struct all
            struct allocdirect *, struct allocdirect *);
 static struct freefrag *allocindir_merge(struct allocindir *,
            struct allocindir *);
-static int bmsafemap_find(struct bmsafemap_hashhead *, struct mount *, int,
+static int bmsafemap_find(struct bmsafemap_hashhead *, int,
            struct bmsafemap **);
 static struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
            int cg, struct bmsafemap *);
-static int newblk_find(struct newblk_hashhead *, struct mount *, ufs2_daddr_t,
-           int, struct newblk **);
+static int newblk_find(struct newblk_hashhead *, ufs2_daddr_t, int,
+           struct newblk **);
 static int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
-static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
+static int inodedep_find(struct inodedep_hashhead *, ino_t,
            struct inodedep **);
 static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
 static int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
            int, struct pagedep **);
 static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
-           struct mount *mp, int, struct pagedep **);
+           struct pagedep **);
 static void pause_timer(void *);
 static int request_cleanup(struct mount *, int);
 static int process_worklist_item(struct mount *, int, int);
@@ -982,9 +911,9 @@ static      void remove_from_worklist(struct 
 static void softdep_flush(void);
 static void softdep_flushjournal(struct mount *);
 static int softdep_speedup(void);
-static void worklist_speedup(void);
+static void worklist_speedup(struct mount *);
 static int journal_mount(struct mount *, struct fs *, struct ucred *);
-static void journal_unmount(struct mount *);
+static void journal_unmount(struct ufsmount *);
 static int journal_space(struct ufsmount *, int);
 static void journal_suspend(struct ufsmount *);
 static int journal_unsuspend(struct ufsmount *ump);
@@ -1030,15 +959,25 @@ static   void softdep_disk_write_complete(
 static void softdep_deallocate_dependencies(struct buf *);
 static int softdep_count_dependencies(struct buf *bp, int);
 
+/*
+ * Global lock over all of soft updates.
+ */
 static struct rwlock lk;
 RW_SYSINIT(softdep_lock, &lk, "Softdep Lock");
 
-#define TRY_ACQUIRE_LOCK(lk)           rw_try_wlock(lk)
-#define ACQUIRE_LOCK(lk)               rw_wlock(lk)
-#define FREE_LOCK(lk)                  rw_wunlock(lk)
+/*
+ * Allow per-filesystem soft-updates locking.
+ * For now all use the same global lock defined above.
+ */
+#define LOCK_PTR(ump)          ((ump)->um_softdep->sd_fslock)
+#define TRY_ACQUIRE_LOCK(ump)  rw_try_wlock((ump)->um_softdep->sd_fslock)
+#define ACQUIRE_LOCK(ump)      rw_wlock((ump)->um_softdep->sd_fslock)
+#define FREE_LOCK(ump)         rw_wunlock((ump)->um_softdep->sd_fslock)
+#define LOCK_OWNED(ump)                rw_assert((ump)->um_softdep->sd_fslock, 
\
+                                   RA_WLOCKED)
 
-#define        BUF_AREC(bp)                    lockallowrecurse(&(bp)->b_lock)
-#define        BUF_NOREC(bp)                   
lockdisablerecurse(&(bp)->b_lock)
+#define        BUF_AREC(bp)            lockallowrecurse(&(bp)->b_lock)
+#define        BUF_NOREC(bp)           lockdisablerecurse(&(bp)->b_lock)
 
 /*
  * Worklist queue management.
@@ -1073,7 +1012,7 @@ worklist_insert(head, item, locked)
 {
 
        if (locked)
-               rw_assert(&lk, RA_WLOCKED);
+               LOCK_OWNED(VFSTOUFS(item->wk_mp));
        if (item->wk_state & ONWORKLIST)
                panic("worklist_insert: %p %s(0x%X) already on list",
                    item, TYPENAME(item->wk_type), item->wk_state);
@@ -1088,7 +1027,7 @@ worklist_remove(item, locked)
 {
 
        if (locked)
-               rw_assert(&lk, RA_WLOCKED);
+               LOCK_OWNED(VFSTOUFS(item->wk_mp));
        if ((item->wk_state & ONWORKLIST) == 0)
                panic("worklist_remove: %p %s(0x%X) not on list",
                    item, TYPENAME(item->wk_type), item->wk_state);
@@ -1161,7 +1100,6 @@ jwork_move(dst, src)
                        freedep = freedep_merge(WK_FREEDEP(wk), freedep);
        }
 
-       rw_assert(&lk, RA_WLOCKED);
        while ((wk = LIST_FIRST(src)) != NULL) {
                WORKLIST_REMOVE(wk);
                WORKLIST_INSERT(dst, wk);
@@ -1216,7 +1154,6 @@ workitem_free(item, type)
        int type;
 {
        struct ufsmount *ump;
-       rw_assert(&lk, RA_WLOCKED);
 
 #ifdef DEBUG
        if (item->wk_state & ONWORKLIST)
@@ -1229,6 +1166,7 @@ workitem_free(item, type)
        if (item->wk_state & IOWAITING)
                wakeup(item);
        ump = VFSTOUFS(item->wk_mp);
+       LOCK_OWNED(ump);
        KASSERT(ump->softdep_deps > 0,
            ("workitem_free: %s: softdep_deps going negative",
            ump->um_fs->fs_fsmnt));
@@ -1237,7 +1175,11 @@ workitem_free(item, type)
        KASSERT(dep_current[item->wk_type] > 0,
            ("workitem_free: %s: dep_current[%s] going negative",
            ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+       KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+           ("workitem_free: %s: softdep_curdeps[%s] going negative",
+           ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
        dep_current[item->wk_type]--;
+       ump->softdep_curdeps[item->wk_type] -= 1;
        free(item, DtoM(type));
 }
 
@@ -1254,14 +1196,15 @@ workitem_alloc(item, type, mp)
        item->wk_state = 0;
 
        ump = VFSTOUFS(mp);
-       ACQUIRE_LOCK(&lk);
+       ACQUIRE_LOCK(ump);
        dep_current[type]++;
        if (dep_current[type] > dep_highuse[type])
                dep_highuse[type] = dep_current[type];
        dep_total[type]++;
+       ump->softdep_curdeps[type] += 1;
        ump->softdep_deps++;
        ump->softdep_accdeps++;
-       FREE_LOCK(&lk);
+       FREE_LOCK(ump);
 }
 
 static void
@@ -1269,7 +1212,15 @@ workitem_reassign(item, newtype)
        struct worklist *item;
        int newtype;
 {
+       struct ufsmount *ump;
 
+       ump = VFSTOUFS(item->wk_mp);
+       LOCK_OWNED(ump);
+       KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
+           ("workitem_reassign: %s: softdep_curdeps[%s] going negative",
+           VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
+       ump->softdep_curdeps[item->wk_type] -= 1;
+       ump->softdep_curdeps[newtype] += 1;
        KASSERT(dep_current[item->wk_type] > 0,
            ("workitem_reassign: %s: dep_current[%s] going negative",
            VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
@@ -1290,7 +1241,8 @@ static int tickdelay = 2; /* number of t
 static int proc_waiting;       /* tracks whether we have a timeout posted */
 static int *stat_countp;       /* statistic to count in proc_waiting timeout */
 static struct callout softdep_callout;
-static int req_pending;
+static struct mount *req_pending;
+#define ALLCLEAN ((struct mount *)-1)
 static int req_clear_inodedeps;        /* syncer process flush some inodedeps 
*/
 static int req_clear_remove;   /* syncer process flush some freeblks */
 static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
@@ -1298,6 +1250,7 @@ static int softdep_flushcache = 0; /* Sh
 /*
  * runtime statistics
  */
+static int stat_softdep_mounts;        /* number of softdep mounted 
filesystems */
 static int stat_worklist_push; /* number of worklist cleanups */
 static int stat_blk_limit_push;        /* number of times block limit neared */
 static int stat_ino_limit_push;        /* number of times inode limit neared */
@@ -1329,6 +1282,8 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, tic
     &tickdelay, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, maxindirdeps, CTLFLAG_RW,
     &maxindirdeps, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, softdep_mounts, CTLFLAG_RD,
+    &stat_softdep_mounts, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
     &stat_worklist_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
@@ -1382,13 +1337,10 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, flu
 
 SYSCTL_DECL(_vfs_ffs);
 
-LIST_HEAD(bmsafemap_hashhead, bmsafemap) *bmsafemap_hashtbl;
-static u_long  bmsafemap_hash; /* size of hash table - 1 */
-
-static int compute_summary_at_mount = 0;       /* Whether to recompute the 
summary at mount time */
+/* Whether to recompute the summary at mount time */
+static int compute_summary_at_mount = 0;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
           &compute_summary_at_mount, 0, "Recompute summary at mount");
-
 static struct proc *softdepproc;
 static struct kproc_desc softdep_kp = {
        "softdepflush",
@@ -1413,21 +1365,6 @@ softdep_flush(void)
 
        for (;;) {      
                kproc_suspend_check(softdepproc);
-               ACQUIRE_LOCK(&lk);
-               /*
-                * If requested, try removing inode or removal dependencies.
-                */
-               if (req_clear_inodedeps) {
-                       clear_inodedeps();
-                       req_clear_inodedeps -= 1;
-                       wakeup_one(&proc_waiting);
-               }
-               if (req_clear_remove) {
-                       clear_remove();
-                       req_clear_remove -= 1;
-                       wakeup_one(&proc_waiting);
-               }
-               FREE_LOCK(&lk);
                remaining = progress = 0;
                mtx_lock(&mountlist_mtx);
                for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp)  {
@@ -1436,8 +1373,8 @@ softdep_flush(void)
                                continue;
                        if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
                                continue;
-                       progress += softdep_process_worklist(mp, 0);
                        ump = VFSTOUFS(mp);
+                       progress += softdep_process_worklist(mp, 0);
                        remaining += ump->softdep_on_worklist;
                        mtx_lock(&mountlist_mtx);
                        nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1446,20 +1383,21 @@ softdep_flush(void)
                mtx_unlock(&mountlist_mtx);
                if (remaining && progress)
                        continue;
-               ACQUIRE_LOCK(&lk);
-               if (!req_pending)
+               rw_wlock(&lk);
+               if (req_pending == NULL)
                        msleep(&req_pending, &lk, PVM, "sdflush", hz);
-               req_pending = 0;
-               FREE_LOCK(&lk);
+               req_pending = NULL;
+               rw_wunlock(&lk);
        }
 }
 
 static void
-worklist_speedup(void)
+worklist_speedup(mp)
+       struct mount *mp;
 {
        rw_assert(&lk, RA_WLOCKED);
        if (req_pending == 0) {
-               req_pending = 1;
+               req_pending = mp;
                wakeup(&req_pending);
        }
 }
@@ -1468,9 +1406,9 @@ static int
 softdep_speedup(void)
 {
 
-       worklist_speedup();
+       worklist_speedup(ALLCLEAN);
        bd_speedup();
-       return speedup_syncer();
+       return (speedup_syncer());
 }
 
 /*
@@ -1491,8 +1429,8 @@ add_to_worklist(wk, flags)
 {
        struct ufsmount *ump;
 
-       rw_assert(&lk, RA_WLOCKED);
        ump = VFSTOUFS(wk->wk_mp);
+       LOCK_OWNED(ump);
        if (wk->wk_state & ONWORKLIST)
                panic("add_to_worklist: %s(0x%X) already on list",
                    TYPENAME(wk->wk_type), wk->wk_state);
@@ -1508,7 +1446,7 @@ add_to_worklist(wk, flags)
        }
        ump->softdep_on_worklist += 1;
        if (flags & WK_NODELAY)
-               worklist_speedup();
+               worklist_speedup(wk->wk_mp);
 }
 
 /*
@@ -1544,9 +1482,11 @@ wait_worklist(wk, wmesg)
        struct worklist *wk;
        char *wmesg;
 {
+       struct ufsmount *ump;
 
+       ump = VFSTOUFS(wk->wk_mp);
        wk->wk_state |= IOWAITING;
-       msleep(wk, &lk, PVM, wmesg, 0);
+       msleep(wk, LOCK_PTR(ump), PVM, wmesg, 0);
 }
 
 /*
@@ -1568,54 +1508,41 @@ softdep_process_worklist(mp, full)
        long starttime;
 
        KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));
-       /*
-        * Record the process identifier of our caller so that we can give
-        * this process preferential treatment in request_cleanup below.
-        */
+       if (MOUNTEDSOFTDEP(mp) == 0)
+               return (0);
        matchcnt = 0;
        ump = VFSTOUFS(mp);
-       ACQUIRE_LOCK(&lk);
+       ACQUIRE_LOCK(ump);
        starttime = time_second;
-       softdep_process_journal(mp, NULL, full?MNT_WAIT:0);
+       softdep_process_journal(mp, NULL, full ? MNT_WAIT : 0);
+       check_clear_deps(mp);
        while (ump->softdep_on_worklist > 0) {
                if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0)
                        break;
                else
                        matchcnt += cnt;
-               /*
-                * If requested, try removing inode or removal dependencies.
-                */
-               if (req_clear_inodedeps) {
-                       clear_inodedeps();
-                       req_clear_inodedeps -= 1;
-                       wakeup_one(&proc_waiting);
-               }
-               if (req_clear_remove) {
-                       clear_remove();
-                       req_clear_remove -= 1;
-                       wakeup_one(&proc_waiting);
-               }
+               check_clear_deps(mp);
                /*
                 * We do not generally want to stop for buffer space, but if
                 * we are really being a buffer hog, we will stop and wait.
                 */
                if (should_yield()) {
-                       FREE_LOCK(&lk);
+                       FREE_LOCK(ump);
                        kern_yield(PRI_USER);
                        bwillwrite();
-                       ACQUIRE_LOCK(&lk);
+                       ACQUIRE_LOCK(ump);
                }
                /*
                 * Never allow processing to run for more than one
-                * second. Otherwise the other mountpoints may get
-                * excessively backlogged.
+                * second. This gives the syncer thread the opportunity
+                * to pause if appropriate.
                 */
                if (!full && starttime != time_second)
                        break;
        }
        if (full == 0)
                journal_unsuspend(ump);
-       FREE_LOCK(&lk);
+       FREE_LOCK(ump);
        return (matchcnt);
 }
 
@@ -1630,12 +1557,13 @@ process_removes(vp)
 {
        struct inodedep *inodedep;
        struct dirrem *dirrem;
+       struct ufsmount *ump;
        struct mount *mp;
        ino_t inum;
 
-       rw_assert(&lk, RA_WLOCKED);
-
        mp = vp->v_mount;
+       ump = VFSTOUFS(mp);
+       LOCK_OWNED(ump);
        inum = VTOI(vp)->i_number;
        for (;;) {
 top:
@@ -1658,12 +1586,12 @@ top:
                if (dirrem == NULL)
                        return;
                remove_from_worklist(&dirrem->dm_list);
-               FREE_LOCK(&lk);
+               FREE_LOCK(ump);
                if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
                        panic("process_removes: suspended filesystem");
                handle_workitem_remove(dirrem, 0);
                vn_finished_secondary_write(mp);
-               ACQUIRE_LOCK(&lk);
+               ACQUIRE_LOCK(ump);
        }
 }
 
@@ -1679,13 +1607,14 @@ process_truncates(vp)
 {
        struct inodedep *inodedep;
        struct freeblks *freeblks;
+       struct ufsmount *ump;
        struct mount *mp;
        ino_t inum;
        int cgwait;
 
-       rw_assert(&lk, RA_WLOCKED);
-
        mp = vp->v_mount;
+       ump = VFSTOUFS(mp);
+       LOCK_OWNED(ump);
        inum = VTOI(vp)->i_number;
        for (;;) {
                if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
@@ -1706,33 +1635,33 @@ process_truncates(vp)
                        }
                        /* Freeblks is waiting on a inode write. */
                        if ((freeblks->fb_state & COMPLETE) == 0) {
-                               FREE_LOCK(&lk);
+                               FREE_LOCK(ump);
                                ffs_update(vp, 1);
-                               ACQUIRE_LOCK(&lk);
+                               ACQUIRE_LOCK(ump);
                                break;
                        }
                        if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) ==
                            (ALLCOMPLETE | ONWORKLIST)) {
                                remove_from_worklist(&freeblks->fb_list);
                                freeblks->fb_state |= INPROGRESS;
-                               FREE_LOCK(&lk);
+                               FREE_LOCK(ump);
                                if (vn_start_secondary_write(NULL, &mp,
                                    V_NOWAIT))
                                        panic("process_truncates: "
                                            "suspended filesystem");
                                handle_workitem_freeblocks(freeblks, 0);
                                vn_finished_secondary_write(mp);
-                               ACQUIRE_LOCK(&lk);
+                               ACQUIRE_LOCK(ump);
                                break;
                        }
                        if (freeblks->fb_cgwait)
                                cgwait++;
                }
                if (cgwait) {
-                       FREE_LOCK(&lk);
+                       FREE_LOCK(ump);
                        sync_cgs(mp, MNT_WAIT);
                        ffs_sync_snap(mp, MNT_WAIT);
-                       ACQUIRE_LOCK(&lk);
+                       ACQUIRE_LOCK(ump);
                        continue;
                }
                if (freeblks == NULL)
@@ -1756,7 +1685,6 @@ process_worklist_item(mp, target, flags)
        int matchcnt;
        int error;
 
-       rw_assert(&lk, RA_WLOCKED);
        KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));
        /*
         * If we are being called because of a process doing a
@@ -1767,6 +1695,7 @@ process_worklist_item(mp, target, flags)
                return (-1);
        PHOLD(curproc); /* Don't let the stack go away. */
        ump = VFSTOUFS(mp);
+       LOCK_OWNED(ump);
        matchcnt = 0;
        sentinel.wk_mp = NULL;
        sentinel.wk_type = D_SENTINEL;
@@ -1783,7 +1712,7 @@ process_worklist_item(mp, target, flags)
                            wk);
                wk->wk_state |= INPROGRESS;
                remove_from_worklist(wk);
-               FREE_LOCK(&lk);
+               FREE_LOCK(ump);
                if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
                        panic("process_worklist_item: suspended filesystem");
                switch (wk->wk_type) {
@@ -1816,7 +1745,7 @@ process_worklist_item(mp, target, flags)
                        /* NOTREACHED */
                }
                vn_finished_secondary_write(mp);
-               ACQUIRE_LOCK(&lk);
+               ACQUIRE_LOCK(ump);
                if (error == 0) {
                        if (++matchcnt == target)
                                break;
@@ -1850,6 +1779,7 @@ softdep_move_dependencies(oldbp, newbp)
        struct buf *newbp;
 {
        struct worklist *wk, *wktail;
+       struct ufsmount *ump;
        int dirty;
 
        if ((wk = LIST_FIRST(&oldbp->b_dep)) == NULL)
@@ -1858,7 +1788,8 @@ softdep_move_dependencies(oldbp, newbp)
            ("softdep_move_dependencies called on non-softdep filesystem"));
        dirty = 0;
        wktail = NULL;
-       ACQUIRE_LOCK(&lk);
+       ump = VFSTOUFS(wk->wk_mp);
+       ACQUIRE_LOCK(ump);
        while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
                LIST_REMOVE(wk, wk_list);
                if (wk->wk_type == D_BMSAFEMAP &&
@@ -1870,7 +1801,7 @@ softdep_move_dependencies(oldbp, newbp)
                        LIST_INSERT_AFTER(wktail, wk, wk_list);
                wktail = wk;
        }
-       FREE_LOCK(&lk);
+       FREE_LOCK(ump);
 
        return (dirty);
 }
@@ -1916,15 +1847,15 @@ softdep_waitidle(struct mount *mp)
        int i;
 
        ump = VFSTOUFS(mp);
-       ACQUIRE_LOCK(&lk);
+       ACQUIRE_LOCK(ump);
        for (i = 0; i < 10 && ump->softdep_deps; i++) {
                ump->softdep_req = 1;
                if (ump->softdep_on_worklist)
                        panic("softdep_waitidle: work added after flush.");
-               msleep(&ump->softdep_deps, &lk, PVM, "softdeps", 1);
+               msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM, "softdeps", 1);
        }
        ump->softdep_req = 0;
-       FREE_LOCK(&lk);
+       FREE_LOCK(ump);
        error = 0;
        if (i == 10) {
                error = EBUSY;
@@ -2023,12 +1954,14 @@ retry_flush:
 /*
  * Structure hashing.
  * 
- * There are three types of structures that can be looked up:
+ * There are four types of structures that can be looked up:
  *     1) pagedep structures identified by mount point, inode number,
  *        and logical block.
  *     2) inodedep structures identified by mount point and inode number.
  *     3) newblk structures identified by mount point and
  *        physical block number.
+ *     4) bmsafemap structures identified by mount point and
+ *        cylinder group number.
  *
  * The "pagedep" and "inodedep" dependency structures are hashed
  * separately from the file blocks and inodes to which they correspond.
@@ -2040,7 +1973,8 @@ retry_flush:
  * their allocdirect or allocindir structure.
  *
  * The lookup routines optionally create and hash a new instance when
- * an existing entry is not found.
+ * an existing entry is not found. The bmsafemap lookup routine always
+ * allocates a new structure if an existing one is not found.
  */
 #define DEPALLOC       0x0001  /* allocate structure if lookup fails */
 #define NODELAY                0x0002  /* cannot do background work */
@@ -2048,26 +1982,20 @@ retry_flush:
 /*
  * Structures and routines associated with pagedep caching.
  */
-LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
-u_long pagedep_hash;           /* size of hash table - 1 */
-#define        PAGEDEP_HASH(mp, inum, lbn) \
-       (&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
-           pagedep_hash])
+#define        PAGEDEP_HASH(ump, inum, lbn) \
+       (&(ump)->pagedep_hashtbl[((inum) + (lbn)) & (ump)->pagedep_hash_size])
 
 static int
-pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp)
+pagedep_find(pagedephd, ino, lbn, pagedeppp)
        struct pagedep_hashhead *pagedephd;
        ino_t ino;
        ufs_lbn_t lbn;
-       struct mount *mp;
-       int flags;
        struct pagedep **pagedeppp;
 {
        struct pagedep *pagedep;
 
        LIST_FOREACH(pagedep, pagedephd, pd_hash) {
-               if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn &&
-                   mp == pagedep->pd_list.wk_mp) {
+               if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn) {
                        *pagedeppp = pagedep;
                        return (1);
                }
@@ -2093,10 +2021,12 @@ pagedep_lookup(mp, bp, ino, lbn, flags, 
        struct pagedep *pagedep;
        struct pagedep_hashhead *pagedephd;
        struct worklist *wk;
+       struct ufsmount *ump;
        int ret;
        int i;
 
-       rw_assert(&lk, RA_WLOCKED);
+       ump = VFSTOUFS(mp);
+       LOCK_OWNED(ump);
        if (bp) {
                LIST_FOREACH(wk, &bp->b_dep, wk_list) {
                        if (wk->wk_type == D_PAGEDEP) {
@@ -2105,8 +2035,8 @@ pagedep_lookup(mp, bp, ino, lbn, flags, 
                        }
                }
        }
-       pagedephd = PAGEDEP_HASH(mp, ino, lbn);
-       ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+       pagedephd = PAGEDEP_HASH(ump, ino, lbn);
+       ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
        if (ret) {
                if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp)
                        WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list);
@@ -2114,12 +2044,12 @@ pagedep_lookup(mp, bp, ino, lbn, flags, 
        }
        if ((flags & DEPALLOC) == 0)
                return (0);
-       FREE_LOCK(&lk);
+       FREE_LOCK(ump);
        pagedep = malloc(sizeof(struct pagedep),
            M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
        workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
-       ACQUIRE_LOCK(&lk);
-       ret = pagedep_find(pagedephd, ino, lbn, mp, flags, pagedeppp);
+       ACQUIRE_LOCK(ump);
+       ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
        if (*pagedeppp) {
                /*
                 * This should never happen since we only create pagedeps
@@ -2143,22 +2073,19 @@ pagedep_lookup(mp, bp, ino, lbn, flags, 
 /*
  * Structures and routines associated with inodedep caching.
  */
-LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
-static u_long  inodedep_hash;  /* size of hash table - 1 */
-#define        INODEDEP_HASH(fs, inum) \
-      (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & 
inodedep_hash])
+#define        INODEDEP_HASH(ump, inum) \
+      (&(ump)->inodedep_hashtbl[(inum) & (ump)->inodedep_hash_size])
 
 static int
-inodedep_find(inodedephd, fs, inum, inodedeppp)
+inodedep_find(inodedephd, inum, inodedeppp)
        struct inodedep_hashhead *inodedephd;
-       struct fs *fs;
        ino_t inum;
        struct inodedep **inodedeppp;
 {
        struct inodedep *inodedep;
 
        LIST_FOREACH(inodedep, inodedephd, id_hash)
-               if (inum == inodedep->id_ino && fs == inodedep->id_fs)
+               if (inum == inodedep->id_ino)
                        break;
        if (inodedep) {
                *inodedeppp = inodedep;
@@ -2183,13 +2110,15 @@ inodedep_lookup(mp, inum, flags, inodede
 {
        struct inodedep *inodedep;
        struct inodedep_hashhead *inodedephd;
+       struct ufsmount *ump;
        struct fs *fs;
 
-       rw_assert(&lk, RA_WLOCKED);
-       fs = VFSTOUFS(mp)->um_fs;
-       inodedephd = INODEDEP_HASH(fs, inum);
+       ump = VFSTOUFS(mp);
+       LOCK_OWNED(ump);
+       fs = ump->um_fs;
+       inodedephd = INODEDEP_HASH(ump, inum);
 
-       if (inodedep_find(inodedephd, fs, inum, inodedeppp))
+       if (inodedep_find(inodedephd, inum, inodedeppp))
                return (1);
        if ((flags & DEPALLOC) == 0)
                return (0);
@@ -2198,12 +2127,12 @@ inodedep_lookup(mp, inum, flags, inodede
         */
        if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0)
                request_cleanup(mp, FLUSH_INODES);
-       FREE_LOCK(&lk);
+       FREE_LOCK(ump);
        inodedep = malloc(sizeof(struct inodedep),
                M_INODEDEP, M_SOFTDEP_FLAGS);
        workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);
-       ACQUIRE_LOCK(&lk);
-       if (inodedep_find(inodedephd, fs, inum, inodedeppp)) {
+       ACQUIRE_LOCK(ump);
+       if (inodedep_find(inodedephd, inum, inodedeppp)) {
                WORKITEM_FREE(inodedep, D_INODEDEP);
                return (1);
        }
@@ -2235,15 +2164,12 @@ inodedep_lookup(mp, inum, flags, inodede
 /*
  * Structures and routines associated with newblk caching.
  */
-LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
-u_long newblk_hash;            /* size of hash table - 1 */
-#define        NEWBLK_HASH(fs, inum) \
-       (&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
+#define        NEWBLK_HASH(ump, inum) \
+       (&(ump)->newblk_hashtbl[(inum) & (ump)->newblk_hash_size])
 
 static int
-newblk_find(newblkhd, mp, newblkno, flags, newblkpp)
+newblk_find(newblkhd, newblkno, flags, newblkpp)
        struct newblk_hashhead *newblkhd;
-       struct mount *mp;
        ufs2_daddr_t newblkno;
        int flags;
        struct newblk **newblkpp;
@@ -2253,8 +2179,6 @@ newblk_find(newblkhd, mp, newblkno, flag
        LIST_FOREACH(newblk, newblkhd, nb_hash) {
                if (newblkno != newblk->nb_newblkno)
                        continue;
-               if (mp != newblk->nb_list.wk_mp)
-                       continue;
                /*
                 * If we're creating a new dependency don't match those that
                 * have already been converted to allocdirects.  This is for
@@ -2286,18 +2210,21 @@ newblk_lookup(mp, newblkno, flags, newbl
 {
        struct newblk *newblk;
        struct newblk_hashhead *newblkhd;
+       struct ufsmount *ump;
 
-       newblkhd = NEWBLK_HASH(VFSTOUFS(mp)->um_fs, newblkno);
-       if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp))
+       ump = VFSTOUFS(mp);
+       LOCK_OWNED(ump);
+       newblkhd = NEWBLK_HASH(ump, newblkno);
+       if (newblk_find(newblkhd, newblkno, flags, newblkpp))
                return (1);
        if ((flags & DEPALLOC) == 0)
                return (0);
-       FREE_LOCK(&lk);
+       FREE_LOCK(ump);
        newblk = malloc(sizeof(union allblk), M_NEWBLK,
            M_SOFTDEP_FLAGS | M_ZERO);
        workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
-       ACQUIRE_LOCK(&lk);
-       if (newblk_find(newblkhd, mp, newblkno, flags, newblkpp)) {
+       ACQUIRE_LOCK(ump);
+       if (newblk_find(newblkhd, newblkno, flags, newblkpp)) {
                WORKITEM_FREE(newblk, D_NEWBLK);
                return (1);
        }
@@ -2315,10 +2242,8 @@ newblk_lookup(mp, newblkno, flags, newbl
 /*
  * Structures and routines associated with freed indirect block caching.
  */
-struct freeworklst *indir_hashtbl;
-u_long indir_hash;             /* size of hash table - 1 */
-#define        INDIR_HASH(mp, blkno) \
-       (&indir_hashtbl[((((register_t)(mp)) >> 13) + (blkno)) & indir_hash])
+#define        INDIR_HASH(ump, blkno) \
+       (&(ump)->indir_hashtbl[(blkno) & (ump)->indir_hash_size])
 
 /*
  * Lookup an indirect block in the indir hash table.  The freework is
@@ -2331,14 +2256,14 @@ indirblk_lookup(mp, blkno)
        ufs2_daddr_t blkno;
 {
        struct freework *freework;
-       struct freeworklst *wkhd;
+       struct indir_hashhead *wkhd;
+       struct ufsmount *ump;
 
-       wkhd = INDIR_HASH(mp, blkno);
+       ump = VFSTOUFS(mp);
+       wkhd = INDIR_HASH(ump, blkno);
        TAILQ_FOREACH(freework, wkhd, fw_next) {
                if (freework->fw_blkno != blkno)
                        continue;
-               if (freework->fw_list.wk_mp != mp)
-                       continue;
                indirblk_remove(freework);
                return (1);
        }
@@ -2356,15 +2281,17 @@ indirblk_insert(freework)
 {
        struct jblocks *jblocks;
        struct jseg *jseg;
+       struct ufsmount *ump;
 
-       jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks;
+       ump = VFSTOUFS(freework->fw_list.wk_mp);
+       jblocks = ump->softdep_jblocks;
        jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
        if (jseg == NULL)
                return;
        
        LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
-       TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp,
-           freework->fw_blkno), freework, fw_next);
+       TAILQ_INSERT_HEAD(INDIR_HASH(ump, freework->fw_blkno), freework,
+           fw_next);
        freework->fw_state &= ~DEPCOMPLETE;
 }
 
@@ -2372,10 +2299,11 @@ static void
 indirblk_remove(freework)
        struct freework *freework;
 {
+       struct ufsmount *ump;
 
+       ump = VFSTOUFS(freework->fw_list.wk_mp);
        LIST_REMOVE(freework, fw_segs);
-       TAILQ_REMOVE(INDIR_HASH(freework->fw_list.wk_mp,
-           freework->fw_blkno), freework, fw_next);
+       TAILQ_REMOVE(INDIR_HASH(ump, freework->fw_blkno), freework, fw_next);
        freework->fw_state |= DEPCOMPLETE;
        if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
                WORKITEM_FREE(freework, D_FREEWORK);
@@ -2388,20 +2316,8 @@ indirblk_remove(freework)
 void 
 softdep_initialize()
 {
-       int i;
 
-       LIST_INIT(&mkdirlisthd);
        max_softdeps = desiredvnodes * 4;
-       pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP, &pagedep_hash);
-       inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
-       newblk_hashtbl = hashinit(max_softdeps / 2,  M_NEWBLK, &newblk_hash);
-       bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP, &bmsafemap_hash);
-       i = 1 << (ffs(desiredvnodes / 10) - 1);
-       indir_hashtbl = malloc(i * sizeof(indir_hashtbl[0]), M_FREEWORK,
-           M_WAITOK);
-       indir_hash = i - 1;
-       for (i = 0; i <= indir_hash; i++)
-               TAILQ_INIT(&indir_hashtbl[i]);
 
        /* initialise bioops hack */
        bioops.io_start = softdep_disk_io_initiation;
@@ -2421,12 +2337,13 @@ void
 softdep_uninitialize()
 {
 
+       /* clear bioops hack */
+       bioops.io_start = NULL;
+       bioops.io_complete = NULL;
+       bioops.io_deallocate = NULL;
+       bioops.io_countdeps = NULL;
+
        callout_drain(&softdep_callout);
-       hashdestroy(pagedep_hashtbl, M_PAGEDEP, pagedep_hash);
-       hashdestroy(inodedep_hashtbl, M_INODEDEP, inodedep_hash);
-       hashdestroy(newblk_hashtbl, M_NEWBLK, newblk_hash);
-       hashdestroy(bmsafemap_hashtbl, M_BMSAFEMAP, bmsafemap_hash);
-       free(indir_hashtbl, M_FREEWORK);
 }
 
 /*
@@ -2441,19 +2358,24 @@ softdep_mount(devvp, mp, fs, cred)
        struct ucred *cred;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to