Author: jeff
Date: Wed May 19 06:18:01 2010
New Revision: 208287
URL: http://svn.freebsd.org/changeset/base/208287

Log:
   - Don't immediately re-run softdepflush if we didn't make any progress
     on the last iteration.  This can lead to a deadlock when we have
     worklist items that cannot be immediately satisfied.
  
  Reported by:  uqs, Dimitry Andric <dimi...@andric.com>
  
   - Remove some unnecessary debugging code and place some other under
     SUJ_DEBUG.
   - Examine the journal state in softdep_slowdown().
   - Re-format some comments so I may more easily add flag descriptions.

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/softdep.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c      Wed May 19 04:00:42 2010        
(r208286)
+++ head/sys/ufs/ffs/ffs_softdep.c      Wed May 19 06:18:01 2010        
(r208287)
@@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
 #ifndef DEBUG
 #define DEBUG
 #endif
-#define        SUJ_DEBUG
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -1200,6 +1199,7 @@ softdep_flush(void)
        struct ufsmount *ump;
        struct thread *td;
        int remaining;
+       int progress;
        int vfslocked;
 
        td = curthread;
@@ -1224,7 +1224,7 @@ softdep_flush(void)
                }
                FREE_LOCK(&lk);
                VFS_UNLOCK_GIANT(vfslocked);
-               remaining = 0;
+               remaining = progress = 0;
                mtx_lock(&mountlist_mtx);
                for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp)  {
                        nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1233,7 +1233,7 @@ softdep_flush(void)
                        if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
                                continue;
                        vfslocked = VFS_LOCK_GIANT(mp);
-                       softdep_process_worklist(mp, 0);
+                       progress += softdep_process_worklist(mp, 0);
                        ump = VFSTOUFS(mp);
                        remaining += ump->softdep_on_worklist -
                                ump->softdep_on_worklist_inprogress;
@@ -1243,7 +1243,7 @@ softdep_flush(void)
                        vfs_unbusy(mp);
                }
                mtx_unlock(&mountlist_mtx);
-               if (remaining)
+               if (remaining && progress)
                        continue;
                ACQUIRE_LOCK(&lk);
                if (!req_pending)
@@ -1449,7 +1449,7 @@ process_worklist_item(mp, flags)
        struct mount *mp;
        int flags;
 {
-       struct worklist *wk, *wkXXX;
+       struct worklist *wk;
        struct ufsmount *ump;
        struct vnode *vp;
        int matchcnt = 0;
@@ -1472,11 +1472,8 @@ process_worklist_item(mp, flags)
        vp = NULL;
        ump = VFSTOUFS(mp);
        LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) {
-               if (wk->wk_state & INPROGRESS) {
-                       wkXXX = wk;
+               if (wk->wk_state & INPROGRESS)
                        continue;
-               }
-               wkXXX = wk;     /* Record the last valid wk pointer. */
                if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
                        break;
                wk->wk_state |= INPROGRESS;
@@ -2364,7 +2361,7 @@ remove_from_journal(wk)
 
        mtx_assert(&lk, MA_OWNED);
        ump = VFSTOUFS(wk->wk_mp);
-#ifdef DEBUG   /* XXX Expensive, temporary. */
+#ifdef SUJ_DEBUG
        {
                struct worklist *wkn;
 
@@ -2401,16 +2398,15 @@ journal_space(ump, thresh)
        struct jblocks *jblocks;
        int avail;
 
+       jblocks = ump->softdep_jblocks;
+       if (jblocks == NULL)
+               return (1);
        /*
         * We use a tighter restriction here to prevent request_cleanup()
         * running in threads from running into locks we currently hold.
         */
        if (num_inodedep > (max_softdeps / 10) * 9)
                return (0);
-
-       jblocks = ump->softdep_jblocks;
-       if (jblocks == NULL)
-               return (1);
        if (thresh)
                thresh = jblocks->jb_min;
        else
@@ -2727,7 +2723,7 @@ softdep_process_journal(mp, flags)
                                break;
                        printf("softdep: Out of journal space!\n");
                        softdep_speedup();
-                       msleep(jblocks, &lk, PRIBIO, "jblocks", 1);
+                       msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
                }
                FREE_LOCK(&lk);
                jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
@@ -10870,18 +10866,29 @@ int
 softdep_slowdown(vp)
        struct vnode *vp;
 {
+       struct ufsmount *ump;
+       int jlow;
        int max_softdeps_hard;
 
        ACQUIRE_LOCK(&lk);
+       jlow = 0;
+       /*
+        * Check for journal space if needed.
+        */
+       if (DOINGSUJ(vp)) {
+               ump = VFSTOUFS(vp->v_mount);
+               if (journal_space(ump, 0) == 0)
+                       jlow = 1;
+       }
        max_softdeps_hard = max_softdeps * 11 / 10;
        if (num_dirrem < max_softdeps_hard / 2 &&
            num_inodedep < max_softdeps_hard &&
            VFSTOUFS(vp->v_mount)->um_numindirdeps < maxindirdeps &&
-           num_freeblkdep < max_softdeps_hard) {
+           num_freeblkdep < max_softdeps_hard && jlow == 0) {
                FREE_LOCK(&lk);
                return (0);
        }
-       if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps)
+       if (VFSTOUFS(vp->v_mount)->um_numindirdeps >= maxindirdeps || jlow)
                softdep_speedup();
        stat_sync_limit_hit += 1;
        FREE_LOCK(&lk);

Modified: head/sys/ufs/ffs/softdep.h
==============================================================================
--- head/sys/ufs/ffs/softdep.h  Wed May 19 04:00:42 2010        (r208286)
+++ head/sys/ufs/ffs/softdep.h  Wed May 19 06:18:01 2010        (r208287)
@@ -46,51 +46,65 @@
  * copy of the data. A particular data dependency is eliminated when
  * it is ALLCOMPLETE: that is ATTACHED, DEPCOMPLETE, and COMPLETE.
  * 
- * ATTACHED means that the data is not currently being written to
- * disk. UNDONE means that the data has been rolled back to a safe
+ * The ATTACHED flag means that the data is not currently being written
+ * to disk.
+ * 
+ * The UNDONE flag means that the data has been rolled back to a safe
  * state for writing to the disk. When the I/O completes, the data is
  * restored to its current form and the state reverts to ATTACHED.
  * The data must be locked throughout the rollback, I/O, and roll
  * forward so that the rolled back information is never visible to
- * user processes. The COMPLETE flag indicates that the item has been
- * written. For example, a dependency that requires that an inode be
- * written will be marked COMPLETE after the inode has been written
- * to disk. The DEPCOMPLETE flag indicates the completion of any other
+ * user processes.
+ *
+ * The COMPLETE flag indicates that the item has been written. For example,
+ * a dependency that requires that an inode be written will be marked
+ * COMPLETE after the inode has been written to disk.
+ * 
+ * The DEPCOMPLETE flag indicates the completion of any other
  * dependencies such as the writing of a cylinder group map has been
  * completed. A dependency structure may be freed only when both it
  * and its dependencies have completed and any rollbacks that are in
  * progress have finished as indicated by the set of ALLCOMPLETE flags
- * all being set. The two MKDIR flags indicate additional dependencies
- * that must be done when creating a new directory. MKDIR_BODY is
- * cleared when the directory data block containing the "." and ".."
- * entries has been written. MKDIR_PARENT is cleared when the parent
- * inode with the increased link count for ".." has been written. When
- * both MKDIR flags have been cleared, the DEPCOMPLETE flag is set to
- * indicate that the directory dependencies have been completed. The
- * writing of the directory inode itself sets the COMPLETE flag which
- * then allows the directory entry for the new directory to be written
- * to disk. The RMDIR flag marks a dirrem structure as representing
- * the removal of a directory rather than a file. When the removal
- * dependencies are completed, additional work needs to be done
- * (truncation of the "." and ".." entries, an additional decrement
- * of the associated inode, and a decrement of the parent inode). The
- * DIRCHG flag marks a diradd structure as representing the changing
+ * all being set.
+ * 
+ * The two MKDIR flags indicate additional dependencies that must be done
+ * when creating a new directory. MKDIR_BODY is cleared when the directory
+ * data block containing the "." and ".." entries has been written.
+ * MKDIR_PARENT is cleared when the parent inode with the increased link
+ * count for ".." has been written. When both MKDIR flags have been
+ * cleared, the DEPCOMPLETE flag is set to indicate that the directory
+ * dependencies have been completed. The writing of the directory inode
+ * itself sets the COMPLETE flag which then allows the directory entry for
+ * the new directory to be written to disk. The RMDIR flag marks a dirrem
+ * structure as representing the removal of a directory rather than a
+ * file. When the removal dependencies are completed, additional work needs
+ * to be done* (an additional decrement of the associated inode, and a
+ * decrement of the parent inode).
+ *
+ * The DIRCHG flag marks a diradd structure as representing the changing
  * of an existing entry rather than the addition of a new one. When
  * the update is complete the dirrem associated with the inode for
  * the old name must be added to the worklist to do the necessary
- * reference count decrement. The GOINGAWAY flag indicates that the
- * data structure is frozen from further change until its dependencies
- * have been completed and its resources freed after which it will be
- * discarded. The IOSTARTED flag prevents multiple calls to the I/O
- * start routine from doing multiple rollbacks. The SPACECOUNTED flag
- * says that the files space has been accounted to the pending free
- * space count. The NEWBLOCK flag marks pagedep structures that have
- * just been allocated, so must be claimed by the inode before all
- * dependencies are complete. The INPROGRESS flag marks worklist
- * structures that are still on the worklist, but are being considered
- * for action by some process. The UFS1FMT flag indicates that the
- * inode being processed is a ufs1 format. The EXTDATA flag indicates
- * that the allocdirect describes an extended-attributes dependency.
+ * reference count decrement.
+ * 
+ * The GOINGAWAY flag indicates that the data structure is frozen from
+ * further change until its dependencies have been completed and its
+ * resources freed after which it will be discarded.
+ *
+ * The IOSTARTED flag prevents multiple calls to the I/O start routine from
+ * doing multiple rollbacks.
+ *
+ * The NEWBLOCK flag marks pagedep structures that have just been allocated,
+ * so must be claimed by the inode before all dependencies are complete.
+ *
+ * The INPROGRESS flag marks worklist structures that are still on the
+ * worklist, but are being considered for action by some process.
+ *
+ * The UFS1FMT flag indicates that the inode being processed is a ufs1 format.
+ *
+ * The EXTDATA flag indicates that the allocdirect describes an
+ * extended-attributes dependency.
+ *
  * The ONWORKLIST flag shows whether the structure is currently linked
  * onto a worklist.
  */
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to