- Append partial truncation freework structures to indirdeps while
truncation is proceeding. These prevent new block pointers from
becoming valid until truncation completes and serialize truncations.
- On completion of a partial truncate journal work waits for zeroed
pointers to hit indirects.
- softdep_journal_freeblocks() handles last frag allocation and last
block zeroing.
- vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it
is only implemented in one place.
- Block allocation failure handling moved up one level so it does not
proceed with buf locks held. This permits us to do more extensive
reclaims when filesystem space is exhausted.
- softdep_sync_metadata() is broken into two parts, the first executes
once at the start of ffs_syncvnode() and flushes truncations and
inode dependencies. The second is called on each locked buf. This
eliminates excessive looping and rollbacks.
- Improve the mechanism in process_worklist_item() that handles
acquiring vnode locks for handle_workitem_remove() so that it works
more generally and does not loop excessively over the same worklist
items on each call.
- Don't corrupt directories by zeroing the tail in fsck. This is only
done for regular files.
- Push a fsync complete record for files that need it so the checker
knows a truncation in the journal is no longer valid.
Discussed with: mckusick, kib (ffs_pages_remove and ffs_truncate parts)
Tested by: pho
Modified:
head/sbin/fsck_ffs/suj.c
head/sys/sys/vnode.h
head/sys/ufs/ffs/ffs_alloc.c
head/sys/ufs/ffs/ffs_balloc.c
head/sys/ufs/ffs/ffs_extern.h
head/sys/ufs/ffs/ffs_inode.c
head/sys/ufs/ffs/ffs_softdep.c
head/sys/ufs/ffs/ffs_vfsops.c
head/sys/ufs/ffs/ffs_vnops.c
head/sys/ufs/ffs/fs.h
head/sys/ufs/ffs/softdep.h
head/sys/ufs/ufs/inode.h
head/sys/ufs/ufs/ufsmount.h
Modified: head/sbin/fsck_ffs/suj.c
==============================================================================
--- head/sbin/fsck_ffs/suj.c Fri Jun 10 22:42:00 2011 (r222957)
+++ head/sbin/fsck_ffs/suj.c Fri Jun 10 22:48:35 2011 (r222958)
@@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size)
* uninitialized space later.
*/
off = blkoff(fs, size);
- if (off) {
+ if (off && DIP(ip, di_mode) != IFDIR) {
uint8_t *buf;
long clrsize;
@@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc)
struct suj_ino *sino;
int i;
- for (i = 0; i < SUJ_HASHSIZE; i++)
- LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
+ for (i = 0; i < SUJ_HASHSIZE; i++) {
+ LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
if (sino->si_trunc) {
ino_trunc(sino->si_ino,
sino->si_trunc->jt_size);
+ sino->si_blkadj = 0;
sino->si_trunc = NULL;
}
+ if (sino->si_blkadj)
+ ino_adjblks(sino);
+ }
+ }
}
/*
@@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc)
static void
cg_check_blk(struct suj_cg *sc)
{
- struct suj_ino *sino;
struct suj_blk *sblk;
int i;
@@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc)
for (i = 0; i < SUJ_HASHSIZE; i++)
LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
blk_check(sblk);
- /*
- * Now that we've freed blocks which are not referenced we
- * make a second pass over all inodes to adjust their block
- * counts.
- */
- for (i = 0; i < SUJ_HASHSIZE; i++)
- LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
- if (sino->si_blkadj)
- ino_adjblks(sino);
}
/*
@@ -1961,14 +1956,7 @@ ino_append(union jrec *rec)
"parent %d, diroff %jd\n",
refrec->jr_op, refrec->jr_ino, refrec->jr_nlink,
refrec->jr_parent, refrec->jr_diroff);
- /*
- * Lookup the ino and clear truncate if one is found. Partial
- * truncates are always done synchronously so if we discover
- * an operation that requires a lock the truncation has completed
- * and can be discarded.
- */
sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1);
- sino->si_trunc = NULL;
sino->si_hasrecs = 1;
srec = errmalloc(sizeof(*srec));
srec->sr_rec = rec;
@@ -2174,9 +2162,7 @@ blk_build(struct jblkrec *blkrec)
struct suj_rec *srec;
struct suj_blk *sblk;
struct jblkrec *blkrn;
- struct suj_ino *sino;
ufs2_daddr_t blk;
- off_t foff;
int frag;
if (debug)
@@ -2185,17 +2171,6 @@ blk_build(struct jblkrec *blkrec)
blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags,
blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn);
- /*
- * Look up the inode and clear the truncate if any lbns after the
- * truncate lbn are freed or allocated.
- */
- sino = ino_lookup(blkrec->jb_ino, 0);
- if (sino && sino->si_trunc) {
- foff = lblktosize(fs, blkrec->jb_lbn);
- foff += lfragtosize(fs, blkrec->jb_frags);
- if (foff > sino->si_trunc->jt_size)
- sino->si_trunc = NULL;
- }
blk = blknum(fs, blkrec->jb_blkno);
frag = fragnum(fs, blkrec->jb_blkno);
sblk = blk_lookup(blk, 1);
@@ -2242,10 +2217,15 @@ ino_build_trunc(struct jtrncrec *rec)
struct suj_ino *sino;
if (debug)
- printf("ino_build_trunc: ino %d, size %jd\n",
- rec->jt_ino, rec->jt_size);
+ printf("ino_build_trunc: op %d ino %d, size %jd\n",
+ rec->jt_op, rec->jt_ino, rec->jt_size);
sino = ino_lookup(rec->jt_ino, 1);
- sino->si_trunc = rec;
+ if (rec->jt_op == JOP_SYNC) {
+ sino->si_trunc = NULL;
+ return;
+ }
+ if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size)
+ sino->si_trunc = rec;
}
/*
Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h Fri Jun 10 22:42:00 2011 (r222957)
+++ head/sys/sys/vnode.h Fri Jun 10 22:48:35 2011 (r222958)
@@ -302,6 +302,7 @@ struct vattr {
#define IO_EXT 0x0400 /* operate on external attributes */
#define IO_NORMAL 0x0800 /* operate on regular data */
#define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */
+#define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is
locked */
#define IO_SEQMAX 0x7F /* seq heuristic max value */
#define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */
Modified: head/sys/ufs/ffs/ffs_alloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_alloc.c Fri Jun 10 22:42:00 2011
(r222957)
+++ head/sys/ufs/ffs/ffs_alloc.c Fri Jun 10 22:48:35 2011
(r222958)
@@ -217,7 +217,7 @@ nospace:
(void) chkdq(ip, -btodb(size), cred, FORCE);
UFS_LOCK(ump);
#endif
- if (reclaimed == 0) {
+ if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
reclaimed = 1;
softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT);
goto retry;
@@ -418,7 +418,7 @@ nospace:
/*
* no space available
*/
- if (reclaimed == 0) {
+ if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
reclaimed = 1;
UFS_UNLOCK(ump);
if (bp) {
Modified: head/sys/ufs/ffs/ffs_balloc.c
==============================================================================
--- head/sys/ufs/ffs/ffs_balloc.c Fri Jun 10 22:42:00 2011
(r222957)
+++ head/sys/ufs/ffs/ffs_balloc.c Fri Jun 10 22:48:35 2011
(r222958)
@@ -105,6 +105,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t
ufs2_daddr_t *lbns_remfree, lbns[NIADDR + 1];
int unwindidx = -1;
int saved_inbdflush;
+ int reclaimed;
ip = VTOI(vp);
dp = ip->i_din1;
@@ -112,6 +113,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t
ump = ip->i_ump;
lbn = lblkno(fs, startoffset);
size = blkoff(fs, startoffset) + size;
+ reclaimed = 0;
if (size > fs->fs_bsize)
panic("ffs_balloc_ufs1: blk too big");
*bpp = NULL;
@@ -276,6 +278,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t
/*
* Fetch through the indirect blocks, allocating as necessary.
*/
+retry:
for (i = 1;;) {
error = bread(vp,
indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
@@ -296,8 +299,15 @@ ffs_balloc_ufs1(struct vnode *vp, off_t
if (pref == 0)
pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
- flags, cred, &newb)) != 0) {
+ flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp);
+ if (++reclaimed == 1) {
+ UFS_LOCK(ump);
+ softdep_request_cleanup(fs, vp, cred,
+ FLUSH_BLOCKS_WAIT);
+ UFS_UNLOCK(ump);
+ goto retry;
+ }
goto fail;
}
nb = newb;
@@ -349,10 +359,17 @@ ffs_balloc_ufs1(struct vnode *vp, off_t
if (nb == 0) {
UFS_LOCK(ump);
pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off, &bap[0]);
- error = ffs_alloc(ip,
- lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
+ error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+ flags | IO_BUFLOCKED, cred, &newb);
if (error) {
brelse(bp);
+ if (++reclaimed == 1) {
+ UFS_LOCK(ump);
+ softdep_request_cleanup(fs, vp, cred,
+ FLUSH_BLOCKS_WAIT);
+ UFS_UNLOCK(ump);
+ goto retry;
+ }
goto fail;
}
nb = newb;
@@ -506,6 +523,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t
int deallocated, osize, nsize, num, i, error;
int unwindidx = -1;
int saved_inbdflush;
+ int reclaimed;
ip = VTOI(vp);
dp = ip->i_din2;
@@ -513,6 +531,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t
ump = ip->i_ump;
lbn = lblkno(fs, startoffset);
size = blkoff(fs, startoffset) + size;
+ reclaimed = 0;
if (size > fs->fs_bsize)
panic("ffs_balloc_ufs2: blk too big");
*bpp = NULL;
@@ -787,6 +806,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t
/*
* Fetch through the indirect blocks, allocating as necessary.
*/
+retry:
for (i = 1;;) {
error = bread(vp,
indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
@@ -807,8 +827,15 @@ ffs_balloc_ufs2(struct vnode *vp, off_t
if (pref == 0)
pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0);
if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
- flags, cred, &newb)) != 0) {
+ flags | IO_BUFLOCKED, cred, &newb)) != 0) {
brelse(bp);
+ if (++reclaimed == 1) {
+ UFS_LOCK(ump);
+ softdep_request_cleanup(fs, vp, cred,
+ FLUSH_BLOCKS_WAIT);
+ UFS_UNLOCK(ump);
+ goto retry;
+ }
goto fail;
}
nb = newb;
@@ -860,10 +887,17 @@ ffs_balloc_ufs2(struct vnode *vp, off_t
if (nb == 0) {
UFS_LOCK(ump);
pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off, &bap[0]);
- error = ffs_alloc(ip,
- lbn, pref, (int)fs->fs_bsize, flags, cred, &newb);
+ error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
+ flags | IO_BUFLOCKED, cred, &newb);
if (error) {
brelse(bp);
+ if (++reclaimed == 1) {
+ UFS_LOCK(ump);
+ softdep_request_cleanup(fs, vp, cred,
+ FLUSH_BLOCKS_WAIT);
+ UFS_UNLOCK(ump);
+ goto retry;
+ }
goto fail;
}
nb = newb;
Modified: head/sys/ufs/ffs/ffs_extern.h
==============================================================================
--- head/sys/ufs/ffs/ffs_extern.h Fri Jun 10 22:42:00 2011
(r222957)
+++ head/sys/ufs/ffs/ffs_extern.h Fri Jun 10 22:48:35 2011
(r222958)
@@ -74,6 +74,7 @@ int ffs_isfreeblock(struct fs *, u_char
void ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
int ffs_mountroot(void);
void ffs_oldfscompat_write(struct fs *, struct ufsmount *);
+void ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end);
int ffs_reallocblks(struct vop_reallocblks_args *);
int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
ufs2_daddr_t, int, int, int, struct ucred *, struct buf **);
@@ -107,7 +108,6 @@ extern struct vop_vector ffs_fifoops2;
int softdep_check_suspend(struct mount *, struct vnode *,
int, int, int, int);
-int softdep_complete_trunc(struct vnode *, void *);
void softdep_get_depcounts(struct mount *, int *, int *);
void softdep_initialize(void);
void softdep_uninitialize(void);
@@ -139,14 +139,17 @@ void softdep_setup_blkfree(struct mount
void softdep_setup_inofree(struct mount *, struct buf *, ino_t,
struct workhead *);
void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *);
-void *softdep_setup_trunc(struct vnode *vp, off_t length, int flags);
void softdep_fsync_mountdev(struct vnode *);
int softdep_sync_metadata(struct vnode *);
+int softdep_sync_buf(struct vnode *, struct buf *, int);
int softdep_process_worklist(struct mount *, int);
int softdep_fsync(struct vnode *);
int softdep_waitidle(struct mount *);
int softdep_prealloc(struct vnode *, int);
int softdep_journal_lookup(struct mount *, struct vnode **);
+void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int);
+void softdep_journal_fsync(struct inode *);
+
/*
* Things to request flushing in softdep_request_cleanup()
Modified: head/sys/ufs/ffs/ffs_inode.c
==============================================================================
--- head/sys/ufs/ffs/ffs_inode.c Fri Jun 10 22:42:00 2011
(r222957)
+++ head/sys/ufs/ffs/ffs_inode.c Fri Jun 10 22:48:35 2011
(r222958)
@@ -120,7 +120,7 @@ ffs_update(vp, waitfor)
}
}
-static void
+void
ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end)
{
vm_object_t object;
@@ -151,12 +151,12 @@ ffs_truncate(vp, length, flags, cred, td
ufs2_daddr_t bn, lbn, lastblock, lastiblock[NIADDR], indir_lbn[NIADDR];
ufs2_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
ufs2_daddr_t count, blocksreleased = 0, datablocks;
- void *cookie;
struct bufobj *bo;
struct fs *fs;
struct buf *bp;
struct ufsmount *ump;
- int needextclean, softdepslowdown, extblocks;
+ int softdeptrunc, journaltrunc;
+ int needextclean, extblocks;
int offset, size, level, nblocks;
int i, error, allerror;
off_t osize;
@@ -165,7 +165,6 @@ ffs_truncate(vp, length, flags, cred, td
fs = ip->i_fs;
ump = ip->i_ump;
bo = &vp->v_bufobj;
- cookie = NULL;
ASSERT_VOP_LOCKED(vp, "ffs_truncate");
@@ -173,6 +172,11 @@ ffs_truncate(vp, length, flags, cred, td
return (EINVAL);
if (length > fs->fs_maxfilesize)
return (EFBIG);
+#ifdef QUOTA
+ error = getinoquota(ip);
+ if (error)
+ return (error);
+#endif
/*
* Historically clients did not have to specify which data
* they were truncating. So, if not specified, we assume
@@ -191,7 +195,10 @@ ffs_truncate(vp, length, flags, cred, td
*/
allerror = 0;
needextclean = 0;
- softdepslowdown = DOINGSOFTDEP(vp) && softdep_slowdown(vp);
+ softdeptrunc = 0;
+ journaltrunc = DOINGSUJ(vp);
+ if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
+ softdeptrunc = !softdep_slowdown(vp);
extblocks = 0;
datablocks = DIP(ip, i_blocks);
if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
@@ -199,27 +206,23 @@ ffs_truncate(vp, length, flags, cred, td
datablocks -= extblocks;
}
if ((flags & IO_EXT) && extblocks > 0) {
- if (DOINGSOFTDEP(vp) && softdepslowdown == 0 && length == 0) {
- if ((flags & IO_NORMAL) == 0) {
- softdep_setup_freeblocks(ip, length, IO_EXT);
- return (0);
- }
+ if (length != 0)
+ panic("ffs_truncate: partial trunc of extdata");
+ if (softdeptrunc || journaltrunc) {
+ if ((flags & IO_NORMAL) == 0)
+ goto extclean;
needextclean = 1;
} else {
- if (length != 0)
- panic("ffs_truncate: partial trunc of extdata");
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
return (error);
- if (DOINGSUJ(vp))
- cookie = softdep_setup_trunc(vp, length, flags);
- osize = ip->i_din2->di_extsize;
- ip->i_din2->di_blocks -= extblocks;
#ifdef QUOTA
(void) chkdq(ip, -extblocks, NOCRED, 0);
#endif
vinvalbuf(vp, V_ALT, 0, 0);
ffs_pages_remove(vp,
OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
+ osize = ip->i_din2->di_extsize;
+ ip->i_din2->di_blocks -= extblocks;
ip->i_din2->di_extsize = 0;
for (i = 0; i < NXADDR; i++) {
oldblks[i] = ip->i_din2->di_extb[i];
@@ -227,7 +230,7 @@ ffs_truncate(vp, length, flags, cred, td
}
ip->i_flag |= IN_CHANGE;
if ((error = ffs_update(vp, 1)))
- goto out;
+ return (error);
for (i = 0; i < NXADDR; i++) {
if (oldblks[i] == 0)
continue;
@@ -236,10 +239,8 @@ ffs_truncate(vp, length, flags, cred, td
}
}
}
- if ((flags & IO_NORMAL) == 0) {
- error = 0;
- goto out;
- }
+ if ((flags & IO_NORMAL) == 0)
+ return (0);
if (vp->v_type == VLNK &&
(ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
datablocks == 0)) {
@@ -252,24 +253,17 @@ ffs_truncate(vp, length, flags, cred, td
DIP_SET(ip, i_size, 0);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (needextclean)
- softdep_setup_freeblocks(ip, length, IO_EXT);
- error = ffs_update(vp, 1);
- goto out;
+ goto extclean;
+ return ffs_update(vp, 1);
}
if (ip->i_size == length) {
ip->i_flag |= IN_CHANGE | IN_UPDATE;
if (needextclean)
- softdep_setup_freeblocks(ip, length, IO_EXT);
- error = ffs_update(vp, 0);
- goto out;
+ goto extclean;
+ return ffs_update(vp, 0);
}
if (fs->fs_ronly)
panic("ffs_truncate: read-only filesystem");
-#ifdef QUOTA
- error = getinoquota(ip);
- if (error)
- goto out;
-#endif
if ((ip->i_flags & SF_SNAPSHOT) != 0)
ffs_snapremove(vp);
vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
@@ -285,7 +279,7 @@ ffs_truncate(vp, length, flags, cred, td
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error) {
vnode_pager_setsize(vp, osize);
- goto out;
+ return (error);
}
ip->i_size = length;
DIP_SET(ip, i_size, length);
@@ -296,11 +290,10 @@ ffs_truncate(vp, length, flags, cred, td
else
bawrite(bp);
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- error = ffs_update(vp, 1);
- goto out;
+ return ffs_update(vp, 1);
}
if (DOINGSOFTDEP(vp)) {
- if (length > 0 || softdepslowdown) {
+ if (softdeptrunc == 0 && journaltrunc == 0) {
/*
* If a file is only partially truncated, then
* we have to clean up the data structures
@@ -311,29 +304,20 @@ ffs_truncate(vp, length, flags, cred, td
* so that it will have no data structures left.
*/
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
- goto out;
- /*
- * We have to journal the truncation before we change
- * any blocks so we don't leave the file partially
- * truncated.
- */
- if (DOINGSUJ(vp) && cookie == NULL)
- cookie = softdep_setup_trunc(vp, length, flags);
+ return (error);
} else {
-#ifdef QUOTA
- (void) chkdq(ip, -datablocks, NOCRED, 0);
-#endif
- softdep_setup_freeblocks(ip, length, needextclean ?
- IO_EXT | IO_NORMAL : IO_NORMAL);
+ flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
+ if (journaltrunc)
+ softdep_journal_freeblocks(ip, cred, length,
+ flags);
+ else
+ softdep_setup_freeblocks(ip, length, flags);
ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
- vinvalbuf(vp, needextclean ? 0 : V_NORMAL, 0, 0);
- if (!needextclean)
- ffs_pages_remove(vp, 0,
- OFF_TO_IDX(lblktosize(fs, -extblocks)));
- vnode_pager_setsize(vp, 0);
- ip->i_flag |= IN_CHANGE | IN_UPDATE;
- error = ffs_update(vp, 0);
- goto out;
+ if (journaltrunc == 0) {
+ ip->i_flag |= IN_CHANGE | IN_UPDATE;
+ error = ffs_update(vp, 0);
+ }
+ return (error);
}
}
/*
@@ -353,7 +337,7 @@ ffs_truncate(vp, length, flags, cred, td
flags |= BA_CLRBUF;
error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
if (error)
- goto out;
+ return (error);
/*
* When we are doing soft updates and the UFS_BALLOC
* above fills in a direct block hole with a full sized
@@ -365,7 +349,7 @@ ffs_truncate(vp, length, flags, cred, td
if (DOINGSOFTDEP(vp) && lbn < NDADDR &&
fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
(error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
- goto out;
+ return (error);
ip->i_size = length;
DIP_SET(ip, i_size, length);
size = blksize(fs, ip, lbn);
@@ -411,13 +395,7 @@ ffs_truncate(vp, length, flags, cred, td
DIP_SET(ip, i_db[i], 0);
}
ip->i_flag |= IN_CHANGE | IN_UPDATE;
- /*
- * When doing softupdate journaling we must preserve the size along
- * with the old pointers until they are freed or we might not
- * know how many fragments remain.
- */
- if (!DOINGSUJ(vp))
- allerror = ffs_update(vp, 1);
+ allerror = ffs_update(vp, 1);
/*
* Having written the new inode to disk, save its new configuration
@@ -541,14 +519,14 @@ done:
#ifdef QUOTA
(void) chkdq(ip, -blocksreleased, NOCRED, 0);
#endif
- error = allerror;
-out:
- if (cookie) {
- allerror = softdep_complete_trunc(vp, cookie);
- if (allerror != 0 && error == 0)
- error = allerror;
- }
- return (error);
+ return (allerror);
+
+extclean:
+ if (journaltrunc)
+ softdep_journal_freeblocks(ip, cred, length, IO_EXT);
+ else
+ softdep_setup_freeblocks(ip, length, IO_EXT);
+ return ffs_update(vp, MNT_WAIT);
}
/*
Modified: head/sys/ufs/ffs/ffs_softdep.c
==============================================================================
--- head/sys/ufs/ffs/ffs_softdep.c Fri Jun 10 22:42:00 2011
(r222957)
+++ head/sys/ufs/ffs/ffs_softdep.c Fri Jun 10 22:48:35 2011
(r222958)
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <sys/buf.h>
#include <sys/kdb.h>
#include <sys/kthread.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mount.h>
@@ -71,6 +72,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#include <sys/vnode.h>
#include <sys/conf.h>
+
#include <ufs/ufs/dir.h>
#include <ufs/ufs/extattr.h>
#include <ufs/ufs/quota.h>
@@ -82,6 +84,8 @@ __FBSDID("$FreeBSD$");
#include <ufs/ufs/ufs_extern.h>
#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_object.h>
#include <ddb/ddb.h>
@@ -214,6 +218,25 @@ softdep_setup_allocindir_meta(nbp, ip, b
}
void
+softdep_journal_freeblocks(ip, cred, length, flags)
+ struct inode *ip;
+ struct ucred *cred;
+ off_t length;
+ int flags;
+{
+
+ panic("softdep_journal_freeblocks called");
+}
+
+void
+softdep_journal_fsync(ip)
+ struct inode *ip;
+{
+
+ panic("softdep_journal_fsync called");
+}
+
+void
softdep_setup_freeblocks(ip, length, flags)
struct inode *ip;
off_t length;
@@ -282,29 +305,6 @@ softdep_setup_directory_change(bp, dp, i
panic("softdep_setup_directory_change called");
}
-void *
-softdep_setup_trunc(vp, length, flags)
- struct vnode *vp;
- off_t length;
- int flags;
-{
-
- panic("%s called", __FUNCTION__);
-
- return (NULL);
-}
-
-int
-softdep_complete_trunc(vp, cookie)
- struct vnode *vp;
- void *cookie;
-{
-
- panic("%s called", __FUNCTION__);
-
- return (0);
-}
-
void
softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
struct mount *mp;
@@ -499,6 +499,13 @@ softdep_sync_metadata(struct vnode *vp)
}
int
+softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
+{
+
+ return (0);
+}
+
+int
softdep_slowdown(vp)
struct vnode *vp;
{
@@ -614,10 +621,13 @@ FEATURE(softupdates, "FFS soft-updates s
#define D_JSEGDEP 23
#define D_SBDEP 24
#define D_JTRUNC 25
-#define D_LAST D_JTRUNC
+#define D_JFSYNC 26
+#define D_SENTINAL 27
+#define D_LAST D_SENTINAL
unsigned long dep_current[D_LAST + 1];
unsigned long dep_total[D_LAST + 1];
+unsigned long dep_write[D_LAST + 1];
SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0, "soft updates stats");
@@ -625,13 +635,17 @@ SYSCTL_NODE(_debug_softdep, OID_AUTO, to
"total dependencies allocated");
SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0,
"current dependencies allocated");
+SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
+ "current dependencies written");
#define SOFTDEP_TYPE(type, str, long) \
static MALLOC_DEFINE(M_ ## type, #str, long); \
SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD, \
&dep_total[D_ ## type], 0, ""); \
SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, \
- &dep_current[D_ ## type], 0, "");
+ &dep_current[D_ ## type], 0, ""); \
+ SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD, \
+ &dep_write[D_ ## type], 0, "");
SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies");
SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies");
@@ -660,6 +674,7 @@ SOFTDEP_TYPE(JSEG, jseg, "Journal segmen
SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete");
SOFTDEP_TYPE(SBDEP, sbdep, "Superblock write dependency");
SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation");
+SOFTDEP_TYPE(JFSYNC, jfsync, "Journal fsync complete");
static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
@@ -694,7 +709,8 @@ static struct malloc_type *memtype[] = {
M_JSEG,
M_JSEGDEP,
M_SBDEP,
- M_JTRUNC
+ M_JTRUNC,
+ M_JFSYNC
};
static LIST_HEAD(mkdirlist, mkdir) mkdirlisthd;
@@ -734,10 +750,11 @@ static void clear_unlinked_inodedep(stru
static struct inodedep *first_unlinked_inodedep(struct ufsmount *);
static int flush_pagedep_deps(struct vnode *, struct mount *,
struct diraddhd *);
-static void free_pagedep(struct pagedep *);
+static int free_pagedep(struct pagedep *);
static int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
-static int flush_inodedep_deps(struct mount *, ino_t);
+static int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
static int flush_deplist(struct allocdirectlst *, int, int *);
+static int sync_cgs(struct mount *, int);
static int handle_written_filepage(struct pagedep *, struct buf *);
static int handle_written_sbdep(struct sbdep *, struct buf *);
static void initiate_write_sbdep(struct sbdep *);
@@ -750,7 +767,7 @@ static void handle_written_jaddref(struc
static void handle_written_jremref(struct jremref *);
static void handle_written_jseg(struct jseg *, struct buf *);
static void handle_written_jnewblk(struct jnewblk *);
-static void handle_written_jfreeblk(struct jfreeblk *);
+static void handle_written_jblkdep(struct jblkdep *);
static void handle_written_jfreefrag(struct jfreefrag *);
static void complete_jseg(struct jseg *);
static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
@@ -758,6 +775,7 @@ static void jaddref_write(struct jaddref
static void jremref_write(struct jremref *, struct jseg *, uint8_t *);
static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
static void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *);
+static void jfsync_write(struct jfsync *, struct jseg *, uint8_t *data);
static void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *);
static void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *);
static void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *);
@@ -768,7 +786,9 @@ static void handle_allocdirect_partdone(
static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
struct workhead *);
static void indirdep_complete(struct indirdep *);
-static int indirblk_inseg(struct mount *, ufs2_daddr_t);
+static int indirblk_lookup(struct mount *, ufs2_daddr_t);
+static void indirblk_insert(struct freework *);
+static void indirblk_remove(struct freework *);
static void handle_allocindir_partdone(struct allocindir *);
static void initiate_write_filepage(struct pagedep *, struct buf *);
static void initiate_write_indirdep(struct indirdep*, struct buf *);
@@ -777,10 +797,12 @@ static void initiate_write_bmsafemap(str
static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
static void handle_workitem_freefile(struct freefile *);
-static void handle_workitem_remove(struct dirrem *, struct vnode *);
+static int handle_workitem_remove(struct dirrem *, int);
static struct dirrem *newdirrem(struct buf *, struct inode *,
struct inode *, int, struct dirrem **);
-static void cancel_indirdep(struct indirdep *, struct buf *, struct inodedep *,
+static struct indirdep *indirdep_lookup(struct mount *, struct inode *,
+ struct buf *);
+static void cancel_indirdep(struct indirdep *, struct buf *,
struct freeblks *);
static void free_indirdep(struct indirdep *);
static void free_diradd(struct diradd *, struct workhead *);
@@ -795,8 +817,13 @@ static void cancel_diradd(struct diradd
struct jremref *, struct jremref *);
static void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *,
struct jremref *);
-static void cancel_allocindir(struct allocindir *, struct inodedep *,
- struct freeblks *);
+static void cancel_allocindir(struct allocindir *, struct buf *bp,
+ struct freeblks *, int);
+static int setup_trunc_indir(struct freeblks *, struct inode *,
+ ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t);
+static void complete_trunc_indir(struct freework *);
+static void trunc_indirdep(struct indirdep *, struct freeblks *, struct buf *,
+ int);
static void complete_mkdir(struct mkdir *);
static void free_newdirblk(struct newdirblk *);
static void free_jremref(struct jremref *);
@@ -806,7 +833,7 @@ static void free_jsegs(struct jblocks *)
static void rele_jseg(struct jseg *);
static void free_jseg(struct jseg *, struct jblocks *);
static void free_jnewblk(struct jnewblk *);
-static void free_jfreeblk(struct jfreeblk *);
+static void free_jblkdep(struct jblkdep *);
static void free_jfreefrag(struct jfreefrag *);
static void free_freedep(struct freedep *);
static void journal_jremref(struct dirrem *, struct jremref *,
@@ -818,30 +845,33 @@ static void cancel_jfreefrag(struct jfre
static inline void setup_freedirect(struct freeblks *, struct inode *,
int, int);
static inline void setup_freeext(struct freeblks *, struct inode *, int, int);
-static inline void setup_freeindir(struct freeblks *, struct inode *, int i,
+static inline void setup_freeindir(struct freeblks *, struct inode *, int,
ufs_lbn_t, int);
static inline struct freeblks *newfreeblks(struct mount *, struct inode *);
static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
-static void softdep_trunc_deps(struct vnode *, struct freeblks *, ufs_lbn_t,
+ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
+static int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
+static void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t,
int, int);
-static int cancel_pagedep(struct pagedep *, struct inodedep *,
- struct freeblks *);
-static int deallocate_dependencies(struct buf *, struct inodedep *,
- struct freeblks *, int off);
+static void trunc_pages(struct inode *, off_t, ufs2_daddr_t, int);
+static int cancel_pagedep(struct pagedep *, struct freeblks *, int);
+static int deallocate_dependencies(struct buf *, struct freeblks *, int);
+static void newblk_freefrag(struct newblk*);
static void free_newblk(struct newblk *);
static void cancel_allocdirect(struct allocdirectlst *,
- struct allocdirect *, struct freeblks *, int);
+ struct allocdirect *, struct freeblks *);
static int check_inode_unwritten(struct inodedep *);
static int free_inodedep(struct inodedep *);
static void freework_freeblock(struct freework *);
-static void handle_workitem_freeblocks(struct freeblks *, int);
-static void handle_complete_freeblocks(struct freeblks *);
+static void freework_enqueue(struct freework *);
+static int handle_workitem_freeblocks(struct freeblks *, int);
+static int handle_complete_freeblocks(struct freeblks *, int);
static void handle_workitem_indirblk(struct freework *);
-static void handle_written_freework(struct freework *);
+static void handle_written_freework(struct freework *, int);
static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
static struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
struct workhead *);
-static void setup_allocindir_phase2(struct buf *, struct inode *,
+static struct freefrag *setup_allocindir_phase2(struct buf *, struct inode *,
struct inodedep *, struct allocindir *, ufs_lbn_t);
static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
ufs2_daddr_t, ufs_lbn_t);
@@ -862,16 +892,20 @@ static int newblk_lookup(struct mount *,
static int inodedep_find(struct inodedep_hashhead *, struct fs *, ino_t,
struct inodedep **);
static int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
-static int pagedep_lookup(struct mount *, ino_t, ufs_lbn_t, int,
- struct pagedep **);
+static int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
+ int, struct pagedep **);
static int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
struct mount *mp, int, struct pagedep **);
static void pause_timer(void *);
static int request_cleanup(struct mount *, int);
-static int process_worklist_item(struct mount *, int);
+static int process_worklist_item(struct mount *, int, int);
static void process_removes(struct vnode *);
+static void process_truncates(struct vnode *);
static void jwork_move(struct workhead *, struct workhead *);
+static void jwork_insert(struct workhead *, struct jsegdep *);
static void add_to_worklist(struct worklist *, int);
+static void wake_worklist(struct worklist *);
+static void wait_worklist(struct worklist *, char *);
static void remove_from_worklist(struct worklist *);
static void softdep_flush(void);
static int softdep_speedup(void);
@@ -889,17 +923,20 @@ static struct jremref *newjremref(struct
struct inode *ip, off_t, nlink_t);
static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
uint16_t);
-static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
+static inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
uint16_t);
-static inline struct jsegdep *inoref_jseg(struct inoref *);
+static inline struct jsegdep *inoref_jseg(struct inoref *);
static struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
static struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
ufs2_daddr_t, int);
+static struct jtrunc *newjtrunc(struct freeblks *, off_t, int);
+static void move_newblock_dep(struct jaddref *, struct inodedep *);
+static void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t);
static struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *,
ufs2_daddr_t, long, ufs_lbn_t);
static struct freework *newfreework(struct ufsmount *, struct freeblks *,
- struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int);
-static void jwait(struct worklist *wk);
+ struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int);
+static int jwait(struct worklist *, int);
static struct inodedep *inodedep_lookup_ip(struct inode *);
static int bmsafemap_rollbacks(struct bmsafemap *);
static struct freefile *handle_bufwait(struct inodedep *, struct workhead *);
@@ -1064,6 +1101,30 @@ jwork_move(dst, src)
}
}
+static void
+jwork_insert(dst, jsegdep)
+ struct workhead *dst;
+ struct jsegdep *jsegdep;
+{
+ struct jsegdep *jsegdepn;
+ struct worklist *wk;
+
+ LIST_FOREACH(wk, dst, wk_list)
+ if (wk->wk_type == D_JSEGDEP)
+ break;
+ if (wk == NULL) {
+ WORKLIST_INSERT(dst, &jsegdep->jd_list);
+ return;
+ }
+ jsegdepn = WK_JSEGDEP(wk);
+ if (jsegdep->jd_seg->js_seq < jsegdepn->jd_seg->js_seq) {
+ WORKLIST_REMOVE(wk);
+ free_jsegdep(jsegdepn);
+ WORKLIST_INSERT(dst, &jsegdep->jd_list);
+ } else
+ free_jsegdep(jsegdep);
+}
+
/*
* Routines for tracking and managing workitems.
*/
@@ -1088,6 +1149,8 @@ workitem_free(item, type)
panic("workitem_free: type mismatch %s != %s",
TYPENAME(item->wk_type), TYPENAME(type));
#endif
+ if (item->wk_state & IOWAITING)
+ wakeup(item);
ump = VFSTOUFS(item->wk_mp);
if (--ump->softdep_deps == 0 && ump->softdep_req)
wakeup(&ump->softdep_deps);
@@ -1101,14 +1164,18 @@ workitem_alloc(item, type, mp)
int type;
struct mount *mp;
{
+ struct ufsmount *ump;
+
item->wk_type = type;
item->wk_mp = mp;
item->wk_state = 0;
+
+ ump = VFSTOUFS(mp);
ACQUIRE_LOCK(&lk);
dep_current[type]++;
dep_total[type]++;
- VFSTOUFS(mp)->softdep_deps++;
- VFSTOUFS(mp)->softdep_accdeps++;
+ ump->softdep_deps++;
+ ump->softdep_accdeps++;
FREE_LOCK(&lk);
}
@@ -1270,8 +1337,7 @@ softdep_flush(void)
vfslocked = VFS_LOCK_GIANT(mp);
progress += softdep_process_worklist(mp, 0);
ump = VFSTOUFS(mp);
- remaining += ump->softdep_on_worklist -
- ump->softdep_on_worklist_inprogress;
+ remaining += ump->softdep_on_worklist;
VFS_UNLOCK_GIANT(vfslocked);
mtx_lock(&mountlist_mtx);
nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1314,10 +1380,14 @@ softdep_speedup(void)
* The following routine is the only one that removes items
* and does so in order from first to last.
*/
+
+#define WK_HEAD 0x0001 /* Add to HEAD. */
+#define WK_NODELAY 0x0002 /* Process immediately. */
+
static void
-add_to_worklist(wk, nodelay)
+add_to_worklist(wk, flags)
struct worklist *wk;
- int nodelay;
+ int flags;
{
struct ufsmount *ump;
@@ -1327,13 +1397,17 @@ add_to_worklist(wk, nodelay)
panic("add_to_worklist: %s(0x%X) already on list",
TYPENAME(wk->wk_type), wk->wk_state);
wk->wk_state |= ONWORKLIST;
- if (LIST_EMPTY(&ump->softdep_workitem_pending))
+ if (ump->softdep_on_worklist == 0) {
LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
- else
+ ump->softdep_worklist_tail = wk;
+ } else if (flags & WK_HEAD) {
+ LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
+ } else {
LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***