svn commit: r222953 - head/sys/kern
Author: jeff Date: Fri Jun 10 22:15:36 2011 New Revision: 222953 URL: http://svn.freebsd.org/changeset/base/222953 Log: - When printing bufs with show buf the lblkno is often more useful than the blkno. Print them both. Modified: head/sys/kern/vfs_bio.c Modified: head/sys/kern/vfs_bio.c == --- head/sys/kern/vfs_bio.c Fri Jun 10 20:51:41 2011(r222952) +++ head/sys/kern/vfs_bio.c Fri Jun 10 22:15:36 2011(r222953) @@ -3999,10 +3999,11 @@ DB_SHOW_COMMAND(buffer, db_show_buffer) db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS); db_printf( "b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n" - "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_dep = %p\n", + "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_lblkno = %jd, " + "b_dep = %p\n", bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid, bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno, - bp->b_dep.lh_first); + (intmax_t)bp->b_lblkno, bp->b_dep.lh_first); if (bp->b_npages) { int i; db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r222954 - head/sys/ufs/ufs
Author: jeff Date: Fri Jun 10 22:18:25 2011 New Revision: 222954 URL: http://svn.freebsd.org/changeset/base/222954 Log: - If the fsync in ufs_direnter fails SUJ can later panic because we have partially added a name. Allow ufs_direnter() to continue in the hopes that it is a transient error. If it is not, the directory is corrupted already from IO errors and writing this new block is not likely to make things worse. Modified: head/sys/ufs/ufs/ufs_lookup.c Modified: head/sys/ufs/ufs/ufs_lookup.c == --- head/sys/ufs/ufs/ufs_lookup.c Fri Jun 10 22:15:36 2011 (r222953) +++ head/sys/ufs/ufs/ufs_lookup.c Fri Jun 10 22:18:25 2011 (r222954) @@ -967,7 +967,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir return (0); if (tvp != NULL) VOP_UNLOCK(tvp, 0); - error = VOP_FSYNC(dvp, MNT_WAIT, td); + (void) VOP_FSYNC(dvp, MNT_WAIT, td); if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); return (error); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r222955 - head/sys/ufs/ufs
Author: jeff Date: Fri Jun 10 22:19:44 2011 New Revision: 222955 URL: http://svn.freebsd.org/changeset/base/222955 Log: - Add support for referencing quota structures without needing the inode pointer for softupdates. Submitted by: mckusick Modified: head/sys/ufs/ufs/quota.h head/sys/ufs/ufs/ufs_quota.c Modified: head/sys/ufs/ufs/quota.h == --- head/sys/ufs/ufs/quota.hFri Jun 10 22:18:25 2011(r222954) +++ head/sys/ufs/ufs/quota.hFri Jun 10 22:19:44 2011(r222955) @@ -239,6 +239,12 @@ intsetuse(struct thread *, struct mount intgetquotasize(struct thread *, struct mount *, u_long, int, void *); vfs_quotactl_t ufs_quotactl; +#ifdef SOFTUPDATES +intquotaref(struct vnode *, struct dquot **); +void quotarele(struct dquot **); +void quotaadj(struct dquot **, struct ufsmount *, int64_t); +#endif /* SOFTUPDATES */ + #else /* !_KERNEL */ #include Modified: head/sys/ufs/ufs/ufs_quota.c == --- head/sys/ufs/ufs/ufs_quota.cFri Jun 10 22:18:25 2011 (r222954) +++ head/sys/ufs/ufs/ufs_quota.cFri Jun 10 22:19:44 2011 (r222955) @@ -1613,6 +1613,101 @@ dqflush(struct vnode *vp) } /* + * The following three functions are provided for the adjustment of + * quotas by the soft updates code. + */ +#ifdef SOFTUPDATES +/* + * Acquire a reference to the quota structures associated with a vnode. + * Return count of number of quota structures found. + */ +int +quotaref(vp, qrp) + struct vnode *vp; + struct dquot **qrp; +{ + struct inode *ip; + struct dquot *dq; + int i, found; + + for (i = 0; i < MAXQUOTAS; i++) + qrp[i] = NODQUOT; + /* +* Disk quotas must be turned off for system files. Currently +* snapshot and quota files. +*/ + if ((vp->v_vflag & VV_SYSTEM) != 0) + return (0); + /* +* Iterate through and copy active quotas. +*/ + found = 0; + ip = VTOI(vp); + for (i = 0; i < MAXQUOTAS; i++) { + if ((dq = ip->i_dquot[i]) == NODQUOT) + continue; + DQREF(dq); + qrp[i] = dq; + found++; + } + return (found); +} + +/* + * Release a set of quota structures obtained from a vnode. + */ +void +quotarele(qrp) + struct dquot **qrp; +{ + struct dquot *dq; + int i; + + for (i = 0; i < MAXQUOTAS; i++) { + if ((dq = qrp[i]) == NODQUOT) + continue; + dqrele(NULL, dq); + } +} + +/* + * Adjust the number of blocks associated with a quota. + * Positive numbers when adding blocks; negative numbers when freeing blocks. + */ +void +quotaadj(qrp, ump, blkcount) + struct dquot **qrp; + struct ufsmount *ump; + int64_t blkcount; +{ + struct dquot *dq; + ufs2_daddr_t ncurblocks; + int i; + + if (blkcount == 0) + return; + for (i = 0; i < MAXQUOTAS; i++) { + if ((dq = qrp[i]) == NODQUOT) + continue; + DQI_LOCK(dq); + DQI_WAIT(dq, PINOD+1, "adjqta"); + ncurblocks = dq->dq_curblocks + blkcount; + if (ncurblocks >= 0) + dq->dq_curblocks = ncurblocks; + else + dq->dq_curblocks = 0; + if (blkcount < 0) + dq->dq_flags &= ~DQ_BLKS; + else if (dq->dq_curblocks + blkcount >= dq->dq_bsoftlimit && +dq->dq_curblocks < dq->dq_bsoftlimit) + dq->dq_btime = time_second + ump->um_btime[i]; + dq->dq_flags |= DQ_MOD; + DQI_UNLOCK(dq); + } +} +#endif /* SOFTUPDATES */ + +/* * 32-bit / 64-bit conversion functions. * * 32-bit quota records are stored in native byte order. Attention must ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r222956 - head/sys/conf
Author: jeff Date: Fri Jun 10 22:38:31 2011 New Revision: 222956 URL: http://svn.freebsd.org/changeset/base/222956 Log: - Eliminate an incorrect include path from the mthca build. Modified: head/sys/conf/files Modified: head/sys/conf/files == --- head/sys/conf/files Fri Jun 10 22:19:44 2011(r222955) +++ head/sys/conf/files Fri Jun 10 22:38:31 2011(r222956) @@ -3152,41 +3152,41 @@ ofed/drivers/net/mlx4/en_tx.c optional compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/" ofed/drivers/infiniband/hw/mthca/mthca_allocator.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_av.coptional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_catas.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_cmd.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_cq.coptional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_eq.coptional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mad.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_main.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mcg.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_memfree.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_mr.coptional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_pd.coptional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_profile.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_provider.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_qp.coptional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_reset.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_srq.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" ofed/drivers/infiniband/hw/mthca/mthca_uar.c optional mthca \ - no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/" + no-depend compile-with "${OFED_C}" # crypto support opencrypto/cast.c optional crypto | ipsec ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r222958 - in head: sbin/fsck_ffs sys/sys sys/ufs/ffs sys/ufs/ufs
Author: jeff Date: Fri Jun 10 22:48:35 2011 New Revision: 222958 URL: http://svn.freebsd.org/changeset/base/222958 Log: Implement fully asynchronous partial truncation with softupdates journaling to resolve errors which can cause corruption on recovery with the old synchronous mechanism. - Append partial truncation freework structures to indirdeps while truncation is proceeding. These prevent new block pointers from becoming valid until truncation completes and serialize truncations. - On completion of a partial truncate journal work waits for zeroed pointers to hit indirects. - softdep_journal_freeblocks() handles last frag allocation and last block zeroing. - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it is only implemented in one place. - Block allocation failure handling moved up one level so it does not proceed with buf locks held. This permits us to do more extensive reclaims when filesystem space is exhausted. - softdep_sync_metadata() is broken into two parts, the first executes once at the start of ffs_syncvnode() and flushes truncations and inode dependencies. The second is called on each locked buf. This eliminates excessive looping and rollbacks. - Improve the mechanism in process_worklist_item() that handles acquiring vnode locks for handle_workitem_remove() so that it works more generally and does not loop excessively over the same worklist items on each call. - Don't corrupt directories by zeroing the tail in fsck. This is only done for regular files. - Push a fsync complete record for files that need it so the checker knows a truncation in the journal is no longer valid. Discussed with: mckusick, kib (ffs_pages_remove and ffs_truncate parts) Tested by:pho Modified: head/sbin/fsck_ffs/suj.c head/sys/sys/vnode.h head/sys/ufs/ffs/ffs_alloc.c head/sys/ufs/ffs/ffs_balloc.c head/sys/ufs/ffs/ffs_extern.h head/sys/ufs/ffs/ffs_inode.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/ffs_vfsops.c head/sys/ufs/ffs/ffs_vnops.c head/sys/ufs/ffs/fs.h head/sys/ufs/ffs/softdep.h head/sys/ufs/ufs/inode.h head/sys/ufs/ufs/ufsmount.h Modified: head/sbin/fsck_ffs/suj.c == --- head/sbin/fsck_ffs/suj.cFri Jun 10 22:42:00 2011(r222957) +++ head/sbin/fsck_ffs/suj.cFri Jun 10 22:48:35 2011(r222958) @@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size) * uninitialized space later. */ off = blkoff(fs, size); - if (off) { + if (off && DIP(ip, di_mode) != IFDIR) { uint8_t *buf; long clrsize; @@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc) struct suj_ino *sino; int i; - for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) + for (i = 0; i < SUJ_HASHSIZE; i++) { + LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_trunc) { ino_trunc(sino->si_ino, sino->si_trunc->jt_size); + sino->si_blkadj = 0; sino->si_trunc = NULL; } + if (sino->si_blkadj) + ino_adjblks(sino); + } + } } /* @@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc) static void cg_check_blk(struct suj_cg *sc) { - struct suj_ino *sino; struct suj_blk *sblk; int i; @@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc) for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) blk_check(sblk); - /* -* Now that we've freed blocks which are not referenced we -* make a second pass over all inodes to adjust their block -* counts. -*/ - for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) - if (sino->si_blkadj) - ino_adjblks(sino); } /* @@ -1961,14 +1956,7 @@ ino_append(union jrec *rec) "parent %d, diroff %jd\n", refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_parent, refrec->jr_diroff); - /* -* Lookup the ino and clear truncate if one is found. Partial -* truncates are always done synchronously so if we discover -* an operation that requires a lock the truncation has completed -* and can be discarded. -*/ sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); - sino->si_trunc = NULL; sino->si_hasrecs = 1; srec = errmalloc(sizeof(*srec)); srec->sr_rec = rec; @@ -2174,9 +2162,7
Re: svn commit: r222958 - in head: sbin/fsck_ffs sys/sys sys/ufs/ffs sys/ufs/ufs
On Fri, 10 Jun 2011, Jeff Roberson wrote: Author: jeff Date: Fri Jun 10 22:48:35 2011 New Revision: 222958 URL: http://svn.freebsd.org/changeset/base/222958 Log: Implement fully asynchronous partial truncation with softupdates journaling to resolve errors which can cause corruption on recovery with the old synchronous mechanism. This diff is enormous and took months of work. I'm sorry to get it in so close to 9.0, I had no idea it would take so long. pho has tested multiple versions of the patch with and without journaling for days of test time and it has probably racked up a week of machine time for me but there may be problems given that it is so huge. There is still a snapshot problem with SUJ that mckusick and I are working on. Expect to see some checkins for that soon. Thanks, Jeff - Append partial truncation freework structures to indirdeps while truncation is proceeding. These prevent new block pointers from becoming valid until truncation completes and serialize truncations. - On completion of a partial truncate journal work waits for zeroed pointers to hit indirects. - softdep_journal_freeblocks() handles last frag allocation and last block zeroing. - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it is only implemented in one place. - Block allocation failure handling moved up one level so it does not proceed with buf locks held. This permits us to do more extensive reclaims when filesystem space is exhausted. - softdep_sync_metadata() is broken into two parts, the first executes once at the start of ffs_syncvnode() and flushes truncations and inode dependencies. The second is called on each locked buf. This eliminates excessive looping and rollbacks. - Improve the mechanism in process_worklist_item() that handles acquiring vnode locks for handle_workitem_remove() so that it works more generally and does not loop excessively over the same worklist items on each call. - Don't corrupt directories by zeroing the tail in fsck. This is only done for regular files. - Push a fsync complete record for files that need it so the checker knows a truncation in the journal is no longer valid. Discussed with:mckusick, kib (ffs_pages_remove and ffs_truncate parts) Tested by: pho Modified: head/sbin/fsck_ffs/suj.c head/sys/sys/vnode.h head/sys/ufs/ffs/ffs_alloc.c head/sys/ufs/ffs/ffs_balloc.c head/sys/ufs/ffs/ffs_extern.h head/sys/ufs/ffs/ffs_inode.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/ffs_vfsops.c head/sys/ufs/ffs/ffs_vnops.c head/sys/ufs/ffs/fs.h head/sys/ufs/ffs/softdep.h head/sys/ufs/ufs/inode.h head/sys/ufs/ufs/ufsmount.h Modified: head/sbin/fsck_ffs/suj.c == --- head/sbin/fsck_ffs/suj.cFri Jun 10 22:42:00 2011(r222957) +++ head/sbin/fsck_ffs/suj.cFri Jun 10 22:48:35 2011(r222958) @@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size) * uninitialized space later. */ off = blkoff(fs, size); - if (off) { + if (off && DIP(ip, di_mode) != IFDIR) { uint8_t *buf; long clrsize; @@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc) struct suj_ino *sino; int i; - for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) + for (i = 0; i < SUJ_HASHSIZE; i++) { + LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_trunc) { ino_trunc(sino->si_ino, sino->si_trunc->jt_size); + sino->si_blkadj = 0; sino->si_trunc = NULL; } + if (sino->si_blkadj) + ino_adjblks(sino); + } + } } /* @@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc) static void cg_check_blk(struct suj_cg *sc) { - struct suj_ino *sino; struct suj_blk *sblk; int i; @@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc) for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) blk_check(sblk); - /* -* Now that we've freed blocks which are not referenced we -* make a second pass over all inodes to adjust their block -* counts. -*/ - for (i = 0; i < SUJ_HASHSIZE; i++) - LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) - if (sino->si_blkadj) - ino_adjblks(sino); } /* @@ -1961,14 +1956,7 @@ ino_append(union jrec *rec) "parent %d, diroff %jd\n", refrec-&
svn commit: r223325 - head/sys/ufs/ffs
Author: jeff Date: Mon Jun 20 03:25:09 2011 New Revision: 223325 URL: http://svn.freebsd.org/changeset/base/223325 Log: - Fix directory count rollbacks by passing the mode to the journal dep earlier. - Add rollback/forward code for frag and cluster accounting. - Handle the FREEDEP case in softdep_sync_buf(). (submitted by pho) Modified: head/sys/ufs/ffs/ffs_alloc.c head/sys/ufs/ffs/ffs_extern.h head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_alloc.c == --- head/sys/ufs/ffs/ffs_alloc.cMon Jun 20 02:17:34 2011 (r223324) +++ head/sys/ufs/ffs/ffs_alloc.cMon Jun 20 03:25:09 2011 (r223325) @@ -1829,7 +1829,7 @@ gotit: } UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); + softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref, mode); bdwrite(bp); if (ibp != NULL) bawrite(ibp); Modified: head/sys/ufs/ffs/ffs_extern.h == --- head/sys/ufs/ffs/ffs_extern.h Mon Jun 20 02:17:34 2011 (r223324) +++ head/sys/ufs/ffs/ffs_extern.h Mon Jun 20 03:25:09 2011 (r223325) @@ -130,7 +130,7 @@ voidsoftdep_freefile(struct vnode *, in intsoftdep_request_cleanup(struct fs *, struct vnode *, struct ucred *, int); void softdep_setup_freeblocks(struct inode *, off_t, int); -void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t); +void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int); void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t, int, int); void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Mon Jun 20 02:17:34 2011 (r223324) +++ head/sys/ufs/ffs/ffs_softdep.c Mon Jun 20 03:25:09 2011 (r223325) @@ -142,10 +142,11 @@ softdep_setup_sbupdate(ump, fs, bp) } void -softdep_setup_inomapdep(bp, ip, newinum) +softdep_setup_inomapdep(bp, ip, newinum, mode) struct buf *bp; struct inode *ip; ino_t newinum; + int mode; { panic("softdep_setup_inomapdep called"); @@ -789,6 +790,8 @@ static void diradd_inode_written(struct static int handle_written_indirdep(struct indirdep *, struct buf *, struct buf**); static int handle_written_inodeblock(struct inodedep *, struct buf *); +static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *, + uint8_t *); static int handle_written_bmsafemap(struct bmsafemap *, struct buf *); static void handle_written_jaddref(struct jaddref *); static void handle_written_jremref(struct jremref *); @@ -820,6 +823,8 @@ static void handle_allocindir_partdone(s static void initiate_write_filepage(struct pagedep *, struct buf *); static void initiate_write_indirdep(struct indirdep*, struct buf *); static void handle_written_mkdir(struct mkdir *, int); +static int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *, + uint8_t *); static void initiate_write_bmsafemap(struct bmsafemap *, struct buf *); static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *); static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *); @@ -935,6 +940,7 @@ static void wake_worklist(struct worklis static void wait_worklist(struct worklist *, char *); static void remove_from_worklist(struct worklist *); static void softdep_flush(void); +static void softdep_flushjournal(struct mount *); static int softdep_speedup(void); static void worklist_speedup(void); static int journal_mount(struct mount *, struct fs *, struct ucred *); @@ -3046,6 +3052,25 @@ jfsync_write(jfsync, jseg, data) rec->jt_extsize = jfsync->jfs_extsize; } +static void +softdep_flushjournal(mp) + struct mount *mp; +{ + struct jblocks *jblocks; + struct ufsmount *ump; + + if ((mp->mnt_kern_flag & MNTK_SUJ) == 0) + return; + ump = VFSTOUFS(mp); + jblocks = ump->softdep_jblocks; + ACQUIRE_LOCK(&lk); + while (ump->softdep_on_journal) { + jblocks->jb_needseg = 1; + softdep_process_journal(mp, NULL, MNT_WAIT); + } + FREE_LOCK(&lk); +} + /* * Flush some journal records to disk. */ @@ -4310,7 +4335,6 @@ softdep_setup_create(dp, ip) inoreflst); KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, ("softdep_setup_create: No addref structure present.")); - jaddref->ja_mode = ip->i_mode; } softdep_prelink(dvp, NULL); FREE_LOCK(&lk); @@ -4417,7
svn commit: r223689 - head/sbin/fsck_ffs
Author: jeff Date: Thu Jun 30 05:28:10 2011 New Revision: 223689 URL: http://svn.freebsd.org/changeset/base/223689 Log: - Handle the JOP_SYNC case as appropriate. Reported by: pho Modified: head/sbin/fsck_ffs/suj.c Modified: head/sbin/fsck_ffs/suj.c == --- head/sbin/fsck_ffs/suj.cThu Jun 30 05:20:02 2011(r223688) +++ head/sbin/fsck_ffs/suj.cThu Jun 30 05:28:10 2011(r223689) @@ -2261,6 +2261,7 @@ suj_build(void) blk_build((struct jblkrec *)rec); break; case JOP_TRUNC: + case JOP_SYNC: ino_build_trunc((struct jtrncrec *)rec); break; default: ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r223769 - head/sys/ufs/ufs
Author: jeff Date: Mon Jul 4 20:52:23 2011 New Revision: 223769 URL: http://svn.freebsd.org/changeset/base/223769 Log: - Fix an inode quota leak. We need to decrement the quota once and only once. Tested by:pho Reviewed by: mckusick Modified: head/sys/ufs/ufs/ufs_inode.c Modified: head/sys/ufs/ufs/ufs_inode.c == --- head/sys/ufs/ufs/ufs_inode.cMon Jul 4 20:50:09 2011 (r223768) +++ head/sys/ufs/ufs/ufs_inode.cMon Jul 4 20:52:23 2011 (r223769) @@ -120,15 +120,14 @@ ufs_inactive(ap) isize = ip->i_size; if (ip->i_ump->um_fstype == UFS2) isize += ip->i_din2->di_extsize; - if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip)) { + if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip)) + error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, + NOCRED, td); + if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) { #ifdef QUOTA if (!getinoquota(ip)) (void)chkiq(ip, -1, NOCRED, FORCE); #endif - error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, - NOCRED, td); - } - if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) { #ifdef UFS_EXTATTR ufs_extattr_vnode_inactive(vp, td); #endif ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r223770 - head/sys/ufs/ffs
Author: jeff Date: Mon Jul 4 20:53:55 2011 New Revision: 223770 URL: http://svn.freebsd.org/changeset/base/223770 Log: - It is impossible to run request_cleanup() while doing a copyonwrite. This will most likely cause new block allocations which can recurse into request cleanup. - While here optimize the ufs locking slightly. We need only acquire and drop once. - process_removes() and process_truncates() also is only needed once. - Attempt to flush each item on the worklist once but do not loop forever if some can not be completed. Discussed with: mckusick Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Mon Jul 4 20:52:23 2011 (r223769) +++ head/sys/ufs/ffs/ffs_softdep.c Mon Jul 4 20:53:55 2011 (r223770) @@ -12510,33 +12510,36 @@ softdep_request_cleanup(fs, vp, cred, re int error; mp = vp->v_mount; - ump = VTOI(vp)->i_ump; + ump = VFSTOUFS(mp); mtx_assert(UFS_MTX(ump), MA_OWNED); if (resource == FLUSH_BLOCKS_WAIT) stat_cleanup_blkrequests += 1; else stat_cleanup_inorequests += 1; + /* * If we are being called because of a process doing a -* copy-on-write, then it is not safe to update the vnode -* as we may recurse into the copy-on-write routine. +* copy-on-write, then it is not safe to process any +* worklist items as we will recurse into the copyonwrite +* routine. This will result in an incoherent snapshot. */ - if (!(curthread->td_pflags & TDP_COWINPROGRESS)) { - UFS_UNLOCK(ump); - error = ffs_update(vp, 1); + if (curthread->td_pflags & TDP_COWINPROGRESS) + return (0); + UFS_UNLOCK(ump); + error = ffs_update(vp, 1); + if (error != 0) { UFS_LOCK(ump); - if (error != 0) - return (0); + return (0); } /* * If we are in need of resources, consider pausing for * tickdelay to give ourselves some breathing room. */ - UFS_UNLOCK(ump); ACQUIRE_LOCK(&lk); + process_removes(vp); + process_truncates(vp); request_cleanup(UFSTOVFS(ump), resource); FREE_LOCK(&lk); - UFS_LOCK(ump); /* * Now clean up at least as many resources as we will need. * @@ -12568,29 +12571,23 @@ softdep_request_cleanup(fs, vp, cred, re roundup((fs->fs_dsize * fs->fs_minfree / 100) - fs->fs_cstotal.cs_nffree, fs->fs_frag)); } else { + UFS_LOCK(ump); printf("softdep_request_cleanup: Unknown resource type %d\n", resource); return (0); } starttime = time_second; retry: - while ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 && - fs->fs_cstotal.cs_nbfree <= needed) || - (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 && - fs->fs_cstotal.cs_nifree <= needed)) { - UFS_UNLOCK(ump); + if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 && + fs->fs_cstotal.cs_nbfree <= needed) || + (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 && + fs->fs_cstotal.cs_nifree <= needed)) { ACQUIRE_LOCK(&lk); - process_removes(vp); - process_truncates(vp); if (ump->softdep_on_worklist > 0 && - process_worklist_item(UFSTOVFS(ump), 1, LK_NOWAIT) != 0) { + process_worklist_item(UFSTOVFS(ump), + ump->softdep_on_worklist, LK_NOWAIT) != 0) stat_worklist_push += 1; - FREE_LOCK(&lk); - UFS_LOCK(ump); - continue; - } FREE_LOCK(&lk); - UFS_LOCK(ump); } /* * If we still need resources and there are no more worklist @@ -12604,7 +12601,6 @@ retry: fs->fs_cstotal.cs_nbfree <= needed) || (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 && fs->fs_cstotal.cs_nifree <= needed)) { - UFS_UNLOCK(ump); MNT_ILOCK(mp); MNT_VNODE_FOREACH(lvp, mp, mvp) { VI_LOCK(lvp); @@ -12633,7 +12629,6 @@ retry: VOP_FSYNC(lvp, MNT_NOWAIT, curthread); VOP_UNLOCK(lvp, 0); } - UFS_LOCK(ump); if (ump->softdep_on_worklist > 0) { stat_cleanup_retries += 1; goto retry; @@ -12642,6 +1
svn commit: r223771 - head/sys/ufs/ffs
Author: jeff Date: Mon Jul 4 21:04:25 2011 New Revision: 223771 URL: http://svn.freebsd.org/changeset/base/223771 Log: - Handle D_JSEGDEP in the softdep_sync_buf() switch. These can now find themselves on snapshot vnodes. Reported by: pho Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Mon Jul 4 20:53:55 2011 (r223770) +++ head/sys/ufs/ffs/ffs_softdep.c Mon Jul 4 21:04:25 2011 (r223771) @@ -12082,6 +12082,7 @@ top: case D_FREEWORK: case D_FREEDEP: + case D_JSEGDEP: continue; default: ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r223772 - head/sys/ufs/ffs
Author: jeff Date: Mon Jul 4 22:08:04 2011 New Revision: 223772 URL: http://svn.freebsd.org/changeset/base/223772 Log: - Speed up pendingblock processing again. Having too much delay between ffs_blkfree() and the pending adjustment causes all kinds of space related problems. Modified: head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/softdep.h Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Mon Jul 4 21:04:25 2011 (r223771) +++ head/sys/ufs/ffs/ffs_softdep.c Mon Jul 4 22:08:04 2011 (r223772) @@ -880,6 +880,7 @@ static inline void setup_freeext(struct static inline void setup_freeindir(struct freeblks *, struct inode *, int, ufs_lbn_t, int); static inline struct freeblks *newfreeblks(struct mount *, struct inode *); +static void freeblks_free(struct ufsmount *, struct freeblks *, int); static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t); ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t); static int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int); @@ -5751,7 +5752,6 @@ newfreeblks(mp, ip) freeblks->fb_modrev = DIP(ip, i_modrev); freeblks->fb_devvp = ip->i_devvp; freeblks->fb_chkcnt = 0; - freeblks->fb_freecnt = 0; freeblks->fb_len = 0; return (freeblks); @@ -6199,7 +6199,7 @@ softdep_journal_freeblocks(ip, cred, len quotaref(vp, freeblks->fb_quota); (void) chkdq(ip, -datablocks, NOCRED, 0); #endif - freeblks->fb_chkcnt = datablocks; + freeblks->fb_chkcnt = -datablocks; UFS_LOCK(ip->i_ump); fs->fs_pendingblocks += datablocks; UFS_UNLOCK(ip->i_ump); @@ -6429,7 +6429,7 @@ softdep_setup_freeblocks(ip, length, fla quotaref(vp, freeblks->fb_quota); (void) chkdq(ip, -datablocks, NOCRED, 0); #endif - freeblks->fb_chkcnt = datablocks; + freeblks->fb_chkcnt = -datablocks; UFS_LOCK(ip->i_ump); fs->fs_pendingblocks += datablocks; UFS_UNLOCK(ip->i_ump); @@ -7284,8 +7284,8 @@ freework_freeblock(freework) freeblks->fb_cgwait++; WORKLIST_INSERT(&wkhd, &freework->fw_list); } - freeblks->fb_freecnt += btodb(bsize); FREE_LOCK(&lk); + freeblks_free(ump, freeblks, btodb(bsize)); ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize, freeblks->fb_inum, freeblks->fb_vtype, &wkhd); ACQUIRE_LOCK(&lk); @@ -7459,6 +7459,33 @@ handle_workitem_freeblocks(freeblks, fla } /* + * Handle completion of block free via truncate. This allows fs_pending + * to track the actual free block count more closely than if we only updated + * it at the end. We must be careful to handle cases where the block count + * on free was incorrect. + */ +static void +freeblks_free(ump, freeblks, blocks) + struct ufsmount *ump; + struct freeblks *freeblks; + int blocks; +{ + struct fs *fs; + ufs2_daddr_t remain; + + UFS_LOCK(ump); + remain = -freeblks->fb_chkcnt; + freeblks->fb_chkcnt += blocks; + if (remain > 0) { + if (remain < blocks) + blocks = remain; + fs = ump->um_fs; + fs->fs_pendingblocks -= blocks; + } + UFS_UNLOCK(ump); +} + +/* * Once all of the freework workitems are complete we can retire the * freeblocks dependency and any journal work awaiting completion. This * can not be called until all other dependencies are stable on disk. @@ -7478,7 +7505,7 @@ handle_complete_freeblocks(freeblks, fla ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; flags = LK_EXCLUSIVE | flags; - spare = freeblks->fb_freecnt - freeblks->fb_chkcnt; + spare = freeblks->fb_chkcnt; /* * If we did not release the expected number of blocks we may have @@ -7501,9 +7528,9 @@ handle_complete_freeblocks(freeblks, fla } vput(vp); } - if (freeblks->fb_chkcnt) { + if (spare < 0) { UFS_LOCK(ump); - fs->fs_pendingblocks -= freeblks->fb_chkcnt; + fs->fs_pendingblocks += spare; UFS_UNLOCK(ump); } #ifdef QUOTA @@ -7559,7 +7586,7 @@ indir_trunc(freework, dbn, lbn) ufs2_daddr_t nb, nnb, *bap2 = 0; ufs_lbn_t lbnadd, nlbn; int i, nblocks, ufs1fmt; - int fs_pendingblocks; + int freedblocks; int goingaway; int freedeps; int needj; @@ -7701,16 +7728,18 @@ indir_trunc(freework, dbn, lbn) bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); } - fs_pendingblocks = 0; + freedblocks = 0; if (level == 0) - fs_pendingblocks = (nblocks * cnt); + freedblocks = (nblocks * cnt); + if (ne
svn commit: r219819 - in head: sys/amd64/include sys/conf sys/dev/hptmv sys/i386/include sys/kern sys/net sys/netinet sys/netinet6 sys/sys sys/vm usr.sbin/config usr.sbin/ndp
Author: jeff Date: Mon Mar 21 09:40:01 2011 New Revision: 219819 URL: http://svn.freebsd.org/changeset/base/219819 Log: - Merge changes to the base system to support OFED. These include a wider arg2 for sysctl, updates to vlan code, IFT_INFINIBAND, and other miscellaneous small features. Modified: head/sys/amd64/include/endian.h head/sys/conf/files head/sys/conf/kern.pre.mk head/sys/conf/options head/sys/dev/hptmv/hptproc.c head/sys/i386/include/endian.h head/sys/kern/kern_intr.c head/sys/kern/kern_jail.c head/sys/kern/kern_sx.c head/sys/kern/kern_sysctl.c head/sys/kern/subr_bus.c head/sys/net/if.c head/sys/net/if_arp.h head/sys/net/if_llatbl.h head/sys/net/if_types.h head/sys/net/if_var.h head/sys/net/if_vlan.c head/sys/net/if_vlan_var.h head/sys/netinet/if_ether.c head/sys/netinet6/in6.c head/sys/netinet6/nd6.c head/sys/netinet6/nd6_nbr.c head/sys/sys/bus.h head/sys/sys/file.h head/sys/sys/interrupt.h head/sys/sys/jail.h head/sys/sys/sx.h head/sys/sys/sysctl.h head/sys/vm/uma_core.c head/sys/vm/vm_map.c head/sys/vm/vm_map.h head/usr.sbin/config/config.h head/usr.sbin/config/mkmakefile.c head/usr.sbin/ndp/ndp.c Modified: head/sys/amd64/include/endian.h == --- head/sys/amd64/include/endian.h Mon Mar 21 08:54:59 2011 (r219818) +++ head/sys/amd64/include/endian.h Mon Mar 21 09:40:01 2011 (r219819) @@ -69,73 +69,59 @@ extern "C" { #if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P) -#define __byte_swap_int_var(x) \ -__extension__ ({ register __uint32_t __X = (x); \ - __asm ("bswap %0" : "+r" (__X)); \ - __X; }) +#define__bswap64_const(_x) \ + (((_x) >> 56) | \ + (((_x) >> 40) & (0xffUL << 8)) |\ + (((_x) >> 24) & (0xffUL << 16)) | \ + (((_x) >> 8) & (0xffUL << 24)) |\ + (((_x) << 8) & (0xffUL << 32)) |\ + (((_x) << 24) & (0xffUL << 40)) | \ + (((_x) << 40) & (0xffUL << 48)) | \ + ((_x) << 56)) + +#define__bswap32_const(_x) \ + (((_x) >> 24) | \ + (((_x) & (0xff << 16)) >> 8) | \ + (((_x) & (0xff << 8)) << 8) | \ + ((_x) << 24)) -#ifdef __OPTIMIZE__ - -#define__byte_swap_int_const(x) \ - x) & 0xff00) >> 24) | \ -(((x) & 0x00ff) >> 8) | \ -(((x) & 0xff00) << 8) | \ -(((x) & 0x00ff) << 24)) -#define__byte_swap_int(x) (__builtin_constant_p(x) ? \ - __byte_swap_int_const(x) : __byte_swap_int_var(x)) - -#else /* __OPTIMIZE__ */ - -#define__byte_swap_int(x) __byte_swap_int_var(x) - -#endif /* __OPTIMIZE__ */ - -#define __byte_swap_long_var(x) \ -__extension__ ({ register __uint64_t __X = (x); \ - __asm ("bswap %0" : "+r" (__X)); \ - __X; }) - -#ifdef __OPTIMIZE__ - -#define__byte_swap_long_const(x) \ - (((x >> 56) | \ -((x >> 40) & 0xff00) | \ -((x >> 24) & 0xff) | \ -((x >> 8) & 0xff00) | \ -((x << 8) & (0xfful << 32)) | \ -((x << 24) & (0xfful << 40)) | \ -((x << 40) & (0xfful << 48)) | \ -((x << 56 - -#define__byte_swap_long(x) (__builtin_constant_p(x) ? \ - __byte_swap_long_const(x) : __byte_swap_long_var(x)) - -#else /* __OPTIMIZE__ */ - -#define__byte_swap_long(x) __byte_swap_long_var(x) - -#endif /* __OPTIMIZE__ */ +#define __bswap16_const(_x)(__uint16_t)((_x) << 8 | (_x) >> 8) static __inline __uint64_t -__bswap64(__uint64_t _x) +__bswap64_var(__uint64_t _x) { - return (__byte_swap_long(_x)); + __asm ("bswap %0" : "+r" (_x)); + return (_x); } static __inline __uint32_t -__bswap32(__uint32_t _x) +__bswap32_var(__uint32_t _x) { - return (__byte_swap_int(_x)); + __asm ("bswap %0" : "+r" (_x)); + return (_x); } static __inline __uint16_t -__bswap16(__uint16_t _x) +__bswap16_var(__uint16_t _x) { - return (_x << 8 | _x >> 8); + + return (__bswap16_const(_x)); } +#define__bswap64(_x) \ + (__builtin_constant_p(_x) ? \ + __bswap64_const((__uint64_t)(_x)) : __bswap64_var(_x)) + +#define__bswap32(_x) \ + (__builtin_constant_p(_x) ? \ + __bswap32_const((__uint32_t)(_x)) : __bswap32_var(_x)) + +#define__bswap16(_x) \ + (__builtin_constant_p(_x) ? \ + __bswap16_const((__uint16_t)(_x)) : __bswap16_var(_x)) + #define__htonl(x) __bswap32(x) #define__htons(x) __bswap16(x) #define__ntohl(x) __bswap32(x) Modified: head/sys/conf/files
Re: svn commit: r219667 - head/usr.sbin/bsdinstall/partedit
On Sun, 20 Mar 2011, Kirk McKusick wrote: Date: Sun, 20 Mar 2011 13:25:20 -0700 From: Doug Barton To: Marius Strobl CC: Kirk McKusick , Nathan Whitehorn , svn-src-h...@freebsd.org, Jeff Roberson , Gavin Atkinson , svn-src-all@FreeBSD.org, src-committ...@freebsd.org, kved...@kvedulv.de Subject: Re: svn commit: r219667 - head/usr.sbin/bsdinstall/partedit On 03/20/2011 09:22, Marius Strobl wrote: I fear it's still a bit premature for enable SU+J by default. Rather recently I was told about a SU+J filesystems lost after a panic that happend after snapshotting it (report CC'ed, maybe he can provide some more details) and I'm pretty sure I've seen the problem described in PR 149022 also after the potential fix mentioned in its feedback. +1 I tried enabling SU+J on my /var (after backing up of course) and after a panic random files were missing entirely. Not the last updates to those files, the whole file, and many of them had not been written to in days/weeks/months. With all due respect to the hard work that went into the code, I would be very uncomfortable with enabling it by default at this point. Doug With all due respect, how can we fix things that nobody reports? If you have a problem, let us know about it. And of course, we need something more specific than the above. I have not been following current but I read any emails sent directly to me without a mailing list in the cc. I also was not aware of this. I had not heard of any filesystem corruption problems at all. If there are any, I also am not comfortable with enabling it by default. I want to fix that first. I have blocked off next week to work on this. I already sent an email out to current@ requesting bug reports. Please if you have anything else let me know immediately so I can prioritize it and start investigating. Thanks, Jeff Kirk McKusick ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
Re: svn commit: r219667 - head/usr.sbin/bsdinstall/partedit
On Sun, 20 Mar 2011, Doug Barton wrote: On 03/20/2011 09:22, Marius Strobl wrote: I fear it's still a bit premature for enable SU+J by default. Rather recently I was told about a SU+J filesystems lost after a panic that happend after snapshotting it (report CC'ed, maybe he can provide some more details) and I'm pretty sure I've seen the problem described in PR 149022 also after the potential fix mentioned in its feedback. +1 I tried enabling SU+J on my /var (after backing up of course) and after a panic random files were missing entirely. Not the last updates to those files, the whole file, and many of them had not been written to in days/weeks/months. So you're saying the directory entry was missing? Can you tell me how big the directory was? Number of files? Approximate directory size when you consider file names? When you fsck'd were inodes recovered and linked into lost and found? What was the actual path? I'm trying to wrap my head around how this would be possible and where the error could be and whether it could be caused by SUJ. The number of interactions with disk writes are minimal. Corruption if it occurs would most likely be caused by a bad journal recovery. Thanks, Jeff With all due respect to the hard work that went into the code, I would be very uncomfortable with enabling it by default at this point. Doug -- Nothin' ever doesn't change, but nothin' changes much. -- OK Go Breadth of IT experience, and depth of knowledge in the DNS. Yours for the right price. :) http://SupersetSolutions.com/ ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
Re: svn commit: r219667 - head/usr.sbin/bsdinstall/partedit
On Mon, 21 Mar 2011, Michael Moll wrote: Hi All, On Sun, Mar 20, 2011 at 05:22:12PM +0100, Marius Strobl wrote: I fear it's still a bit premature for enable SU+J by default. Rather recently I was told about a SU+J filesystems lost after a panic that happend after snapshotting it (report CC'ed, maybe he can provide some more details) and I'm pretty sure I've seen the problem described in PR 149022 also after the potential fix mentioned in its feedback. Sorry, no details available, as I didn't record the panic and problems back then. However this was not the first panic which I attribute (maybe wrongly) to SUJ and as an consequence now all my UFS filesystems have SUJ turned off again. If SUJ really is going to be the default I would expact quite some fallout from this after my experiences. How long ago was this? We fixed quite a number of bugs a few months ago. Thanks, Jeff Kind Regards -- Michael Moll ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r219849 - head/sys/modules
Author: jeff Date: Mon Mar 21 21:35:19 2011 New Revision: 219849 URL: http://svn.freebsd.org/changeset/base/219849 Log: - For now, disable ofed module build unless MK_OFED is set. Modified: head/sys/modules/Makefile Modified: head/sys/modules/Makefile == --- head/sys/modules/Makefile Mon Mar 21 21:34:12 2011(r219848) +++ head/sys/modules/Makefile Mon Mar 21 21:35:19 2011(r219849) @@ -185,9 +185,9 @@ SUBDIR= ${_3dfx} \ mfi \ mii \ mlx \ - mlx4 \ - mlx4ib \ - mlxen \ + ${_mlx4} \ + ${_mlx4ib} \ + ${_mlxen} \ ${_mly} \ mmc \ mmcsd \ @@ -198,7 +198,7 @@ SUBDIR= ${_3dfx} \ msdosfs_iconv \ ${_mse} \ msk \ - mthca \ + ${_mthca} \ mvs \ mwl \ mwlfw \ @@ -673,6 +673,13 @@ _zfs= zfs .endif .endif +.if ${MK_OFED} != "no" || defined(ALL_MODULES) +_mthca=mthca +_mlx4= mlx4 +_mlx4ib= mlx4ib +_mlxen=mlxen +.endif + .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES) SUBDIR=${MODULES_OVERRIDE} .endif @@ -690,5 +697,6 @@ afterinstall: kldxref ${DESTDIR}${KMODDIR}; \ fi .endif +#endif .include ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r219859 - head/sys/ofed/drivers/net/mlx4
Author: jeff Date: Tue Mar 22 04:50:47 2011 New Revision: 219859 URL: http://svn.freebsd.org/changeset/base/219859 Log: - Don't use a separate set of rx queues for UDP, hash them into the same set as TCP. - Eliminate the fully linear non-scatter/gather rx path, there is no harm in using arrays of clusters for both TCP and UDP. - Implement support for enabling/disabling per-vlan priority pause and queues via sysctl. Modified: head/sys/ofed/drivers/net/mlx4/en_main.c head/sys/ofed/drivers/net/mlx4/en_netdev.c head/sys/ofed/drivers/net/mlx4/en_rx.c head/sys/ofed/drivers/net/mlx4/mlx4_en.h Modified: head/sys/ofed/drivers/net/mlx4/en_main.c == --- head/sys/ofed/drivers/net/mlx4/en_main.cTue Mar 22 04:31:35 2011 (r219858) +++ head/sys/ofed/drivers/net/mlx4/en_main.cTue Mar 22 04:50:47 2011 (r219859) @@ -236,9 +236,8 @@ static void *mlx4_en_add(struct mlx4_dev mlx4_info(mdev, "Using %d tx rings for port:%d\n", mdev->profile.prof[i].tx_ring_num, i); mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two( - min_t(int, dev->caps.num_comp_vectors, MAX_RX_RINGS/2)) + - (mdev->profile.udp_rss ? rounddown_pow_of_two( - min_t(int, dev->caps.num_comp_vectors, MAX_RX_RINGS/2)) : 1); + min_t(int, dev->caps.num_comp_vectors, MAX_RX_RINGS)); + mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n", mdev->profile.prof[i].rx_ring_num, i); } Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c == --- head/sys/ofed/drivers/net/mlx4/en_netdev.c Tue Mar 22 04:31:35 2011 (r219858) +++ head/sys/ofed/drivers/net/mlx4/en_netdev.c Tue Mar 22 04:50:47 2011 (r219859) @@ -277,10 +277,7 @@ static void mlx4_en_netpoll(struct net_d cq = &priv->rx_cq[i]; spin_lock_irqsave(&cq->lock, flags); napi_synchronize(&cq->napi); - if (priv->rx_ring[i].use_frags) - mlx4_en_process_rx_cq(dev, cq, 0); - else - mlx4_en_process_rx_cq_mb(dev, cq, 0); + mlx4_en_process_rx_cq(dev, cq, 0); spin_unlock_irqrestore(&cq->lock, flags); } } @@ -866,10 +863,6 @@ int mlx4_en_alloc_resources(struct mlx4_ prof->rx_ring_size, i, RX)) goto err; - if (i > priv->rx_ring_num - priv->udp_rings - 1) - priv->rx_ring[i].use_frags = 0; - else - priv->rx_ring[i].use_frags = 1; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size)) goto err; @@ -880,7 +873,7 @@ int mlx4_en_alloc_resources(struct mlx4_ /* Populate Tx priority mappings */ mlx4_en_set_prio_map(priv, priv->tx_prio_map, -prof->tx_ring_num - MLX4_EN_NUM_HASH_RINGS); +priv->tx_ring_num - MLX4_EN_NUM_HASH_RINGS); return 0; @@ -1193,6 +1186,83 @@ static int mlx4_en_set_tx_ring_size(SYSC return (error); } +static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) +{ + struct mlx4_en_priv *priv; + int ppp; + int error; + + priv = arg1; + ppp = priv->prof->tx_ppp; + error = sysctl_handle_int(oidp, &ppp, 0, req); + if (error || !req->newptr) + return (error); + if (ppp > 0xff || ppp < 0) + return (-EINVAL); + priv->prof->tx_ppp = ppp; + error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, + priv->rx_mb_size + ETHER_CRC_LEN, + priv->prof->tx_pause, + priv->prof->tx_ppp, + priv->prof->rx_pause, + priv->prof->rx_ppp); + + return (error); +} + +static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) +{ + struct mlx4_en_priv *priv; + struct mlx4_en_dev *mdev; + int tx_ring_num; + int ppp; + int error; + int port_up; + + port_up = 0; + priv = arg1; + mdev = priv->mdev; + ppp = priv->prof->rx_ppp; + error = sysctl_handle_int(oidp, &ppp, 0, req); + if (error || !req->newptr) + return (error); + if (ppp > 0xff || ppp < 0) + return (-EINVAL); + /* See if we have to change the number of tx queues. */ + if (!ppp != !priv->prof->rx_ppp) { + tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 + + (!!ppp) * MLX4_EN_NUM_PPP_RINGS; +
svn commit: r219893 - head/sys/ofed/drivers/net/mlx4
Author: jeff Date: Wed Mar 23 02:47:04 2011 New Revision: 219893 URL: http://svn.freebsd.org/changeset/base/219893 Log: - Correct the vlan filter programming. The device filter is built in reverse order. - Name the cq taskqueues according to whether they handle rx or tx. - Default LRO to on. Modified: head/sys/ofed/drivers/net/mlx4/en_cq.c head/sys/ofed/drivers/net/mlx4/en_netdev.c head/sys/ofed/drivers/net/mlx4/en_port.c Modified: head/sys/ofed/drivers/net/mlx4/en_cq.c == --- head/sys/ofed/drivers/net/mlx4/en_cq.c Wed Mar 23 01:26:21 2011 (r219892) +++ head/sys/ofed/drivers/net/mlx4/en_cq.c Wed Mar 23 02:47:04 2011 (r219893) @@ -51,21 +51,23 @@ int mlx4_en_create_cq(struct mlx4_en_pri int err; cq->size = entries; + cq->tq = taskqueue_create_fast("mlx4_en_que", M_NOWAIT, + taskqueue_thread_enqueue, &cq->tq); if (mode == RX) { cq->buf_size = cq->size * sizeof(struct mlx4_cqe); cq->vector = (ring + priv->port) % mdev->dev->caps.num_comp_vectors; TASK_INIT(&cq->cq_task, 0, mlx4_en_rx_que, cq); + taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s rx cq", + if_name(priv->dev)); } else { cq->buf_size = sizeof(struct mlx4_cqe); cq->vector = MLX4_LEAST_ATTACHED_VECTOR; TASK_INIT(&cq->cq_task, 0, mlx4_en_tx_que, cq); + taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s tx cq", + if_name(priv->dev)); } - cq->tq = taskqueue_create_fast("mlx4_en_que", M_NOWAIT, - taskqueue_thread_enqueue, &cq->tq); - taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s cq", - if_name(priv->dev)); cq->ring = ring; cq->is_tx = mode; mtx_init(&cq->lock.m, "mlx4 cq", NULL, MTX_DEF); Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c == --- head/sys/ofed/drivers/net/mlx4/en_netdev.c Wed Mar 23 01:26:21 2011 (r219892) +++ head/sys/ofed/drivers/net/mlx4/en_netdev.c Wed Mar 23 02:47:04 2011 (r219893) @@ -53,13 +53,11 @@ static void mlx4_en_vlan_rx_add_vid(void if ((vid == 0) || (vid > 4095))/* Invalid */ return; - en_dbg(HW, priv, "adding VLAN:%d\n", vid); - - spin_lock(&priv->vlan_lock); - priv->vlgrp_modified = true; idx = vid >> 5; field = 1 << (vid & 0x1f); + spin_lock(&priv->vlan_lock); + priv->vlgrp_modified = true; if (priv->vlan_unregister[idx] & field) priv->vlan_unregister[idx] &= ~field; else @@ -77,10 +75,10 @@ static void mlx4_en_vlan_rx_kill_vid(voi if ((vid == 0) || (vid > 4095))/* Invalid */ return; en_dbg(HW, priv, "Killing VID:%d\n", vid); - spin_lock(&priv->vlan_lock); - priv->vlgrp_modified = true; idx = vid >> 5; field = 1 << (vid & 0x1f); + spin_lock(&priv->vlan_lock); + priv->vlgrp_modified = true; if (priv->vlan_register[idx] & field) priv->vlan_register[idx] &= ~field; else @@ -1541,12 +1539,9 @@ int mlx4_en_init_netdev(struct mlx4_en_d #endif if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; - - /* Don't enable LOR unless the user requests. */ - dev->if_capenable = dev->if_capabilities; - if (mdev->profile.num_lro) dev->if_capabilities |= IFCAP_LRO; + dev->if_capenable = dev->if_capabilities; /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, Modified: head/sys/ofed/drivers/net/mlx4/en_port.c == --- head/sys/ofed/drivers/net/mlx4/en_port.cWed Mar 23 01:26:21 2011 (r219892) +++ head/sys/ofed/drivers/net/mlx4/en_port.cWed Mar 23 02:47:04 2011 (r219893) @@ -51,7 +51,7 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev * { struct mlx4_cmd_mailbox *mailbox; struct mlx4_set_vlan_fltr_mbox *filter; - int i; + int i, j; int err = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -61,8 +61,9 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev * filter = mailbox->buf; memset(filter, 0, sizeof *filter); if (vlans) - for (i = 0; i < VLAN_FLTR_SIZE; i ++) - filter->entry[i] = cpu_to_be32(vlans[i]); + for (i = 0, j = VLAN_FLTR_SIZE - 1; i < VLAN_FLTR_SIZE; + i++, j--) + filter->entry[j] = cpu_to_be32(vlans[i]); err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_VLAN_FLTR, MLX4_CMD_TIME_CLA
svn commit: r219898 - head/sys/modules
Author: jeff Date: Wed Mar 23 08:27:57 2011 New Revision: 219898 URL: http://svn.freebsd.org/changeset/base/219898 Log: - Move ofed modules into the i386 and amd64 specific sections to fix universe on other architectures. Modified: head/sys/modules/Makefile Modified: head/sys/modules/Makefile == --- head/sys/modules/Makefile Wed Mar 23 06:31:45 2011(r219897) +++ head/sys/modules/Makefile Wed Mar 23 08:27:57 2011(r219898) @@ -418,6 +418,12 @@ _linprocfs=linprocfs _linsysfs= linsysfs _linux=linux _mse= mse +.if ${MK_OFED} != "no" || defined(ALL_MODULES) +_mlx4= mlx4 +_mlx4ib= mlx4ib +_mlxen=mlxen +_mthca=mthca +.endif .if ${MK_NCP} != "no" _ncp= ncp .endif @@ -566,6 +572,12 @@ _linprocfs=linprocfs _linsysfs= linsysfs _linux=linux _mly= mly +.if ${MK_OFED} != "no" || defined(ALL_MODULES) +_mlx4= mlx4 +_mlx4ib= mlx4ib +_mlxen=mlxen +_mthca=mthca +.endif _ndis= ndis _nfe= nfe _nve= nve @@ -673,13 +685,6 @@ _zfs= zfs .endif .endif -.if ${MK_OFED} != "no" || defined(ALL_MODULES) -_mthca=mthca -_mlx4= mlx4 -_mlx4ib= mlx4ib -_mlxen=mlxen -.endif - .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES) SUBDIR=${MODULES_OVERRIDE} .endif @@ -697,6 +702,5 @@ afterinstall: kldxref ${DESTDIR}${KMODDIR}; \ fi .endif -#endif .include ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r220016 - in head/sys/ofed: drivers/net/mlx4 include/linux/mlx4
Author: jeff Date: Sat Mar 26 00:54:01 2011 New Revision: 220016 URL: http://svn.freebsd.org/changeset/base/220016 Log: - Implement wake-on-lan support in mlxen. Modified: head/sys/ofed/drivers/net/mlx4/en_ethtool.c head/sys/ofed/drivers/net/mlx4/en_netdev.c head/sys/ofed/drivers/net/mlx4/fw.c head/sys/ofed/drivers/net/mlx4/fw.h head/sys/ofed/drivers/net/mlx4/main.c head/sys/ofed/drivers/net/mlx4/mlx4_en.h head/sys/ofed/include/linux/mlx4/device.h Modified: head/sys/ofed/drivers/net/mlx4/en_ethtool.c == --- head/sys/ofed/drivers/net/mlx4/en_ethtool.c Sat Mar 26 00:34:35 2011 (r220015) +++ head/sys/ofed/drivers/net/mlx4/en_ethtool.c Sat Mar 26 00:54:01 2011 (r220016) @@ -494,6 +494,7 @@ const struct ethtool_ops mlx4_en_ethtool .get_ethtool_stats = mlx4_en_get_ethtool_stats, .self_test = mlx4_en_self_test, .get_wol = mlx4_en_get_wol, + .set_wol = mlx4_en_set_wol, .get_msglevel = mlx4_en_get_msglevel, .set_msglevel = mlx4_en_set_msglevel, .get_coalesce = mlx4_en_get_coalesce, Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c == --- head/sys/ofed/drivers/net/mlx4/en_netdev.c Sat Mar 26 00:34:35 2011 (r220015) +++ head/sys/ofed/drivers/net/mlx4/en_netdev.c Sat Mar 26 00:54:01 2011 (r220016) @@ -532,6 +532,7 @@ int mlx4_en_start_port(struct net_device struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; + u64 config; int rx_index = 0; int tx_index = 0; int err = 0; @@ -662,6 +663,25 @@ int mlx4_en_start_port(struct net_device else priv->rx_csum = 0; + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); + if (err) { + en_err(priv, "Failed to get WoL info, unable to modify\n"); + goto wol_err; + } + if (dev->if_capenable & IFCAP_WOL_MAGIC) { + config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED | + MLX4_EN_WOL_MAGIC; + } else { + config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC); + config |= MLX4_EN_WOL_DO_MODIFY; + } + + err = mlx4_wol_write(priv->mdev->dev, config, priv->port); + if (err) { + en_err(priv, "Failed to set WoL information\n"); + goto wol_err; + } + priv->port_up = true; /* Populate multicast list */ @@ -676,6 +696,10 @@ int mlx4_en_start_port(struct net_device return 0; +wol_err: + /* close port*/ + mlx4_CLOSE_PORT(mdev->dev, priv->port); + mac_err: mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index); tx_err: @@ -1095,6 +1119,8 @@ static int mlx4_en_ioctl(struct ifnet *d dev->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) dev->if_capenable ^= IFCAP_VLAN_HWFILTER; + if (mask & IFCAP_WOL_MAGIC) + dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_init(priv); VLAN_CAPABILITIES(dev); @@ -1534,14 +1560,23 @@ int mlx4_en_init_netdev(struct mlx4_en_d dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; -#if 0 /* Not yet */ - dev->if_capabilities |= IFCAP_WOL; -#endif if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; if (mdev->profile.num_lro) dev->if_capabilities |= IFCAP_LRO; dev->if_capenable = dev->if_capabilities; + /* +* Setup wake-on-lan. +*/ + if (priv->mdev->dev->caps.wol) { + u64 config; + if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) { + if (config & MLX4_EN_WOL_MAGIC) + dev->if_capabilities |= IFCAP_WOL_MAGIC; + if (config & MLX4_EN_WOL_ENABLED) + dev->if_capenable |= IFCAP_WOL_MAGIC; + } + } /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, Modified: head/sys/ofed/drivers/net/mlx4/fw.c == --- head/sys/ofed/drivers/net/mlx4/fw.c Sat Mar 26 00:34:35 2011 (r220015) +++ head/sys/ofed/drivers/net/mlx4/fw.c Sat Mar 26 00:54:01 2011 (r220016) @@ -289,6 +289,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev * dev_cap->udp_rss = field & 0x1; MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_
svn commit: r220282 - head/sys/ufs/ffs
Author: jeff Date: Sat Apr 2 21:52:58 2011 New Revision: 220282 URL: http://svn.freebsd.org/changeset/base/220282 Log: Fix problems that manifested from filesystem full conditions: - In softdep_revert_mkdir() find the dotaddref before we attempt to cancel the jaddref so we can make assumptions about where the dotaddref is on the list. cancel_jaddref() does not always remove items from the list anymore. - Always set GOINGAWAY on an inode in softdep_freefile() if DEPCOMPLETE was never set. This ensures that dependencies will continue to be processed on the inowait/bufwait list and is more an artifact of the structure of the code than a pure ordering problem. - Always set DEPCOMPLETE on canceled jaddrefs so that they can be freed appropriately. This normally occurs when the refs are added to the journal but if they are canceled before this point the state would never be set and the dependency could never be freed. Reported by: pho Tested by:pho Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Sat Apr 2 16:02:25 2011 (r220281) +++ head/sys/ufs/ffs/ffs_softdep.c Sat Apr 2 21:52:58 2011 (r220282) @@ -3501,10 +3501,14 @@ cancel_jaddref(jaddref, inodedep, wkhd) * us so that it is consistent with the in-memory reference. This * ensures that inode nlink rollbacks always have the correct link. */ - if (needsj == 0) + if (needsj == 0) { for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref; - inoref = TAILQ_NEXT(inoref, if_deps)) + inoref = TAILQ_NEXT(inoref, if_deps)) { + if (inoref->if_state & GOINGAWAY) + break; inoref->if_nlink--; + } + } jsegdep = inoref_jseg(&jaddref->ja_ref); if (jaddref->ja_state & NEWBLOCK) move_newblock_dep(jaddref, inodedep); @@ -3522,6 +3526,7 @@ cancel_jaddref(jaddref, inodedep, wkhd) if (jaddref->ja_state & DEPCOMPLETE) remove_from_journal(&jaddref->ja_list); } + jaddref->ja_state |= (GOINGAWAY | DEPCOMPLETE); /* * Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove * can arrange for them to be freed with the bitmap. Otherwise we @@ -3535,7 +3540,6 @@ cancel_jaddref(jaddref, inodedep, wkhd) free_jaddref(jaddref); return (needsj); } - jaddref->ja_state |= GOINGAWAY; /* * Leave the head of the list for jsegdeps for fast merging. */ @@ -4071,6 +4075,7 @@ softdep_revert_mkdir(dp, ip) { struct inodedep *inodedep; struct jaddref *jaddref; + struct jaddref *dotaddref; struct vnode *dvp; dvp = ITOV(dp); @@ -4090,12 +4095,12 @@ softdep_revert_mkdir(dp, ip) inoreflst); KASSERT(jaddref->ja_parent == dp->i_number, ("softdep_revert_mkdir: addref parent mismatch")); + dotaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref, + inoreflst, if_deps); cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait); - jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst, - inoreflst); - KASSERT(jaddref->ja_parent == ip->i_number, + KASSERT(dotaddref->ja_parent == ip->i_number, ("softdep_revert_mkdir: dot addref parent mismatch")); - cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait); + cancel_jaddref(dotaddref, inodedep, &inodedep->id_inowait); } FREE_LOCK(&lk); } @@ -5734,14 +5739,14 @@ softdep_freefile(pvp, ino, mode) clear_unlinked_inodedep(inodedep); /* Re-acquire inodedep as we've dropped lk. */ inodedep_lookup(pvp->v_mount, ino, 0, &inodedep); - if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) - inodedep->id_state |= GOINGAWAY; } if (inodedep == NULL || check_inode_unwritten(inodedep)) { FREE_LOCK(&lk); handle_workitem_freefile(freefile); return; } + if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0) + inodedep->id_state |= GOINGAWAY; WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list); FREE_LOCK(&lk); if (ip->i_number == ino) ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r220406 - head/sys/ufs/ffs
Author: jeff Date: Thu Apr 7 03:19:10 2011 New Revision: 220406 URL: http://svn.freebsd.org/changeset/base/220406 Log: - Don't invalidate jnewblks immediately upon discovering that the block will be removed. Permit the journal to proceed so that we don't leave a rollback in a cg for a very long time as this can cause terrible perf problems in low memory situations. Tested by: pho Modified: head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/softdep.h Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Thu Apr 7 00:30:32 2011 (r220405) +++ head/sys/ufs/ffs/ffs_softdep.c Thu Apr 7 03:19:10 2011 (r220406) @@ -766,7 +766,8 @@ static inline void inoref_write(struct i struct jrefrec *); static void handle_allocdirect_partdone(struct allocdirect *, struct workhead *); -static void cancel_newblk(struct newblk *, struct workhead *); +static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *, + struct workhead *); static void indirdep_complete(struct indirdep *); static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); @@ -826,6 +827,8 @@ static void handle_complete_freeblocks(s static void handle_workitem_indirblk(struct freework *); static void handle_written_freework(struct freework *); static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *); +static struct worklist *jnewblk_merge(struct worklist *, struct worklist *, + struct workhead *); static void setup_allocindir_phase2(struct buf *, struct inode *, struct inodedep *, struct allocindir *, ufs_lbn_t); static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t, @@ -3125,33 +3128,72 @@ handle_written_jaddref(jaddref) /* * Called once a jnewblk journal is written. The allocdirect or allocindir - * is placed in the bmsafemap to await notification of a written bitmap. + * is placed in the bmsafemap to await notification of a written bitmap. If + * the operation was canceled we add the segdep to the appropriate + * dependency to free the journal space once the canceling operation + * completes. */ static void handle_written_jnewblk(jnewblk) struct jnewblk *jnewblk; { struct bmsafemap *bmsafemap; + struct freefrag *freefrag; struct jsegdep *jsegdep; struct newblk *newblk; + struct freework *freework; + struct indirdep *indirdep; /* Grab the jsegdep. */ jsegdep = jnewblk->jn_jsegdep; jnewblk->jn_jsegdep = NULL; - /* -* Add the written block to the bmsafemap so it can be notified when -* the bitmap is on disk. -*/ - newblk = jnewblk->jn_newblk; - jnewblk->jn_newblk = NULL; - if (newblk == NULL) + if (jnewblk->jn_dep == NULL) panic("handle_written_jnewblk: No dependency for the segdep."); - - newblk->nb_jnewblk = NULL; - bmsafemap = newblk->nb_bmsafemap; - WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list); - newblk->nb_state |= ONDEPLIST; - LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); + switch (jnewblk->jn_dep->wk_type) { + case D_NEWBLK: + case D_ALLOCDIRECT: + case D_ALLOCINDIR: + /* +* Add the written block to the bmsafemap so it can +* be notified when the bitmap is on disk. +*/ + newblk = WK_NEWBLK(jnewblk->jn_dep); + newblk->nb_jnewblk = NULL; + bmsafemap = newblk->nb_bmsafemap; + newblk->nb_state |= ONDEPLIST; + LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps); + WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list); + break; + case D_FREEFRAG: + /* +* A newblock being removed by a freefrag when replaced by +* frag extension. +*/ + freefrag = WK_FREEFRAG(jnewblk->jn_dep); + freefrag->ff_jdep = NULL; + WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list); + break; + case D_FREEWORK: + /* +* A direct block was removed by truncate. +*/ + freework = WK_FREEWORK(jnewblk->jn_dep); + freework->fw_jnewblk = NULL; + WORKLIST_INSERT(&freework->fw_jwork, &jsegdep->jd_list); + break; + case D_INDIRDEP: + /* +* An indirect block was removed by truncate. +*/ + indirdep = WK_INDIRDEP(jnewblk->jn_dep); + LIST_REMOVE(jnewblk, jn_indirdeps); + WORKLIST_INSERT(&indirdep->ir_jwork, &jsegdep->jd_list); +
svn commit: r220511 - head/sys/ufs/ffs
Author: jeff Date: Sun Apr 10 03:49:53 2011 New Revision: 220511 URL: http://svn.freebsd.org/changeset/base/220511 Log: Fix a long standing SUJ performance problem: - Keep a hash of indirect blocks that have recently been freed and are still referenced in the journal. - Lookup blocks in this hash before forcing a new block write to wait on the journal entry to hit the disk. This is only necessary to avoid confusion between old identities as indirects and new identities as file blocks. - Don't free jseg structures until the journal has written a record that invalidates it. This keeps the indirect block information around for as long as is required to be safe. - Force an empty journal block write when required to flush out stale journal data that is simply waiting for the oldest valid sequence number to advance beyond it. Modified: head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/softdep.h Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Sun Apr 10 01:54:42 2011 (r220510) +++ head/sys/ufs/ffs/ffs_softdep.c Sun Apr 10 03:49:53 2011 (r220511) @@ -753,8 +753,7 @@ static void handle_written_jnewblk(struc static void handle_written_jfreeblk(struct jfreeblk *); static void handle_written_jfreefrag(struct jfreefrag *); static void complete_jseg(struct jseg *); -static void jseg_write(struct ufsmount *ump, struct jblocks *, struct jseg *, - uint8_t *); +static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *); static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *); static void jremref_write(struct jremref *, struct jseg *, uint8_t *); static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *); @@ -769,6 +768,7 @@ static void handle_allocdirect_partdone( static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *, struct workhead *); static void indirdep_complete(struct indirdep *); +static int indirblk_inseg(struct mount *, ufs2_daddr_t); static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); static void initiate_write_indirdep(struct indirdep*, struct buf *); @@ -802,7 +802,9 @@ static void free_newdirblk(struct newdir static void free_jremref(struct jremref *); static void free_jaddref(struct jaddref *); static void free_jsegdep(struct jsegdep *); -static void free_jseg(struct jseg *); +static void free_jsegs(struct jblocks *); +static void rele_jseg(struct jseg *); +static void free_jseg(struct jseg *, struct jblocks *); static void free_jnewblk(struct jnewblk *); static void free_jfreeblk(struct jfreeblk *); static void free_jfreefrag(struct jfreefrag *); @@ -872,7 +874,7 @@ static int journal_unsuspend(struct ufsm static void softdep_prelink(struct vnode *, struct vnode *); static void add_to_journal(struct worklist *); static void remove_from_journal(struct worklist *); -static void softdep_process_journal(struct mount *, int); +static void softdep_process_journal(struct mount *, struct worklist *, int); static struct jremref *newjremref(struct dirrem *, struct inode *, struct inode *ip, off_t, nlink_t); static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t, @@ -1376,7 +1378,7 @@ softdep_process_worklist(mp, full) ump = VFSTOUFS(mp); ACQUIRE_LOCK(&lk); starttime = time_second; - softdep_process_journal(mp, full?MNT_WAIT:0); + softdep_process_journal(mp, NULL, full?MNT_WAIT:0); while (ump->softdep_on_worklist > 0) { if ((cnt = process_worklist_item(mp, LK_NOWAIT)) == -1) break; @@ -1999,6 +2001,37 @@ newblk_lookup(mp, newblkno, flags, newbl } /* + * Structures and routines associated with indir caching. + */ +struct workhead *indir_hashtbl; +u_long indir_hash; /* size of hash table - 1 */ +#defineINDIR_HASH(mp, blkno) \ + (&indir_hashtbl[register_t)(mp)) >> 13) + (blkno)) & indir_hash]) + +static int +indirblk_inseg(mp, blkno) + struct mount *mp; + ufs2_daddr_t blkno; +{ + struct freework *freework; + struct workhead *wkhd; + struct worklist *wk; + + wkhd = INDIR_HASH(mp, blkno); + LIST_FOREACH(wk, wkhd, wk_list) { + freework = WK_FREEWORK(wk); + if (freework->fw_blkno == blkno && + freework->fw_list.wk_mp == mp) { + LIST_REMOVE(freework, fw_next); + WORKLIST_REMOVE(&freework->fw_list); + WORKITEM_FREE(freework, D_FREEWORK); + return (1); + } + } + return (0); +} + +/* * Executed during filesystem system initialization before * mounting any filesystems. */ @@ -2012,6 +2045,7 @@ softd
Re: svn commit: r220511 - head/sys/ufs/ffs
On Sun, 10 Apr 2011, Jeff Roberson wrote: Author: jeff Date: Sun Apr 10 03:49:53 2011 New Revision: 220511 URL: http://svn.freebsd.org/changeset/base/220511 Log: Fix a long standing SUJ performance problem: This brought my dbench performance to within 10-15% of softupdates on a real disk depending on concurrency. There are cases where it outperforms softupdates as well. Over time I can eliminate the extra blocking IO waits that cause the remaining degradation on this test. For now I'm going to focus on the mksnap bug that has been reported in several forms. Thanks, Jeff - Keep a hash of indirect blocks that have recently been freed and are still referenced in the journal. - Lookup blocks in this hash before forcing a new block write to wait on the journal entry to hit the disk. This is only necessary to avoid confusion between old identities as indirects and new identities as file blocks. - Don't free jseg structures until the journal has written a record that invalidates it. This keeps the indirect block information around for as long as is required to be safe. - Force an empty journal block write when required to flush out stale journal data that is simply waiting for the oldest valid sequence number to advance beyond it. Modified: head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/softdep.h Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Sun Apr 10 01:54:42 2011 (r220510) +++ head/sys/ufs/ffs/ffs_softdep.c Sun Apr 10 03:49:53 2011 (r220511) @@ -753,8 +753,7 @@ static void handle_written_jnewblk(struc static void handle_written_jfreeblk(struct jfreeblk *); static void handle_written_jfreefrag(struct jfreefrag *); static void complete_jseg(struct jseg *); -static void jseg_write(struct ufsmount *ump, struct jblocks *, struct jseg *, - uint8_t *); +static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *); static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *); static void jremref_write(struct jremref *, struct jseg *, uint8_t *); static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *); @@ -769,6 +768,7 @@ static void handle_allocdirect_partdone( static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *, struct workhead *); static void indirdep_complete(struct indirdep *); +static int indirblk_inseg(struct mount *, ufs2_daddr_t); static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); static void initiate_write_indirdep(struct indirdep*, struct buf *); @@ -802,7 +802,9 @@ static void free_newdirblk(struct newdir static void free_jremref(struct jremref *); static void free_jaddref(struct jaddref *); static void free_jsegdep(struct jsegdep *); -static void free_jseg(struct jseg *); +static void free_jsegs(struct jblocks *); +static void rele_jseg(struct jseg *); +static void free_jseg(struct jseg *, struct jblocks *); static void free_jnewblk(struct jnewblk *); static void free_jfreeblk(struct jfreeblk *); static void free_jfreefrag(struct jfreefrag *); @@ -872,7 +874,7 @@ static int journal_unsuspend(struct ufsm static void softdep_prelink(struct vnode *, struct vnode *); static void add_to_journal(struct worklist *); static void remove_from_journal(struct worklist *); -static void softdep_process_journal(struct mount *, int); +static void softdep_process_journal(struct mount *, struct worklist *, int); static struct jremref *newjremref(struct dirrem *, struct inode *, struct inode *ip, off_t, nlink_t); static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t, @@ -1376,7 +1378,7 @@ softdep_process_worklist(mp, full) ump = VFSTOUFS(mp); ACQUIRE_LOCK(&lk); starttime = time_second; - softdep_process_journal(mp, full?MNT_WAIT:0); + softdep_process_journal(mp, NULL, full?MNT_WAIT:0); while (ump->softdep_on_worklist > 0) { if ((cnt = process_worklist_item(mp, LK_NOWAIT)) == -1) break; @@ -1999,6 +2001,37 @@ newblk_lookup(mp, newblkno, flags, newbl } /* + * Structures and routines associated with indir caching. + */ +struct workhead *indir_hashtbl; +u_long indir_hash; /* size of hash table - 1 */ +#defineINDIR_HASH(mp, blkno) \ + (&indir_hashtbl[register_t)(mp)) >> 13) + (blkno)) & indir_hash]) + +static int +indirblk_inseg(mp, blkno) + struct mount *mp; + ufs2_daddr_t blkno; +{ + struct freework *freework; + struct workhead *wkhd; + struct worklist *wk; + + wkhd = INDIR_HASH(mp, blkno); + LIST_FOREACH(wk, wkhd, wk_list) { + freework = WK_FREEWORK(wk); + if (freework->fw_blkno ==
svn commit: r220532 - head/sys/ufs/ffs
Author: jeff Date: Mon Apr 11 01:43:59 2011 New Revision: 220532 URL: http://svn.freebsd.org/changeset/base/220532 Log: - Refactor softdep_setup_freeblocks() into a set of functions to prepare for a new journal specific partial truncate routine. - Use dep_current[] in place of specific dependency counts. This is automatically maintained when workitems are allocated and has less risk of becoming incorrect. Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Mon Apr 11 01:19:02 2011 (r220531) +++ head/sys/ufs/ffs/ffs_softdep.c Mon Apr 11 01:43:59 2011 (r220532) @@ -815,9 +815,19 @@ static void cancel_jnewblk(struct jnewbl static int cancel_jaddref(struct jaddref *, struct inodedep *, struct workhead *); static void cancel_jfreefrag(struct jfreefrag *); +static inline void setup_freedirect(struct freeblks *, struct inode *, + int, int); +static inline void setup_freeext(struct freeblks *, struct inode *, int, int); +static inline void setup_freeindir(struct freeblks *, struct inode *, int i, + ufs_lbn_t, int); +static inline struct freeblks *newfreeblks(struct mount *, struct inode *); static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t); -static int deallocate_dependencies(struct buf *, struct inodedep *, +static void softdep_trunc_deps(struct vnode *, struct freeblks *, ufs_lbn_t, + int, int); +static int cancel_pagedep(struct pagedep *, struct inodedep *, struct freeblks *); +static int deallocate_dependencies(struct buf *, struct inodedep *, + struct freeblks *, int off); static void free_newblk(struct newblk *); static void cancel_allocdirect(struct allocdirectlst *, struct allocdirect *, struct freeblks *, int); @@ -1114,7 +1124,6 @@ static struct callout softdep_callout; static int req_pending; static int req_clear_inodedeps;/* syncer process flush some inodedeps */ static int req_clear_remove; /* syncer process flush some freeblks */ -static long num_freeblkdep;/* number of freeblks workitems allocated */ /* * runtime statistics @@ -1832,7 +1841,6 @@ pagedep_lookup(mp, ino, lbn, flags, page */ LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl; static u_long inodedep_hash; /* size of hash table - 1 */ -static longnum_inodedep; /* number of inodedep allocated */ #defineINODEDEP_HASH(fs, inum) \ (&inodedep_hashtbl[register_t)(fs)) >> 13) + (inum)) & inodedep_hash]) @@ -1884,7 +1892,7 @@ inodedep_lookup(mp, inum, flags, inodede /* * If we are over our limit, try to improve the situation. */ - if (num_inodedep > max_softdeps && (flags & NODELAY) == 0) + if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0) request_cleanup(mp, FLUSH_INODES); FREE_LOCK(&lk); inodedep = malloc(sizeof(struct inodedep), @@ -1895,7 +1903,6 @@ inodedep_lookup(mp, inum, flags, inodede WORKITEM_FREE(inodedep, D_INODEDEP); return (1); } - num_inodedep += 1; inodedep->id_fs = fs; inodedep->id_ino = inum; inodedep->id_state = ALLCOMPLETE; @@ -2472,7 +2479,7 @@ journal_space(ump, thresh) * We use a tighter restriction here to prevent request_cleanup() * running in threads from running into locks we currently hold. */ - if (num_inodedep > (max_softdeps / 10) * 9) + if (dep_current[D_INODEDEP] > (max_softdeps / 10) * 9) return (0); if (thresh) thresh = jblocks->jb_min; @@ -5340,6 +5347,83 @@ allocindir_merge(aip, oldaip) return (freefrag); } +static inline void +setup_freedirect(freeblks, ip, i, needj) + struct freeblks *freeblks; + struct inode *ip; + int i; + int needj; +{ + ufs2_daddr_t blkno; + int frags; + + blkno = DIP(ip, i_db[i]); + if (blkno == 0) + return; + DIP_SET(ip, i_db[i], 0); + frags = sblksize(ip->i_fs, ip->i_size, i); + frags = numfrags(ip->i_fs, frags); + newfreework(ip->i_ump, freeblks, NULL, i, blkno, frags, needj); +} + +static inline void +setup_freeext(freeblks, ip, i, needj) + struct freeblks *freeblks; + struct inode *ip; + int i; + int needj; +{ + ufs2_daddr_t blkno; + int frags; + + blkno = ip->i_din2->di_extb[i]; + if (blkno == 0) + return; + ip->i_din2->di_extb[i] = 0; + frags = sblksize(ip->i_fs, ip->i_din2->di_extsize, i); + frags = numfrags(ip->i_fs, frags); + newfreework(ip->i_ump, freeblks, NULL, -1 - i, blkno, frags, needj); +} + +static inline void +setup_freeindir(freeblks, ip, i, lbn, needj) + struct freeblks *freeblks;
svn commit: r221055 - in head/sys: kern ofed/include/linux
Author: jeff Date: Tue Apr 26 07:30:52 2011 New Revision: 221055 URL: http://svn.freebsd.org/changeset/base/221055 Log: - Catch up to falloc() changes. - PHOLD() before using a task structure on the stack. - Fix a LOR between the sleepq lock and thread lock in _intr_drain(). Modified: head/sys/kern/kern_intr.c head/sys/ofed/include/linux/file.h head/sys/ofed/include/linux/workqueue.h Modified: head/sys/kern/kern_intr.c == --- head/sys/kern/kern_intr.c Tue Apr 26 04:52:35 2011(r221054) +++ head/sys/kern/kern_intr.c Tue Apr 26 07:30:52 2011(r221055) @@ -746,7 +746,6 @@ intr_handler_source(void *cookie) void _intr_drain(int irq) { - struct mtx *mtx; struct intr_event *ie; struct intr_thread *ithd; struct thread *td; @@ -758,13 +757,21 @@ _intr_drain(int irq) return; ithd = ie->ie_thread; td = ithd->it_thread; + /* +* We set the flag and wait for it to be cleared to avoid +* long delays with potentially busy interrupt handlers +* were we to only sample TD_AWAITING_INTR() every tick. +*/ thread_lock(td); - mtx = td->td_lock; if (!TD_AWAITING_INTR(td)) { ithd->it_flags |= IT_WAIT; - msleep_spin(ithd, mtx, "isync", 0); + while (ithd->it_flags & IT_WAIT) { + thread_unlock(td); + pause("idrain", 1); + thread_lock(td); + } } - mtx_unlock_spin(mtx); + thread_unlock(td); return; } Modified: head/sys/ofed/include/linux/file.h == --- head/sys/ofed/include/linux/file.h Tue Apr 26 04:52:35 2011 (r221054) +++ head/sys/ofed/include/linux/file.h Tue Apr 26 07:30:52 2011 (r221055) @@ -92,7 +92,7 @@ get_unused_fd(void) int error; int fd; - error = falloc(curthread, &file, &fd); + error = falloc(curthread, &file, &fd, 0); if (error) return -error; return fd; Modified: head/sys/ofed/include/linux/workqueue.h == --- head/sys/ofed/include/linux/workqueue.h Tue Apr 26 04:52:35 2011 (r221054) +++ head/sys/ofed/include/linux/workqueue.h Tue Apr 26 07:30:52 2011 (r221055) @@ -160,9 +160,11 @@ flush_taskqueue(struct taskqueue *tq) { struct task flushtask; + PHOLD(curproc); TASK_INIT(&flushtask, 0, _flush_fn, NULL); taskqueue_enqueue(tq, &flushtask); taskqueue_drain(tq, &flushtask); + PRELE(curproc); } static inline int ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r190570 - stable/7/sys/kern
Author: jeff Date: Mon Mar 30 19:20:56 2009 New Revision: 190570 URL: http://svn.freebsd.org/changeset/base/190570 Log: MFC SVN rev 189787. - Fix steal_thresh calculation with odd numbers of cpus and sched_affinity() for threads on runqueues. Approved by: re Modified: stable/7/sys/kern/sched_ule.c Modified: stable/7/sys/kern/sched_ule.c == --- stable/7/sys/kern/sched_ule.c Mon Mar 30 18:47:13 2009 (r190569) +++ stable/7/sys/kern/sched_ule.c Mon Mar 30 19:20:56 2009 (r190570) @@ -1395,11 +1395,11 @@ sched_initticks(void *dummy) */ balance_interval = realstathz; /* -* Set steal thresh to log2(mp_ncpu) but no greater than 4. This -* prevents excess thrashing on large machines and excess idle on -* smaller machines. +* Set steal thresh to roughly log2(mp_ncpu) but no greater than 4. +* This prevents excess thrashing on large machines and excess idle +* on smaller machines. */ - steal_thresh = min(ffs(mp_ncpus) - 1, 4); + steal_thresh = min(fls(mp_ncpus) - 1, 3); affinity = SCHED_AFFINITY_DEFAULT; #endif } @@ -2549,6 +2549,11 @@ sched_affinity(struct thread *td) ts = td->td_sched; if (THREAD_CAN_SCHED(td, ts->ts_cpu)) return; + if (TD_ON_RUNQ(td)) { + sched_rem(td); + sched_add(td, SRQ_BORING); + return; + } if (!TD_IS_RUNNING(td)) return; td->td_flags |= TDF_NEEDRESCHED; ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r191643 - in head/sys: kern sys
Author: jeff Date: Wed Apr 29 03:15:43 2009 New Revision: 191643 URL: http://svn.freebsd.org/changeset/base/191643 Log: - Remove the bogus idle thread state code. This may have a race in it and it only optimized out an ipi or mwait in very few cases. - Skip the adaptive idle code when running on SMT or HTT cores. This just wastes cpu time that could be used on a busy thread on the same core. - Rename CG_FLAG_THREAD to CG_FLAG_SMT to be more descriptive. Re-use CG_FLAG_THREAD to mean SMT or HTT. Sponsored by: Nokia Modified: head/sys/kern/sched_ule.c head/sys/kern/subr_smp.c head/sys/sys/smp.h Modified: head/sys/kern/sched_ule.c == --- head/sys/kern/sched_ule.c Tue Apr 28 23:36:29 2009(r191642) +++ head/sys/kern/sched_ule.c Wed Apr 29 03:15:43 2009(r191643) @@ -36,7 +36,7 @@ */ #include -__FBSDID("$FreeBSD$"); +__FBSDID("$FreeBSD$); #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" @@ -213,7 +213,6 @@ struct tdq { volatile inttdq_load; /* Aggregate load. */ int tdq_sysload;/* For loadavg, !ITHD load. */ int tdq_transferable; /* Transferable thread count. */ - volatile inttdq_idlestate; /* State of the idle thread. */ short tdq_switchcnt; /* Switches this tick. */ short tdq_oldswitchcnt; /* Switches last tick. */ u_char tdq_lowpri; /* Lowest priority thread. */ @@ -360,7 +359,6 @@ tdq_print(int cpu) printf("\tload: %d\n", tdq->tdq_load); printf("\tswitch cnt: %d\n", tdq->tdq_switchcnt); printf("\told switch cnt: %d\n", tdq->tdq_oldswitchcnt); - printf("\tidle state: %d\n", tdq->tdq_idlestate); printf("\ttimeshare idx: %d\n", tdq->tdq_idx); printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx); printf("\tload transferable: %d\n", tdq->tdq_transferable); @@ -913,7 +911,7 @@ tdq_idled(struct tdq *tdq) /* We don't want to be preempted while we're iterating. */ spinlock_enter(); for (cg = tdq->tdq_cg; cg != NULL; ) { - if ((cg->cg_flags & (CG_FLAG_HTT | CG_FLAG_THREAD)) == 0) + if ((cg->cg_flags & CG_FLAG_THREAD) == 0) thresh = steal_thresh; else thresh = 1; @@ -969,13 +967,6 @@ tdq_notify(struct tdq *tdq, struct threa return; if (TD_IS_IDLETHREAD(ctd)) { /* -* If the idle thread is still 'running' it's probably -* waiting on us to release the tdq spinlock already. No -* need to ipi. -*/ - if (tdq->tdq_idlestate == TDQ_RUNNING) - return; - /* * If the MD code has an idle wakeup routine try that before * falling back to IPI. */ @@ -2536,12 +2527,10 @@ sched_idletd(void *dummy) int switchcnt; int i; + mtx_assert(&Giant, MA_NOTOWNED); td = curthread; tdq = TDQ_SELF(); - mtx_assert(&Giant, MA_NOTOWNED); - /* ULE relies on preemption for idle interruption. */ for (;;) { - tdq->tdq_idlestate = TDQ_RUNNING; #ifdef SMP if (tdq_idled(tdq) == 0) continue; @@ -2550,26 +2539,21 @@ sched_idletd(void *dummy) /* * If we're switching very frequently, spin while checking * for load rather than entering a low power state that -* requires an IPI. +* may require an IPI. However, don't do any busy +* loops while on SMT machines as this simply steals +* cycles from cores doing useful work. */ - if (switchcnt > sched_idlespinthresh) { + if ((tdq->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0 && + switchcnt > sched_idlespinthresh) { for (i = 0; i < sched_idlespins; i++) { if (tdq->tdq_load) break; cpu_spinwait(); } } - /* -* We must set our state to IDLE before checking -* tdq_load for the last time to avoid a race with -* tdq_notify(). -*/ - if (tdq->tdq_load == 0) { - switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; - tdq->tdq_idlestate = TDQ_IDLE; - if (tdq->tdq_load == 0) - cpu_idle(switchcnt > 1); - } + switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldsw
svn commit: r191645 - head/sys/kern
Author: jeff Date: Wed Apr 29 03:26:30 2009 New Revision: 191645 URL: http://svn.freebsd.org/changeset/base/191645 Log: - Fix the FBSDID line. Modified: head/sys/kern/sched_ule.c Modified: head/sys/kern/sched_ule.c == --- head/sys/kern/sched_ule.c Wed Apr 29 03:21:53 2009(r191644) +++ head/sys/kern/sched_ule.c Wed Apr 29 03:26:30 2009(r191645) @@ -36,7 +36,7 @@ */ #include -__FBSDID("$FreeBSD$); +__FBSDID("$FreeBSD$"); #include "opt_hwpmc_hooks.h" #include "opt_kdtrace.h" ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r191648 - in head/sys: amd64/amd64 amd64/include i386/i386 i386/include
Author: jeff Date: Wed Apr 29 06:54:40 2009 New Revision: 191648 URL: http://svn.freebsd.org/changeset/base/191648 Log: - Add support for cpuid leaf 0xb. This allows us to determine the topology of nehalem/corei7 based systems. - Remove the cpu_cores/cpu_logical detection from identcpu. - Describe the layout of the system in cpu_mp_announce(). Sponsored by: Nokia Modified: head/sys/amd64/amd64/identcpu.c head/sys/amd64/amd64/mp_machdep.c head/sys/amd64/include/smp.h head/sys/amd64/include/specialreg.h head/sys/i386/i386/identcpu.c head/sys/i386/i386/mp_machdep.c head/sys/i386/include/smp.h head/sys/i386/include/specialreg.h Modified: head/sys/amd64/amd64/identcpu.c == --- head/sys/amd64/amd64/identcpu.c Wed Apr 29 06:52:04 2009 (r191647) +++ head/sys/amd64/amd64/identcpu.c Wed Apr 29 06:54:40 2009 (r191648) @@ -106,9 +106,6 @@ static struct { { CENTAUR_VENDOR_ID,CPU_VENDOR_CENTAUR }, /* CentaurHauls */ }; -int cpu_cores; -int cpu_logical; - extern int pq_l2size; extern int pq_l2nways; @@ -195,7 +192,6 @@ printcpuinfo(void) cpu_vendor_id == CPU_VENDOR_CENTAUR) { printf(" Stepping = %u", cpu_id & 0xf); if (cpu_high > 0) { - u_int cmp = 1, htt = 1; /* * Here we should probably set up flags indicating @@ -400,28 +396,6 @@ printcpuinfo(void) if (tsc_is_invariant) printf("\n TSC: P-state invariant"); - /* -* If this CPU supports HTT or CMP then mention the -* number of physical/logical cores it contains. -*/ - if (cpu_feature & CPUID_HTT) - htt = (cpu_procinfo & CPUID_HTT_CORES) >> 16; - if (cpu_vendor_id == CPU_VENDOR_AMD && - (amd_feature2 & AMDID2_CMP)) - cmp = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; - else if (cpu_vendor_id == CPU_VENDOR_INTEL && - (cpu_high >= 4)) { - cpuid_count(4, 0, regs); - if ((regs[0] & 0x1f) != 0) - cmp = ((regs[0] >> 26) & 0x3f) + 1; - } - cpu_cores = cmp; - cpu_logical = htt / cmp; - if (cmp > 1) - printf("\n Cores per package: %d", cmp); - if ((htt / cmp) > 1) - printf("\n Logical CPUs per core: %d", - cpu_logical); } } /* Avoid ugly blank lines: only print newline when we have to. */ Modified: head/sys/amd64/amd64/mp_machdep.c == --- head/sys/amd64/amd64/mp_machdep.c Wed Apr 29 06:52:04 2009 (r191647) +++ head/sys/amd64/amd64/mp_machdep.c Wed Apr 29 06:54:40 2009 (r191648) @@ -160,6 +160,8 @@ int apic_cpuids[MAX_APIC_ID + 1]; static volatile u_int cpu_ipi_pending[MAXCPU]; static u_int boot_address; +static int cpu_logical; +static int cpu_cores; static voidassign_cpu_ids(void); static voidset_interrupt_apic_ids(void); @@ -181,13 +183,142 @@ mem_range_AP_init(void) mem_range_softc.mr_op->initAP(&mem_range_softc); } -struct cpu_group * -cpu_topo(void) +static void +topo_probe_0xb(void) +{ + int logical; + int p[4]; + int bits; + int type; + int cnt; + int i; + int x; + + /* We only support two levels for now. */ + for (i = 0; i < 3; i++) { + cpuid_count(0x0B, i, p); + bits = p[0] & 0x1f; + logical = p[1] &= 0x; + type = (p[2] >> 8) & 0xff; + if (type == 0 || logical == 0) + break; + for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { + if (!cpu_info[x].cpu_present || + cpu_info[x].cpu_disabled) + continue; + if (x >> bits == boot_cpu_id >> bits) + cnt++; + } + if (type == CPUID_TYPE_SMT) + cpu_logical = cnt; + else if (type == CPUID_TYPE_CORE) + cpu_cores = cnt; + } + if (cpu_logical == 0) + cpu_logical = 1; + cpu_cores /= cpu_logical; +} + +static void +topo_probe_0x4(void) +{ + u_int threads_per_cache, p[4]; + u_int htt, cmp; + int i; + + htt = cmp = 1; + /* +* If this CPU supports HTT o
svn commit: r191676 - head/sys/kern
Author: jeff Date: Wed Apr 29 23:04:31 2009 New Revision: 191676 URL: http://svn.freebsd.org/changeset/base/191676 Log: - Fix non-SMP build by encapsulating idle spin logic in a macro. Pointy hat to:me Modified: head/sys/kern/sched_ule.c Modified: head/sys/kern/sched_ule.c == --- head/sys/kern/sched_ule.c Wed Apr 29 21:50:13 2009(r191675) +++ head/sys/kern/sched_ule.c Wed Apr 29 23:04:31 2009(r191676) @@ -2516,6 +2516,13 @@ sched_sizeof_thread(void) return (sizeof(struct thread) + sizeof(struct td_sched)); } +#ifdef SMP +#defineTDQ_IDLESPIN(tdq) \ +((tdq)->tdq_cg != NULL && ((tdq)->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0) +#else +#defineTDQ_IDLESPIN(tdq) 1 +#endif + /* * The actual idle process. */ @@ -2543,8 +2550,7 @@ sched_idletd(void *dummy) * loops while on SMT machines as this simply steals * cycles from cores doing useful work. */ - if ((tdq->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0 && - switchcnt > sched_idlespinthresh) { + if (TDQ_IDLESPIN(tdq) && switchcnt > sched_idlespinthresh) { for (i = 0; i < sched_idlespins; i++) { if (tdq->tdq_load) break; ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
Re: svn commit: r191643 - in head/sys: kern sys
On Wed, 29 Apr 2009, Kostik Belousov wrote: On Wed, Apr 29, 2009 at 03:15:44AM +, Jeff Roberson wrote: Author: jeff Date: Wed Apr 29 03:15:43 2009 New Revision: 191643 URL: http://svn.freebsd.org/changeset/base/191643 Log: - Remove the bogus idle thread state code. This may have a race in it and it only optimized out an ipi or mwait in very few cases. - Skip the adaptive idle code when running on SMT or HTT cores. This just wastes cpu time that could be used on a busy thread on the same core. - Rename CG_FLAG_THREAD to CG_FLAG_SMT to be more descriptive. Re-use CG_FLAG_THREAD to mean SMT or HTT. Sponsored by: Nokia Modified: head/sys/kern/sched_ule.c head/sys/kern/subr_smp.c head/sys/sys/smp.h Now I see a reason why it is better #ifdef SMP the code that uses CG_FLAG_*. Also, we should check for tdq_cg != NULL in one more place. See the patch below, instead of exposing CG_FLAG_* for !SMP configs. Thank you kan. I did something slightly different so we can retain the adaptive idling on UP. Thanks, Jeff diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 680572d..fe3a119 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -891,6 +891,7 @@ tdq_move(struct tdq *from, struct tdq *to) return (1); } +#ifdef SMP /* * This tdq has idled. Try to steal a thread from another cpu and switch * to it. @@ -947,6 +948,7 @@ tdq_idled(struct tdq *tdq) spinlock_exit(); return (1); } +#endif /* * Notify a remote cpu of new work. Sends an IPI if criteria are met. @@ -2525,7 +2527,9 @@ sched_idletd(void *dummy) struct thread *td; struct tdq *tdq; int switchcnt; +#ifdef SMP int i; +#endif mtx_assert(&Giant, MA_NOTOWNED); td = curthread; @@ -2543,7 +2547,9 @@ sched_idletd(void *dummy) * loops while on SMT machines as this simply steals * cycles from cores doing useful work. */ - if ((tdq->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0 && +#ifdef SMP + if (tdq->tdq_cg != NULL && + (tdq->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0 && switchcnt > sched_idlespinthresh) { for (i = 0; i < sched_idlespins; i++) { if (tdq->tdq_load) @@ -2551,6 +2557,7 @@ sched_idletd(void *dummy) cpu_spinwait(); } } +#endif switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; if (tdq->tdq_load == 0) cpu_idle(switchcnt > 1); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r251826 - head/sys/vm
Author: jeff Date: Mon Jun 17 03:43:47 2013 New Revision: 251826 URL: http://svnweb.freebsd.org/changeset/base/251826 Log: - Add a new UMA API: uma_zcache_create(). This makes a zone without any backing memory that is only a container for per-cpu caches of arbitrary pointer items. These zones have no kegs. - Convert the regular keg based allocator to use the new import/release functions. - Move some stats to be atomics since they would require excessive zone locking/unlocking with the new import/release paradigm. Make zone_free_item simpler now that callers can manage more stats. - Check for these cache-only zones in the public APIs and debugging code by checking zone_first_keg() against NULL. Sponsored by: EMC / Isilong Storage Division Modified: head/sys/vm/uma.h head/sys/vm/uma_core.c head/sys/vm/uma_int.h Modified: head/sys/vm/uma.h == --- head/sys/vm/uma.h Mon Jun 17 03:32:27 2013(r251825) +++ head/sys/vm/uma.h Mon Jun 17 03:43:47 2013(r251826) @@ -124,6 +124,16 @@ typedef int (*uma_init)(void *mem, int s typedef void (*uma_fini)(void *mem, int size); /* + * Import new memory into a cache zone. + */ +typedef int (*uma_import)(void *arg, void **store, int count, int flags); + +/* + * Free memory from a cache zone. + */ +typedef void (*uma_release)(void *arg, void **store, int count); + +/* * What's the difference between initializing and constructing? * * The item is initialized when it is cached, and this is the state that the @@ -216,6 +226,19 @@ uma_zone_t uma_zsecond_create(char *name int uma_zsecond_add(uma_zone_t zone, uma_zone_t master); /* + * Create cache-only zones. + * + * This allows uma's per-cpu cache facilities to handle arbitrary + * pointers. Consumers must specify the import and release functions to + * fill and destroy caches. UMA does not allocate any memory for these + * zones. The 'arg' parameter is passed to import/release and is caller + * specific. + */ +uma_zone_t uma_zcache_create(char *name, uma_ctor ctor, uma_dtor dtor, + uma_init zinit, uma_fini zfini, uma_import zimport, + uma_release zrelease, void *arg, int flags); + +/* * Definitions for uma_zcreate flags * * These flags share space with UMA_ZFLAGs in uma_int.h. Be careful not to Modified: head/sys/vm/uma_core.c == --- head/sys/vm/uma_core.c Mon Jun 17 03:32:27 2013(r251825) +++ head/sys/vm/uma_core.c Mon Jun 17 03:43:47 2013(r251826) @@ -131,14 +131,14 @@ static int bucketdisable = 1; static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs); /* This mutex protects the keg list */ -static struct mtx uma_mtx; +static struct mtx_padalign uma_mtx; /* Linked list of boot time pages */ static LIST_HEAD(,uma_slab) uma_boot_pages = LIST_HEAD_INITIALIZER(uma_boot_pages); /* This mutex protects the boot time pages list */ -static struct mtx uma_boot_pages_mtx; +static struct mtx_padalign uma_boot_pages_mtx; /* Is the VM done starting up? */ static int booted = 0; @@ -172,6 +172,9 @@ struct uma_zctor_args { uma_dtor dtor; uma_init uminit; uma_fini fini; + uma_import import; + uma_release release; + void *arg; uma_keg_t keg; int align; uint32_t flags; @@ -216,9 +219,6 @@ static uint8_t bucket_size[BUCKET_ZONES] */ enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI }; -#defineZFREE_STATFAIL 0x0001 /* Update zone failure statistic. */ -#defineZFREE_STATFREE 0x0002 /* Update zone free statistic. */ - /* Prototypes.. */ static void *noobj_alloc(uma_zone_t, int, uint8_t *, int); @@ -244,8 +244,7 @@ static void hash_free(struct uma_hash *h static void uma_timeout(void *); static void uma_startup3(void); static void *zone_alloc_item(uma_zone_t, void *, int); -static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip, -int); +static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip); static void bucket_enable(void); static void bucket_init(void); static uma_bucket_t bucket_alloc(int, int); @@ -254,11 +253,14 @@ static void bucket_zone_drain(void); static int zone_alloc_bucket(uma_zone_t zone, int flags); static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags); static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags); -static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab); +static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); +static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item); static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, int align, uint32_t flags); static inline void zone_relock(uma_zone_t zone, uma_keg_t
svn commit: r251894 - in head: lib/libmemstat sys/vm
Author: jeff Date: Tue Jun 18 04:50:20 2013 New Revision: 251894 URL: http://svnweb.freebsd.org/changeset/base/251894 Log: Refine UMA bucket allocation to reduce space consumption and improve performance. - Always free to the alloc bucket if there is space. This gives LIFO allocation order to improve hot-cache performance. This also allows for zones with a single bucket per-cpu rather than a pair if the entire working set fits in one bucket. - Enable per-cpu caches of buckets. To prevent recursive bucket allocation one bucket zone still has per-cpu caches disabled. - Pick the initial bucket size based on a table driven maximum size per-bucket rather than the number of items per-page. This gives more sane initial sizes. - Only grow the bucket size when we face contention on the zone lock, this causes bucket sizes to grow more slowly. - Adjust the number of items per-bucket to account for the header space. This packs the buckets more efficiently per-page while making them not quite powers of two. - Eliminate the per-zone free bucket list. Always return buckets back to the bucket zone. This ensures that as zones grow into larger bucket sizes they eventually discard the smaller sizes. It persists fewer buckets in the system. The locking is slightly trickier. - Only switch buckets in zalloc, not zfree, this eliminates pathological cases where we ping-pong between two buckets. - Ensure that the thread that fills a new bucket gets to allocate from it to give a better upper bound on allocation time. Sponsored by: EMC / Isilon Storage Division Modified: head/lib/libmemstat/memstat_uma.c head/sys/vm/uma_core.c head/sys/vm/uma_int.h Modified: head/lib/libmemstat/memstat_uma.c == --- head/lib/libmemstat/memstat_uma.c Tue Jun 18 04:11:16 2013 (r251893) +++ head/lib/libmemstat/memstat_uma.c Tue Jun 18 04:50:20 2013 (r251894) @@ -446,7 +446,7 @@ skip_percpu: kz.uk_ipers; mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size; mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees; - for (ubp = LIST_FIRST(&uz.uz_full_bucket); ubp != + for (ubp = LIST_FIRST(&uz.uz_buckets); ubp != NULL; ubp = LIST_NEXT(&ub, ub_link)) { ret = kread(kvm, ubp, &ub, sizeof(ub), 0); mtp->mt_zonefree += ub.ub_cnt; Modified: head/sys/vm/uma_core.c == --- head/sys/vm/uma_core.c Tue Jun 18 04:11:16 2013(r251893) +++ head/sys/vm/uma_core.c Tue Jun 18 04:50:20 2013(r251894) @@ -192,27 +192,26 @@ struct uma_kctor_args { struct uma_bucket_zone { uma_zone_t ubz_zone; char*ubz_name; - int ubz_entries; + int ubz_entries;/* Number of items it can hold. */ + int ubz_maxsize;/* Maximum allocation size per-item. */ }; -#defineBUCKET_MAX 128 +/* + * Compute the actual number of bucket entries to pack them in power + * of two sizes for more efficient space utilization. + */ +#defineBUCKET_SIZE(n) \ +(((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *)) + +#defineBUCKET_MAX BUCKET_SIZE(128) struct uma_bucket_zone bucket_zones[] = { - { NULL, "16 Bucket", 16 }, - { NULL, "32 Bucket", 32 }, - { NULL, "64 Bucket", 64 }, - { NULL, "128 Bucket", 128 }, + { NULL, "32 Bucket", BUCKET_SIZE(32), 512 }, + { NULL, "64 Bucket", BUCKET_SIZE(64), 256 }, + { NULL, "128 Bucket", BUCKET_SIZE(128), 128 }, { NULL, NULL, 0} }; - -#defineBUCKET_SHIFT4 -#defineBUCKET_ZONES((BUCKET_MAX >> BUCKET_SHIFT) + 1) - -/* - * bucket_size[] maps requested bucket sizes to zones that allocate a bucket - * of approximately the right size. - */ -static uint8_t bucket_size[BUCKET_ZONES]; +static uma_zone_t largebucket; /* * Flags and enumerations to be passed to internal functions. @@ -250,7 +249,7 @@ static void bucket_init(void); static uma_bucket_t bucket_alloc(int, int); static void bucket_free(uma_bucket_t); static void bucket_zone_drain(void); -static int zone_alloc_bucket(uma_zone_t zone, int flags); +static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, int flags); static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags); static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags); static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); @@ -283,7 +282,6 @@ SYSCTL_INT(_vm, OID_AUTO, zone_warnings, /* * This routine checks to see
Re: svn commit: r251894 - in head: lib/libmemstat sys/vm
On Tue, 18 Jun 2013, Alfred Perlstein wrote: On 6/18/13 4:37 AM, Gleb Smirnoff wrote: On Tue, Jun 18, 2013 at 10:25:08AM +0200, Andre Oppermann wrote: A> There used to be a problem with per CPU caches accumulating large amounts A> of items without freeing back to the global (or socket) pool. A> A> Do these updates to UMA change this situation and/or do you have further A> improvements coming up? This is especially a problem with ZFS, which utilizes UMA extensively. IMHO, we need a flag for uma_zcreate() that would disable per CPU caches, so that certain zones (ZFS at least) would have them off. It might be a good idea to force this flag on every zone that has allocation >= then the page size. What about people running with 256GB+ ram? Do they also want the per cpu caches off? If you look at the new system there is a static threshold for the initial item size required for different sized per-cpu buckets. What might make sense is to tune this size based on available memory. For what it's worth I looked at solaris settings and they cache roughly 4x as much on a per-cpu basis. The new system should tend to cache less of large and infrequent allocations vs the old system. I can't say yet whether it is still a problem. I have an implementation of vmem to replace using vm_maps for kmem_map, buffer_map, etc. which may resolve the zfs allocation problems. I hope to get this in over the next few weeks. Thanks, Jeff -- Alfred Perlstein VP Software Engineering, iXsystems ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r251983 - head/sys/vm
Author: jeff Date: Wed Jun 19 02:30:32 2013 New Revision: 251983 URL: http://svnweb.freebsd.org/changeset/base/251983 Log: - Persist the caller's flags in the bucket allocation flags so we don't lose a M_NOVM when we recurse into a bucket allocation. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/uma_core.c Modified: head/sys/vm/uma_core.c == --- head/sys/vm/uma_core.c Wed Jun 19 02:16:04 2013(r251982) +++ head/sys/vm/uma_core.c Wed Jun 19 02:30:32 2013(r251983) @@ -2418,7 +2418,7 @@ zone_alloc_bucket(uma_zone_t zone, int f int max; max = zone->uz_count; - bflags = M_NOWAIT; + bflags = (flags & ~M_WAITOK) | M_NOWAIT; if (zone->uz_flags & UMA_ZFLAG_CACHEONLY) bflags |= M_NOVM; bucket = bucket_alloc(zone->uz_count, bflags); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r252040 - head/sys/vm
Author: jeff Date: Thu Jun 20 19:08:12 2013 New Revision: 252040 URL: http://svnweb.freebsd.org/changeset/base/252040 Log: - Add a per-zone lock for zones without kegs. - Be more explicit about zone vs keg locking. This functionally changes almost nothing. - Add a size parameter to uma_zcache_create() so we can size the buckets. - Pass the zone to bucket_alloc() so it can modify allocation flags as appropriate. - Fix a bug in zone_alloc_bucket() where I missed an address of operator in a failure case. (Found by pho) Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/uma.h head/sys/vm/uma_core.c head/sys/vm/uma_dbg.c head/sys/vm/uma_int.h Modified: head/sys/vm/uma.h == --- head/sys/vm/uma.h Thu Jun 20 18:25:10 2013(r252039) +++ head/sys/vm/uma.h Thu Jun 20 19:08:12 2013(r252040) @@ -234,7 +234,7 @@ int uma_zsecond_add(uma_zone_t zone, uma * zones. The 'arg' parameter is passed to import/release and is caller * specific. */ -uma_zone_t uma_zcache_create(char *name, uma_ctor ctor, uma_dtor dtor, +uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor, uma_init zinit, uma_fini zfini, uma_import zimport, uma_release zrelease, void *arg, int flags); Modified: head/sys/vm/uma_core.c == --- head/sys/vm/uma_core.c Thu Jun 20 18:25:10 2013(r252039) +++ head/sys/vm/uma_core.c Thu Jun 20 19:08:12 2013(r252040) @@ -246,8 +246,8 @@ static void *zone_alloc_item(uma_zone_t, static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip); static void bucket_enable(void); static void bucket_init(void); -static uma_bucket_t bucket_alloc(int, int); -static void bucket_free(uma_bucket_t); +static uma_bucket_t bucket_alloc(uma_zone_t zone, int); +static void bucket_free(uma_zone_t zone, uma_bucket_t); static void bucket_zone_drain(void); static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, int flags); static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags); @@ -256,8 +256,6 @@ static void *slab_alloc_item(uma_keg_t k static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item); static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini, int align, uint32_t flags); -static inline void zone_relock(uma_zone_t zone, uma_keg_t keg); -static inline void keg_relock(uma_keg_t keg, uma_zone_t zone); static int zone_import(uma_zone_t zone, void **bucket, int max, int flags); static void zone_release(uma_zone_t zone, void **bucket, int cnt); @@ -352,7 +350,7 @@ bucket_select(int size) } static uma_bucket_t -bucket_alloc(int entries, int bflags) +bucket_alloc(uma_zone_t zone, int flags) { struct uma_bucket_zone *ubz; uma_bucket_t bucket; @@ -366,8 +364,10 @@ bucket_alloc(int entries, int bflags) if (bucketdisable) return (NULL); - ubz = bucket_zone_lookup(entries); - bucket = uma_zalloc(ubz->ubz_zone, bflags); + if (zone->uz_flags & UMA_ZFLAG_CACHEONLY) + flags |= M_NOVM; + ubz = bucket_zone_lookup(zone->uz_count); + bucket = uma_zalloc(ubz->ubz_zone, flags); if (bucket) { #ifdef INVARIANTS bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries); @@ -380,7 +380,7 @@ bucket_alloc(int entries, int bflags) } static void -bucket_free(uma_bucket_t bucket) +bucket_free(uma_zone_t zone, uma_bucket_t bucket) { struct uma_bucket_zone *ubz; @@ -662,9 +662,9 @@ cache_drain(uma_zone_t zone) bucket_drain(zone, cache->uc_allocbucket); bucket_drain(zone, cache->uc_freebucket); if (cache->uc_allocbucket != NULL) - bucket_free(cache->uc_allocbucket); + bucket_free(zone, cache->uc_allocbucket); if (cache->uc_freebucket != NULL) - bucket_free(cache->uc_freebucket); + bucket_free(zone, cache->uc_freebucket); cache->uc_allocbucket = cache->uc_freebucket = NULL; } ZONE_LOCK(zone); @@ -688,7 +688,7 @@ bucket_cache_drain(uma_zone_t zone) LIST_REMOVE(bucket, ub_link); ZONE_UNLOCK(zone); bucket_drain(zone, bucket); - bucket_free(bucket); + bucket_free(zone, bucket); ZONE_LOCK(zone); } } @@ -801,7 +801,7 @@ zone_drain_wait(uma_zone_t zone, int wai if (waitok == M_NOWAIT) goto out; mtx_unlock(&uma_mtx); - msleep(zone, zone->uz_lock, PVM, "zonedrain", 1); + msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1); mtx_lock(&uma_mtx); }
svn commit: r252226 - head/sys/vm
Author: jeff Date: Wed Jun 26 00:57:38 2013 New Revision: 252226 URL: http://svnweb.freebsd.org/changeset/base/252226 Log: - Resolve bucket recursion issues by passing a cookie with zone flags through bucket_alloc() to uma_zalloc_arg() and uma_zfree_arg(). - Make some smaller buckets for large zones to further reduce memory waste. - Implement uma_zone_reserve(). This holds aside a number of items only for callers who specify M_USE_RESERVE. buckets will never be filled from reserve allocations. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/uma.h head/sys/vm/uma_core.c head/sys/vm/uma_int.h Modified: head/sys/vm/uma.h == --- head/sys/vm/uma.h Wed Jun 26 00:42:45 2013(r252225) +++ head/sys/vm/uma.h Wed Jun 26 00:57:38 2013(r252226) @@ -459,6 +459,12 @@ void uma_reclaim(void); void uma_set_align(int align); /* + * Set a reserved number of items to hold for M_USE_RESERVE allocations. All + * other requests must allocate new backing pages. + */ +void uma_zone_reserve(uma_zone_t zone, int nitems); + +/* * Reserves the maximum KVA space required by the zone and configures the zone * to use a VM_ALLOC_NOOBJ-based backend allocator. * Modified: head/sys/vm/uma_core.c == --- head/sys/vm/uma_core.c Wed Jun 26 00:42:45 2013(r252225) +++ head/sys/vm/uma_core.c Wed Jun 26 00:57:38 2013(r252226) @@ -206,12 +206,14 @@ struct uma_bucket_zone { #defineBUCKET_MAX BUCKET_SIZE(128) struct uma_bucket_zone bucket_zones[] = { + { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 }, + { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 }, + { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 }, { NULL, "32 Bucket", BUCKET_SIZE(32), 512 }, { NULL, "64 Bucket", BUCKET_SIZE(64), 256 }, { NULL, "128 Bucket", BUCKET_SIZE(128), 128 }, { NULL, NULL, 0} }; -static uma_zone_t largebucket; /* * Flags and enumerations to be passed to internal functions. @@ -246,10 +248,10 @@ static void *zone_alloc_item(uma_zone_t, static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip); static void bucket_enable(void); static void bucket_init(void); -static uma_bucket_t bucket_alloc(uma_zone_t zone, int); -static void bucket_free(uma_zone_t zone, uma_bucket_t); +static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int); +static void bucket_free(uma_zone_t zone, uma_bucket_t, void *); static void bucket_zone_drain(void); -static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, int flags); +static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags); static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags); static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags); static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab); @@ -304,17 +306,8 @@ bucket_init(void) size += sizeof(void *) * ubz->ubz_entries; ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, - UMA_ZONE_MAXBUCKET | UMA_ZONE_MTXCLASS); + UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET); } - /* -* To avoid recursive bucket allocation loops we disable buckets -* on the smallest bucket zone and use it for the largest zone. -* The remainder of the zones all use the largest zone. -*/ - ubz--; - ubz->ubz_zone->uz_count = bucket_zones[0].ubz_entries; - bucket_zones[0].ubz_zone->uz_count = 0; - largebucket = ubz->ubz_zone; } /* @@ -350,7 +343,7 @@ bucket_select(int size) } static uma_bucket_t -bucket_alloc(uma_zone_t zone, int flags) +bucket_alloc(uma_zone_t zone, void *udata, int flags) { struct uma_bucket_zone *ubz; uma_bucket_t bucket; @@ -363,11 +356,26 @@ bucket_alloc(uma_zone_t zone, int flags) */ if (bucketdisable) return (NULL); - - if (zone->uz_flags & UMA_ZFLAG_CACHEONLY) + /* +* To limit bucket recursion we store the original zone flags +* in a cookie passed via zalloc_arg/zfree_arg. This allows the +* NOVM flag to persist even through deep recursions. We also +* store ZFLAG_BUCKET once we have recursed attempting to allocate +* a bucket for a bucket zone so we do not allow infinite bucket +* recursion. This cookie will even persist to frees of unused +* buckets via the allocation path or bucket allocations in the +* free path. +*/ + if ((uintptr_t)udata & UMA_ZFLAG_BUCKET) + return (NULL); + if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0) + udata = (void *)(uintptr_t)zone->uz_flags; + else + udata = (void *)((uintptr
svn commit: r252330 - in head/sys: conf geom kern sys vm
Author: jeff Date: Fri Jun 28 03:51:20 2013 New Revision: 252330 URL: http://svnweb.freebsd.org/changeset/base/252330 Log: - Add a general purpose resource allocator, vmem, from NetBSD. It was originally inspired by the Solaris vmem detailed in the proceedings of usenix 2001. The NetBSD version was heavily refactored for bugs and simplicity. - Use this resource allocator to allocate the buffer and transient maps. Buffer cache defrags are reduced by 25% when used by filesystems with mixed block sizes. Ultimately this may permit dynamic buffer cache sizing on low KVA machines. Discussed with: alc, kib, attilio Tested by:pho Sponsored by: EMC / Isilon Storage Division Added: head/sys/kern/subr_vmem.c (contents, props changed) head/sys/sys/vmem.h (contents, props changed) Modified: head/sys/conf/files head/sys/geom/geom_io.c head/sys/kern/vfs_bio.c head/sys/sys/malloc.h head/sys/vm/vm.h head/sys/vm/vm_init.c head/sys/vm/vm_kern.c head/sys/vm/vm_kern.h head/sys/vm/vm_object.c head/sys/vm/vm_pager.c head/sys/vm/vm_pager.h Modified: head/sys/conf/files == --- head/sys/conf/files Fri Jun 28 03:41:23 2013(r252329) +++ head/sys/conf/files Fri Jun 28 03:51:20 2013(r252330) @@ -2797,6 +2797,7 @@ kern/subr_trap.c standard kern/subr_turnstile.c standard kern/subr_uio.cstandard kern/subr_unit.c standard +kern/subr_vmem.c standard kern/subr_witness.coptional witness kern/sys_capability.c standard kern/sys_generic.c standard Modified: head/sys/geom/geom_io.c == --- head/sys/geom/geom_io.c Fri Jun 28 03:41:23 2013(r252329) +++ head/sys/geom/geom_io.c Fri Jun 28 03:51:20 2013(r252330) @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -626,7 +627,6 @@ g_io_transient_map_bio(struct bio *bp) vm_offset_t addr; long size; u_int retried; - int rv; KASSERT(unmapped_buf_allowed, ("unmapped disabled")); @@ -636,10 +636,7 @@ g_io_transient_map_bio(struct bio *bp) retried = 0; atomic_add_long(&transient_maps, 1); retry: - vm_map_lock(bio_transient_map); - if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map), - size, &addr)) { - vm_map_unlock(bio_transient_map); + if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) { if (transient_map_retries != 0 && retried >= transient_map_retries) { g_io_deliver(bp, EDEADLK/* XXXKIB */); @@ -651,7 +648,7 @@ retry: /* * Naive attempt to quisce the I/O to get more * in-flight requests completed and defragment -* the bio_transient_map. +* the transient_arena. */ CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d", bp, bp->bio_to->name, retried); @@ -661,12 +658,6 @@ retry: goto retry; } } - rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size, - VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); - KASSERT(rv == KERN_SUCCESS, - ("vm_map_insert(bio_transient_map) rv %d %jx %lx", - rv, (uintmax_t)addr, size)); - vm_map_unlock(bio_transient_map); atomic_add_int(&inflight_transient_maps, 1); pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size)); bp->bio_data = (caddr_t)addr + bp->bio_ma_offset; Added: head/sys/kern/subr_vmem.c == --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/kern/subr_vmem.c Fri Jun 28 03:51:20 2013(r252330) @@ -0,0 +1,1372 @@ +/*- + * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, + * Copyright (c) 2013 EMC Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMP
Re: svn commit: r252330 - in head/sys: conf geom kern sys vm
On Fri, 28 Jun 2013, Adrian Chadd wrote: Hi, Do we really need another allocator / resource manager just for this? No, however; I have a follow-up patch to replace kmem with this. And then we will use it for NUMA allocations in the kernel. After that it is likely that we could replace several other less efficient allocators with this. Solaris uses it for pids, tids, device unit numbers. etc. We could easily do the same. The existing allocators have failure modes, big O cost, and allocation requirements that are not tolerable for use in the vm. This also has a very nice feature that works with UMA to provide per-cpu caches of arbitrary number ranges. So it is more scalable as well as providing for less fragmentation. Thanks, Jeff Adrian On 27 June 2013 20:51, Jeff Roberson wrote: Author: jeff Date: Fri Jun 28 03:51:20 2013 New Revision: 252330 URL: http://svnweb.freebsd.org/changeset/base/252330 Log: - Add a general purpose resource allocator, vmem, from NetBSD. It was originally inspired by the Solaris vmem detailed in the proceedings of usenix 2001. The NetBSD version was heavily refactored for bugs and simplicity. - Use this resource allocator to allocate the buffer and transient maps. Buffer cache defrags are reduced by 25% when used by filesystems with mixed block sizes. Ultimately this may permit dynamic buffer cache sizing on low KVA machines. Discussed with: alc, kib, attilio Tested by:pho Sponsored by: EMC / Isilon Storage Division Added: head/sys/kern/subr_vmem.c (contents, props changed) head/sys/sys/vmem.h (contents, props changed) Modified: head/sys/conf/files head/sys/geom/geom_io.c head/sys/kern/vfs_bio.c head/sys/sys/malloc.h head/sys/vm/vm.h head/sys/vm/vm_init.c head/sys/vm/vm_kern.c head/sys/vm/vm_kern.h head/sys/vm/vm_object.c head/sys/vm/vm_pager.c head/sys/vm/vm_pager.h Modified: head/sys/conf/files == --- head/sys/conf/files Fri Jun 28 03:41:23 2013(r252329) +++ head/sys/conf/files Fri Jun 28 03:51:20 2013(r252330) @@ -2797,6 +2797,7 @@ kern/subr_trap.c standard kern/subr_turnstile.c standard kern/subr_uio.cstandard kern/subr_unit.c standard +kern/subr_vmem.c standard kern/subr_witness.coptional witness kern/sys_capability.c standard kern/sys_generic.c standard Modified: head/sys/geom/geom_io.c == --- head/sys/geom/geom_io.c Fri Jun 28 03:41:23 2013(r252329) +++ head/sys/geom/geom_io.c Fri Jun 28 03:51:20 2013(r252330) @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -626,7 +627,6 @@ g_io_transient_map_bio(struct bio *bp) vm_offset_t addr; long size; u_int retried; - int rv; KASSERT(unmapped_buf_allowed, ("unmapped disabled")); @@ -636,10 +636,7 @@ g_io_transient_map_bio(struct bio *bp) retried = 0; atomic_add_long(&transient_maps, 1); retry: - vm_map_lock(bio_transient_map); - if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map), - size, &addr)) { - vm_map_unlock(bio_transient_map); + if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) { if (transient_map_retries != 0 && retried >= transient_map_retries) { g_io_deliver(bp, EDEADLK/* XXXKIB */); @@ -651,7 +648,7 @@ retry: /* * Naive attempt to quisce the I/O to get more * in-flight requests completed and defragment -* the bio_transient_map. +* the transient_arena. */ CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d", bp, bp->bio_to->name, retried); @@ -661,12 +658,6 @@ retry: goto retry; } } - rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size, - VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); - KASSERT(rv == KERN_SUCCESS, - ("vm_map_insert(bio_transient_map) rv %d %jx %lx", - rv, (uintmax_t)addr, size)); - vm_map_unlock(bio_transient_map); atomic_add_int(&inflight_transient_maps, 1); pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size)); bp->bio_data = (caddr_t)addr + bp->bio_ma_offset; Added: head/sys/kern/subr_vmem.c == --- /dev/null 00:00:00 1970 (empty, because file is new
svn commit: r253583 - head/sys/vm
Author: jeff Date: Tue Jul 23 22:52:38 2013 New Revision: 253583 URL: http://svnweb.freebsd.org/changeset/base/253583 Log: - Correct a stale comment. We don't have vclean() anymore. The work is done by vgonel() and destroy_vobject() should only be called once from VOP_INACTIVE(). Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/vnode_pager.c Modified: head/sys/vm/vnode_pager.c == --- head/sys/vm/vnode_pager.c Tue Jul 23 22:17:00 2013(r253582) +++ head/sys/vm/vnode_pager.c Tue Jul 23 22:52:38 2013(r253583) @@ -158,11 +158,6 @@ vnode_destroy_vobject(struct vnode *vp) VM_OBJECT_WLOCK(obj); if (obj->ref_count == 0) { /* -* vclean() may be called twice. The first time -* removes the primary reference to the object, -* the second time goes one further and is a -* special-case to terminate the object. -* * don't double-terminate the object */ if ((obj->flags & OBJ_DEAD) == 0) ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r253587 - head/sys/vm
Author: jeff Date: Wed Jul 24 01:25:56 2013 New Revision: 253587 URL: http://svnweb.freebsd.org/changeset/base/253587 Log: - Remove the long obsolete 'vm_pageout_algorithm' experiment. Discussed with: alc Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/vm_pageout.c Modified: head/sys/vm/vm_pageout.c == --- head/sys/vm/vm_pageout.cWed Jul 24 01:08:45 2013(r253586) +++ head/sys/vm/vm_pageout.cWed Jul 24 01:25:56 2013(r253587) @@ -157,7 +157,6 @@ static int vm_pageout_stats; static int vm_pageout_stats_interval; static int vm_pageout_full_stats; static int vm_pageout_full_stats_interval; -static int vm_pageout_algorithm; static int defer_swap_pageouts; static int disable_swap_pageouts; @@ -169,9 +168,6 @@ static int vm_swap_enabled = 1; static int vm_swap_idle_enabled = 0; #endif -SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm, - CTLFLAG_RW, &vm_pageout_algorithm, 0, "LRU page mgmt"); - SYSCTL_INT(_vm, OID_AUTO, max_launder, CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout"); @@ -756,9 +752,7 @@ vm_pageout_object_deactivate_pages(pmap_ if (actcount == 0) { p->act_count -= min(p->act_count, ACT_DECLINE); - if (!remove_mode && - (vm_pageout_algorithm || - p->act_count == 0)) { + if (!remove_mode && p->act_count == 0) { pmap_remove_all(p); vm_page_deactivate(p); } else @@ -1356,8 +1350,7 @@ relock_queues: vm_page_requeue_locked(m); else { m->act_count -= min(m->act_count, ACT_DECLINE); - if (vm_pageout_algorithm || - object->ref_count == 0 || + if (object->ref_count == 0 || m->act_count == 0) { page_shortage--; /* Dequeue to avoid later lock recursion. */ ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r253685 - in head/sys: amd64/amd64 i386/i386
Author: jeff Date: Fri Jul 26 19:06:14 2013 New Revision: 253685 URL: http://svnweb.freebsd.org/changeset/base/253685 Log: - Use kmem_malloc rather than kmem_alloc() for GDT/LDT/tss allocations etc. This eliminates some unusual uses of that API in favor of more typical uses of kmem_malloc(). Discussed with: kib/alc Tested by:pho Sponsored by: EMC / Isilon Storage Division Modified: head/sys/amd64/amd64/sys_machdep.c head/sys/i386/i386/sys_machdep.c Modified: head/sys/amd64/amd64/sys_machdep.c == --- head/sys/amd64/amd64/sys_machdep.c Fri Jul 26 19:02:17 2013 (r253684) +++ head/sys/amd64/amd64/sys_machdep.c Fri Jul 26 19:06:14 2013 (r253685) @@ -356,8 +356,8 @@ amd64_set_ioperm(td, uap) */ pcb = td->td_pcb; if (pcb->pcb_tssp == NULL) { - tssp = (struct amd64tss *)kmem_alloc(kernel_map, - ctob(IOPAGES+1)); + tssp = (struct amd64tss *)kmem_malloc(kernel_map, + ctob(IOPAGES+1), M_WAITOK); if (tssp == NULL) return (ENOMEM); iomap = (char *)&tssp[1]; @@ -463,8 +463,9 @@ user_ldt_alloc(struct proc *p, int force return (mdp->md_ldt); mtx_unlock(&dt_lock); new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK); - new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, -max_ldt_segment * sizeof(struct user_segment_descriptor)); + new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_map, +max_ldt_segment * sizeof(struct user_segment_descriptor), +M_WAITOK); if (new_ldt->ldt_base == NULL) { FREE(new_ldt, M_SUBPROC); mtx_lock(&dt_lock); Modified: head/sys/i386/i386/sys_machdep.c == --- head/sys/i386/i386/sys_machdep.cFri Jul 26 19:02:17 2013 (r253684) +++ head/sys/i386/i386/sys_machdep.cFri Jul 26 19:06:14 2013 (r253685) @@ -164,8 +164,9 @@ sysarch(td, uap) break; case I386_SET_LDT: if (kargs.largs.descs != NULL) { - lp = (union descriptor *)kmem_alloc(kernel_map, - kargs.largs.num * sizeof(union descriptor)); + lp = (union descriptor *)kmem_malloc(kernel_map, + kargs.largs.num * sizeof(union descriptor), + M_WAITOK); if (lp == NULL) { error = ENOMEM; break; @@ -298,7 +299,8 @@ i386_extend_pcb(struct thread *td) 0 /* granularity */ }; - ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1)); + ext = (struct pcb_ext *)kmem_malloc(kernel_map, ctob(IOPAGES+1), + M_WAITOK); if (ext == 0) return (ENOMEM); bzero(ext, sizeof(struct pcb_ext)); @@ -471,8 +473,8 @@ user_ldt_alloc(struct mdproc *mdp, int l M_SUBPROC, M_WAITOK); new_ldt->ldt_len = len = NEW_MAX_LD(len); -new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, -round_page(len * sizeof(union descriptor))); +new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_map, +round_page(len * sizeof(union descriptor)), M_WAITOK); if (new_ldt->ldt_base == NULL) { free(new_ldt, M_SUBPROC); mtx_lock_spin(&dt_lock); @@ -511,8 +513,8 @@ user_ldt_alloc(struct mdproc *mdp, int l M_SUBPROC, M_WAITOK); new_ldt->ldt_len = len = NEW_MAX_LD(len); - new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, - len * sizeof(union descriptor)); + new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_map, + len * sizeof(union descriptor), M_WAITOK); if (new_ldt->ldt_base == NULL) { free(new_ldt, M_SUBPROC); mtx_lock_spin(&dt_lock); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r253697 - head/sys/vm
Author: jeff Date: Fri Jul 26 23:22:05 2013 New Revision: 253697 URL: http://svnweb.freebsd.org/changeset/base/253697 Log: Improve page LRU quality and simplify the logic. - Don't short-circuit aging tests for unmapped objects. This biases against unmapped file pages and transient mappings. - Always honor PGA_REFERENCED. We can now use this after soft busying to lazily restart the LRU. - Don't transition directly from active to cached bypassing the inactive queue. This frees recently used data much too early. - Rename actcount to act_delta to be more consistent with use and meaning. Reviewed by: kib, alc Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/vm_pageout.c Modified: head/sys/vm/vm_pageout.c == --- head/sys/vm/vm_pageout.cFri Jul 26 22:53:17 2013(r253696) +++ head/sys/vm/vm_pageout.cFri Jul 26 23:22:05 2013(r253697) @@ -708,7 +708,7 @@ vm_pageout_object_deactivate_pages(pmap_ { vm_object_t backing_object, object; vm_page_t p; - int actcount, remove_mode; + int act_delta, remove_mode; VM_OBJECT_ASSERT_LOCKED(first_object); if ((first_object->flags & OBJ_FICTITIOUS) != 0) @@ -739,17 +739,17 @@ vm_pageout_object_deactivate_pages(pmap_ vm_page_unlock(p); continue; } - actcount = pmap_ts_referenced(p); + act_delta = pmap_ts_referenced(p); if ((p->aflags & PGA_REFERENCED) != 0) { - if (actcount == 0) - actcount = 1; + if (act_delta == 0) + act_delta = 1; vm_page_aflag_clear(p, PGA_REFERENCED); } - if (p->queue != PQ_ACTIVE && actcount != 0) { + if (p->queue != PQ_ACTIVE && act_delta != 0) { vm_page_activate(p); - p->act_count += actcount; + p->act_count += act_delta; } else if (p->queue == PQ_ACTIVE) { - if (actcount == 0) { + if (act_delta == 0) { p->act_count -= min(p->act_count, ACT_DECLINE); if (!remove_mode && p->act_count == 0) { @@ -869,7 +869,7 @@ vm_pageout_scan(int pass) int page_shortage, maxscan, pcount; int addl_page_shortage; vm_object_t object; - int actcount; + int act_delta; int vnodes_skipped = 0; int maxlaunder; boolean_t queues_locked; @@ -989,44 +989,40 @@ vm_pageout_scan(int pass) queues_locked = FALSE; /* -* If the object is not being used, we ignore previous +* We bump the activation count if the page has been +* referenced while in the inactive queue. This makes +* it less likely that the page will be added back to the +* inactive queue prematurely again. Here we check the +* page tables (or emulated bits, if any), given the upper +* level VM system not knowing anything about existing * references. */ - if (object->ref_count == 0) { + act_delta = 0; + if ((m->aflags & PGA_REFERENCED) != 0) { vm_page_aflag_clear(m, PGA_REFERENCED); + act_delta = 1; + } + if (object->ref_count != 0) { + act_delta += pmap_ts_referenced(m); + } else { KASSERT(!pmap_page_is_mapped(m), ("vm_pageout_scan: page %p is mapped", m)); - - /* -* Otherwise, if the page has been referenced while in the -* inactive queue, we bump the "activation count" upwards, -* making it less likely that the page will be added back to -* the inactive queue prematurely again. Here we check the -* page tables (or emulated bits, if any), given the upper -* level VM system not knowing anything about existing -* references. -*/ - } else if ((m->aflags & PGA_REFERENCED) == 0 && - (actcount = pmap_ts_referenced(m)) != 0) { - vm_page_activate(m); - VM_OBJECT_WUNLOCK(object); - m->act_count += actcount + ACT_ADVANCE; - vm_page_unlock(m); -
svn commit: r249218 - in head/sys: fs/ext2fs kern ufs/ffs vm
Author: jeff Date: Sat Apr 6 22:21:23 2013 New Revision: 249218 URL: http://svnweb.freebsd.org/changeset/base/249218 Log: Prepare to replace the buf splay with a trie: - Don't insert BKGRDMARKER bufs into the splay or dirty/clean buf lists. No consumers need to find them there and it complicates the tree. These flags are all FFS specific and could be moved out of the buf cache. - Use pbgetvp() and pbrelvp() to associate the background and journal bufs with the vp. Not only is this much cheaper it makes more sense for these transient bufs. - Fix the assertions in pbget* and pbrel*. It's not safe to check list pointers which were never initialized. Use the BX flags instead. We also check B_PAGING in reassignbuf() so this should cover all cases. Discussed with: kib, mckusick, attilio Sponsored by: EMC / Isilon Storage Division Modified: head/sys/fs/ext2fs/ext2_alloc.c head/sys/kern/vfs_subr.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/ffs_vfsops.c head/sys/vm/vm_pager.c Modified: head/sys/fs/ext2fs/ext2_alloc.c == --- head/sys/fs/ext2fs/ext2_alloc.c Sat Apr 6 21:56:54 2013 (r249217) +++ head/sys/fs/ext2fs/ext2_alloc.c Sat Apr 6 22:21:23 2013 (r249218) @@ -794,8 +794,6 @@ ext2_clusteralloc(struct inode *ip, int goto fail_lock; bbp = (char *)bp->b_data; - bp->b_xflags |= BX_BKGRDWRITE; - EXT2_LOCK(ump); /* * Check to see if a cluster of the needed size (or bigger) is Modified: head/sys/kern/vfs_subr.c == --- head/sys/kern/vfs_subr.cSat Apr 6 21:56:54 2013(r249217) +++ head/sys/kern/vfs_subr.cSat Apr 6 22:21:23 2013(r249218) @@ -1312,8 +1312,7 @@ flushbuflist(struct bufv *bufv, int flag xflags = 0; if (nbp != NULL) { lblkno = nbp->b_lblkno; - xflags = nbp->b_xflags & - (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN); + xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN); } retval = EAGAIN; error = BUF_TIMELOCK(bp, @@ -1357,8 +1356,7 @@ flushbuflist(struct bufv *bufv, int flag if (nbp != NULL && (nbp->b_bufobj != bo || nbp->b_lblkno != lblkno || -(nbp->b_xflags & - (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN)) != xflags)) +(nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) != xflags)) break; /* nbp invalid */ } return (retval); @@ -1501,9 +1499,7 @@ buf_splay(daddr_t lblkno, b_xflags_t xfl return (NULL); lefttreemax = righttreemin = &dummy; for (;;) { - if (lblkno < root->b_lblkno || - (lblkno == root->b_lblkno && - (xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) { + if (lblkno < root->b_lblkno) { if ((y = root->b_left) == NULL) break; if (lblkno < y->b_lblkno) { @@ -1517,9 +1513,7 @@ buf_splay(daddr_t lblkno, b_xflags_t xfl /* Link into the new root's right tree. */ righttreemin->b_left = root; righttreemin = root; - } else if (lblkno > root->b_lblkno || - (lblkno == root->b_lblkno && - (xflags & BX_BKGRDMARKER) > (root->b_xflags & BX_BKGRDMARKER))) { + } else if (lblkno > root->b_lblkno) { if ((y = root->b_right) == NULL) break; if (lblkno > y->b_lblkno) { @@ -1603,9 +1597,7 @@ buf_vlist_add(struct buf *bp, struct buf bp->b_left = NULL; bp->b_right = NULL; TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); - } else if (bp->b_lblkno < root->b_lblkno || - (bp->b_lblkno == root->b_lblkno && - (bp->b_xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) { + } else if (bp->b_lblkno < root->b_lblkno) { bp->b_left = root->b_left; bp->b_right = root; root->b_left = NULL; @@ -1638,20 +1630,18 @@ gbincore(struct bufobj *bo, daddr_t lblk struct buf *bp; ASSERT_BO_LOCKED(bo); - if ((bp = bo->bo_clean.bv_root) != NULL && - bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) + if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno) return (bp); - if ((bp = bo->bo_dirty.bv_root) != NULL && - bp->b_lblkno == lblkno && !(bp->b_xflags &
svn commit: r262812 - head/sys/ufs/ufs
Author: jeff Date: Thu Mar 6 00:10:07 2014 New Revision: 262812 URL: http://svnweb.freebsd.org/changeset/base/262812 Log: - Gracefully handle truncation failures when trying to shrink directories. This could cause dirhash panics since the dirhash state would be successfully truncated while the directory was not. Reported by: pho Discussed with: mckusick Sponsored by: EMC / Isilon Storage Division MFC after:2 weeks Modified: head/sys/ufs/ufs/ufs_lookup.c Modified: head/sys/ufs/ufs/ufs_lookup.c == --- head/sys/ufs/ufs/ufs_lookup.c Wed Mar 5 23:37:25 2014 (r262811) +++ head/sys/ufs/ufs/ufs_lookup.c Thu Mar 6 00:10:07 2014 (r262812) @@ -1130,12 +1130,15 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir dp->i_endoff && dp->i_endoff < dp->i_size) { if (tvp != NULL) VOP_UNLOCK(tvp, 0); + error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, + IO_NORMAL | IO_SYNC, cr); + if (error != 0) + vprint("ufs_direnter: failted to truncate", dvp); #ifdef UFS_DIRHASH - if (dp->i_dirhash != NULL) + if (error == 0 && dp->i_dirhash != NULL) ufsdirhash_dirtrunc(dp, dp->i_endoff); #endif - (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, - IO_NORMAL | IO_SYNC, cr); + error = 0; if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); } ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r262814 - head/sys/ufs/ffs
Author: jeff Date: Thu Mar 6 00:13:21 2014 New Revision: 262814 URL: http://svnweb.freebsd.org/changeset/base/262814 Log: - If we fail to do a non-blocking acquire of a buf lock while doing a waiting sync pass we need to do a blocking acquire and restart. Another thread, typically the buf daemon, may have this buf locked and if we don't wait we can fail to sync the file. This lead to a great variety of softdep panics because we rely on all dependencies being flushed before proceeding in several cases. Reported by: pho Discussed with: mckusick Sponsored by: EMC / Isilon Storage Division MFC after:2 weeks Modified: head/sys/ufs/ffs/ffs_vnops.c Modified: head/sys/ufs/ffs/ffs_vnops.c == --- head/sys/ufs/ffs/ffs_vnops.cThu Mar 6 00:11:47 2014 (r262813) +++ head/sys/ufs/ffs/ffs_vnops.cThu Mar 6 00:13:21 2014 (r262814) @@ -259,9 +259,17 @@ loop: continue; if (bp->b_lblkno > lbn) panic("ffs_syncvnode: syncing truncated data."); - if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) + if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) { + BO_UNLOCK(bo); + } else if (wait != 0) { + if (BUF_LOCK(bp, + LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, + BO_LOCKPTR(bo)) != 0) { + bp->b_vflags &= ~BV_SCANNED; + goto next; + } + } else continue; - BO_UNLOCK(bo); if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); /* ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r262917 - head/sys/kern
Author: jeff Date: Sat Mar 8 00:35:06 2014 New Revision: 262917 URL: http://svnweb.freebsd.org/changeset/base/262917 Log: - Make runq_steal_from more aggressive. Previously it would examine only a single priority queue. If that queue had a thread or threads which could not be migrated we would fail to steal load. This could cause starvation in situations where cores are idle. Submitted by: Doug Kilpatrick Tested by:pho Reviewed by: mav Sponsored by: EMC / Isilon Storage Division Modified: head/sys/kern/sched_ule.c Modified: head/sys/kern/sched_ule.c == --- head/sys/kern/sched_ule.c Sat Mar 8 00:14:40 2014(r262916) +++ head/sys/kern/sched_ule.c Sat Mar 8 00:35:06 2014(r262917) @@ -1057,32 +1057,27 @@ runq_steal_from(struct runq *rq, int cpu struct rqhead *rqh; struct thread *td, *first; int bit; - int pri; int i; rqb = &rq->rq_status; bit = start & (RQB_BPW -1); - pri = 0; first = NULL; again: for (i = RQB_WORD(start); i < RQB_LEN; bit = 0, i++) { if (rqb->rqb_bits[i] == 0) continue; - if (bit != 0) { - for (pri = bit; pri < RQB_BPW; pri++) - if (rqb->rqb_bits[i] & (1ul << pri)) - break; - if (pri >= RQB_BPW) + if (bit == 0) + bit = RQB_FFS(rqb->rqb_bits[i]); + for (; bit < RQB_BPW; bit++) { + if ((rqb->rqb_bits[i] & (1ul << bit)) == 0) continue; - } else - pri = RQB_FFS(rqb->rqb_bits[i]); - pri += (i << RQB_L2BPW); - rqh = &rq->rq_queues[pri]; - TAILQ_FOREACH(td, rqh, td_runq) { - if (first && THREAD_CAN_MIGRATE(td) && - THREAD_CAN_SCHED(td, cpu)) - return (td); - first = td; + rqh = &rq->rq_queues[bit + (i << RQB_L2BPW)]; + TAILQ_FOREACH(td, rqh, td_runq) { + if (first && THREAD_CAN_MIGRATE(td) && + THREAD_CAN_SCHED(td, cpu)) + return (td); + first = td; + } } } if (start != 0) { ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r253949 - in head/sys: amd64/amd64 i386/i386
Author: jeff Date: Mon Aug 5 00:28:03 2013 New Revision: 253949 URL: http://svnweb.freebsd.org/changeset/base/253949 Log: - Introduce a specific function, pmap_remove_kernel_pde, for removing huge pages in the kernel's address space. This works around several asserts from pmap_demote_pde_locked that did not apply and gave false warnings. Discovered by:pho Reviewed by: alc Sponsored by: EMC / Isilon Storage Division Modified: head/sys/amd64/amd64/pmap.c head/sys/i386/i386/pmap.c Modified: head/sys/amd64/amd64/pmap.c == --- head/sys/amd64/amd64/pmap.c Sun Aug 4 23:45:04 2013(r253948) +++ head/sys/amd64/amd64/pmap.c Mon Aug 5 00:28:03 2013(r253949) @@ -2795,6 +2795,44 @@ pmap_demote_pde_locked(pmap_t pmap, pd_e } /* + * pmap_remove_kernel_pde: Remove a kernel superpage mapping. + */ +static void +pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) +{ + pd_entry_t newpde; + vm_paddr_t mptepa; + vm_page_t mpte; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mpte = pmap_lookup_pt_page(pmap, va); + if (mpte == NULL) + panic("pmap_remove_kernel_pde: Missing pt page."); + + pmap_remove_pt_page(pmap, mpte); + mptepa = VM_PAGE_TO_PHYS(mpte); + newpde = mptepa | PG_M | PG_A | PG_RW | PG_V; + + /* +* Initialize the page table page. +*/ + pagezero((void *)PHYS_TO_DMAP(mptepa)); + + /* +* Demote the mapping. +*/ + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, newpde); + else + pde_store(pde, newpde); + + /* +* Invalidate a stale recursive mapping of the page table page. +*/ + pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); +} + +/* * pmap_remove_pde: do the things to unmap a superpage in a process */ static int @@ -2837,8 +2875,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t } } if (pmap == kernel_pmap) { - if (!pmap_demote_pde_locked(pmap, pdq, sva, lockp)) - panic("pmap_remove_pde: failed demotion"); + pmap_remove_kernel_pde(pmap, pdq, sva); } else { mpte = pmap_lookup_pt_page(pmap, sva); if (mpte != NULL) { Modified: head/sys/i386/i386/pmap.c == --- head/sys/i386/i386/pmap.c Sun Aug 4 23:45:04 2013(r253948) +++ head/sys/i386/i386/pmap.c Mon Aug 5 00:28:03 2013(r253949) @@ -2773,6 +2773,44 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t } /* + * Removes a 2- or 4MB page mapping from the kernel pmap. + */ +static void +pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) +{ + pd_entry_t newpde; + vm_paddr_t mptepa; + vm_page_t mpte; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mpte = pmap_lookup_pt_page(pmap, va); + if (mpte == NULL) + panic("pmap_remove_kernel_pde: Missing pt page."); + + pmap_remove_pt_page(pmap, mpte); + mptepa = VM_PAGE_TO_PHYS(mpte); + newpde = mptepa | PG_M | PG_A | PG_RW | PG_V; + + /* +* Initialize the page table page. +*/ + pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]); + + /* +* Remove the mapping. +*/ + if (workaround_erratum383) + pmap_update_pde(pmap, va, pde, newpde); + else + pmap_kenter_pde(va, newpde); + + /* +* Invalidate the recursive mapping of the page table page. +*/ + pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); +} + +/* * pmap_remove_pde: do the things to unmap a superpage in a process */ static void @@ -2814,8 +2852,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t } } if (pmap == kernel_pmap) { - if (!pmap_demote_pde(pmap, pdq, sva)) - panic("pmap_remove_pde: failed demotion"); + pmap_remove_kernel_pde(pmap, pdq, sva); } else { mpte = pmap_lookup_pt_page(pmap, sva); if (mpte != NULL) { ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254025 - in head/sys: amd64/amd64 arm/arm arm/at91 arm/mv/armadaxp arm/s3c2xx0 arm/xscale/i80321 arm/xscale/i8134x arm/xscale/ixp425 cddl/compat/opensolaris/kern cddl/compat/opensolari...
Author: jeff Date: Wed Aug 7 06:21:20 2013 New Revision: 254025 URL: http://svnweb.freebsd.org/changeset/base/254025 Log: Replace kernel virtual address space allocation with vmem. This provides transparent layering and better fragmentation. - Normalize functions that allocate memory to use kmem_* - Those that allocate address space are named kva_* - Those that operate on maps are named kmap_* - Implement recursive allocation handling for kmem_arena in vmem. Reviewed by: alc Tested by:pho Sponsored by: EMC / Isilon Storage Division Modified: head/sys/amd64/amd64/mp_machdep.c head/sys/amd64/amd64/pmap.c head/sys/amd64/amd64/sys_machdep.c head/sys/amd64/amd64/vm_machdep.c head/sys/arm/arm/bus_space_generic.c head/sys/arm/arm/busdma_machdep-v6.c head/sys/arm/arm/busdma_machdep.c head/sys/arm/arm/mp_machdep.c head/sys/arm/arm/pmap-v6.c head/sys/arm/arm/pmap.c head/sys/arm/arm/vm_machdep.c head/sys/arm/at91/at91.c head/sys/arm/mv/armadaxp/armadaxp_mp.c head/sys/arm/s3c2xx0/s3c2xx0_space.c head/sys/arm/xscale/i80321/i80321_space.c head/sys/arm/xscale/i8134x/i81342_space.c head/sys/arm/xscale/ixp425/ixp425_pci_space.c head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c head/sys/cddl/compat/opensolaris/sys/kmem.h head/sys/compat/linux/linux_misc.c head/sys/compat/ndis/subr_ntoskrnl.c head/sys/dev/bktr/bktr_core.c head/sys/dev/drm/drm_scatter.c head/sys/dev/drm2/drm_scatter.c head/sys/dev/drm2/i915/intel_ringbuffer.c head/sys/dev/drm2/ttm/ttm_bo_util.c head/sys/dev/xen/blkback/blkback.c head/sys/dev/xen/netback/netback.c head/sys/dev/xen/xenpci/xenpci.c head/sys/i386/i386/machdep.c head/sys/i386/i386/mp_machdep.c head/sys/i386/i386/pmap.c head/sys/i386/i386/sys_machdep.c head/sys/i386/i386/vm_machdep.c head/sys/i386/ibcs2/imgact_coff.c head/sys/i386/pci/pci_cfgreg.c head/sys/i386/xen/mp_machdep.c head/sys/i386/xen/pmap.c head/sys/ia64/ia64/mp_machdep.c head/sys/kern/imgact_gzip.c head/sys/kern/init_main.c head/sys/kern/kern_exec.c head/sys/kern/kern_malloc.c head/sys/kern/kern_mbuf.c head/sys/kern/kern_sharedpage.c head/sys/kern/subr_busdma_bufalloc.c head/sys/kern/subr_vmem.c head/sys/kern/vfs_bio.c head/sys/mips/mips/mp_machdep.c head/sys/mips/mips/pmap.c head/sys/mips/mips/vm_machdep.c head/sys/mips/sibyte/sb_zbpci.c head/sys/ofed/include/linux/dma-mapping.h head/sys/ofed/include/linux/gfp.h head/sys/ofed/include/linux/linux_compat.c head/sys/pc98/pc98/machdep.c head/sys/powerpc/aim/mmu_oea.c head/sys/powerpc/aim/mmu_oea64.c head/sys/powerpc/aim/vm_machdep.c head/sys/powerpc/booke/pmap.c head/sys/powerpc/booke/vm_machdep.c head/sys/powerpc/powerpc/busdma_machdep.c head/sys/powerpc/powerpc/mp_machdep.c head/sys/sparc64/sparc64/bus_machdep.c head/sys/sparc64/sparc64/mem.c head/sys/sparc64/sparc64/mp_machdep.c head/sys/sparc64/sparc64/pmap.c head/sys/sparc64/sparc64/vm_machdep.c head/sys/vm/memguard.c head/sys/vm/memguard.h head/sys/vm/pmap.h head/sys/vm/uma_core.c head/sys/vm/vm_extern.h head/sys/vm/vm_glue.c head/sys/vm/vm_init.c head/sys/vm/vm_kern.c head/sys/vm/vm_kern.h head/sys/vm/vm_map.c head/sys/vm/vm_map.h head/sys/vm/vm_object.c head/sys/x86/x86/busdma_machdep.c head/sys/xen/gnttab.c Modified: head/sys/amd64/amd64/mp_machdep.c == --- head/sys/amd64/amd64/mp_machdep.c Wed Aug 7 06:05:57 2013 (r254024) +++ head/sys/amd64/amd64/mp_machdep.c Wed Aug 7 06:21:20 2013 (r254025) @@ -938,10 +938,14 @@ start_all_aps(void) apic_id = cpu_apic_ids[cpu]; /* allocate and set up an idle stack data page */ - bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); - doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); - nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE); - dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE); + bootstacks[cpu] = (void *)kmem_malloc(kernel_arena, + KSTACK_PAGES * PAGE_SIZE, M_WAITOK | M_ZERO); + doublefault_stack = (char *)kmem_malloc(kernel_arena, + PAGE_SIZE, M_WAITOK | M_ZERO); + nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE, + M_WAITOK | M_ZERO); + dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE, + M_WAITOK | M_ZERO); bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 8; bootAP = cpu; Modified: head/sys/amd64/amd64/pmap.c == --- head/sys/amd64/amd64/pmap.c Wed Aug 7 06:05:57 2013(r254024) +++ head/sys/amd64/amd64/pmap.c Wed Aug 7 06:21:20 2013(r254025) @@ -860,7 +860,8 @@ pmap_init(void)
Re: svn commit: r254025 - in head/sys: amd64/amd64 arm/arm arm/at91 arm/mv/armadaxp arm/s3c2xx0 arm/xscale/i80321 arm/xscale/i8134x arm/xscale/ixp425 cddl/compat/opensolaris/kern cddl/compat/opensolar
On Wed, 7 Aug 2013, Zbyszek Bodek wrote: On 07.08.2013 08:21, Jeff Roberson wrote: Author: jeff Date: Wed Aug 7 06:21:20 2013 New Revision: 254025 URL: http://svnweb.freebsd.org/changeset/base/254025 Log: Replace kernel virtual address space allocation with vmem. This provides transparent layering and better fragmentation. - Normalize functions that allocate memory to use kmem_* - Those that allocate address space are named kva_* - Those that operate on maps are named kmap_* - Implement recursive allocation handling for kmem_arena in vmem. Reviewed by: alc Tested by:pho Sponsored by: EMC / Isilon Storage Division Hello Jeff, I'm having some trouble on my ARM platform staring from this commit. Kernel panics on assertion very early. Please check out log below (as you can see bt doesn't look helpful but assertion message is visible. I can send you which functions are in bt if it is necessary). It would be very helpful to know which function is passing the unaligned value. I will resolve this later today if you can get me that information. Thanks, Jeff Best regards Zbyszek Bodek - ## Starting application at 0x00F0 ... GDB: no debug ports present KDB: debugger backends: ddb KDB: current backend: ddb Copyright (c) 1992-2013 The FreeBSD Project. Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994 The Regents of the University of California. All rights reserved. FreeBSD is a registered trademark of The FreeBSD Foundation. FreeBSD 10.0-CURRENT #155 7ddb89a-dirty: Wed Aug 7 12:12:39 CEST 2013 zbb@:/home/zbb/projects/armsp/obj_gcc/arm.arm/home/zbb/projects/armsp/freebsd-arm-superpages/sys/ARMADAXP arm gcc version 4.2.1 20070831 patched [FreeBSD] WARNING: DIAGNOSTIC option enabled, expect reduced performance. panic: Assertion (size & vm->vm_quantum_mask) == 0 failed at /home/zbb/projects/armsp/freebsd-arm-superpages/sys/kern/subr_vmem.c:341 KDB: stack backtrace: (null)() at 0xc11f6874 pc = 0xc11f6874 lr = 0xc0f2dc00 (0xc0f2dc00) sp = 0xc1361c98 fp = 0xc1340288 (null)() at 0xc0f2dc00 pc = 0xc0f2dc00 lr = 0xc108dd14 (0xc108dd14) sp = 0xc1361db0 fp = 0xc1340288 r4 = 0xc133d654 (null)() at 0xc108dd14 pc = 0xc108dd14 lr = 0xc105a6f0 (0xc105a6f0) sp = 0xc1361db8 fp = 0xc1340288 r4 = 0xc132f940 (null)() at 0xc105a6f0 pc = 0xc105a6f0 lr = 0xc105a7dc (0xc105a7dc) sp = 0xc1361dd0 fp = 0xc1340288 r4 = 0xc124c6fc r5 = 0x1333 r6 = 0xc1340240 r7 = 0xc147d150 r8 = 0x0010 (null)() at 0xc105a7dc pc = 0xc105a7dc lr = 0xc10a2ef8 (0xc10a2ef8) sp = 0xc1361e08 fp = 0xc1340288 r0 = 0xc124c6fc r1 = 0xc12662b8 r2 = 0xc1266230 r3 = 0x0155 r4 = 0x0001 (null)() at 0xc10a2ef8 pc = 0xc10a2ef8 lr = 0xc10a37e4 (0xc10a37e4) sp = 0xc1361e20 fp = 0xc1340288 r4 = 0xc147d150 r5 = 0xc147d16c r6 = 0xc1340240 r7 = 0x1333 r8 = 0xc57b1000 (null)() at 0xc10a37e4 pc = 0xc10a37e4 lr = 0xc10a39d8 (0xc10a39d8) sp = 0xc1361e38 fp = 0xc1340288 r4 = 0xc1340240 r5 = 0x r6 = 0xc57b1000 r7 = 0x1333 r8 = 0x0010 (null)() at 0xc10a39d8 pc = 0xc10a39d8 lr = 0xc10a4f8c (0xc10a4f8c) sp = 0xc1361e50 fp = 0xc1340288 r4 = 0xc13402a4 r5 = 0x r6 = 0x0001 r7 = 0xc1340240 (null)() at 0xc10a4f8c pc = 0xc10a4f8c lr = 0xc1044398 (0xc1044398) sp = 0xc1361e98 fp = 0x r4 = 0x1333 r5 = 0xc1340240 r6 = 0xc1307574 r7 = 0x00f0004c r8 = 0x7f9ea674 r9 = 0x0001 r10 = 0x7ff1449c (null)() at 0xc1044398 pc = 0xc1044398 lr = 0xc1044408 (0xc1044408) sp = 0xc1361eb8 fp = 0x r4 = 0xc1291584 r5 = 0x00f00058 r6 = 0x00f0 (null)() at 0xc1044408 pc = 0xc1044408 lr = 0xc1010800 (0xc1010800) sp = 0xc1361ee8 fp = 0x r4 = 0xc1291584 r5 = 0x00f00058 r6 = 0x00f0 r7 = 0x00f0004c r8 = 0x7f9ea674 r9 = 0x0001 r10 = 0x7ff1449c (null)() at 0xc1010800 pc = 0xc1010800 lr = 0xc0f00124 (0xc0f00124) sp = 0xc1361ef8 fp = 0x r4 = 0x00f00164 r5 = 0x00f00058 (null)() at 0xc0f00124 pc = 0xc0f00124 lr = 0xc0f00124 (0xc0f00124) sp = 0xc1361ef8 fp = 0x Unable to unwind further KDB: enter: panic [ thread pid 0 tid 0 ] Stopped at 0xc108dba8: ldrbr15, [r15, r15, ror r15]! db> ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254120 - head/sys/ofed/include/linux
Author: jeff Date: Fri Aug 9 03:24:12 2013 New Revision: 254120 URL: http://svnweb.freebsd.org/changeset/base/254120 Log: - Use the correct type in the linux bitops emulation. Submitted by: Maxim Ignatenko Modified: head/sys/ofed/include/linux/bitops.h Modified: head/sys/ofed/include/linux/bitops.h == --- head/sys/ofed/include/linux/bitops.hFri Aug 9 01:27:05 2013 (r254119) +++ head/sys/ofed/include/linux/bitops.hFri Aug 9 03:24:12 2013 (r254120) @@ -272,22 +272,25 @@ bitmap_empty(unsigned long *addr, int si return (1); } -#defineNBINT (NBBY * sizeof(int)) +#defineNBLONG (NBBY * sizeof(long)) #defineset_bit(i, a) \ -atomic_set_int(&((volatile int *)(a))[(i)/NBINT], 1 << (i) % NBINT) +atomic_set_long(&((volatile long *)(a))[(i)/NBLONG], 1 << (i) % NBLONG) #defineclear_bit(i, a) \ -atomic_clear_int(&((volatile int *)(a))[(i)/NBINT], 1 << (i) % NBINT) +atomic_clear_long(&((volatile long *)(a))[(i)/NBLONG], 1 << (i) % NBLONG) #definetest_bit(i, a) \ -!!(atomic_load_acq_int(&((volatile int *)(a))[(i)/NBINT]) & 1 << ((i) % NBINT)) +!!(atomic_load_acq_long(&((volatile long *)(a))[(i)/NBLONG]) & \ +1 << ((i) % NBLONG)) static inline long test_and_clear_bit(long bit, long *var) { long val; + var += bit / (sizeof(long) * NBBY); + bit %= sizeof(long) * NBBY; bit = 1 << bit; do { val = *(volatile long *)var; @@ -301,6 +304,8 @@ test_and_set_bit(long bit, long *var) { long val; + var += bit / (sizeof(long) * NBBY); + bit %= sizeof(long) * NBBY; bit = 1 << bit; do { val = *(volatile long *)var; ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254121 - head/sys/ofed/include/linux
Author: jeff Date: Fri Aug 9 03:24:48 2013 New Revision: 254121 URL: http://svnweb.freebsd.org/changeset/base/254121 Log: - Correctly handle various edge cases in sysfs emulation. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/ofed/include/linux/sysfs.h Modified: head/sys/ofed/include/linux/sysfs.h == --- head/sys/ofed/include/linux/sysfs.h Fri Aug 9 03:24:12 2013 (r254120) +++ head/sys/ofed/include/linux/sysfs.h Fri Aug 9 03:24:48 2013 (r254121) @@ -97,11 +97,14 @@ sysctl_handle_attr(SYSCTL_HANDLER_ARGS) error = -len; if (error != EIO) goto out; + buf[0] = '\0'; + } else if (len) { + len--; + if (len >= PAGE_SIZE) + len = PAGE_SIZE - 1; + /* Trim trailing newline. */ + buf[len] = '\0'; } - - /* Trim trailing newline. */ - len--; - buf[len] = '\0'; } /* Leave one trailing byte to append a newline. */ ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254122 - in head/sys: ofed/include/rdma sys
Author: jeff Date: Fri Aug 9 03:26:17 2013 New Revision: 254122 URL: http://svnweb.freebsd.org/changeset/base/254122 Log: - Reserve a special AF for SDP. The one we were incorrectly using before was taken by another AF. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/ofed/include/rdma/sdp_socket.h head/sys/sys/socket.h Modified: head/sys/ofed/include/rdma/sdp_socket.h == --- head/sys/ofed/include/rdma/sdp_socket.h Fri Aug 9 03:24:48 2013 (r254121) +++ head/sys/ofed/include/rdma/sdp_socket.h Fri Aug 9 03:26:17 2013 (r254122) @@ -3,10 +3,12 @@ #ifndef SDP_SOCKET_H #define SDP_SOCKET_H +#ifndef __FreeBSD__ #ifndef AF_INET_SDP #define AF_INET_SDP 27 #define PF_INET_SDP AF_INET_SDP #endif +#endif #ifndef SDP_ZCOPY_THRESH #define SDP_ZCOPY_THRESH 80 Modified: head/sys/sys/socket.h == --- head/sys/sys/socket.h Fri Aug 9 03:24:48 2013(r254121) +++ head/sys/sys/socket.h Fri Aug 9 03:26:17 2013(r254122) @@ -230,7 +230,9 @@ struct accept_filter_arg { #defineAF_ARP 35 #defineAF_BLUETOOTH36 /* Bluetooth sockets */ #defineAF_IEEE8021137 /* IEEE 802.11 protocol */ -#defineAF_MAX 38 +#defineAF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */ +#defineAF_INET6_SDP42 /* OFED Socket Direct Protocol ipv6 */ +#defineAF_MAX 42 /* * When allocating a new AF_ constant, please only allocate * even numbered constants for FreeBSD until 134 as odd numbered AF_ @@ -353,6 +355,8 @@ struct sockproto { #definePF_ARP AF_ARP #definePF_BLUETOOTHAF_BLUETOOTH #definePF_IEEE80211AF_IEEE80211 +#definePF_INET_SDP AF_INET_SDP +#definePF_INET6_SDPAF_INET6_SDP #definePF_MAX AF_MAX ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254123 - in head/contrib/ofed: libsdp/src/linux management/infiniband-diags/src management/opensm/opensm
Author: jeff Date: Fri Aug 9 03:29:46 2013 New Revision: 254123 URL: http://svnweb.freebsd.org/changeset/base/254123 Log: - Fix compile errors from the clang conversion - Grab AF_SDP_INET from sys/socket.h Submitted by: Garrett Cooper Sponsored by: EMC / Isilon Storage Division Modified: head/contrib/ofed/libsdp/src/linux/sdp_inet.h head/contrib/ofed/management/infiniband-diags/src/sminfo.c head/contrib/ofed/management/opensm/opensm/osm_console.c head/contrib/ofed/management/opensm/opensm/osm_subnet.c Modified: head/contrib/ofed/libsdp/src/linux/sdp_inet.h == --- head/contrib/ofed/libsdp/src/linux/sdp_inet.h Fri Aug 9 03:26:17 2013(r254122) +++ head/contrib/ofed/libsdp/src/linux/sdp_inet.h Fri Aug 9 03:29:46 2013(r254123) @@ -29,8 +29,12 @@ */ #ifndef SOLARIS_BUILD +#ifdef __FreeBSD__ +#include +#else #define AF_INET_SDP 27 /* SDP socket protocol family */ #define AF_INET6_SDP 28 /* SDP socket protocol family */ +#endif #else #define AF_INET_SDP 31 /* This is an invalid family on native solaris * and will only work using QuickTransit */ Modified: head/contrib/ofed/management/infiniband-diags/src/sminfo.c == --- head/contrib/ofed/management/infiniband-diags/src/sminfo.c Fri Aug 9 03:26:17 2013(r254122) +++ head/contrib/ofed/management/infiniband-diags/src/sminfo.c Fri Aug 9 03:29:46 2013(r254123) @@ -72,10 +72,10 @@ enum { }; char *statestr[] = { - [SMINFO_NOTACT] "SMINFO_NOTACT", - [SMINFO_DISCOVER] "SMINFO_DISCOVER", - [SMINFO_STANDBY] "SMINFO_STANDBY", - [SMINFO_MASTER] "SMINFO_MASTER", + [SMINFO_NOTACT] = "SMINFO_NOTACT", + [SMINFO_DISCOVER] = "SMINFO_DISCOVER", + [SMINFO_STANDBY] = "SMINFO_STANDBY", + [SMINFO_MASTER] = "SMINFO_MASTER", }; #define STATESTR(s)(((unsigned)(s)) < SMINFO_STATE_LAST ? statestr[s] : "???") Modified: head/contrib/ofed/management/opensm/opensm/osm_console.c == --- head/contrib/ofed/management/opensm/opensm/osm_console.cFri Aug 9 03:26:17 2013(r254122) +++ head/contrib/ofed/management/opensm/opensm/osm_console.cFri Aug 9 03:29:46 2013(r254123) @@ -67,7 +67,10 @@ static struct { time_t previous; void (*loop_function) (osm_opensm_t * p_osm, FILE * out); } loop_command = { -on: 0, delay_s: 2, loop_function:NULL}; + .on = 0, + .delay_s = 2, + .loop_function = NULL, +}; static const struct command console_cmds[]; Modified: head/contrib/ofed/management/opensm/opensm/osm_subnet.c == --- head/contrib/ofed/management/opensm/opensm/osm_subnet.c Fri Aug 9 03:26:17 2013(r254122) +++ head/contrib/ofed/management/opensm/opensm/osm_subnet.c Fri Aug 9 03:29:46 2013(r254123) @@ -482,7 +482,7 @@ static void log_report(const char *fmt, va_start(args, fmt); vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); - printf(buf); + printf("%s", buf); cl_log_event("OpenSM", CL_LOG_INFO, buf, NULL, 0); } @@ -500,7 +500,7 @@ static void log_config_value(char *name, n = sizeof(buf); snprintf(buf + n, sizeof(buf) - n, "\n"); va_end(args); - printf(buf); + printf("%s", buf); cl_log_event("OpenSM", CL_LOG_INFO, buf, NULL, 0); } ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254304 - in head/sys: sys vm
Author: jeff Date: Tue Aug 13 21:56:16 2013 New Revision: 254304 URL: http://svnweb.freebsd.org/changeset/base/254304 Log: Improve pageout flow control to wakeup more frequently and do less work while maintaining better LRU of active pages. - Change v_free_target to include the quantity previously represented by v_cache_min so we don't need to add them together everywhere we use them. - Add a pageout_wakeup_thresh that sets the free page count trigger for waking the page daemon. Set this 10% above v_free_min so we wakeup before any phase transitions in vm users. - Adjust down v_free_target now that we're willing to accept more pagedaemon wakeups. This means we process fewer pages in one iteration as well, leading to shorter lock hold times and less overall disruption. - Eliminate vm_pageout_page_stats(). This was a minor variation on the PQ_ACTIVE segment of the normal pageout daemon. Instead we now process 1 / vm_pageout_update_period pages every second. This causes us to visit the whole active list every 60 seconds. Previously we would only maintain the active LRU when we were short on pages which would mean it could be woefully out of date. Reviewed by: alc (slight variant of this) Discussed with: alc, kib, jhb Sponsored by: EMC / Isilon Storage Division Modified: head/sys/sys/vmmeter.h head/sys/vm/vm_page.c head/sys/vm/vm_page.h head/sys/vm/vm_pageout.c Modified: head/sys/sys/vmmeter.h == --- head/sys/sys/vmmeter.h Tue Aug 13 21:49:32 2013(r254303) +++ head/sys/sys/vmmeter.h Tue Aug 13 21:56:16 2013(r254304) @@ -98,7 +98,7 @@ struct vmmeter { u_int v_inactive_count; /* (q) pages inactive */ u_int v_cache_count;/* (f) pages on cache queue */ u_int v_cache_min; /* (c) min pages desired on cache queue */ - u_int v_cache_max; /* (c) max pages in cached obj */ + u_int v_cache_max; /* (c) max pages in cached obj (unused) */ u_int v_pageout_free_min; /* (c) min pages reserved for kernel */ u_int v_interrupt_free_min; /* (c) reserved pages for int code */ u_int v_free_severe;/* (c) severe page depletion point */ @@ -118,6 +118,8 @@ struct vmmeter { extern struct vmmeter cnt; +extern int vm_pageout_wakeup_thresh; + /* * Return TRUE if we are under our severe low-free-pages threshold * @@ -170,10 +172,7 @@ static __inline int vm_paging_target(void) { -return ( - (cnt.v_free_target + cnt.v_cache_min) - - (cnt.v_free_count + cnt.v_cache_count) -); +return (cnt.v_free_target - (cnt.v_free_count + cnt.v_cache_count)); } /* @@ -184,10 +183,7 @@ static __inline int vm_paging_needed(void) { -return ( - (cnt.v_free_reserved + cnt.v_cache_min) > - (cnt.v_free_count + cnt.v_cache_count) -); +return (cnt.v_free_count + cnt.v_cache_count < vm_pageout_wakeup_thresh); } #endif Modified: head/sys/vm/vm_page.c == --- head/sys/vm/vm_page.c Tue Aug 13 21:49:32 2013(r254303) +++ head/sys/vm/vm_page.c Tue Aug 13 21:56:16 2013(r254304) @@ -259,7 +259,6 @@ vm_page_domain_init(struct vm_domain *vm "vm active pagequeue"; *__DECONST(int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) = &cnt.v_active_count; - vmd->vmd_fullintervalcount = 0; vmd->vmd_page_count = 0; vmd->vmd_free_count = 0; vmd->vmd_segs = 0; Modified: head/sys/vm/vm_page.h == --- head/sys/vm/vm_page.h Tue Aug 13 21:49:32 2013(r254303) +++ head/sys/vm/vm_page.h Tue Aug 13 21:56:16 2013(r254304) @@ -223,7 +223,6 @@ struct vm_pagequeue { struct vm_domain { struct vm_pagequeue vmd_pagequeues[PQ_COUNT]; - int vmd_fullintervalcount; u_int vmd_page_count; u_int vmd_free_count; long vmd_segs; /* bitmask of the segments */ Modified: head/sys/vm/vm_pageout.c == --- head/sys/vm/vm_pageout.cTue Aug 13 21:49:32 2013(r254303) +++ head/sys/vm/vm_pageout.cTue Aug 13 21:56:16 2013(r254304) @@ -146,6 +146,7 @@ SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ int vm_pages_needed; /* Event on which pageout daemon sleeps */ int vm_pageout_deficit;/* Estimated number of pages deficit */ int vm_pageout_pages_needed; /* flag saying that the pageout daemon needs pages */ +int vm_pageout_wakeup_thresh; #if !defined(NO_SWAPPING) static int vm_pageout_req_swapout; /* XXX */ @@ -155,11 +156,7 @@ static struct mtx vm_daemon_mtx; MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF); #endif static in
svn commit: r254307 - in head/sys: kern vm
Author: jeff Date: Tue Aug 13 22:40:43 2013 New Revision: 254307 URL: http://svnweb.freebsd.org/changeset/base/254307 Log: - Add a statically allocated memguard arena since it is needed very early on. - Pass the appropriate flags to vmem_xalloc() when allocating space for the arena from kmem_arena. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/kern/subr_vmem.c head/sys/vm/memguard.c head/sys/vm/vm_kern.h Modified: head/sys/kern/subr_vmem.c == --- head/sys/kern/subr_vmem.c Tue Aug 13 22:05:50 2013(r254306) +++ head/sys/kern/subr_vmem.c Tue Aug 13 22:40:43 2013(r254307) @@ -57,6 +57,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include "opt_vm.h" + #include #include #include @@ -223,6 +225,11 @@ vmem_t *kmem_arena = &kmem_arena_storage vmem_t *buffer_arena = &buffer_arena_storage; vmem_t *transient_arena = &transient_arena_storage; +#ifdef DEBUG_MEMGUARD +static struct vmem memguard_arena_storage; +vmem_t *memguard_arena = &memguard_arena_storage; +#endif + /* * Fill the vmem's boundary tag cache. We guarantee that boundary tag * allocation will not fail once bt_fill() passes. To do so we cache Modified: head/sys/vm/memguard.c == --- head/sys/vm/memguard.c Tue Aug 13 22:05:50 2013(r254306) +++ head/sys/vm/memguard.c Tue Aug 13 22:40:43 2013(r254307) @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -100,7 +101,6 @@ SYSCTL_PROC(_vm_memguard, OID_AUTO, desc CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, memguard_sysctl_desc, "A", "Short description of memory type to monitor"); -static vmem_t *memguard_map = NULL; static vm_offset_t memguard_cursor; static vm_offset_t memguard_base; static vm_size_t memguard_mapsize; @@ -206,8 +206,8 @@ memguard_init(vmem_t *parent) { vm_offset_t base; - vmem_alloc(parent, memguard_mapsize, M_WAITOK, &base); - memguard_map = vmem_create("memguard arena", base, memguard_mapsize, + vmem_alloc(parent, memguard_mapsize, M_BESTFIT | M_WAITOK, &base); + vmem_init(memguard_arena, "memguard arena", base, memguard_mapsize, PAGE_SIZE, 0, M_WAITOK); memguard_cursor = base; memguard_base = base; @@ -311,7 +311,7 @@ memguard_alloc(unsigned long req_size, i * of physical memory whether we allocate or hand off to * uma_large_alloc(), so keep those. */ - if (vmem_size(memguard_map, VMEM_ALLOC) >= memguard_physlimit && + if (vmem_size(memguard_arena, VMEM_ALLOC) >= memguard_physlimit && req_size < PAGE_SIZE) { addr = (vm_offset_t)NULL; memguard_fail_pgs++; @@ -328,8 +328,9 @@ memguard_alloc(unsigned long req_size, i * map, unless vm_map_findspace() is tweaked. */ for (;;) { - if (vmem_xalloc(memguard_map, size_v, 0, 0, 0, memguard_cursor, - VMEM_ADDR_MAX, M_BESTFIT | M_NOWAIT, &addr) == 0) + if (vmem_xalloc(memguard_arena, size_v, 0, 0, 0, + memguard_cursor, VMEM_ADDR_MAX, + M_BESTFIT | M_NOWAIT, &addr) == 0) break; /* * The map has no space. This may be due to @@ -348,7 +349,7 @@ memguard_alloc(unsigned long req_size, i addr += PAGE_SIZE; rv = kmem_back(kmem_object, addr, size_p, flags); if (rv != KERN_SUCCESS) { - vmem_xfree(memguard_map, addr, size_v); + vmem_xfree(memguard_arena, addr, size_v); memguard_fail_pgs++; addr = (vm_offset_t)NULL; goto out; @@ -419,7 +420,7 @@ memguard_free(void *ptr) kmem_unback(kmem_object, addr, size); if (sizev > size) addr -= PAGE_SIZE; - vmem_xfree(memguard_map, addr, sizev); + vmem_xfree(memguard_arena, addr, sizev); if (req_size < PAGE_SIZE) memguard_wasted -= (PAGE_SIZE - req_size); } Modified: head/sys/vm/vm_kern.h == --- head/sys/vm/vm_kern.h Tue Aug 13 22:05:50 2013(r254306) +++ head/sys/vm/vm_kern.h Tue Aug 13 22:40:43 2013(r254307) @@ -71,6 +71,7 @@ extern struct vmem *kernel_arena; extern struct vmem *kmem_arena; extern struct vmem *buffer_arena; extern struct vmem *transient_arena; +extern struct vmem *memguard_arena; extern vm_offset_t swapbkva; extern u_long vm_kmem_size; ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254308 - head/sys/kern
Author: jeff Date: Tue Aug 13 22:41:24 2013 New Revision: 254308 URL: http://svnweb.freebsd.org/changeset/base/254308 Log: - Disable quantum caches on the kmem_arena. This can make fragmentation worse on small KVA systems. I had intended to only enable it for debugging. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/kern/kern_malloc.c Modified: head/sys/kern/kern_malloc.c == --- head/sys/kern/kern_malloc.c Tue Aug 13 22:40:43 2013(r254307) +++ head/sys/kern/kern_malloc.c Tue Aug 13 22:41:24 2013(r254308) @@ -747,7 +747,7 @@ kmeminit(void) tmp = vm_kmem_size; #endif vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE, - PAGE_SIZE * 16, 0); + 0, 0); vmem_set_reclaim(kmem_arena, kmem_reclaim); #ifdef DEBUG_MEMGUARD ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254387 - head/sys/vm
Author: jeff Date: Thu Aug 15 22:29:49 2013 New Revision: 254387 URL: http://svnweb.freebsd.org/changeset/base/254387 Log: - Fix bug in r254304. Use the ACTIVE pq count for the active list processing, not inactive. This was the result of a bad merge. Reported by: pho Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/vm_pageout.c Modified: head/sys/vm/vm_pageout.c == --- head/sys/vm/vm_pageout.cThu Aug 15 21:48:29 2013(r254386) +++ head/sys/vm/vm_pageout.cThu Aug 15 22:29:49 2013(r254387) @@ -1286,6 +1286,8 @@ relock_queues: * Compute the number of pages we want to try to move from the * active queue to the inactive queue. */ + pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; + vm_pagequeue_lock(pq); pcount = pq->pq_cnt; page_shortage = vm_paging_target() + cnt.v_inactive_target - cnt.v_inactive_count; @@ -1304,8 +1306,6 @@ relock_queues: * track the per-page activity counter and use it to locate * deactivation candidates. */ - pq = &vmd->vmd_pagequeues[PQ_ACTIVE]; - vm_pagequeue_lock(pq); m = TAILQ_FIRST(&pq->pq_pl); while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) { ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254543 - in head/sys: kern vm
Author: jeff Date: Mon Aug 19 23:02:39 2013 New Revision: 254543 URL: http://svnweb.freebsd.org/changeset/base/254543 Log: - Use an arbitrary but reasonably large import size for kva on architectures that don't support superpages. This keeps the number of spans and internal fragmentation lower. - When the user asks for alignment from vmem_xalloc adjust the imported size by 2*align to be certain we can satisfy the allocation. This comes at the expense of potential failures when the backend can't supply enough memory but could supply the requested size and alignment. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/kern/subr_vmem.c head/sys/vm/vm_init.c Modified: head/sys/kern/subr_vmem.c == --- head/sys/kern/subr_vmem.c Mon Aug 19 22:25:36 2013(r254542) +++ head/sys/kern/subr_vmem.c Mon Aug 19 23:02:39 2013(r254543) @@ -758,6 +758,7 @@ vmem_add1(vmem_t *vm, vmem_addr_t addr, bt_t *btfree; MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC); + MPASS((size & vm->vm_quantum_mask) == 0); btspan = bt_alloc(vm); btspan->bt_type = type; @@ -805,7 +806,7 @@ vmem_destroy1(vmem_t *vm) } static int -vmem_import(vmem_t *vm, vmem_size_t size, int flags) +vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags) { vmem_addr_t addr; int error; @@ -813,6 +814,12 @@ vmem_import(vmem_t *vm, vmem_size_t size if (vm->vm_importfn == NULL) return EINVAL; + /* +* To make sure we get a span that meets the alignment we double it +* and add the size to the tail. This slightly overestimates. +*/ + if (align != vm->vm_quantum_mask + 1) + size = (align * 2) + size; size = roundup(size, vm->vm_import_quantum); /* @@ -1157,7 +1164,7 @@ vmem_xalloc(vmem_t *vm, const vmem_size_ * imported region. It is up to the user to specify the * import quantum such that it can satisfy any allocation. */ - if (vmem_import(vm, size, flags) == 0) + if (vmem_import(vm, size, align, flags) == 0) continue; /* Modified: head/sys/vm/vm_init.c == --- head/sys/vm/vm_init.c Mon Aug 19 22:25:36 2013(r254542) +++ head/sys/vm/vm_init.c Mon Aug 19 23:02:39 2013(r254543) @@ -156,7 +156,8 @@ vm_mem_init(dummy) #if VM_NRESERVLEVEL > 0 1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT)); #else - PAGE_SIZE); + /* On non-superpage architectures want large import sizes. */ + PAGE_SIZE * 1024); #endif kmem_init_zero_region(); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254544 - head/sys/vm
Author: jeff Date: Mon Aug 19 23:54:24 2013 New Revision: 254544 URL: http://svnweb.freebsd.org/changeset/base/254544 Log: - Increase the active lru refresh interval to 10 minutes. This has been shown to negatively impact some workloads and the goal is only to eliminate worst case behaviors for very long periods of paging inactivity. Eventually we should determine a more complex scaling factor for this feature. - Rate limit low memory callback handlers to limit thrashing. Set the default to 10 seconds. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/vm_pageout.c Modified: head/sys/vm/vm_pageout.c == --- head/sys/vm/vm_pageout.cMon Aug 19 23:02:39 2013(r254543) +++ head/sys/vm/vm_pageout.cMon Aug 19 23:54:24 2013(r254544) @@ -159,6 +159,8 @@ static int vm_max_launder = 32; static int vm_pageout_update_period; static int defer_swap_pageouts; static int disable_swap_pageouts; +static int lowmem_period = 10; +static int lowmem_ticks; #if defined(NO_SWAPPING) static int vm_swap_enabled = 0; @@ -179,6 +181,9 @@ SYSCTL_INT(_vm, OID_AUTO, pageout_update CTLFLAG_RW, &vm_pageout_update_period, 0, "Maximum active LRU update period"); +SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0, + "Low memory callback period"); + #if defined(NO_SWAPPING) SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout"); @@ -901,9 +906,10 @@ vm_pageout_scan(struct vm_domain *vmd, i /* * If we need to reclaim memory ask kernel caches to return -* some. +* some. We rate limit to avoid thrashing. */ - if (pass > 0) { + if (vmd == &vm_dom[0] && pass > 0 && + lowmem_ticks + (lowmem_period * hz) < ticks) { /* * Decrease registered cache sizes. */ @@ -913,6 +919,7 @@ vm_pageout_scan(struct vm_domain *vmd, i * drained above. */ uma_reclaim(); + lowmem_ticks = ticks; } /* @@ -1680,10 +1687,11 @@ vm_pageout(void) /* * Set interval in seconds for active scan. We want to visit each -* page at least once a minute. +* page at least once every ten minutes. This is to prevent worst +* case paging behaviors with stale active LRU. */ if (vm_pageout_update_period == 0) - vm_pageout_update_period = 60; + vm_pageout_update_period = 600; /* XXX does not really belong here */ if (vm_page_max_wired == 0) ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r254622 - head/sys/vm
Author: jeff Date: Wed Aug 21 22:39:19 2013 New Revision: 254622 URL: http://svnweb.freebsd.org/changeset/base/254622 Log: - Eliminate the vm object lock from the active queue scan. It is not necessary since we do not free or cache the page from active anymore. Document the one possible race that is harmless. Sponsored by: EMC / Isilon Storage Division Discussed with: alc Modified: head/sys/vm/vm_pageout.c Modified: head/sys/vm/vm_pageout.c == --- head/sys/vm/vm_pageout.cWed Aug 21 22:37:15 2013(r254621) +++ head/sys/vm/vm_pageout.cWed Aug 21 22:39:19 2013(r254622) @@ -1333,25 +1333,6 @@ relock_queues: m = next; continue; } - object = m->object; - if (!VM_OBJECT_TRYWLOCK(object) && - !vm_pageout_fallback_object_lock(m, &next)) { - VM_OBJECT_WUNLOCK(object); - vm_page_unlock(m); - m = next; - continue; - } - - /* -* Don't deactivate pages that are busy. -*/ - if (vm_page_busied(m) || m->hold_count != 0) { - vm_page_unlock(m); - VM_OBJECT_WUNLOCK(object); - vm_page_requeue_locked(m); - m = next; - continue; - } /* * The count for pagedaemon pages is done after checking the @@ -1367,7 +1348,15 @@ relock_queues: vm_page_aflag_clear(m, PGA_REFERENCED); act_delta += 1; } - if (object->ref_count != 0) + /* +* Unlocked object ref count check. Two races are possible. +* 1) The ref was transitioning to zero and we saw non-zero, +*the pmap bits will be checked unnecessarily. +* 2) The ref was transitioning to one and we saw zero. +*The page lock prevents a new reference to this page so +*we need not check the reference bits. +*/ + if (m->object->ref_count != 0) act_delta += pmap_ts_referenced(m); /* @@ -1387,9 +1376,6 @@ relock_queues: * queue depending on usage. */ if (act_delta == 0) { - KASSERT(object->ref_count != 0 || - !pmap_page_is_mapped(m), - ("vm_pageout_scan: page %p is mapped", m)); /* Dequeue to avoid later lock recursion. */ vm_page_dequeue_locked(m); vm_page_deactivate(m); @@ -1397,7 +1383,6 @@ relock_queues: } else vm_page_requeue_locked(m); vm_page_unlock(m); - VM_OBJECT_WUNLOCK(object); m = next; } vm_pagequeue_unlock(pq); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
Re: svn commit: r250411 - in head/sys: conf kern sys
On Thu, 9 May 2013, Marcel Moolenaar wrote: Author: marcel Date: Thu May 9 16:28:18 2013 New Revision: 250411 URL: http://svnweb.freebsd.org/changeset/base/250411 Log: Add option WITNESS_NO_VNODE to suppress printing LORs between VNODE locks. To support this, VNODE locks are created with the LK_IS_VNODE flag. This flag is propagated down using the LO_IS_VNODE flag. Note that WITNESS still records the LOR. Only the printing and the optional entering into the kernel debugger is bypassed with the WITNESS_NO_VNODE option. I'm replying to the original commit because the resulting thread got way out of hand. We need to all take a deep breath and take a pragmatic approach to solving the problem at hand. Let me first say I understand the utility here as this is also coming up in my organization. Test, and users, do not want to see erroneous warning messages. I understand that. Let's find a solution. Secondly, I think this project has grown too far for us to commit changes like this without some focused discussion. We need to be more mindful of the size of the impact and the number of people who are interested in a particular area. I'm not picking on you Marcel because this sort of thing has been coming up lately and we have all been guilty of it from time to time. There are more companies and individuals than ever trying to push work into the repository and we're having some growing pains. I am intimately familiar with the problems that lead to these erroneous witness messages as I have tracked down many of them and am even responsible for the code that generates them in some cases. Let me first outline a handful of generic problems. The root cause is that witness can not determine the real order between two locks due to relationships too complex to describe with a pair of strings. One example, which has been brought up, is the hierarchical nature of vnode locks. This impacts vnodes within one filesystem but it also involves vnodes between two different filesystems as you cross mount points. We can construct perfectly valid and deadlock free chains of mount points that have two different filesystem types in different orders which will LOR at the boundaries. We already skip duplicates to avoid this problem within each filesystem. We need to skip cross-filesystem duplicates, most desirably at the few specific places where this happens. This problem comes up especially for devfs because we lock devvps while file vnodes are locked but we lock devfs directories after the rootfs lock when crossing mountpoints in lookup. A second example, is locks of a fundamentally different type that have a complex ordering relationship. For example, a vnode lock may be acquired after a buf lock belonging to the parent's directory block. A cg buf lock may be acquired after any file buf lock. Here we want to ignore interactions between these two specific types at this particular location but not others as they may be unsafe. The third example, is a complex locking pattern with shared locks as presented by dirhash. We are seeing a similar pattern develop in the vm where we are going to use an exclusive object lock to protect pages or a shared object lock + a page lock. The semantics only get more complex as we push for more scalability. I expect to see more of these patterns develop. None of these problems can be solved with names alone. So far we've just lived with the warnings and we're no longer willing to accept that. What we need is a solution that blesses the specific instances and the specific lock classes involved without silencing legitimate warnings that may only occur after new code is added. For example, it may be safe to add a sx lock around some vnode code but you may not notice that you LOR if you silence all witness warnings related to the vnode lock site. I believe that the perfect solution would be a mechanism that could teach witness about and enforce these specific relationships. However, that may be computationally prohibitive and too complex to code. A more reasonable option would be to bless the specific relationships at the specific call sites. Turning all witness off at particular sites or with particular types renders important infrastructure useless for very large functional areas. It's also important to distinguish between squelching the error message from eliminating the other state that is saved at lock sites. We already have lock names and types. What I would propose we do is make the type 'vnode' for all vnodes and 'buf' for all bufs with the names used for the specific filesystems. Then you could specify a DUPOK that automatically blesses any filesystem to filesystem related LORs. In this way witness still records the call sites and unrelated LORs or panics still have the acquisition information. You could eventually unwind this to only DUPOK at the specific currently known places that we ant
svn commit: r250551 - in head/sys: conf kern sys
Author: jeff Date: Sun May 12 04:05:01 2013 New Revision: 250551 URL: http://svnweb.freebsd.org/changeset/base/250551 Log: - Add a new general purpose path-compressed radix trie which can be used with any structure containing a uint64_t index. The tree code auto-generates type safe wrappers. - Eliminate the buf splay and replace it with pctrie. This is not only significantly faster with large files but also allows for the possibility of shared locking. Reviewed by:alc, attilio Sponsored by: EMC / Isilon Storage Division Added: head/sys/kern/subr_pctrie.c (contents, props changed) head/sys/sys/_pctrie.h - copied, changed from r249323, head/sys/vm/_vm_radix.h head/sys/sys/pctrie.h (contents, props changed) Modified: head/sys/conf/files head/sys/kern/vfs_subr.c head/sys/sys/buf.h head/sys/sys/bufobj.h Modified: head/sys/conf/files == --- head/sys/conf/files Sun May 12 03:36:28 2013(r250550) +++ head/sys/conf/files Sun May 12 04:05:01 2013(r250551) @@ -2760,6 +2760,7 @@ kern/subr_module.cstandard kern/subr_msgbuf.c standard kern/subr_param.c standard kern/subr_pcpu.c standard +kern/subr_pctrie.c standard kern/subr_power.c standard kern/subr_prf.cstandard kern/subr_prof.c standard Added: head/sys/kern/subr_pctrie.c == --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/kern/subr_pctrie.c Sun May 12 04:05:01 2013(r250551) @@ -0,0 +1,705 @@ +/* + * Copyright (c) 2013 EMC Corp. + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * Path-compressed radix trie implementation. + * + * The implementation takes into account the following rationale: + * - Size of the nodes should be as small as possible but still big enough + * to avoid a large maximum depth for the trie. This is a balance + * between the necessity to not wire too much physical memory for the nodes + * and the necessity to avoid too much cache pollution during the trie + * operations. + * - There is not a huge bias toward the number of lookup operations over + * the number of insert and remove operations. This basically implies + * that optimizations supposedly helping one operation but hurting the + * other might be carefully evaluated. + * - On average not many nodes are expected to be fully populated, hence + * level compression may just complicate things. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" + +#include +#include +#include +#include + +#ifdef DDB +#include +#endif + +/* + * These widths should allow the pointers to a node's children to fit within + * a single cache line. The extra levels from a narrow width should not be + * a problem thanks to path compression. + */ +#ifdef __LP64__ +#definePCTRIE_WIDTH4 +#else +#definePCTRIE_WIDTH3 +#endif + +#definePCTRIE_COUNT(1 << PCTRIE_WIDTH) +#definePCTRIE_MASK (PCTRIE_COUNT - 1) +#definePCTRIE_LIMIT(howmany((sizeof(uint64_t) * NBBY), PCTRIE_WIDTH) - 1) + +/* Flag bits stored in node pointers. */ +#definePCTRIE_ISLEAF 0x1 +#definePCTRIE_FLAGS0x1 +#definePCTRIE_PAD PCTRIE_FLAGS + +/* Returns one unit associated with specified level. */ +#definePCTRIE_UNITLEVEL(lev) \ +
svn commit: r250578 - head/sys/sys
Author: jeff Date: Sun May 12 20:44:28 2013 New Revision: 250578 URL: http://svnweb.freebsd.org/changeset/base/250578 Log: - pctrie really only requires two byte alignment so that there is a single bit available for a flag in the pointer. However, it felt more correct to enforce natural alignment of the key pointer. Unfortunately on 32bit architectures 64bit integers are not always naturally aligned. Change the assert to enforce only 32bit alignment of the 64bit key for now to fix the build. A more correct fix would be to properly sort the struct buf fields which definitely suffer from bloat due to padding. Modified: head/sys/sys/pctrie.h Modified: head/sys/sys/pctrie.h == --- head/sys/sys/pctrie.h Sun May 12 16:50:18 2013(r250577) +++ head/sys/sys/pctrie.h Sun May 12 20:44:28 2013(r250578) @@ -38,7 +38,11 @@ #definePCTRIE_DEFINE(name, type, field, allocfn, freefn) \ \ CTASSERT(sizeof(((struct type *)0)->field) == sizeof(uint64_t)); \ -CTASSERT((__offsetof(struct type, field) & (sizeof(uint64_t) - 1)) == 0); \ +/* \ + * XXX This assert protects flag bits, it does not enforce natural \ + * alignment. 32bit architectures do not naturally align 64bit fields. \ + */\ +CTASSERT((__offsetof(struct type, field) & (sizeof(uint32_t) - 1)) == 0); \ \ static __inline struct type * \ name##_PCTRIE_VAL2PTR(uint64_t *val) \ ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r251171 - in head/sys: fs/ext2fs fs/nandfs fs/nfsclient fs/nfsserver kern nfsclient nfsserver sys ufs/ffs
Author: jeff Date: Fri May 31 00:43:41 2013 New Revision: 251171 URL: http://svnweb.freebsd.org/changeset/base/251171 Log: - Convert the bufobj lock to rwlock. - Use a shared bufobj lock in getblk() and inmem(). - Convert softdep's lk to rwlock to match the bufobj lock. - Move INFREECNT to b_flags and protect it with the buf lock. - Remove unnecessary locking around bremfree() and BKGRDINPROG. Sponsored by: EMC / Isilon Storage Division Discussed with: mckusick, kib, mdf Modified: head/sys/fs/ext2fs/ext2_inode.c head/sys/fs/nandfs/nandfs_segment.c head/sys/fs/nandfs/nandfs_vnops.c head/sys/fs/nfsclient/nfs_clvnops.c head/sys/fs/nfsserver/nfs_nfsdport.c head/sys/kern/vfs_bio.c head/sys/kern/vfs_cluster.c head/sys/kern/vfs_default.c head/sys/kern/vfs_subr.c head/sys/nfsclient/nfs_subs.c head/sys/nfsclient/nfs_vnops.c head/sys/nfsserver/nfs_serv.c head/sys/sys/buf.h head/sys/sys/bufobj.h head/sys/ufs/ffs/ffs_inode.c head/sys/ufs/ffs/ffs_snapshot.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/ffs_vfsops.c Modified: head/sys/fs/ext2fs/ext2_inode.c == --- head/sys/fs/ext2fs/ext2_inode.c Fri May 31 00:31:45 2013 (r251170) +++ head/sys/fs/ext2fs/ext2_inode.c Fri May 31 00:43:41 2013 (r251171) @@ -43,6 +43,7 @@ #include #include #include +#include #include #include Modified: head/sys/fs/nandfs/nandfs_segment.c == --- head/sys/fs/nandfs/nandfs_segment.c Fri May 31 00:31:45 2013 (r251170) +++ head/sys/fs/nandfs/nandfs_segment.c Fri May 31 00:43:41 2013 (r251171) @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -702,7 +703,7 @@ nandfs_save_buf(struct buf *bp, uint64_t if (bp->b_bufobj != bo) { BO_LOCK(bp->b_bufobj); BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, - BO_MTX(bp->b_bufobj)); + BO_LOCKPTR(bp->b_bufobj)); KASSERT(BUF_ISLOCKED(bp), ("Problem with locking buffer")); } Modified: head/sys/fs/nandfs/nandfs_vnops.c == --- head/sys/fs/nandfs/nandfs_vnops.c Fri May 31 00:31:45 2013 (r251170) +++ head/sys/fs/nandfs/nandfs_vnops.c Fri May 31 00:43:41 2013 (r251171) @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -556,7 +557,7 @@ restart_locked: continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - BO_MTX(bo)) == ENOLCK) + BO_LOCKPTR(bo)) == ENOLCK) goto restart; bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~(B_ASYNC | B_MANAGED); Modified: head/sys/fs/nfsclient/nfs_clvnops.c == --- head/sys/fs/nfsclient/nfs_clvnops.c Fri May 31 00:31:45 2013 (r251170) +++ head/sys/fs/nfsclient/nfs_clvnops.c Fri May 31 00:43:41 2013 (r251171) @@ -2852,7 +2852,7 @@ loop: error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - BO_MTX(bo), "nfsfsync", slpflag, slptimeo); + BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; Modified: head/sys/fs/nfsserver/nfs_nfsdport.c == --- head/sys/fs/nfsserver/nfs_nfsdport.cFri May 31 00:31:45 2013 (r251170) +++ head/sys/fs/nfsserver/nfs_nfsdport.cFri May 31 00:43:41 2013 (r251171) @@ -1321,7 +1321,7 @@ nfsvno_fsync(struct vnode *vp, u_int64_t */ if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | - LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) { + LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) { BO_LOCK(bo); continue; /* retry */ } Modified: head/sys/kern/vfs_bio.c == --- head/sys/kern/vfs_bio.c Fri May 31 00:31:45 2013(r251170) +++ head/sys/kern/vfs_bio.c Fri May 31 00:43:41 2013(r251171) @@ -418,11 +418,9 @@ bufcountwakeup(struct buf *bp) { int old; - KASSERT((bp->b_v
svn commit: r251446 - head/sys/kern
Author: jeff Date: Wed Jun 5 23:53:00 2013 New Revision: 251446 URL: http://svnweb.freebsd.org/changeset/base/251446 Log: - Consolidate duplicate code into support functions. - Split the bqlock into bqclean and bqdirty locks. - Only acquire the wakeup synchronization locks when we cross a threshold requiring them. - Restructure the way flushbufqueues() targets work so they are more smp friendly and sane. Reviewed by: kib Discussed with: mckusick, attilio Sponsored by: EMC / Isilon Storage Division Mvfs_bio.c Modified: head/sys/kern/vfs_bio.c Modified: head/sys/kern/vfs_bio.c == --- head/sys/kern/vfs_bio.c Wed Jun 5 23:28:29 2013(r251445) +++ head/sys/kern/vfs_bio.c Wed Jun 5 23:53:00 2013(r251446) @@ -113,10 +113,11 @@ static void vfs_setdirty_locked_object(s static void vfs_vmio_release(struct buf *bp); static int vfs_bio_clcheck(struct vnode *vp, int size, daddr_t lblkno, daddr_t blkno); -static int buf_do_flush(struct vnode *vp); +static int buf_flush(struct vnode *vp, int); static int flushbufqueues(struct vnode *, int, int); static void buf_daemon(void); static void bremfreel(struct buf *bp); +static __inline void bd_wakeup(void); #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) static int sysctl_bufspace(SYSCTL_HANDLER_ARGS); @@ -217,8 +218,8 @@ SYSCTL_INT(_vfs, OID_AUTO, mappingrestar static int flushbufqtarget = 100; SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0, "Amount of work to do in flushbufqueues when helping bufdaemon"); -static long notbufdflashes; -SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLAG_RD, ¬bufdflashes, 0, +static long notbufdflushes; +SYSCTL_LONG(_vfs, OID_AUTO, notbufdflushes, CTLFLAG_RD, ¬bufdflushes, 0, "Number of dirty buffer flushes done by the bufdaemon helpers"); static long barrierwrites; SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0, @@ -228,6 +229,37 @@ SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_ "Permit the use of the unmapped i/o"); /* + * Lock for the non-dirty bufqueues + */ +static struct mtx_padalign bqclean; + +/* + * Lock for the dirty queue. + */ +static struct mtx_padalign bqdirty; + +/* + * This lock synchronizes access to bd_request. + */ +static struct mtx_padalign bdlock; + +/* + * This lock protects the runningbufreq and synchronizes runningbufwakeup and + * waitrunningbufspace(). + */ +static struct mtx_padalign rbreqlock; + +/* + * Lock that protects needsbuffer and the sleeps/wakeups surrounding it. + */ +static struct mtx_padalign nblock; + +/* + * Lock that protects bdirtywait. + */ +static struct mtx_padalign bdirtylock; + +/* * Wakeup point for bufdaemon, as well as indicator of whether it is already * active. Set to 1 when the bufdaemon is already "on" the queue, 0 when it * is idling. @@ -243,11 +275,6 @@ static int bd_request; static int bd_speedupreq; /* - * This lock synchronizes access to bd_request. - */ -static struct mtx bdlock; - -/* * bogus page -- for I/O to/from partially complete buffers * this is a temporary solution to the problem, but it is not * really that bad. it would be better to split the buffer @@ -263,25 +290,19 @@ vm_page_t bogus_page; */ static int runningbufreq; -/* - * This lock protects the runningbufreq and synchronizes runningbufwakeup and - * waitrunningbufspace(). - */ -static struct mtx rbreqlock; - /* * Synchronization (sleep/wakeup) variable for buffer requests. * Can contain the VFS_BIO_NEED flags defined below; setting/clearing is done * by and/or. - * Used in numdirtywakeup(), bufspacewakeup(), bufcountwakeup(), bwillwrite(), + * Used in numdirtywakeup(), bufspacewakeup(), bufcountadd(), bwillwrite(), * getnewbuf(), and getblk(). */ static int needsbuffer; /* - * Lock that protects needsbuffer and the sleeps/wakeups surrounding it. + * Synchronization for bwillwrite() waiters. */ -static struct mtx nblock; +static int bdirtywait; /* * Definitions for the buffer free lists. @@ -301,9 +322,6 @@ static TAILQ_HEAD(bqueues, buf) bufqueue static int bq_len[BUFFER_QUEUES]; #endif -/* Lock for the bufqueues */ -static struct mtx bqlock; - /* * Single global constant for BUF_WMESG, to avoid getting multiple references. * buf_wmesg is referred from macros. @@ -311,7 +329,6 @@ static struct mtx bqlock; const char *buf_wmesg = BUF_WMESG; #define VFS_BIO_NEED_ANY 0x01/* any freeable buffer */ -#define VFS_BIO_NEED_DIRTYFLUSH0x02/* waiting for dirty buffer flush */ #define VFS_BIO_NEED_FREE 0x04/* wait for free bufs, hi hysteresis */ #define VFS_BIO_NEED_BUFSPACE 0x08/* wait for buf space, lo hysteresis */ @@ -337,25 +354,69 @@ sysctl_bufspace(SYSCTL_HANDLER_ARGS) #ifdef DIRECTIO extern void ffs_raw
svn commit: r251703 - in head/sys: amd64/amd64 i386/i386 i386/xen kern mips/mips sparc64/sparc64 sys
Author: jeff Date: Thu Jun 13 20:46:03 2013 New Revision: 251703 URL: http://svnweb.freebsd.org/changeset/base/251703 Log: - Add a BIT_FFS() macro and use it to replace cpusetffs_obj() Discussed with: attilio Sponsored by: EMC / Isilon Storage Division Modified: head/sys/amd64/amd64/mp_machdep.c head/sys/i386/i386/mp_machdep.c head/sys/i386/i386/pmap.c head/sys/i386/xen/mp_machdep.c head/sys/i386/xen/pmap.c head/sys/kern/kern_cpuset.c head/sys/mips/mips/mp_machdep.c head/sys/sparc64/sparc64/mp_machdep.c head/sys/sys/bitset.h head/sys/sys/cpuset.h Modified: head/sys/amd64/amd64/mp_machdep.c == --- head/sys/amd64/amd64/mp_machdep.c Thu Jun 13 20:41:09 2013 (r251702) +++ head/sys/amd64/amd64/mp_machdep.c Thu Jun 13 20:46:03 2013 (r251703) @@ -1150,7 +1150,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask ipi_all_but_self(vector); } else { ncpu = 0; - while ((cpu = cpusetobj_ffs(&mask)) != 0) { + while ((cpu = CPU_FFS(&mask)) != 0) { cpu--; CPU_CLR(cpu, &mask); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, @@ -1299,7 +1299,7 @@ ipi_selected(cpuset_t cpus, u_int ipi) if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - while ((cpu = cpusetobj_ffs(&cpus)) != 0) { + while ((cpu = CPU_FFS(&cpus)) != 0) { cpu--; CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); Modified: head/sys/i386/i386/mp_machdep.c == --- head/sys/i386/i386/mp_machdep.c Thu Jun 13 20:41:09 2013 (r251702) +++ head/sys/i386/i386/mp_machdep.c Thu Jun 13 20:46:03 2013 (r251703) @@ -1249,7 +1249,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask ipi_all_but_self(vector); } else { ncpu = 0; - while ((cpu = cpusetobj_ffs(&mask)) != 0) { + while ((cpu = CPU_FFS(&mask)) != 0) { cpu--; CPU_CLR(cpu, &mask); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, @@ -1398,7 +1398,7 @@ ipi_selected(cpuset_t cpus, u_int ipi) if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - while ((cpu = cpusetobj_ffs(&cpus)) != 0) { + while ((cpu = CPU_FFS(&cpus)) != 0) { cpu--; CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); Modified: head/sys/i386/i386/pmap.c == --- head/sys/i386/i386/pmap.c Thu Jun 13 20:41:09 2013(r251702) +++ head/sys/i386/i386/pmap.c Thu Jun 13 20:46:03 2013(r251703) @@ -1957,7 +1957,7 @@ pmap_lazyfix(pmap_t pmap) spins = 5000; /* Find least significant set bit. */ - lsb = cpusetobj_ffs(&mask); + lsb = CPU_FFS(&mask); MPASS(lsb != 0); lsb--; CPU_SETOF(lsb, &mask); Modified: head/sys/i386/xen/mp_machdep.c == --- head/sys/i386/xen/mp_machdep.c Thu Jun 13 20:41:09 2013 (r251702) +++ head/sys/i386/xen/mp_machdep.c Thu Jun 13 20:46:03 2013 (r251703) @@ -1039,7 +1039,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask ipi_all_but_self(vector); } else { ncpu = 0; - while ((cpu = cpusetobj_ffs(&mask)) != 0) { + while ((cpu = CPU_FFS(&mask)) != 0) { cpu--; CPU_CLR(cpu, &mask); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, @@ -1132,7 +1132,7 @@ ipi_selected(cpuset_t cpus, u_int ipi) if (ipi == IPI_STOP_HARD) CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - while ((cpu = cpusetobj_ffs(&cpus)) != 0) { + while ((cpu = CPU_FFS(&cpus)) != 0) { cpu--; CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); Modified: head/sys/i386/xen/pmap.c == --- head/sys/i386/xen/pmap.cThu Jun 13 20:41:09 2013(r251702) +++ head/sys/i386/xen/pmap.cThu Jun 13 20:46:03 2013(r251703) @@ -1707,7 +1707,7 @@ pmap_lazyfix(pmap_t pmap) spins = 5000; /* Find least significant set bit. */ - lsb = cpusetobj_ffs(&mask); + lsb = CPU_FFS(&mask); MPASS(lsb != 0); lsb--; CPU_SETOF(lsb, &m
svn commit: r251709 - head/sys/vm
Author: jeff Date: Thu Jun 13 21:05:38 2013 New Revision: 251709 URL: http://svnweb.freebsd.org/changeset/base/251709 Log: - Convert the slab free item list from a linked array of indices to a bitmap using sys/bitset. This is much simpler, has lower space overhead and is cheaper in most cases. - Use a second bitmap for invariants asserts and improve the quality of the asserts as well as the number of erroneous conditions that we will catch. - Drastically simplify sizing code. Special case refcnt zones since they will be going away. - Update stale comments. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/vm/uma_core.c head/sys/vm/uma_dbg.c head/sys/vm/uma_int.h Modified: head/sys/vm/uma_core.c == --- head/sys/vm/uma_core.c Thu Jun 13 21:03:23 2013(r251708) +++ head/sys/vm/uma_core.c Thu Jun 13 21:05:38 2013(r251709) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002-2005, 2009 Jeffrey Roberson + * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson * Copyright (c) 2004, 2005 Bosko Milekic * Copyright (c) 2004-2006 Robert N. M. Watson * All rights reserved. @@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -145,8 +146,13 @@ static int booted = 0; #defineUMA_STARTUP22 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */ -static u_int uma_max_ipers; -static u_int uma_max_ipers_ref; +static const u_int uma_max_ipers = SLAB_SETSIZE; + +/* + * Only mbuf clusters use ref zones. Just provide enough references + * to support the one user. New code should not use the ref facility. + */ +static const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES; /* * This is the handle used to schedule events that need to happen @@ -208,7 +214,7 @@ static uint8_t bucket_size[BUCKET_ZONES] /* * Flags and enumerations to be passed to internal functions. */ -enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI }; +enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI }; #defineZFREE_STATFAIL 0x0001 /* Update zone failure statistic. */ #defineZFREE_STATFREE 0x0002 /* Update zone free statistic. */ @@ -885,18 +891,15 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t slab->us_keg = keg; slab->us_data = mem; slab->us_freecount = keg->uk_ipers; - slab->us_firstfree = 0; slab->us_flags = flags; - + BIT_FILL(SLAB_SETSIZE, &slab->us_free); +#ifdef INVARIANTS + BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree); +#endif if (keg->uk_flags & UMA_ZONE_REFCNT) { slabref = (uma_slabrefcnt_t)slab; - for (i = 0; i < keg->uk_ipers; i++) { - slabref->us_freelist[i].us_refcnt = 0; - slabref->us_freelist[i].us_item = i+1; - } - } else { for (i = 0; i < keg->uk_ipers; i++) - slab->us_freelist[i].us_item = i+1; + slabref->us_refcnt[i] = 0; } if (keg->uk_init != NULL) { @@ -1148,31 +1151,32 @@ keg_small_init(uma_keg_t keg) keg->uk_ppera = 1; } + /* +* Calculate the size of each allocation (rsize) according to +* alignment. If the requested size is smaller than we have +* allocation bits for we round it up. +*/ rsize = keg->uk_size; - + if (rsize < keg->uk_slabsize / SLAB_SETSIZE) + rsize = keg->uk_slabsize / SLAB_SETSIZE; if (rsize & keg->uk_align) rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1); - if (rsize < keg->uk_slabsize / 256) - rsize = keg->uk_slabsize / 256; - keg->uk_rsize = rsize; KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 || keg->uk_rsize < sizeof(struct pcpu), ("%s: size %u too large", __func__, keg->uk_rsize)); - if (keg->uk_flags & UMA_ZONE_OFFPAGE) { + if (keg->uk_flags & UMA_ZONE_REFCNT) + rsize += sizeof(uint32_t); + + if (keg->uk_flags & UMA_ZONE_OFFPAGE) shsize = 0; - } else if (keg->uk_flags & UMA_ZONE_REFCNT) { - rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */ - shsize = sizeof(struct uma_slab_refcnt); - } else { - rsize += UMA_FRITM_SZ; /* Account for linkage */ + else shsize = sizeof(struct uma_slab); - } keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize; - KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 256, + KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE, ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers)); memused = keg->uk_ipers * rsize + shsize; @@ -1189,10 +1193,18 @@ keg_small_init(uma_keg_t keg) (keg->uk_fl
svn commit: r244444 - head/sys/kern
Author: jeff Date: Wed Dec 19 20:08:06 2012 New Revision: 24 URL: http://svnweb.freebsd.org/changeset/base/24 Log: - Correctly handle EWOULDBLOCK in quiesce_cpus Discussed with: mav Modified: head/sys/kern/subr_smp.c Modified: head/sys/kern/subr_smp.c == --- head/sys/kern/subr_smp.cWed Dec 19 18:51:35 2012(r23) +++ head/sys/kern/subr_smp.cWed Dec 19 20:08:06 2012(r24) @@ -766,8 +766,9 @@ quiesce_cpus(cpuset_t map, const char *w thread_unlock(curthread); while (gen[cpu] == pcpu->pc_idlethread->td_generation) { error = tsleep(quiesce_cpus, prio, wmesg, 1); - if (error) + if (error != EWOULDBLOCK) goto out; + error = 0; } } out: ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r244445 - in head/sys: kern sys tools
Author: jeff Date: Wed Dec 19 20:10:00 2012 New Revision: 25 URL: http://svnweb.freebsd.org/changeset/base/25 Log: - Add new machine parsable KTR macros for timing events. - Use this new format to automatically handle syscalls and VOPs. This changes the earlier format but is still human readable. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/kern/subr_syscall.c head/sys/sys/ktr.h head/sys/tools/vnode_if.awk Modified: head/sys/kern/subr_syscall.c == --- head/sys/kern/subr_syscall.cWed Dec 19 20:08:06 2012 (r24) +++ head/sys/kern/subr_syscall.cWed Dec 19 20:10:00 2012 (r25) @@ -77,13 +77,12 @@ syscallenter(struct thread *td, struct s if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(sa->code, sa->narg, sa->args); #endif - - CTR6(KTR_SYSC, -"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)", - td, td->td_proc->p_pid, syscallname(p, sa->code), - sa->args[0], sa->args[1], sa->args[2]); + KTR_START4(KTR_SYSC, "syscall", syscallname(p, sa->code), + td, "pid:%d", td->td_proc->p_pid, "arg0:%p", sa->args[0], + "arg1:%p", sa->args[1], "arg2:%p", sa->args[2]); if (error == 0) { + STOPEVENT(p, S_SCE, sa->narg); if (p->p_flag & P_TRACED && p->p_stops & S_PT_SCE) { PROC_LOCK(p); @@ -150,10 +149,12 @@ syscallenter(struct thread *td, struct s sa->callp, NULL, (error) ? -1 : td->td_retval[0]); #endif syscall_thread_exit(td, sa->callp); - CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx", - p, error, td->td_retval[0], td->td_retval[1]); } retval: + KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code), + td, "pid:%d", td->td_proc->p_pid, "error:%d", error, + "retval0:%#lx", td->td_retval[0], "retval1:%#lx", + td->td_retval[1]); if (traced) { PROC_LOCK(p); td->td_dbgflags &= ~TDB_SCE; @@ -176,9 +177,6 @@ syscallret(struct thread *td, int error, */ userret(td, td->td_frame); - CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s", - syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name); - #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) { ktrsysret(sa->code, (td->td_pflags & TDP_NERRNO) == 0 ? Modified: head/sys/sys/ktr.h == --- head/sys/sys/ktr.h Wed Dec 19 20:08:06 2012(r24) +++ head/sys/sys/ktr.h Wed Dec 19 20:10:00 2012(r25) @@ -244,6 +244,50 @@ void ktr_tracepoint(u_int mask, const ch point, a0, (v0), a1, (v1), a2, (v2), a3, (v3)) /* + * Start functions denote the start of a region of code or operation + * and should be paired with stop functions for timing of nested + * sequences. + * + * Specifying extra attributes with the name "key" will result in + * multi-part keys. For example a block device and offset pair + * might be used to describe a buf undergoing I/O. + */ +#defineKTR_START0(m, egroup, ident, key) \ + KTR_EVENT0(m, egroup, ident, "start:0x%jX", (uintmax_t)key) +#defineKTR_START1(m, egroup, ident, key, a0, v0) \ + KTR_EVENT1(m, egroup, ident, "start:0x%jX", (uintmax_t)key, a0, (v0)) +#defineKTR_START2(m, egroup, ident, key, a0, v0, a1, v1) \ + KTR_EVENT2(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \ + a0, (v0), a1, (v1)) +#defineKTR_START3(m, egroup, ident, key, a0, v0, a1, v1, a2, v2)\ + KTR_EVENT3(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \ + a0, (v0), a1, (v1), a2, (v2)) +#defineKTR_START4(m, egroup, ident, key, \ + a0, v0, a1, v1, a2, v2, a3, v3) \ + KTR_EVENT4(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \ + a0, (v0), a1, (v1), a2, (v2), a3, (v3)) + +/* + * Stop functions denote the end of a region of code or operation + * and should be paired with start functions for timing of nested + * sequences. + */ +#defineKTR_STOP0(m, egroup, ident, key) \ + KTR_EVENT0(m, egroup, ident, "stop:0x%jX", (uintmax_t)key) +#defineKTR_STOP1(m, egroup, ident, key, a0, v0) \ + KTR_EVENT1(m, egroup, ident, "stop:0x%jX", (uintmax_t)key, a0, (v0)) +#defineKTR_STOP2(m, egroup, ident, key, a0, v0, a1, v1) \ + KTR_EVENT2(m, egroup, ident, "stop:0x%jX", (uintmax_t)key, \ + a0, (v0), a1, (v1)) +#defineKTR_STOP3(m, egroup, ident, key, a0, v0, a1, v1, a2, v2)\ + KTR_EVENT3(m, egroup
Re: svn commit: r242014 - head/sys/kern
On Wed, 24 Oct 2012, Attilio Rao wrote: On Wed, Oct 24, 2012 at 8:16 PM, Andre Oppermann wrote: On 24.10.2012 20:56, Jim Harris wrote: On Wed, Oct 24, 2012 at 11:41 AM, Adrian Chadd wrote: On 24 October 2012 11:36, Jim Harris wrote: Pad tdq_lock to avoid false sharing with tdq_load and tdq_cpu_idle. Ok, but.. struct mtx tdq_lock; /* run queue lock. */ + charpad[64 - sizeof(struct mtx)]; .. don't we have an existing compile time macro for the cache line size, which can be used here? Yes, but I didn't use it for a couple of reasons: 1) struct tdq itself is currently using __aligned(64), so I wanted to keep it consistent. 2) CACHE_LINE_SIZE is currently defined as 128 on x86, due to NetBurst-based processors having 128-byte cache sectors a while back. I had planned to start a separate thread on arch@ about this today on whether this was still appropriate. See also the discussion on svn-src-all regarding global struct mtx alignment. Thank you for proving my point. ;) Let's go back and see how we can do this the sanest way. These are the options I see at the moment: 1. sprinkle __aligned(CACHE_LINE_SIZE) all over the place This is wrong because it doesn't give padding. 2. use a macro like MTX_ALIGN that can be SMP/UP aware and in the future possibly change to a different compiler dependent align attribute What is this macro supposed to do? I don't understand that from your description. 3. embed __aligned(CACHE_LINE_SIZE) into struct mtx itself so it automatically gets aligned in all cases, even when dynamically allocated. This works but I think it is overkill for structures including sleep mutexes which are the vast majority. So I wouldn't certainly be in favor of such a patch. I agree. For locks with little contention we probably want smaller structures. For example, you wouldn't want to put a huge lock in every file descriptor. It would be nice to have an automatic way to pad every global lock though. I think it should be done as needed. Jeff Attilio -- Peace can only be achieved by understanding - A. Einstein ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r242492 - head/sys/ufs/ffs
Author: jeff Date: Fri Nov 2 21:04:06 2012 New Revision: 242492 URL: http://svn.freebsd.org/changeset/base/242492 Log: - In cancel_mkdir_dotdot don't panic if the inodedep is not available. If the previous diradd had already finished it could have been reclaimed already. This would only happen under heavy dependency pressure. Reported by: Andrey Zonov Discussed with: mckusick MFC after:1 week Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Fri Nov 2 20:36:41 2012 (r242491) +++ head/sys/ufs/ffs/ffs_softdep.c Fri Nov 2 21:04:06 2012 (r242492) @@ -8579,7 +8579,7 @@ cancel_mkdir_dotdot(ip, dirrem, jremref) if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0, &inodedep) == 0) - panic("cancel_mkdir_dotdot: Lost inodedep"); + return (jremref); dap = inodedep->id_mkdiradd; if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0) return (jremref); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r242734 - head/sys/ufs/ffs
Author: jeff Date: Thu Nov 8 01:41:04 2012 New Revision: 242734 URL: http://svnweb.freebsd.org/changeset/base/242734 Log: - Implement BIO_FLUSH support around journal entries. This will not 100% solve power loss problems with dishonest write caches. However, it should improve the situation and force a full fsck when it is unable to resolve with the journal. - Resolve a case where the journal could wrap in an unsafe way causing us to prematurely lose journal entries in very specific scenarios. Discussed with: mckusick MFC after:1 month Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Thu Nov 8 01:38:30 2012 (r242733) +++ head/sys/ufs/ffs/ffs_softdep.c Thu Nov 8 01:41:04 2012 (r242734) @@ -88,6 +88,8 @@ __FBSDID("$FreeBSD$"); #include #include +#include + #include #ifndef SOFTUPDATES @@ -802,6 +804,7 @@ static void handle_written_jnewblk(struc static void handle_written_jblkdep(struct jblkdep *); static void handle_written_jfreefrag(struct jfreefrag *); static void complete_jseg(struct jseg *); +static void complete_jsegs(struct jseg *); static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *); static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *); static void jremref_write(struct jremref *, struct jseg *, uint8_t *); @@ -1227,6 +1230,7 @@ static struct callout softdep_callout; static int req_pending; static int req_clear_inodedeps;/* syncer process flush some inodedeps */ static int req_clear_remove; /* syncer process flush some freeblks */ +static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */ /* * runtime statistics @@ -1310,6 +1314,8 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, cle &stat_cleanup_retries, 0, ""); SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_failures, CTLFLAG_RW, &stat_cleanup_failures, 0, ""); +SYSCTL_INT(_debug_softdep, OID_AUTO, flushcache, CTLFLAG_RW, +&softdep_flushcache, 0, ""); SYSCTL_DECL(_vfs_ffs); @@ -3078,6 +3084,67 @@ softdep_flushjournal(mp) FREE_LOCK(&lk); } +static void softdep_synchronize_completed(struct bio *); +static void softdep_synchronize(struct bio *, struct ufsmount *, void *); + +static void +softdep_synchronize_completed(bp) +struct bio *bp; +{ + struct jseg *oldest; + struct jseg *jseg; + + /* +* caller1 marks the last segment written before we issued the +* synchronize cache. +*/ + jseg = bp->bio_caller1; + oldest = NULL; + ACQUIRE_LOCK(&lk); + /* +* Mark all the journal entries waiting on the synchronize cache +* as completed so they may continue on. +*/ + while (jseg != NULL && (jseg->js_state & COMPLETE) == 0) { + jseg->js_state |= COMPLETE; + oldest = jseg; + jseg = TAILQ_PREV(jseg, jseglst, js_next); + } + /* +* Restart deferred journal entry processing from the oldest +* completed jseg. +*/ + if (oldest) + complete_jsegs(oldest); + + FREE_LOCK(&lk); + g_destroy_bio(bp); +} + +/* + * Send BIO_FLUSH/SYNCHRONIZE CACHE to the device to enforce write ordering + * barriers. The journal must be written prior to any blocks that depend + * on it and the journal can not be released until the blocks have be + * written. This code handles both barriers simultaneously. + */ +static void +softdep_synchronize(bp, ump, caller1) + struct bio *bp; + struct ufsmount *ump; + void *caller1; +{ + + bp->bio_cmd = BIO_FLUSH; + bp->bio_flags |= BIO_ORDERED; + bp->bio_data = NULL; + bp->bio_offset = ump->um_cp->provider->mediasize; + bp->bio_length = 0; + bp->bio_done = softdep_synchronize_completed; + bp->bio_caller1 = caller1; + g_io_request(bp, + (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private); +} + /* * Flush some journal records to disk. */ @@ -3092,8 +3159,10 @@ softdep_process_journal(mp, needwk, flag struct worklist *wk; struct jseg *jseg; struct buf *bp; + struct bio *bio; uint8_t *data; struct fs *fs; + int shouldflush; int segwritten; int jrecmin;/* Minimum records per block. */ int jrecmax;/* Maximum records per block. */ @@ -3104,6 +3173,9 @@ softdep_process_journal(mp, needwk, flag if (MOUNTEDSUJ(mp) == 0) return; + shouldflush = softdep_flushcache; + bio = NULL; + jseg = NULL; ump = VFSTOUFS(mp); fs = ump->um_fs; jblocks = ump->softdep_jblocks; @@ -3152,6 +3224,10 @@ softdep_process_journal(mp, needwk, flag LIST_INIT(&jseg->js_entries); LIST_INIT(&jseg->js_
svn commit: r242736 - head/sys/kern
Author: jeff Date: Thu Nov 8 01:46:47 2012 New Revision: 242736 URL: http://svnweb.freebsd.org/changeset/base/242736 Log: - Change ULE to use dynamic slice sizes for the timeshare queue in order to further reduce latency for threads in this queue. This should help as threads transition from realtime to timeshare. The latency is bound to a max of sched_slice until we have more than sched_slice / 6 threads runnable. Then the min slice is allotted to all threads and latency becomes (nthreads - 1) * min_slice. Discussed with: mav Modified: head/sys/kern/sched_ule.c Modified: head/sys/kern/sched_ule.c == --- head/sys/kern/sched_ule.c Thu Nov 8 01:42:54 2012(r242735) +++ head/sys/kern/sched_ule.c Thu Nov 8 01:46:47 2012(r242736) @@ -189,6 +189,12 @@ static struct td_sched td_sched0; #defineSCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2) #defineSCHED_INTERACT_THRESH (30) +/* + * These parameters determine the slice behavior for batch work. + */ +#defineSCHED_SLICE_DEFAULT_DIVISOR 10 /* ~94 ms, 12 stathz ticks. */ +#defineSCHED_SLICE_MIN_DIVISOR 6 /* DEFAULT/MIN = ~16 ms. */ + /* Flags kept in td_flags. */ #defineTDF_SLICEENDTDF_SCHED2 /* Thread time slice is over. */ @@ -201,9 +207,10 @@ static struct td_sched td_sched0; * preempt_thresh: Priority threshold for preemption and remote IPIs. */ static int sched_interact = SCHED_INTERACT_THRESH; -static int realstathz = 127; static int tickincr = 8 << SCHED_TICK_SHIFT; -static int sched_slice = 12; +static int realstathz = 127; /* reset during boot. */ +static int sched_slice = 10; /* reset during boot. */ +static int sched_slice_min = 1;/* reset during boot. */ #ifdef PREEMPTION #ifdef FULL_PREEMPTION static int preempt_thresh = PRI_MAX_IDLE; @@ -559,6 +566,30 @@ tdq_load_rem(struct tdq *tdq, struct thr } /* + * Bound timeshare latency by decreasing slice size as load increases. We + * consider the maximum latency as the sum of the threads waiting to run + * aside from curthread and target no more than sched_slice latency but + * no less than sched_slice_min runtime. + */ +static inline int +tdq_slice(struct tdq *tdq) +{ + int load; + + /* +* It is safe to use sys_load here because this is called from +* contexts where timeshare threads are running and so there +* cannot be higher priority load in the system. +*/ + load = tdq->tdq_sysload - 1; + if (load >= SCHED_SLICE_MIN_DIVISOR) + return (sched_slice_min); + if (load <= 1) + return (sched_slice); + return (sched_slice / load); +} + +/* * Set lowpri to its exact value by searching the run-queue and * evaluating curthread. curthread may be passed as an optimization. */ @@ -1384,7 +1415,8 @@ sched_initticks(void *dummy) int incr; realstathz = stathz ? stathz : hz; - sched_slice = realstathz / 10; /* ~100ms */ + sched_slice = realstathz / SCHED_SLICE_DEFAULT_DIVISOR; + sched_slice_min = sched_slice / SCHED_SLICE_MIN_DIVISOR; hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) / realstathz); @@ -1585,7 +1617,7 @@ schedinit(void) thread0.td_sched = &td_sched0; td_sched0.ts_ltick = ticks; td_sched0.ts_ftick = ticks; - td_sched0.ts_slice = sched_slice; + td_sched0.ts_slice = 0; } /* @@ -2003,8 +2035,10 @@ sched_wakeup(struct thread *td) sched_interact_update(td); sched_pctcpu_update(ts, 0); } - /* Reset the slice value after we sleep. */ - ts->ts_slice = sched_slice; + /* +* Reset the slice value since we slept and advanced the round-robin. +*/ + ts->ts_slice = 0; sched_add(td, SRQ_BORING); } @@ -2036,14 +2070,16 @@ sched_fork_thread(struct thread *td, str { struct td_sched *ts; struct td_sched *ts2; + struct tdq *tdq; + tdq = TDQ_SELF(); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * Initialize child. */ ts = td->td_sched; ts2 = child->td_sched; - child->td_lock = TDQ_LOCKPTR(TDQ_SELF()); + child->td_lock = TDQ_LOCKPTR(tdq); child->td_cpuset = cpuset_ref(td->td_cpuset); ts2->ts_cpu = ts->ts_cpu; ts2->ts_flags = 0; @@ -2062,7 +2098,8 @@ sched_fork_thread(struct thread *td, str */ ts2->ts_slptime = ts->ts_slptime; ts2->ts_runtime = ts->ts_runtime; - ts2->ts_slice = 1; /* Attempt to quickly learn interactivity. */ + /* Attempt to quickly learn interactivity. */ + ts2->ts_slice = tdq_slice(tdq) - sched_slice_min; #ifdef KTR bzero(ts2->ts_name, sizeof(ts2->ts_name)); #endif @@ -2227,8 +2264,8 @@ sched_clock(struct th
svn commit: r242815 - head/sys/ufs/ffs
Author: jeff Date: Fri Nov 9 04:04:25 2012 New Revision: 242815 URL: http://svnweb.freebsd.org/changeset/base/242815 Log: - Correct rev 242734, segments can sometimes get stuck. Be a bit more defensive with segment state. Reported by: b. f. Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Fri Nov 9 01:51:06 2012 (r242814) +++ head/sys/ufs/ffs/ffs_softdep.c Fri Nov 9 04:04:25 2012 (r242815) @@ -4291,13 +4291,16 @@ free_jsegs(jblocks) jblocks->jb_oldestseg = jseg; return; } + if ((jseg->js_state & ALLCOMPLETE) != ALLCOMPLETE) + break; if (jseg->js_seq > jblocks->jb_oldestwrseq) break; /* * We can free jsegs that didn't write entries when * oldestwrseq == js_seq. */ - if (jseg->js_cnt != 0) + if (jseg->js_seq == jblocks->jb_oldestwrseq && + jseg->js_cnt != 0) break; free_jseg(jseg, jblocks); } ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r242924 - head/sys/ufs/ffs
Author: jeff Date: Mon Nov 12 19:53:55 2012 New Revision: 242924 URL: http://svnweb.freebsd.org/changeset/base/242924 Log: - Fix a bug that has existed since the original softdep implementation. When a background copy of a cg is written we complete any work associated with that bmsafemap. If new work has been added to the non-background copy of the buffer it will be completed before the next write happens. The solution is to do the rollbacks when we make the copy so only those dependencies that were present at the time of writing will be completed when the background write completes. This would've resulted in various bitmap related corruptions and panics. It also would've expired journal entries early causing journal replay to miss some records. MFC after:2 weeks Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Mon Nov 12 18:38:54 2012 (r242923) +++ head/sys/ufs/ffs/ffs_softdep.c Mon Nov 12 19:53:55 2012 (r242924) @@ -977,7 +977,7 @@ static struct freework *newfreework(stru struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int); static int jwait(struct worklist *, int); static struct inodedep *inodedep_lookup_ip(struct inode *); -static int bmsafemap_rollbacks(struct bmsafemap *); +static int bmsafemap_backgroundwrite(struct bmsafemap *, struct buf *); static struct freefile *handle_bufwait(struct inodedep *, struct workhead *); static void handle_jwork(struct workhead *); static struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *, @@ -1795,7 +1795,7 @@ softdep_move_dependencies(oldbp, newbp) while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) { LIST_REMOVE(wk, wk_list); if (wk->wk_type == D_BMSAFEMAP && - bmsafemap_rollbacks(WK_BMSAFEMAP(wk))) + bmsafemap_backgroundwrite(WK_BMSAFEMAP(wk), newbp)) dirty = 1; if (wktail == 0) LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list); @@ -5173,9 +5173,15 @@ jnewblk_merge(new, old, wkhd) return (new); /* Replace a jfreefrag with a jnewblk. */ if (new->wk_type == D_JFREEFRAG) { + if (WK_JNEWBLK(old)->jn_blkno != WK_JFREEFRAG(new)->fr_blkno) + panic("jnewblk_merge: blkno mismatch: %p, %p", + old, new); cancel_jfreefrag(WK_JFREEFRAG(new)); return (old); } + if (old->wk_type != D_JNEWBLK || new->wk_type != D_JNEWBLK) + panic("jnewblk_merge: Bad type: old %d new %d\n", + old->wk_type, new->wk_type); /* * Handle merging of two jnewblk records that describe * different sets of fragments in the same block. @@ -10504,7 +10510,7 @@ initiate_write_bmsafemap(bmsafemap, bp) ino_t ino; if (bmsafemap->sm_state & IOSTARTED) - panic("initiate_write_bmsafemap: Already started\n"); + return; bmsafemap->sm_state |= IOSTARTED; /* * Clear any inode allocations which are pending journal writes. @@ -10515,10 +10521,6 @@ initiate_write_bmsafemap(bmsafemap, bp) inosused = cg_inosused(cgp); LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) { ino = jaddref->ja_ino % fs->fs_ipg; - /* -* If this is a background copy the inode may not -* be marked used yet. -*/ if (isset(inosused, ino)) { if ((jaddref->ja_mode & IFMT) == IFDIR) cgp->cg_cs.cs_ndir--; @@ -10527,7 +10529,7 @@ initiate_write_bmsafemap(bmsafemap, bp) jaddref->ja_state &= ~ATTACHED; jaddref->ja_state |= UNDONE; stat_jaddref++; - } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0) + } else panic("initiate_write_bmsafemap: inode %ju " "marked free", (uintmax_t)jaddref->ja_ino); } @@ -10542,9 +10544,8 @@ initiate_write_bmsafemap(bmsafemap, bp) LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) { if (jnewblk_rollback(jnewblk, fs, cgp, blksfree)) continue; - if ((bp->b_xflags & BX_BKGRDMARKER) == 0) - panic("initiate_write_bmsafemap: block %jd " - "marked free", jnewblk->jn_blkno); + panic("initiate_write_bmsafemap: block %jd
svn commit: r243017 - head/sbin/fsck_ffs
Author: jeff Date: Wed Nov 14 06:31:47 2012 New Revision: 243017 URL: http://svnweb.freebsd.org/changeset/base/243017 Log: - blk_equals() is too strict. If the journal entry defines more frags than we're claiming it should still be considered an exact match. This would previously leak frags that had been extended. - If there is a sequence number problem in the journal print the sequence numbers we've seen so far for debugging. - Clean up the block mask related debuging printfs. Some are redundant. MFC after:1 week Modified: head/sbin/fsck_ffs/suj.c Modified: head/sbin/fsck_ffs/suj.c == --- head/sbin/fsck_ffs/suj.cWed Nov 14 06:23:32 2012(r243016) +++ head/sbin/fsck_ffs/suj.cWed Nov 14 06:31:47 2012(r243017) @@ -504,7 +504,7 @@ blk_equals(struct jblkrec *brec, ino_t i return (0); if (brec->jb_blkno + brec->jb_oldfrags != start) return (0); - if (brec->jb_frags != frags) + if (brec->jb_frags < frags) return (0); return (1); } @@ -551,7 +551,6 @@ blk_freemask(ufs2_daddr_t blk, ino_t ino brec = (struct jblkrec *)srec->sr_rec; /* * If the block overlaps but does not match -* exactly it's a new allocation. If it matches * exactly this record refers to the current * location. */ @@ -648,7 +647,8 @@ blk_free(ufs2_daddr_t bno, int mask, int uint8_t *blksfree; if (debug) - printf("Freeing %d frags at blk %jd\n", frags, bno); + printf("Freeing %d frags at blk %jd mask 0x%x\n", + frags, bno, mask); cg = dtog(fs, bno); sc = cg_lookup(cg); cgp = sc->sc_cgp; @@ -1143,12 +1143,8 @@ ino_adjblks(struct suj_ino *sino) static void blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { - int mask; - mask = blk_freemask(blk, ino, lbn, frags); - if (debug) - printf("blk %jd freemask 0x%X\n", blk, mask); - blk_free(blk, mask, frags); + blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags); } /* @@ -1163,8 +1159,6 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino int mask; mask = blk_freemask(blk, ino, lbn, frags); - if (debug) - printf("blk %jd freemask 0x%X\n", blk, mask); resid = 0; if (lbn <= -NDADDR && follow && mask == 0) indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); @@ -2334,6 +2328,10 @@ suj_prune(void) } if (newseq != oldseq) { + TAILQ_FOREACH(seg, &allsegs, ss_next) { + printf("%jd, ", seg->ss_rec.jsr_seq); + } + printf("\n"); err_suj("Journal file sequence mismatch %jd != %jd\n", newseq, oldseq); } ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r243018 - head/sys/ufs/ffs
Author: jeff Date: Wed Nov 14 06:37:43 2012 New Revision: 243018 URL: http://svnweb.freebsd.org/changeset/base/243018 Log: - Fix a truncation bug with softdep journaling that could leak blocks on crash. When truncating a file that never made it to disk we use the canceled allocation dependencies to hold the journal records until the truncation completes. Previously allocdirect dependencies on the id_bufwait list were not considered and their journal space could expire before the bitmaps were written. Cancel them and attach them to the freeblks as we do for other allocdirects. - Add KTR traces that were used to debug this problem. - When adding jsegdeps, always use jwork_insert() so we don't have more than one segdep on a given jwork list. Sponsored by: EMC / Isilon Storage Division Modified: head/sys/ufs/ffs/ffs_softdep.c Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Wed Nov 14 06:31:47 2012 (r243017) +++ head/sys/ufs/ffs/ffs_softdep.c Wed Nov 14 06:37:43 2012 (r243018) @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -92,6 +93,8 @@ __FBSDID("$FreeBSD$"); #include +#defineKTR_SUJ 0 /* Define to KTR_SPARE. */ + #ifndef SOFTUPDATES int @@ -770,6 +773,34 @@ struct pagedep_hashhead; struct bmsafemap_hashhead; /* + * Private journaling structures. + */ +struct jblocks { + struct jseglst jb_segs;/* TAILQ of current segments. */ + struct jseg *jb_writeseg; /* Next write to complete. */ + struct jseg *jb_oldestseg; /* Oldest segment with valid entries. */ + struct jextent *jb_extent; /* Extent array. */ + uint64_tjb_nextseq; /* Next sequence number. */ + uint64_tjb_oldestwrseq; /* Oldest written sequence number. */ + uint8_t jb_needseg; /* Need a forced segment. */ + uint8_t jb_suspended; /* Did journal suspend writes? */ + int jb_avail; /* Available extents. */ + int jb_used;/* Last used extent. */ + int jb_head;/* Allocator head. */ + int jb_off; /* Allocator extent offset. */ + int jb_blocks; /* Total disk blocks covered. */ + int jb_free;/* Total disk blocks free. */ + int jb_min; /* Minimum free space. */ + int jb_low; /* Low on space. */ + int jb_age; /* Insertion time of oldest rec. */ +}; + +struct jextent { + ufs2_daddr_tje_daddr; /* Disk block address. */ + int je_blocks; /* Disk block count. */ +}; + +/* * Internal function prototypes. */ static void softdep_error(char *, int); @@ -2268,19 +2299,15 @@ static void indirblk_insert(freework) struct freework *freework; { - struct freeblks *freeblks; - struct jsegdep *jsegdep; - struct worklist *wk; + struct jblocks *jblocks; + struct jseg *jseg; - freeblks = freework->fw_freeblks; - LIST_FOREACH(wk, &freeblks->fb_jwork, wk_list) - if (wk->wk_type == D_JSEGDEP) - break; - if (wk == NULL) + jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks; + jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst); + if (jseg == NULL) return; - jsegdep = WK_JSEGDEP(wk); - LIST_INSERT_HEAD(&jsegdep->jd_seg->js_indirs, freework, fw_segs); + LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs); TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp, freework->fw_blkno), freework, fw_next); freework->fw_state &= ~DEPCOMPLETE; @@ -2433,31 +2460,6 @@ softdep_unmount(mp) journal_unmount(mp); } -struct jblocks { - struct jseglst jb_segs;/* TAILQ of current segments. */ - struct jseg *jb_writeseg; /* Next write to complete. */ - struct jseg *jb_oldestseg; /* Oldest segment with valid entries. */ - struct jextent *jb_extent; /* Extent array. */ - uint64_tjb_nextseq; /* Next sequence number. */ - uint64_tjb_oldestwrseq; /* Oldest written sequence number. */ - uint8_t jb_needseg; /* Need a forced segment. */ - uint8_t jb_suspended; /* Did journal suspend writes? */ - int jb_avail; /* Available extents. */ - int jb_used;/* Last used extent. */ - int jb_head;/* Allocator head. */ - int jb_off; /* Allocator extent offset. */ - int jb_blocks; /* Total disk blocks covered. */ - int
svn commit: r243046 - in head: sys/kern sys/sparc64/include sys/sys usr.bin/ktrdump
Author: jeff Date: Thu Nov 15 00:51:57 2012 New Revision: 243046 URL: http://svnweb.freebsd.org/changeset/base/243046 Log: - Implement run-time expansion of the KTR buffer via sysctl. - Implement a function to ensure that all preempted threads have switched back out at least once. Use this to make sure there are no stale references to the old ktr_buf or the lock profiling buffers before updating them. Reviewed by: marius (sparc64 parts), attilio (earlier patch) Sponsored by: EMC / Isilon Storage Division Modified: head/sys/kern/kern_ktr.c head/sys/kern/subr_lock.c head/sys/kern/subr_smp.c head/sys/sparc64/include/ktr.h head/sys/sys/ktr.h head/sys/sys/smp.h head/usr.bin/ktrdump/ktrdump.c Modified: head/sys/kern/kern_ktr.c == --- head/sys/kern/kern_ktr.cWed Nov 14 22:21:03 2012(r243045) +++ head/sys/kern/kern_ktr.cThu Nov 15 00:51:57 2012(r243046) @@ -47,7 +47,11 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include +#include #include +#include #include #include #include @@ -66,6 +70,9 @@ __FBSDID("$FreeBSD$"); #defineKTR_ENTRIES 1024 #endif +/* Limit the allocations to something manageable. */ +#defineKTR_ENTRIES_MAX (8 * 1024 * 1024) + #ifndef KTR_MASK #defineKTR_MASK(0) #endif @@ -82,30 +89,31 @@ __FBSDID("$FreeBSD$"); #defineKTR_CPU PCPU_GET(cpuid) #endif -FEATURE(ktr, "Kernel support for KTR kernel tracing facility"); +static MALLOC_DEFINE(M_KTR, "KTR", "KTR"); -static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options"); +FEATURE(ktr, "Kernel support for KTR kernel tracing facility"); +volatile int ktr_idx = 0; intktr_mask = KTR_MASK; +intktr_compile = KTR_COMPILE; +intktr_entries = KTR_ENTRIES; +intktr_version = KTR_VERSION; +struct ktr_entry ktr_buf_init[KTR_ENTRIES]; +struct ktr_entry *ktr_buf = ktr_buf_init; +cpuset_t ktr_cpumask = CPUSET_T_INITIALIZER(KTR_CPUMASK); +static char ktr_cpumask_str[CPUSETBUFSIZ]; + TUNABLE_INT("debug.ktr.mask", &ktr_mask); -SYSCTL_INT(_debug_ktr, OID_AUTO, mask, CTLFLAG_RW, -&ktr_mask, 0, "Bitmask of KTR event classes for which logging is enabled"); -intktr_compile = KTR_COMPILE; -SYSCTL_INT(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD, -&ktr_compile, 0, "Bitmask of KTR event classes compiled into the kernel"); +TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str)); -intktr_entries = KTR_ENTRIES; -SYSCTL_INT(_debug_ktr, OID_AUTO, entries, CTLFLAG_RD, -&ktr_entries, 0, "Number of entries in the KTR buffer"); +static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options"); -intktr_version = KTR_VERSION; SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD, &ktr_version, 0, "Version of the KTR interface"); -cpuset_t ktr_cpumask = CPUSET_T_INITIALIZER(KTR_CPUMASK); -static char ktr_cpumask_str[CPUSETBUFSIZ]; -TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str)); +SYSCTL_INT(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD, +&ktr_compile, 0, "Bitmask of KTR event classes compiled into the kernel"); static void ktr_cpumask_initializer(void *dummy __unused) @@ -145,9 +153,6 @@ SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumas sysctl_debug_ktr_cpumask, "S", "Bitmask of CPUs on which KTR logging is enabled"); -volatile int ktr_idx = 0; -struct ktr_entry ktr_buf[KTR_ENTRIES]; - static int sysctl_debug_ktr_clear(SYSCTL_HANDLER_ARGS) { @@ -159,7 +164,7 @@ sysctl_debug_ktr_clear(SYSCTL_HANDLER_AR return (error); if (clear) { - bzero(ktr_buf, sizeof(ktr_buf)); + bzero(ktr_buf, sizeof(*ktr_buf) * ktr_entries); ktr_idx = 0; } @@ -168,6 +173,67 @@ sysctl_debug_ktr_clear(SYSCTL_HANDLER_AR SYSCTL_PROC(_debug_ktr, OID_AUTO, clear, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_debug_ktr_clear, "I", "Clear KTR Buffer"); +/* + * This is a sysctl proc so that it is serialized as !MPSAFE along with + * the other ktr sysctl procs. + */ +static int +sysctl_debug_ktr_mask(SYSCTL_HANDLER_ARGS) +{ + int mask, error; + + mask = ktr_mask; + error = sysctl_handle_int(oidp, &mask, 0, req); + if (error || !req->newptr) + return (error); + ktr_mask = mask; + return (error); +} + +SYSCTL_PROC(_debug_ktr, OID_AUTO, mask, CTLTYPE_INT|CTLFLAG_RW, 0, 0, +sysctl_debug_ktr_mask, "I", +"Bitmask of KTR event classes for which logging is enabled"); + +static int +sysctl_debug_ktr_entries(SYSCTL_HANDLER_ARGS) +{ + int entries, error, mask; + struct ktr_entry *buf, *oldbuf; + + entries = ktr_entries; + error = sysctl_handle_int(oidp, &entries, 0, req); + if (error || !req->newptr) + return (error); + if (entries > KTR_ENTRIES_MAX) + return (ERANGE
svn commit: r188904 - in head/sys: amd64/amd64 i386/i386
Author: jeff Date: Sat Feb 21 23:15:34 2009 New Revision: 188904 URL: http://svn.freebsd.org/changeset/base/188904 Log: - Resolve an issue where we may clear an idt while an interrupt on a different cpu is still assigned to that vector by never clearing idt entries. This was only provided as a debugging feature and the bugs are caught by other means. - Drop the sched lock when rebinding to reassign an interrupt vector to a new cpu so that pending interrupts have a chance to be delivered before removing the old vector. Discussed with: tegge, jhb Modified: head/sys/amd64/amd64/local_apic.c head/sys/i386/i386/local_apic.c Modified: head/sys/amd64/amd64/local_apic.c == --- head/sys/amd64/amd64/local_apic.c Sat Feb 21 22:57:26 2009 (r188903) +++ head/sys/amd64/amd64/local_apic.c Sat Feb 21 23:15:34 2009 (r188904) @@ -900,7 +900,13 @@ apic_disable_vector(u_int apic_id, u_int KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); +#ifdef notyet + /* +* We can not currently clear the idt entry because other cpus +* may have a valid vector at this offset. +*/ setidt(vector, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); +#endif } /* Release an APIC vector when it's no longer in use. */ @@ -924,9 +930,11 @@ apic_free_vector(u_int apic_id, u_int ve if (sched_is_bound(td)) panic("apic_free_vector: Thread already bound.\n"); sched_bind(td, apic_cpuid(apic_id)); + thread_unlock(td); mtx_lock_spin(&icu_lock); lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0; mtx_unlock_spin(&icu_lock); + thread_lock(td); sched_unbind(td); thread_unlock(td); Modified: head/sys/i386/i386/local_apic.c == --- head/sys/i386/i386/local_apic.c Sat Feb 21 22:57:26 2009 (r188903) +++ head/sys/i386/i386/local_apic.c Sat Feb 21 23:15:34 2009 (r188904) @@ -903,8 +903,14 @@ apic_disable_vector(u_int apic_id, u_int KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry")); KASSERT(ioint_handlers[vector / 32] != NULL, ("No ISR handler for vector %u", vector)); +#ifdef notyet + /* +* We can not currently clear the idt entry because other cpus +* may have a valid vector at this offset. +*/ setidt(vector, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif } /* Release an APIC vector when it's no longer in use. */ @@ -928,9 +934,11 @@ apic_free_vector(u_int apic_id, u_int ve if (sched_is_bound(td)) panic("apic_free_vector: Thread already bound.\n"); sched_bind(td, apic_cpuid(apic_id)); + thread_unlock(td); mtx_lock_spin(&icu_lock); lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0; mtx_unlock_spin(&icu_lock); + thread_lock(td); sched_unbind(td); thread_unlock(td); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r189787 - head/sys/kern
Author: jeff Date: Sat Mar 14 11:41:36 2009 New Revision: 189787 URL: http://svn.freebsd.org/changeset/base/189787 Log: - Fix an error that occurs when mp_ncpu is an odd number. steal_thresh is calculated as 0 which causes errors elsewhere. Submitted by: KOIE Hidetaka - When sched_affinity() is called with a thread that is not curthread we need to handle the ON_RUNQ() case by adding the thread to the correct run queue. Submitted by: Justin Teller MFC after:1 Week Modified: head/sys/kern/sched_ule.c Modified: head/sys/kern/sched_ule.c == --- head/sys/kern/sched_ule.c Sat Mar 14 08:34:45 2009(r189786) +++ head/sys/kern/sched_ule.c Sat Mar 14 11:41:36 2009(r189787) @@ -1337,11 +1337,11 @@ sched_initticks(void *dummy) */ balance_interval = realstathz; /* -* Set steal thresh to log2(mp_ncpu) but no greater than 4. This -* prevents excess thrashing on large machines and excess idle on -* smaller machines. +* Set steal thresh to roughly log2(mp_ncpu) but no greater than 4. +* This prevents excess thrashing on large machines and excess idle +* on smaller machines. */ - steal_thresh = min(ffs(mp_ncpus) - 1, 3); + steal_thresh = min(fls(mp_ncpus) - 1, 3); affinity = SCHED_AFFINITY_DEFAULT; #endif } @@ -2417,6 +2417,11 @@ sched_affinity(struct thread *td) ts = td->td_sched; if (THREAD_CAN_SCHED(td, ts->ts_cpu)) return; + if (TD_ON_RUNQ(td)) { + sched_rem(td); + sched_add(td, SRQ_BORING); + return; + } if (!TD_IS_RUNNING(td)) return; td->td_flags |= TDF_NEEDRESCHED; ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r189788 - head/sys/kern
Author: jeff Date: Sat Mar 14 11:43:02 2009 New Revision: 189788 URL: http://svn.freebsd.org/changeset/base/189788 Log: - Call lock_profile_release when we're transitioning a lock to be owned by LK_KERNPROC. Discussed with: attilio Modified: head/sys/kern/kern_lock.c Modified: head/sys/kern/kern_lock.c == --- head/sys/kern/kern_lock.c Sat Mar 14 11:41:36 2009(r189787) +++ head/sys/kern/kern_lock.c Sat Mar 14 11:43:02 2009(r189788) @@ -686,7 +686,8 @@ __lockmgr_args(struct lock *lk, u_int fl lk->lk_recurse--; break; } - lock_profile_release_lock(&lk->lock_object); + if (tid != LK_KERNPROC) + lock_profile_release_lock(&lk->lock_object); if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid, LK_UNLOCKED)) @@ -874,6 +875,7 @@ _lockmgr_disown(struct lock *lk, const c */ if (LK_HOLDER(lk->lk_lock) != tid) return; + lock_profile_release_lock(&lk->lock_object); LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line); WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_DEC(curthread); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r189789 - head/sys/kern
Author: jeff Date: Sat Mar 14 11:43:38 2009 New Revision: 189789 URL: http://svn.freebsd.org/changeset/base/189789 Log: - When a mutex is destroyed while locked we need to inform lock profiling that it has been released. Modified: head/sys/kern/kern_mutex.c Modified: head/sys/kern/kern_mutex.c == --- head/sys/kern/kern_mutex.c Sat Mar 14 11:43:02 2009(r189788) +++ head/sys/kern/kern_mutex.c Sat Mar 14 11:43:38 2009(r189789) @@ -765,6 +765,7 @@ mtx_destroy(struct mtx *m) else curthread->td_locks--; + lock_profile_release_lock(&m->lock_object); /* Tell witness this isn't locked to make it happy. */ WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__, __LINE__); ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r189845 - in head/sys: kern sys
Author: jeff Date: Sun Mar 15 06:41:47 2009 New Revision: 189845 URL: http://svn.freebsd.org/changeset/base/189845 Log: - Implement a new mechanism for resetting lock profiling. We now guarantee that all cpus have acknowledged the cleared enable int by scheduling the resetting thread on each cpu in succession. Since all lock profiling happens within a critical section this guarantees that all cpus have left lock profiling before we clear the datastructures. - Assert that the per-thread queue of locks lock profiling is aware of is clear on thread exit. There were several cases where this was not true that slows lock profiling and leaks information. - Remove all objects from all lists before clearing any per-cpu information in reset. Lock profiling objects can migrate between per-cpu caches and previously these migrated objects could be zero'd before they'd been removed Discussed with: attilio Sponsored by: Nokia Modified: head/sys/kern/kern_thread.c head/sys/kern/subr_lock.c head/sys/sys/lock_profile.h Modified: head/sys/kern/kern_thread.c == --- head/sys/kern/kern_thread.c Sun Mar 15 06:40:57 2009(r189844) +++ head/sys/kern/kern_thread.c Sun Mar 15 06:41:47 2009(r189845) @@ -306,6 +306,8 @@ thread_alloc(void) void thread_free(struct thread *td) { + + lock_profile_thread_exit(td); if (td->td_cpuset) cpuset_rel(td->td_cpuset); td->td_cpuset = NULL; @@ -439,6 +441,7 @@ thread_wait(struct proc *p) /* Wait for any remaining threads to exit cpu_throw(). */ while (p->p_exitthreads) sched_relinquish(curthread); + lock_profile_thread_exit(td); cpuset_rel(td->td_cpuset); td->td_cpuset = NULL; cpu_thread_clean(td); Modified: head/sys/kern/subr_lock.c == --- head/sys/kern/subr_lock.c Sun Mar 15 06:40:57 2009(r189844) +++ head/sys/kern/subr_lock.c Sun Mar 15 06:41:47 2009(r189845) @@ -46,9 +46,11 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include +#include #include #include @@ -186,7 +188,8 @@ struct lock_prof_cpu { struct lock_prof_cpu *lp_cpu[MAXCPU]; -int lock_prof_enable = 0; +volatile int lock_prof_enable = 0; +static volatile int lock_prof_resetting; /* SWAG: sbuf size = avg stat. line size * number of locks */ #define LPROF_SBUF_SIZE256 * 400 @@ -239,25 +242,77 @@ lock_prof_init(void *arg) } SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL); +/* + * To be certain that lock profiling has idled on all cpus before we + * reset, we schedule the resetting thread on all active cpus. Since + * all operations happen within critical sections we can be sure that + * it is safe to zero the profiling structures. + */ +static void +lock_prof_idle(void) +{ + struct thread *td; + int cpu; + + td = curthread; + thread_lock(td); + for (cpu = 0; cpu <= mp_maxid; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + sched_bind(td, cpu); + } + sched_unbind(td); + thread_unlock(td); +} + +static void +lock_prof_reset_wait(void) +{ + + /* +* Spin relinquishing our cpu so that lock_prof_idle may +* run on it. +*/ + while (lock_prof_resetting) + sched_relinquish(curthread); +} + static void lock_prof_reset(void) { struct lock_prof_cpu *lpc; int enabled, i, cpu; + /* +* We not only race with acquiring and releasing locks but also +* thread exit. To be certain that threads exit without valid head +* pointers they must see resetting set before enabled is cleared. +* Otherwise a lock may not be removed from a per-thread list due +* to disabled being set but not wait for reset() to remove it below. +*/ + atomic_store_rel_int(&lock_prof_resetting, 1); enabled = lock_prof_enable; lock_prof_enable = 0; - pause("lpreset", hz / 10); + lock_prof_idle(); + /* +* Some objects may have migrated between CPUs. Clear all links +* before we zero the structures. Some items may still be linked +* into per-thread lists as well. +*/ for (cpu = 0; cpu <= mp_maxid; cpu++) { lpc = lp_cpu[cpu]; for (i = 0; i < LPROF_CACHE_SIZE; i++) { LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link); LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link); } + } + for (cpu = 0; cpu <= mp_maxid; cpu++) { + lpc = lp_cpu[cpu]; bzero(lpc, sizeof(*lpc)); lock_prof_init
svn commit: r189846 - head/sys/kern
Author: jeff Date: Sun Mar 15 08:03:54 2009 New Revision: 189846 URL: http://svn.freebsd.org/changeset/base/189846 Log: - Wrap lock profiling state variables in #ifdef LOCK_PROFILING blocks. Modified: head/sys/kern/kern_lock.c head/sys/kern/kern_mutex.c head/sys/kern/kern_rwlock.c head/sys/kern/kern_sx.c Modified: head/sys/kern/kern_lock.c == --- head/sys/kern/kern_lock.c Sun Mar 15 06:41:47 2009(r189845) +++ head/sys/kern/kern_lock.c Sun Mar 15 08:03:54 2009(r189846) @@ -333,16 +333,17 @@ __lockmgr_args(struct lock *lk, u_int fl const char *wmesg, int pri, int timo, const char *file, int line) { GIANT_DECLARE; - uint64_t waittime; struct lock_class *class; const char *iwmesg; uintptr_t tid, v, x; u_int op; - int contested, error, ipri, itimo, queue, wakeup_swapper; + int error, ipri, itimo, queue, wakeup_swapper; +#ifdef LOCK_PROFILING + uint64_t waittime = 0; + int contested = 0; +#endif - contested = 0; error = 0; - waittime = 0; tid = (uintptr_t)curthread; op = (flags & LK_TYPE_MASK); iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg; Modified: head/sys/kern/kern_mutex.c == --- head/sys/kern/kern_mutex.c Sun Mar 15 06:41:47 2009(r189845) +++ head/sys/kern/kern_mutex.c Sun Mar 15 08:03:54 2009(r189846) @@ -254,8 +254,11 @@ _mtx_unlock_spin_flags(struct mtx *m, in int _mtx_trylock(struct mtx *m, int opts, const char *file, int line) { - int rval, contested = 0; +#ifdef LOCK_PROFILING uint64_t waittime = 0; + int contested = 0; +#endif + int rval; MPASS(curthread != NULL); KASSERT(m->mtx_lock != MTX_DESTROYED, @@ -296,15 +299,17 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t int line) { struct turnstile *ts; + uintptr_t v; #ifdef ADAPTIVE_MUTEXES volatile struct thread *owner; #endif #ifdef KTR int cont_logged = 0; #endif +#ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; - uintptr_t v; +#endif if (mtx_owned(m)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, @@ -448,8 +453,11 @@ void _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, int line) { - int i = 0, contested = 0; + int i = 0; +#ifdef LOCK_PROFILING + int contested = 0; uint64_t waittime = 0; +#endif if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); @@ -486,11 +494,13 @@ _thread_lock_flags(struct thread *td, in { struct mtx *m; uintptr_t tid; - int i, contested; - uint64_t waittime; + int i; +#ifdef LOCK_PROFILING + int contested = 0; + uint64_t waittime = 0; +#endif - contested = i = 0; - waittime = 0; + i = 0; tid = (uintptr_t)curthread; for (;;) { retry: Modified: head/sys/kern/kern_rwlock.c == --- head/sys/kern/kern_rwlock.c Sun Mar 15 06:41:47 2009(r189845) +++ head/sys/kern/kern_rwlock.c Sun Mar 15 08:03:54 2009(r189846) @@ -282,8 +282,10 @@ _rw_rlock(struct rwlock *rw, const char int spintries = 0; int i; #endif +#ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; +#endif uintptr_t v; KASSERT(rw->rw_lock != RW_DESTROYED, @@ -584,9 +586,11 @@ _rw_wlock_hard(struct rwlock *rw, uintpt int spintries = 0; int i; #endif - uint64_t waittime = 0; uintptr_t v, x; +#ifdef LOCK_PROFILING + uint64_t waittime = 0; int contested = 0; +#endif if (rw_wlocked(rw)) { KASSERT(rw->lock_object.lo_flags & RW_RECURSE, Modified: head/sys/kern/kern_sx.c == --- head/sys/kern/kern_sx.c Sun Mar 15 06:41:47 2009(r189845) +++ head/sys/kern/kern_sx.c Sun Mar 15 08:03:54 2009(r189846) @@ -431,9 +431,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t #ifdef ADAPTIVE_SX volatile struct thread *owner; #endif - uint64_t waittime = 0; uintptr_t x; - int contested = 0, error = 0; +#ifdef LOCK_PROFILING + uint64_t waittime = 0; + int contested = 0; +#endif + int error = 0; /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { @@ -652,8 +655,10 @@ _sx_slock_hard(struct sx *sx, int opts, #ifdef ADAPTIVE_SX volatile struct thread *owner; #endif +#ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; +#endif uintptr_t x; in
Re: svn commit: r189846 - head/sys/kern
Sorry for the temporary build breakage; I meant to commit these two patches together. Jeff On Sun, 15 Mar 2009, Jeff Roberson wrote: Author: jeff Date: Sun Mar 15 08:03:54 2009 New Revision: 189846 URL: http://svn.freebsd.org/changeset/base/189846 Log: - Wrap lock profiling state variables in #ifdef LOCK_PROFILING blocks. Modified: head/sys/kern/kern_lock.c head/sys/kern/kern_mutex.c head/sys/kern/kern_rwlock.c head/sys/kern/kern_sx.c Modified: head/sys/kern/kern_lock.c == --- head/sys/kern/kern_lock.c Sun Mar 15 06:41:47 2009(r189845) +++ head/sys/kern/kern_lock.c Sun Mar 15 08:03:54 2009(r189846) @@ -333,16 +333,17 @@ __lockmgr_args(struct lock *lk, u_int fl const char *wmesg, int pri, int timo, const char *file, int line) { GIANT_DECLARE; - uint64_t waittime; struct lock_class *class; const char *iwmesg; uintptr_t tid, v, x; u_int op; - int contested, error, ipri, itimo, queue, wakeup_swapper; + int error, ipri, itimo, queue, wakeup_swapper; +#ifdef LOCK_PROFILING + uint64_t waittime = 0; + int contested = 0; +#endif - contested = 0; error = 0; - waittime = 0; tid = (uintptr_t)curthread; op = (flags & LK_TYPE_MASK); iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg; Modified: head/sys/kern/kern_mutex.c == --- head/sys/kern/kern_mutex.c Sun Mar 15 06:41:47 2009(r189845) +++ head/sys/kern/kern_mutex.c Sun Mar 15 08:03:54 2009(r189846) @@ -254,8 +254,11 @@ _mtx_unlock_spin_flags(struct mtx *m, in int _mtx_trylock(struct mtx *m, int opts, const char *file, int line) { - int rval, contested = 0; +#ifdef LOCK_PROFILING uint64_t waittime = 0; + int contested = 0; +#endif + int rval; MPASS(curthread != NULL); KASSERT(m->mtx_lock != MTX_DESTROYED, @@ -296,15 +299,17 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t int line) { struct turnstile *ts; + uintptr_t v; #ifdef ADAPTIVE_MUTEXES volatile struct thread *owner; #endif #ifdef KTR int cont_logged = 0; #endif +#ifdef LOCK_PROFILING int contested = 0; uint64_t waittime = 0; - uintptr_t v; +#endif if (mtx_owned(m)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0, @@ -448,8 +453,11 @@ void _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file, int line) { - int i = 0, contested = 0; + int i = 0; +#ifdef LOCK_PROFILING + int contested = 0; uint64_t waittime = 0; +#endif if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m); @@ -486,11 +494,13 @@ _thread_lock_flags(struct thread *td, in { struct mtx *m; uintptr_t tid; - int i, contested; - uint64_t waittime; + int i; +#ifdef LOCK_PROFILING + int contested = 0; + uint64_t waittime = 0; +#endif - contested = i = 0; - waittime = 0; + i = 0; tid = (uintptr_t)curthread; for (;;) { retry: Modified: head/sys/kern/kern_rwlock.c == --- head/sys/kern/kern_rwlock.c Sun Mar 15 06:41:47 2009(r189845) +++ head/sys/kern/kern_rwlock.c Sun Mar 15 08:03:54 2009(r189846) @@ -282,8 +282,10 @@ _rw_rlock(struct rwlock *rw, const char int spintries = 0; int i; #endif +#ifdef LOCK_PROFILING uint64_t waittime = 0; int contested = 0; +#endif uintptr_t v; KASSERT(rw->rw_lock != RW_DESTROYED, @@ -584,9 +586,11 @@ _rw_wlock_hard(struct rwlock *rw, uintpt int spintries = 0; int i; #endif - uint64_t waittime = 0; uintptr_t v, x; +#ifdef LOCK_PROFILING + uint64_t waittime = 0; int contested = 0; +#endif if (rw_wlocked(rw)) { KASSERT(rw->lock_object.lo_flags & RW_RECURSE, Modified: head/sys/kern/kern_sx.c == --- head/sys/kern/kern_sx.c Sun Mar 15 06:41:47 2009(r189845) +++ head/sys/kern/kern_sx.c Sun Mar 15 08:03:54 2009(r189846) @@ -431,9 +431,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t #ifdef ADAPTIVE_SX volatile struct thread *owner; #endif - uint64_t waittime = 0; uintptr_t x; - int contested = 0, error = 0; +#ifdef LOCK_PROFILING + uint64_t waittime = 0; + int contested = 0; +#endif + int error = 0; /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { @@ -652,8 +655,10 @@ _sx_slock_hard(struct sx *sx, int opts, #ifdef ADAPTIVE_SX v
svn commit: r208241 - head/sbin/tunefs
Author: jeff Date: Tue May 18 01:45:28 2010 New Revision: 208241 URL: http://svn.freebsd.org/changeset/base/208241 Log: - Round up the journal size to the block size so we don't confuse fsck. Reported by: Mikolaj Golub - Only require 256k of blocks per-cg when trying to allocate contiguous journal blocks. The storage may not actually be contiguous but is at least within one cg. - When disabling SUJ leave SU enabled and report this to the user. It is expected that users will upgrade SU filesystems to SUJ and want a similar downgrade path. Modified: head/sbin/tunefs/tunefs.c Modified: head/sbin/tunefs/tunefs.c == --- head/sbin/tunefs/tunefs.c Tue May 18 00:46:15 2010(r208240) +++ head/sbin/tunefs/tunefs.c Tue May 18 01:45:28 2010(r208241) @@ -358,10 +358,12 @@ main(int argc, char *argv[]) warnx("%s remains unchanged as disabled", name); } else { journal_clear(); - sblock.fs_flags &= ~(FS_DOSOFTDEP | FS_SUJ); + sblock.fs_flags &= ~FS_SUJ; sblock.fs_sujfree = 0; - warnx("%s cleared, " - "remove .sujournal to reclaim space", name); + warnx("%s cleared but soft updates still set.", + name); + + warnx("remove .sujournal to reclaim space"); } } } @@ -546,7 +548,7 @@ journal_balloc(void) * Try to minimize fragmentation by requiring a minimum * number of blocks present. */ - if (cgp->cg_cs.cs_nbfree > 128 * 1024 * 1024) + if (cgp->cg_cs.cs_nbfree > 256 * 1024) break; if (contig == 0 && cgp->cg_cs.cs_nbfree) break; @@ -906,6 +908,8 @@ journal_alloc(int64_t size) if (size / sblock.fs_fsize > sblock.fs_fpg) size = sblock.fs_fpg * sblock.fs_fsize; size = MAX(SUJ_MIN, size); + /* fsck does not support fragments in journal files. */ + size = roundup(size, sblock.fs_bsize); } resid = blocks = size / sblock.fs_bsize; if (sblock.fs_cstotal.cs_nbfree < blocks) { ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r208287 - head/sys/ufs/ffs
Author: jeff Date: Wed May 19 06:18:01 2010 New Revision: 208287 URL: http://svn.freebsd.org/changeset/base/208287 Log: - Don't immediately re-run softdepflush if we didn't make any progress on the last iteration. This can lead to a deadlock when we have worklist items that cannot be immediately satisfied. Reported by: uqs, Dimitry Andric - Remove some unnecessary debugging code and place some other under SUJ_DEBUG. - Examine the journal state in softdep_slowdown(). - Re-format some comments so I may more easily add flag descriptions. Modified: head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/softdep.h Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_softdep.c Wed May 19 04:00:42 2010 (r208286) +++ head/sys/ufs/ffs/ffs_softdep.c Wed May 19 06:18:01 2010 (r208287) @@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$"); #ifndef DEBUG #define DEBUG #endif -#defineSUJ_DEBUG #include #include @@ -1200,6 +1199,7 @@ softdep_flush(void) struct ufsmount *ump; struct thread *td; int remaining; + int progress; int vfslocked; td = curthread; @@ -1224,7 +1224,7 @@ softdep_flush(void) } FREE_LOCK(&lk); VFS_UNLOCK_GIANT(vfslocked); - remaining = 0; + remaining = progress = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { nmp = TAILQ_NEXT(mp, mnt_list); @@ -1233,7 +1233,7 @@ softdep_flush(void) if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) continue; vfslocked = VFS_LOCK_GIANT(mp); - softdep_process_worklist(mp, 0); + progress += softdep_process_worklist(mp, 0); ump = VFSTOUFS(mp); remaining += ump->softdep_on_worklist - ump->softdep_on_worklist_inprogress; @@ -1243,7 +1243,7 @@ softdep_flush(void) vfs_unbusy(mp); } mtx_unlock(&mountlist_mtx); - if (remaining) + if (remaining && progress) continue; ACQUIRE_LOCK(&lk); if (!req_pending) @@ -1449,7 +1449,7 @@ process_worklist_item(mp, flags) struct mount *mp; int flags; { - struct worklist *wk, *wkXXX; + struct worklist *wk; struct ufsmount *ump; struct vnode *vp; int matchcnt = 0; @@ -1472,11 +1472,8 @@ process_worklist_item(mp, flags) vp = NULL; ump = VFSTOUFS(mp); LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) { - if (wk->wk_state & INPROGRESS) { - wkXXX = wk; + if (wk->wk_state & INPROGRESS) continue; - } - wkXXX = wk; /* Record the last valid wk pointer. */ if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM) break; wk->wk_state |= INPROGRESS; @@ -2364,7 +2361,7 @@ remove_from_journal(wk) mtx_assert(&lk, MA_OWNED); ump = VFSTOUFS(wk->wk_mp); -#ifdef DEBUG /* XXX Expensive, temporary. */ +#ifdef SUJ_DEBUG { struct worklist *wkn; @@ -2401,16 +2398,15 @@ journal_space(ump, thresh) struct jblocks *jblocks; int avail; + jblocks = ump->softdep_jblocks; + if (jblocks == NULL) + return (1); /* * We use a tighter restriction here to prevent request_cleanup() * running in threads from running into locks we currently hold. */ if (num_inodedep > (max_softdeps / 10) * 9) return (0); - - jblocks = ump->softdep_jblocks; - if (jblocks == NULL) - return (1); if (thresh) thresh = jblocks->jb_min; else @@ -2727,7 +2723,7 @@ softdep_process_journal(mp, flags) break; printf("softdep: Out of journal space!\n"); softdep_speedup(); - msleep(jblocks, &lk, PRIBIO, "jblocks", 1); + msleep(jblocks, &lk, PRIBIO, "jblocks", hz); } FREE_LOCK(&lk); jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS); @@ -10870,18 +10866,29 @@ int softdep_slowdown(vp) struct vnode *vp; { + struct ufsmount *ump; + int jlow; int max_softdeps_hard; ACQUIRE_LOCK(&lk); + jlow = 0; + /* +* Check for journal space if needed. +*/ + if (DOINGSUJ(vp)) { + ump = VFSTOUFS(vp->v_mount); + if
svn commit: r209716 - head/sbin/fsck_ffs
Author: jeff Date: Tue Jul 6 07:07:29 2010 New Revision: 209716 URL: http://svn.freebsd.org/changeset/base/209716 Log: - Permit zero length directories as a handled inconsistency. This allows directory truncation to proceed before the link has been cleared. This is accomplished by detecting a directory with no . or .. links and clearing the named directory entry in the parent. - Add a new function ino_remref() which handles the details of removing a reference to an inode as a result of a lost directory. There were some minor errors in various subcases of this routine. Modified: head/sbin/fsck_ffs/suj.c Modified: head/sbin/fsck_ffs/suj.c == --- head/sbin/fsck_ffs/suj.cTue Jul 6 03:48:46 2010(r209715) +++ head/sbin/fsck_ffs/suj.cTue Jul 6 07:07:29 2010(r209716) @@ -808,6 +808,44 @@ blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_ } /* + * Clear the directory entry at diroff that should point to child. Minimal + * checking is done and it is assumed that this path was verified with isat. + */ +static void +ino_clrat(ino_t parent, off_t diroff, ino_t child) +{ + union dinode *dip; + struct direct *dp; + ufs2_daddr_t blk; + uint8_t *block; + ufs_lbn_t lbn; + int blksize; + int frags; + int doff; + + if (debug) + printf("Clearing inode %d from parent %d at offset %jd\n", + child, parent, diroff); + + lbn = lblkno(fs, diroff); + doff = blkoff(fs, diroff); + dip = ino_read(parent); + blk = ino_blkatoff(dip, parent, lbn, &frags); + blksize = sblksize(fs, DIP(dip, di_size), lbn); + block = dblk_read(blk, blksize); + dp = (struct direct *)&block[doff]; + if (dp->d_ino != child) + errx(1, "Inode %d does not exist in %d at %jd", + child, parent, diroff); + dp->d_ino = 0; + dblk_dirty(blk); + /* +* The actual .. reference count will already have been removed +* from the parent by the .. remref record. +*/ +} + +/* * Determines whether a pointer to an inode exists within a directory * at a specified offset. Returns the mode of the found entry. */ @@ -1134,6 +1172,57 @@ ino_setskip(struct suj_ino *sino, ino_t sino->si_skipparent = 1; } +static void +ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot) +{ + struct suj_ino *sino; + struct suj_rec *srec; + struct jrefrec *rrec; + + /* +* Lookup this inode to see if we have a record for it. +*/ + sino = ino_lookup(child, 0); + /* +* Tell any child directories we've already removed their +* parent link cnt. Don't try to adjust our link down again. +*/ + if (sino != NULL && isdotdot == 0) + ino_setskip(sino, parent); + /* +* No valid record for this inode. Just drop the on-disk +* link by one. +*/ + if (sino == NULL || sino->si_hasrecs == 0) { + ino_decr(child); + return; + } + /* +* Use ino_adjust() if ino_check() has already processed this +* child. If we lose the last non-dot reference to a +* directory it will be discarded. +*/ + if (sino->si_linkadj) { + sino->si_nlink--; + if (isdotdot) + sino->si_dotlinks--; + ino_adjust(sino); + return; + } + /* +* If we haven't yet processed this inode we need to make +* sure we will successfully discover the lost path. If not +* use nlinkadj to remember. +*/ + TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { + rrec = (struct jrefrec *)srec->sr_rec; + if (rrec->jr_parent == parent && + rrec->jr_diroff == diroff) + return; + } + sino->si_nlinkadj++; +} + /* * Free the children of a directory when the directory is discarded. */ @@ -1141,13 +1230,11 @@ static void ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { struct suj_ino *sino; - struct suj_rec *srec; - struct jrefrec *rrec; struct direct *dp; off_t diroff; uint8_t *block; int skipparent; - int isparent; + int isdotdot; int dpoff; int size; @@ -1165,53 +1252,15 @@ ino_free_children(ino_t ino, ufs_lbn_t l continue; if (dp->d_namlen == 1 && dp->d_name[0] == '.') continue; - isparent = dp->d_namlen == 2 && dp->d_name[0] == '.' && + isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'; - if (isparent && skipparent == 1) +
svn commit: r209717 - in head/sys/ufs: ffs ufs
Author: jeff Date: Tue Jul 6 07:11:04 2010 New Revision: 209717 URL: http://svn.freebsd.org/changeset/base/209717 Log: - Handle the truncation of an inode with an effective link count of 0 in the context of the process that reduced the effective count. Previously all truncation as a result of unlink happened in the softdep flush thread. This had the effect of being impossible to rate limit properly with the journal code. Now the process issuing unlinks is suspended when the journal files. This has a side-effect of improving rm performance by allowing more concurrent work. - Handle two cases in inactive, one for effnlink == 0 and another when nlink finally reaches 0. - Eliminate the SPACECOUNTED related code since the truncation is no longer delayed. Discussed with: mckusick Modified: head/sys/ufs/ffs/ffs_alloc.c head/sys/ufs/ffs/ffs_inode.c head/sys/ufs/ffs/ffs_softdep.c head/sys/ufs/ffs/ffs_vnops.c head/sys/ufs/ffs/softdep.h head/sys/ufs/ufs/inode.h head/sys/ufs/ufs/ufs_inode.c head/sys/ufs/ufs/ufs_lookup.c head/sys/ufs/ufs/ufs_vnops.c Modified: head/sys/ufs/ffs/ffs_alloc.c == --- head/sys/ufs/ffs/ffs_alloc.cTue Jul 6 07:07:29 2010 (r209716) +++ head/sys/ufs/ffs/ffs_alloc.cTue Jul 6 07:11:04 2010 (r209717) @@ -191,11 +191,6 @@ retry: bno = ffs_hashalloc(ip, cg, bpref, size, size, ffs_alloccg); if (bno > 0) { delta = btodb(size); - if (ip->i_flag & IN_SPACECOUNTED) { - UFS_LOCK(ump); - fs->fs_pendingblocks += delta; - UFS_UNLOCK(ump); - } DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); if (flags & IO_EXT) ip->i_flag |= IN_CHANGE; @@ -321,11 +316,6 @@ retry: if (bp->b_blkno != fsbtodb(fs, bno)) panic("ffs_realloccg: bad blockno"); delta = btodb(nsize - osize); - if (ip->i_flag & IN_SPACECOUNTED) { - UFS_LOCK(ump); - fs->fs_pendingblocks += delta; - UFS_UNLOCK(ump); - } DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); if (flags & IO_EXT) ip->i_flag |= IN_CHANGE; @@ -394,11 +384,6 @@ retry: ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize, ip->i_number, NULL); delta = btodb(nsize - osize); - if (ip->i_flag & IN_SPACECOUNTED) { - UFS_LOCK(ump); - fs->fs_pendingblocks += delta; - UFS_UNLOCK(ump); - } DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta); if (flags & IO_EXT) ip->i_flag |= IN_CHANGE; @@ -2422,11 +2407,6 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) break; ip = VTOI(vp); - if (ip->i_flag & IN_SPACECOUNTED) { - UFS_LOCK(ump); - fs->fs_pendingblocks += cmd.size; - UFS_UNLOCK(ump); - } DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size); ip->i_flag |= IN_CHANGE; vput(vp); Modified: head/sys/ufs/ffs/ffs_inode.c == --- head/sys/ufs/ffs/ffs_inode.cTue Jul 6 07:07:29 2010 (r209716) +++ head/sys/ufs/ffs/ffs_inode.cTue Jul 6 07:11:04 2010 (r209717) @@ -180,6 +180,8 @@ ffs_truncate(vp, length, flags, cred, td */ if ((flags & (IO_EXT | IO_NORMAL)) == 0) flags |= IO_NORMAL; + if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp)) + flags |= IO_SYNC; /* * If we are truncating the extended-attributes, and cannot * do it with soft updates, then do it slowly here. If we are @@ -310,10 +312,6 @@ ffs_truncate(vp, length, flags, cred, td */ if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0) goto out; - UFS_LOCK(ump); - if (ip->i_flag & IN_SPACECOUNTED) - fs->fs_pendingblocks -= datablocks; - UFS_UNLOCK(ump); /* * We have to journal the truncation before we change * any blocks so we don't leave the file partially Modified: head/sys/ufs/ffs/ffs_softdep.c == --- head/sys/ufs/ffs/ffs_sof
svn commit: r187357 - in head/sys: kern sys
Author: jeff Date: Sat Jan 17 07:17:57 2009 New Revision: 187357 URL: http://svn.freebsd.org/changeset/base/187357 Log: - Implement generic macros for producing KTR records that are compatible with src/tools/sched/schedgraph.py. This allows developers to quickly create a graphical view of ktr data for any resource in the system. - Add sched_tdname() and the pcpu field 'name' for quickly and uniformly identifying records associated with a thread or cpu. - Reimplement the KTR_SCHED traces using the new generic facility. Obtained from:attilio Discussed with: jhb Sponsored by: Nokia Modified: head/sys/kern/kern_clock.c head/sys/kern/kern_synch.c head/sys/kern/sched_4bsd.c head/sys/kern/sched_ule.c head/sys/kern/subr_pcpu.c head/sys/sys/ktr.h head/sys/sys/pcpu.h head/sys/sys/sched.h Modified: head/sys/kern/kern_clock.c == --- head/sys/kern/kern_clock.c Sat Jan 17 06:55:28 2009(r187356) +++ head/sys/kern/kern_clock.c Sat Jan 17 07:17:57 2009(r187357) @@ -498,8 +498,8 @@ statclock(int usermode) rss = pgtok(vmspace_resident_count(vm)); if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; - CTR4(KTR_SCHED, "statclock: %p(%s) prio %d stathz %d", - td, td->td_name, td->td_priority, (stathz)?stathz:hz); + KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", + "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); thread_lock_flags(td, MTX_QUIET); sched_clock(td); thread_unlock(td); Modified: head/sys/kern/kern_synch.c == --- head/sys/kern/kern_synch.c Sat Jan 17 06:55:28 2009(r187356) +++ head/sys/kern/kern_synch.c Sat Jan 17 07:17:57 2009(r187357) @@ -71,6 +71,13 @@ __FBSDID("$FreeBSD$"); #include #endif +#defineKTDSTATE(td) \ + (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ + ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ + ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" : \ + ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \ + ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding") + static void synch_setup(void *dummy); SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL); @@ -425,25 +432,19 @@ mi_switch(int flags, struct thread *newt td->td_tid, td->td_sched, p->p_pid, td->td_name); #if (KTR_COMPILE & KTR_SCHED) != 0 if (TD_IS_IDLETHREAD(td)) - CTR3(KTR_SCHED, "mi_switch: %p(%s) prio %d idle", - td, td->td_name, td->td_priority); - else if (newtd != NULL) - CTR5(KTR_SCHED, - "mi_switch: %p(%s) prio %d preempted by %p(%s)", - td, td->td_name, td->td_priority, newtd, - newtd->td_name); + KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle", + "prio:%d", td->td_priority); else - CTR6(KTR_SCHED, - "mi_switch: %p(%s) prio %d inhibit %d wmesg %s lock %s", - td, td->td_name, td->td_priority, - td->td_inhibitors, td->td_wmesg, td->td_lockname); + KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td), + "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg, + "lockname:\"%s\"", td->td_lockname); #endif #ifdef XEN PT_UPDATES_FLUSH(); #endif sched_switch(td, newtd, flags); - CTR3(KTR_SCHED, "mi_switch: running %p(%s) prio %d", - td, td->td_name, td->td_priority); + KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", + "prio:%d", td->td_priority); CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, td->td_name); Modified: head/sys/kern/sched_4bsd.c == --- head/sys/kern/sched_4bsd.c Sat Jan 17 06:55:28 2009(r187356) +++ head/sys/kern/sched_4bsd.c Sat Jan 17 07:17:57 2009(r187357) @@ -82,6 +82,8 @@ dtrace_vtime_switch_func_tdtrace_vtime_ #endif #defineNICE_WEIGHT 1 /* Priorities per nice level. */ +#defineTS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + sizeof(__STRING(UINT_MAX))) + /* * The schedulable entity that runs a context. * This is an extension to the thread structure and is tailored to @@ -93,6 +95,9 @@ struct td_sched { int ts_slptime; /* (j) Seconds !RUNNING. */ int ts_flags; struct runq *ts_runq; /* runq the thread is currently on */ +#ifdef K
svn commit: r187358 - head/tools/sched
Author: jeff Date: Sat Jan 17 07:24:25 2009 New Revision: 187358 URL: http://svn.freebsd.org/changeset/base/187358 Log: - Rewrite the parser to support the new generic schedgraph interface. This no longer requires any custom classes or parsers to support new event types. - Add an optional command line argument for specifying the clock frequency in ghz. This is useful for traces that do not include KTR_SCHED. Sponsored by: Nokia - Add support for sorting rows by clicking and dragging them to their new position. - Add support for configuring the cpu background colors. - Improve the scaling so a better center is maintained as you zoom. This is not perfect due to precision loss with floats used in the window views. - Add new colors and a random assignment for unknown event types. A table is used for known event types. This is the only event specific information. Modified: head/tools/sched/schedgraph.py Modified: head/tools/sched/schedgraph.py == --- head/tools/sched/schedgraph.py Sat Jan 17 07:17:57 2009 (r187357) +++ head/tools/sched/schedgraph.py Sat Jan 17 07:24:25 2009 (r187358) @@ -28,6 +28,7 @@ import sys import re +import random from Tkinter import * # To use: @@ -53,30 +54,96 @@ from Tkinter import * # while the workload is still running is to avoid wasting log entries on # "idle" time at the end. # - Dump the trace to a file: 'ktrdump -ct > ktr.out' -# - Run the python script: 'python schedgraph.py ktr.out' +# - Run the python script: 'python schedgraph.py ktr.out' optionally provide +# your cpu frequency in ghz: 'python schedgraph.py ktr.out 2.4' # # To do: -# 1) Add a per-thread summary display -# 2) Add bounding box style zoom. -# 3) Click to center. -# 4) Implement some sorting mechanism. -# 5) Widget to display variable-range data (e.g. q length) -# 6) Reorder rows, hide rows, etc. -# 7) "Vertical rule" to help relate data in different rows -# 8) Mouse-over popup of full thread/event/row lable (currently truncated) -# 9) More visible anchors for popup event windows +# Add a per-source summary display +# Click to move. +# Hide rows +# "Vertical rule" to help relate data in different rows +# Mouse-over popup of full thread/event/row label (currently truncated) +# More visible anchors for popup event windows # # BUGS: 1) Only 8 CPUs are supported, more CPUs require more choices of # colours to represent them ;-) -# 2) Extremely short traces may cause a crash because the code -# assumes there is always at least one stathz entry logged, and -# the number of such events is used as a denominator + +eventcolors = [ + ("count", "red"), + ("running", "green"), + ("idle","grey"), + ("yielding","yellow"), + ("swapped", "violet"), + ("suspended", "purple"), + ("iwait", "grey"), + ("sleep", "blue"), + ("blocked", "dark red"), + ("runq add","yellow"), + ("runq rem","yellow"), + ("thread exit", "grey"), + ("proc exit", "grey"), + ("callwheel idle", "grey"), + ("callout running", "green"), + ("lock acquire", "blue"), + ("lock contest", "purple"), + ("failed lock try", "red"), + ("lock release", "grey"), + ("tick","black"), + ("prio","black"), + ("lend prio", "black"), + ("wokeup", "black") +] + +cpucolors = [ + ("CPU 0", "light grey"), + ("CPU 1", "dark grey"), + ("CPU 2", "light blue"), + ("CPU 3", "light pink"), + ("CPU 4", "blanched almond"), + ("CPU 5", "slate grey"), + ("CPU 6", "tan"), + ("CPU 7", "thistle"), + ("CPU 8", "white") +] + +colors = [ + "white", "thistle", "blanched almond", "tan", "chartreuse", + "dark red", "red", "pale violet red", "pink", "light pink", + "dark orange", "orange", "coral", "light coral", + "goldenrod", "gold", "yellow", "light yellow", + "dark green", "green", "light green", "light sea green", + "dark blue", "blue", "light blue", "steel blue", "light slate blue", + "dark violet", "violet", "purple", "blue violet", + "dark grey", "slate grey", "light grey", + "black", +] +colors.sort() ticksps = None status = None -configtypes = [] +colormap = None +ktrfile = None +clockfreq = None +sources = [] lineno = -1 +class Colormap: + def __init__(self, table): + self.table = table + self.map = {} + for entry in table: + self.map[entry[0]] = entry[1] + + def lookup(self, name): + try: + color = self.map[name] + except: + color =
svn commit: r187359 - head/tools/sched
Author: jeff Date: Sat Jan 17 11:19:15 2009 New Revision: 187359 URL: http://svn.freebsd.org/changeset/base/187359 Log: - Add a new source configuration menu option that allows hiding and displaying sources. - Add functions to the main SchedGraph to facilitate source hiding. The source is simply moved off screen and all other sources are moved to compensate. Modified: head/tools/sched/schedgraph.py Modified: head/tools/sched/schedgraph.py == --- head/tools/sched/schedgraph.py Sat Jan 17 07:24:25 2009 (r187358) +++ head/tools/sched/schedgraph.py Sat Jan 17 11:19:15 2009 (r187359) @@ -59,8 +59,6 @@ from Tkinter import * # # To do: # Add a per-source summary display -# Click to move. -# Hide rows # "Vertical rule" to help relate data in different rows # Mouse-over popup of full thread/event/row label (currently truncated) # More visible anchors for popup event windows @@ -294,6 +292,118 @@ class ColorConfigure(Toplevel): for item in self.types: item.revert() +class SourceConf(Frame): + def __init__(self, master, source): + Frame.__init__(self, master) + if (source.hidden == 1): + enabled = 0 + else: + enabled = 1 + self.source = source + self.name = source.name + self.enabled = IntVar() + self.enabled_default = enabled + self.enabled_current = enabled + self.enabled.set(enabled) + self.draw() + + def draw(self): + self.label = Label(self, text=self.name, anchor=W) + self.checkbox = Checkbutton(self, text="enabled", + variable=self.enabled) + self.label.grid(row=0, column=0, sticky=E+W) + self.checkbox.grid(row=0, column=1) + self.columnconfigure(0, weight=1) + + def apply(self): + echange = 0 + if (self.enabled_current != self.enabled.get()): + echange = 1 + self.enabled_current = self.enabled.get() + if (echange != 0): + if (self.enabled_current): + graph.sourceshow(self.source) + else: + graph.sourcehide(self.source) + return + + def revert(self): + self.enabled.set(self.enabled_default) + + def check(self): + self.enabled.set(1) + + def uncheck(self): + self.enabled.set(0) + +class SourceConfigure(Toplevel): + def __init__(self): + Toplevel.__init__(self) + self.resizable(0, 0) + self.title("Source Configuration") + self.items = [] + self.iframe = Frame(self) + self.iframe.grid(row=0, column=0, sticky=E+W) + f = LabelFrame(self.iframe, bd=4, text="Sources") + self.items.append(f) + self.buttons = Frame(self) + self.items[0].grid(row=0, column=0, sticky=E+W) + self.columnconfigure(0, weight=1) + self.sconfig = [] + self.irow = 0 + self.icol = 0 + for source in sources: + self.addsource(source) + self.drawbuttons() + self.buttons.grid(row=1, column=0, sticky=W) + + def addsource(self, source): + if (self.irow > 30): + self.icol += 1 + self.irow = 0 + c = self.icol + f = LabelFrame(self.iframe, bd=4, text="Sources") + f.grid(row=0, column=c, sticky=N+E+W) + self.items.append(f) + item = SourceConf(self.items[self.icol], source) + self.sconfig.append(item) + item.grid(row=self.irow, column=0, sticky=E+W) + self.irow += 1 + + def drawbuttons(self): + self.apply = Button(self.buttons, text="Apply", + command=self.apress) + self.default = Button(self.buttons, text="Revert", + command=self.rpress) + self.checkall = Button(self.buttons, text="Check All", + command=self.cpress) + self.uncheckall = Button(self.buttons, text="Uncheck All", + command=self.upress) + self.checkall.grid(row=0, column=0, sticky=W) + self.uncheckall.grid(row=0, column=1, sticky=W) + self.apply.grid(row=0, column=2, sticky=W) + self.default.grid(row=0, column=3, sticky=W) + self.buttons.columnconfigure(0, weight=1) + self.buttons.columnconfigure(
svn commit: r187376 - head/tools/sched
Author: jeff Date: Sun Jan 18 04:49:01 2009 New Revision: 187376 URL: http://svn.freebsd.org/changeset/base/187376 Log: - Significantly speedup hiding and displaying multiple rows by writing an optimized single pass function for each. This reduces the number of tkinter calls required to the minimum. - Add a right-click context menu for sources. Supported commands hide the source, hide the whole group the source is in, and bring up a stat window. - Add a source stat frame that gives an event frequency table as well as the total duration for each event type that has a duration. This can be used to see, for example, the total time a thread spent running or blocked by a wchan or lock. Modified: head/tools/sched/schedgraph.py Modified: head/tools/sched/schedgraph.py == --- head/tools/sched/schedgraph.py Sun Jan 18 04:29:42 2009 (r187375) +++ head/tools/sched/schedgraph.py Sun Jan 18 04:49:01 2009 (r187376) @@ -315,17 +315,13 @@ class SourceConf(Frame): self.checkbox.grid(row=0, column=1) self.columnconfigure(0, weight=1) - def apply(self): - echange = 0 + def changed(self): if (self.enabled_current != self.enabled.get()): - echange = 1 + return 1 + return 0 + + def apply(self): self.enabled_current = self.enabled.get() - if (echange != 0): - if (self.enabled_current): - graph.sourceshow(self.source) - else: - graph.sourcehide(self.source) - return def revert(self): self.enabled.set(self.enabled_default) @@ -389,6 +385,21 @@ class SourceConfigure(Toplevel): self.buttons.columnconfigure(3, weight=1) def apress(self): + disable_sources = [] + enable_sources = [] + for item in self.sconfig: + if (item.changed() == 0): + continue + if (item.enabled.get() == 1): + enable_sources.append(item.source) + else: + disable_sources.append(item.source) + + if (len(disable_sources)): + graph.sourcehidelist(disable_sources) + if (len(enable_sources)): + graph.sourceshowlist(enable_sources) + for item in self.sconfig: item.apply() @@ -404,6 +415,77 @@ class SourceConfigure(Toplevel): for item in self.sconfig: item.uncheck() +# Reverse compare of second member of the tuple +def cmp_counts(x, y): + return y[1] - x[1] + +class SourceStats(Toplevel): + def __init__(self, source): + self.source = source + Toplevel.__init__(self) + self.resizable(0, 0) + self.title(source.name + " statistics") + self.evframe = LabelFrame(self, + text="Event Frequency and Duration") + self.evframe.grid(row=0, column=0, sticky=E+W) + eventtypes={} + for event in self.source.events: + if (event.type == "pad"): + continue + duration = event.duration + if (eventtypes.has_key(event.name)): + (c, d) = eventtypes[event.name] + c += 1 + d += duration + eventtypes[event.name] = (c, d) + else: + eventtypes[event.name] = (1, duration) + events = [] + for k, v in eventtypes.iteritems(): + (c, d) = v + events.append((k, c, d)) + events.sort(cmp=cmp_counts) + + ypos = 0 + for event in events: + (name, c, d) = event + l = Label(self.evframe, text=name, bd=1, + relief=SUNKEN, anchor=W, width=30) + m = Label(self.evframe, text=str(c), bd=1, + relief=SUNKEN, anchor=W, width=10) + r = Label(self.evframe, text=ticks2sec(d), + bd=1, relief=SUNKEN, width=10) + l.grid(row=ypos, column=0, sticky=E+W) + m.grid(row=ypos, column=1, sticky=E+W) + r.grid(row=ypos, column=2, sticky=E+W) + ypos += 1 + + +class SourceContext(Menu): + def __init__(self, event, source): +
svn commit: r187379 - head/tools/sched
Author: jeff Date: Sun Jan 18 05:44:31 2009 New Revision: 187379 URL: http://svn.freebsd.org/changeset/base/187379 Log: - Add summary information to the title once the file is parsed rather than printing it to the terminal. Now only parse errors go to the terminal. - Speedup drawing by raising and lowering tags only once everything has been drawn. Surprisingly, it now takes a little longer to parse than it does to draw. - Parameterize the layout with X_ and Y_ defines that determine the sizes of various things. - Remove unnecessary tags. Modified: head/tools/sched/schedgraph.py Modified: head/tools/sched/schedgraph.py == --- head/tools/sched/schedgraph.py Sun Jan 18 05:35:58 2009 (r187378) +++ head/tools/sched/schedgraph.py Sun Jan 18 05:44:31 2009 (r187379) @@ -86,7 +86,7 @@ eventcolors = [ ("lock contest", "purple"), ("failed lock try", "red"), ("lock release", "grey"), - ("tick","black"), + ("statclock", "black"), ("prio","black"), ("lend prio", "black"), ("wokeup", "black") @@ -125,6 +125,12 @@ clockfreq = None sources = [] lineno = -1 +Y_BORDER = 10 +X_BORDER = 10 +Y_COUNTER = 80 +Y_EVENTSOURCE = 10 +XY_POINT = 4 + class Colormap: def __init__(self, table): self.table = table @@ -674,9 +680,10 @@ class PointEvent(Event): def draw(self, canvas, xpos, ypos): color = colormap.lookup(self.name) - l = canvas.create_oval(xpos - 6, ypos + 1, xpos + 6, ypos - 11, + l = canvas.create_oval(xpos - XY_POINT, ypos, + xpos + XY_POINT, ypos - (XY_POINT * 2), fill=color, width=0, - tags=("all", "point", "event", self.name, self.source.tag)) + tags=("event", self.type, self.name, self.source.tag)) Event.draw(self, canvas, xpos, ypos, l) return xpos @@ -701,7 +708,7 @@ class StateEvent(Event): delta = duration / canvas.ratio l = canvas.create_rectangle(xpos, ypos, xpos + delta, ypos - 10, fill=color, width=0, - tags=("all", "state", "event", self.name, self.source.tag)) + tags=("event", self.type, self.name, self.source.tag)) Event.draw(self, canvas, xpos, ypos, l) return (xpos + delta) @@ -725,7 +732,7 @@ class CountEvent(Event): yhight = self.source.yscale() * self.count l = canvas.create_rectangle(xpos, ypos - yhight, xpos + delta, ypos, fill=color, width=0, - tags=("all", "count", "event", self.name, self.source.tag)) + tags=("event", self.type, self.name, self.source.tag)) Event.draw(self, canvas, xpos, ypos, l) return (xpos + delta) @@ -797,7 +804,8 @@ class EventSource: def drawname(self, canvas, ypos): self.y = ypos ypos = ypos - (self.ysize() / 2) - self.item = canvas.create_text(10, ypos, anchor="w", text=self.name) + self.item = canvas.create_text(X_BORDER, ypos, anchor="w", + text=self.name) return (self.item) def drawcpu(self, canvas, cpu, fromx, tox, ypos): @@ -807,7 +815,7 @@ class EventSource: l = canvas.create_rectangle(fromx, ypos - self.ysize() - canvas.bdheight, tox, ypos + canvas.bdheight, fill=color, width=0, - tags=("all", "cpuinfo", cpu, self.tag), state="hidden") + tags=("cpubg", cpu, self.tag), state="hidden") self.cpuitems.append(l) def move(self, canvas, xpos, ypos): @@ -818,7 +826,7 @@ class EventSource: canvas.move(self.item, xpos, ypos) def ysize(self): - return (10) + return (Y_EVENTSOURCE) def eventat(self, i): if (i >= len(self.events)): @@ -858,7 +866,7 @@ class Counter(EventSource): return (Counter.groups[self.group]) def ysize(self): - return (80) + return (Y_COUNTER) def yscale(self): return (self.ysize() / self.ymax()) @@ -873,16 +881,22 @@ class KTRFile: self.load = {} self.crit = {} self.stathz = 0 + self.eventcnt = 0 self.parse(file) self.fixup() global ticksps ticksps = self.ticksps() - timespan = self.timespan() - print "first tick", self.timestamp_f, - print "last tick", self.timestamp_l - print "Ticks per second", ticksps - print "time span", times
svn commit: r187471 - head/tools/sched
Author: jeff Date: Tue Jan 20 12:33:04 2009 New Revision: 187471 URL: http://svn.freebsd.org/changeset/base/187471 Log: - Permit timestamps to be as far as 2048 ticks apart before we complain about invalid timestamps. Nehalem CPUs seem to be synchronized but only within a fraction of a microsecond. - Make the Counter code more flexible to poor timestamps. In general we now complain a lot but render as much as we can. - Change the scaler behavior so it works better with very long and very short traces. We now set the maximum scale such that it properly displays the entire file by default and doesn't permit zooming out beyond the file. This improves other awkward navigation behavior. The interval is now set very small which can't be achieved by simply dragging the mouse. Clicking to the left of or right of the scaler bar will produce increments of a single, very small, interval now. Sponsored by: Nokia Modified: head/tools/sched/schedgraph.py Modified: head/tools/sched/schedgraph.py == --- head/tools/sched/schedgraph.py Tue Jan 20 12:07:49 2009 (r187470) +++ head/tools/sched/schedgraph.py Tue Jan 20 12:33:04 2009 (r187471) @@ -162,15 +162,12 @@ def ticks2sec(ticks): class Scaler(Frame): def __init__(self, master, target): Frame.__init__(self, master) - self.scale = Scale(self, command=self.scaleset, - from_=1000, to_=1000, orient=HORIZONTAL, - resolution=1000) + self.scale = None + self.target = target self.label = Label(self, text="Ticks per pixel") self.label.pack(side=LEFT) - self.scale.pack(fill="both", expand=1) - self.target = target - self.scale.set(target.scaleget()) - self.initialized = 1 + self.resolution = 100 + self.setmax(1) def scaleset(self, value): self.target.scaleset(int(value)) @@ -178,6 +175,20 @@ class Scaler(Frame): def set(self, value): self.scale.set(value) + def setmax(self, value): + # + # We can't reconfigure the to_ value so we delete the old + # window and make a new one when we resize. + # + if (self.scale != None): + self.scale.pack_forget() + self.scale.destroy() + self.scale = Scale(self, command=self.scaleset, + from_=100, to_=value, orient=HORIZONTAL, + resolution=self.resolution) + self.scale.pack(fill="both", expand=1) + self.scale.set(self.target.scaleget()) + class Status(Frame): def __init__(self, master): Frame.__init__(self, master) @@ -726,6 +737,11 @@ class CountEvent(Event): return (xpos) color = colormap.lookup("count") self.duration = duration = next.timestamp - self.timestamp + if (duration < 0): + duration = 0 + print "Unsynchronized timestamp" + print self.cpu, self.timestamp + print next.cpu, next.timestamp self.attrs.insert(0, ("count", self.count)) self.attrs.insert(1, ("duration", ticks2sec(duration))) delta = duration / canvas.ratio @@ -882,6 +898,7 @@ class KTRFile: self.crit = {} self.stathz = 0 self.eventcnt = 0 + self.taghash = {} self.parse(file) self.fixup() @@ -956,7 +973,8 @@ class KTRFile: if (dat == None): dat = dat1 if (self.checkstamp(timestamp) == 0): - print "Bad timestamp at", lineno, ":", line, + print "Bad timestamp at", lineno, ":", + print cpu, timestamp continue # # Build the table of optional attributes @@ -1021,20 +1039,22 @@ class KTRFile: timestamp = int(timestamp) if (self.timestamp_f == None): self.timestamp_f = timestamp; - if (self.timestamp_l != None and timestamp > self.timestamp_l): + if (self.timestamp_l != None and + timestamp -2048> self.timestamp_l): return (0) self.timestamp_l = timestamp; return (1) def makeid(self, group, id, type): - for source in sources: - if (source.name == id and source.group == group): -
svn commit: r187580 - head/tools/sched
Author: jeff Date: Thu Jan 22 06:21:30 2009 New Revision: 187580 URL: http://svn.freebsd.org/changeset/base/187580 Log: - Update my copyright. - Print human readable time as a float with two digits of precision. Use ns now as well since clock periods are well into the hundreds of picoseconds now. - Show the average duration in the stats frame. This is often more useful than total duration. Modified: head/tools/sched/schedgraph.py Modified: head/tools/sched/schedgraph.py == --- head/tools/sched/schedgraph.py Thu Jan 22 05:05:56 2009 (r187579) +++ head/tools/sched/schedgraph.py Thu Jan 22 06:21:30 2009 (r187580) @@ -1,6 +1,6 @@ #!/usr/local/bin/python -# Copyright (c) 2002-2003, Jeffrey Roberson +# Copyright (c) 2002-2003, 2009, Jeffrey Roberson # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -149,15 +149,19 @@ class Colormap: return (color) def ticks2sec(ticks): - us = ticksps / 100 - ticks /= us + ticks = float(ticks) + ns = float(ticksps) / 10 + ticks /= ns if (ticks < 1000): - return (str(ticks) + "us") + return ("%.2fns" % ticks) ticks /= 1000 if (ticks < 1000): - return (str(ticks) + "ms") + return ("%.2fus" % ticks) ticks /= 1000 - return (str(ticks) + "s") + if (ticks < 1000): + return ("%.2fms" % ticks) + ticks /= 1000 + return ("%.2fs" % ticks) class Scaler(Frame): def __init__(self, master, target): @@ -443,7 +447,7 @@ class SourceStats(Toplevel): self.resizable(0, 0) self.title(source.name + " statistics") self.evframe = LabelFrame(self, - text="Event Frequency and Duration") + text="Event Count, Duration, Avg Duration") self.evframe.grid(row=0, column=0, sticky=E+W) eventtypes={} for event in self.source.events: @@ -466,15 +470,22 @@ class SourceStats(Toplevel): ypos = 0 for event in events: (name, c, d) = event - l = Label(self.evframe, text=name, bd=1, - relief=SUNKEN, anchor=W, width=30) - m = Label(self.evframe, text=str(c), bd=1, - relief=SUNKEN, anchor=W, width=10) - r = Label(self.evframe, text=ticks2sec(d), - bd=1, relief=SUNKEN, width=10) - l.grid(row=ypos, column=0, sticky=E+W) - m.grid(row=ypos, column=1, sticky=E+W) - r.grid(row=ypos, column=2, sticky=E+W) + Label(self.evframe, text=name, bd=1, + relief=SUNKEN, anchor=W, width=30).grid( + row=ypos, column=0, sticky=W+E) + Label(self.evframe, text=str(c), bd=1, + relief=SUNKEN, anchor=W, width=10).grid( + row=ypos, column=1, sticky=W+E) + Label(self.evframe, text=ticks2sec(d), + bd=1, relief=SUNKEN, width=10).grid( + row=ypos, column=2, sticky=W+E) + if (d and c): + d /= c + else: + d = 0 + Label(self.evframe, text=ticks2sec(d), + bd=1, relief=SUNKEN, width=10).grid( + row=ypos, column=3, sticky=W+E) ypos += 1 ___ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
Re: svn commit: r187576 - in head/sys/dev: ppbus ppc
On Wed, 21 Jan 2009, John Baldwin wrote: Author: jhb Date: Wed Jan 21 23:10:06 2009 New Revision: 187576 URL: http://svn.freebsd.org/changeset/base/187576 Log: Add locking to ppc and ppbus and mark the whole lot MPSAFE: Looks like there might be some kinks still: ppc0: port 0x378-0x37f,0x778-0x77f irq 7 drq 3 on acpi0 ppc0: SMC-like chipset (ECP/EPP/PS2/NIBBLE) in COMPATIBLE mode ppc0: FIFO with 16/16/9 bytes threshold ppc0: [ITHREAD] ppbus0: on ppc0 panic: mutex ppc0 not owned at ../../../dev/ppc/ppc.c:1983 cpuid = 0 KDB: enter: panic [thread pid 0 tid 10 ] Stopped at kdb_enter+0x3d: movq$0,0x652ea8(%rip) _mtx_assert() at _mtx_assert+0xdc ppc_write_ivar() at ppc_write_ivar+0x6e ppbus_attach() at ppbus_attach+0x14b Thanks, Jeff - To avoid having a bunch of locks that end up always getting acquired as a group, give each ppc(4) device a mutex which it shares with all the child devices including ppbus(4), lpt(4), plip(4), etc. This mutex is then used for all the locking. - Rework the interrupt handling stuff yet again. Now ppbus drivers setup their interrupt handler during attach and tear it down during detach like most other drivers. ppbus(4) only invokes the interrupt handler of the device that currently owns the bus (if any) when an interrupt occurs, however. Also, interrupt handlers in general now accept their softc pointers as their argument rather than the device_t. Another feature of the ppbus interrupt handlers is that they are called with the parent ppc device's lock already held. This minimizes the number of lock operations during an interrupt. - Mark plip(4), lpt(4), pcfclock(4), ppi(4), vpo(4) MPSAFE. - lpbb(4) uses the ppc lock instead of Giant. - Other plip(4) changes: - Add a mutex to protect the global tables in plip(4) and free them on module unload. - Add a detach routine. - Split out the init/stop code from the ioctl routine into separate functions. - Other lpt(4) changes: - Use device_printf(). - Use a dedicated callout for the lptout timer. - Allocate the I/O buffers at attach and detach rather than during open and close as this simplifies the locking at the cost of 1024+32 bytes when the driver is attached. - Other ppi(4) changes: - Use an sx lock to serialize open and close. - Remove unused HADBUS flag. - Add a detach routine. - Use a malloc'd buffer for each read and write to avoid races with concurrent read/write. - Other pps(4) changes: - Use a callout rather than a callout handle with timeout(). - Conform to the new ppbus requirements (regular mutex, non-filter interrupt handler). pps(4) is probably going to have to become a standalone driver that doesn't use ppbus(4) to satisfy it's requirements for low latency as a result. - Use an sx lock to serialize open and close. - Other vpo(4) changes: - Use the parent ppc device's lock to create the CAM sim instead of Giant. - Other ppc(4) changes: - Fix ppc_isa's detach method to detach instead of calling attach. Tested by: no one :-( Modified: head/sys/dev/ppbus/if_plip.c head/sys/dev/ppbus/immio.c head/sys/dev/ppbus/lpbb.c head/sys/dev/ppbus/lpt.c head/sys/dev/ppbus/pcfclock.c head/sys/dev/ppbus/ppb_1284.c head/sys/dev/ppbus/ppb_base.c head/sys/dev/ppbus/ppb_msq.c head/sys/dev/ppbus/ppbconf.c head/sys/dev/ppbus/ppbconf.h head/sys/dev/ppbus/ppi.c head/sys/dev/ppbus/pps.c head/sys/dev/ppbus/vpo.c head/sys/dev/ppbus/vpoio.c head/sys/dev/ppc/ppc.c head/sys/dev/ppc/ppc_acpi.c head/sys/dev/ppc/ppc_isa.c head/sys/dev/ppc/ppc_pci.c head/sys/dev/ppc/ppc_puc.c head/sys/dev/ppc/ppcreg.h head/sys/dev/ppc/ppcvar.h Modified: head/sys/dev/ppbus/if_plip.c == --- head/sys/dev/ppbus/if_plip.cWed Jan 21 21:48:46 2009 (r187575) +++ head/sys/dev/ppbus/if_plip.cWed Jan 21 23:10:06 2009 (r187576) @@ -152,8 +152,12 @@ struct lp_data { int sc_iferrs; struct resource *res_irq; + void*sc_intr_cookie; }; +static struct mtx lp_tables_lock; +MTX_SYSINIT(lp_tables, &lp_tables_lock, "plip tables", MTX_DEF); + /* Tables for the lp# interface */ static u_char *txmith; #define txmitl (txmith + (1 * LPIPTBLSIZE)) @@ -170,13 +174,41 @@ static int lpinittables(void); static int lpioctl(struct ifnet *, u_long, caddr_t); static int lpoutput(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); +static void lpstop(struct lp_data *); static void lp_intr(void *); +static int lp_module_handler(module_t, int, void *); #define DEVTOSOFTC(dev) \ ((struct lp_data *)device_get_softc(dev)) static devclass_t lp_devclass; +static int +lp_module_handler(module_t mod, int what, void *arg) +{ + + switch (what) { + case MOD_UNLOAD: + mtx_lock(&lp_tables_lock); + if (txmith