from:"Jeff Roberson"

svn commit: r222953 - head/sys/kern

2011-06-10 Thread Jeff Roberson

Author: jeff
Date: Fri Jun 10 22:15:36 2011
New Revision: 222953
URL: http://svn.freebsd.org/changeset/base/222953

Log:
   - When printing bufs with show buf the lblkno is often more useful than
 the blkno.  Print them both.

Modified:
  head/sys/kern/vfs_bio.c

Modified: head/sys/kern/vfs_bio.c
==
--- head/sys/kern/vfs_bio.c Fri Jun 10 20:51:41 2011(r222952)
+++ head/sys/kern/vfs_bio.c Fri Jun 10 22:15:36 2011(r222953)
@@ -3999,10 +3999,11 @@ DB_SHOW_COMMAND(buffer, db_show_buffer)
db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS);
db_printf(
"b_error = %d, b_bufsize = %ld, b_bcount = %ld, b_resid = %ld\n"
-   "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_dep = %p\n",
+   "b_bufobj = (%p), b_data = %p, b_blkno = %jd, b_lblkno = %jd, "
+   "b_dep = %p\n",
bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
bp->b_bufobj, bp->b_data, (intmax_t)bp->b_blkno,
-   bp->b_dep.lh_first);
+   (intmax_t)bp->b_lblkno, bp->b_dep.lh_first);
if (bp->b_npages) {
int i;
db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r222954 - head/sys/ufs/ufs

2011-06-10 Thread Jeff Roberson

Author: jeff
Date: Fri Jun 10 22:18:25 2011
New Revision: 222954
URL: http://svn.freebsd.org/changeset/base/222954

Log:
   - If the fsync in ufs_direnter fails SUJ can later panic because we have
 partially added a name.  Allow ufs_direnter() to continue in the
 hopes that it is a transient error.  If it is not, the directory
 is corrupted already from IO errors and writing this new block
 is not likely to make things worse.

Modified:
  head/sys/ufs/ufs/ufs_lookup.c

Modified: head/sys/ufs/ufs/ufs_lookup.c
==
--- head/sys/ufs/ufs/ufs_lookup.c   Fri Jun 10 22:15:36 2011
(r222953)
+++ head/sys/ufs/ufs/ufs_lookup.c   Fri Jun 10 22:18:25 2011
(r222954)
@@ -967,7 +967,7 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
return (0);
if (tvp != NULL)
VOP_UNLOCK(tvp, 0);
-   error = VOP_FSYNC(dvp, MNT_WAIT, td);
+   (void) VOP_FSYNC(dvp, MNT_WAIT, td);
if (tvp != NULL)
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
return (error);
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r222955 - head/sys/ufs/ufs

2011-06-10 Thread Jeff Roberson

Author: jeff
Date: Fri Jun 10 22:19:44 2011
New Revision: 222955
URL: http://svn.freebsd.org/changeset/base/222955

Log:
   - Add support for referencing quota structures without needing the inode
 pointer for softupdates.
  
  Submitted by: mckusick

Modified:
  head/sys/ufs/ufs/quota.h
  head/sys/ufs/ufs/ufs_quota.c

Modified: head/sys/ufs/ufs/quota.h
==
--- head/sys/ufs/ufs/quota.hFri Jun 10 22:18:25 2011(r222954)
+++ head/sys/ufs/ufs/quota.hFri Jun 10 22:19:44 2011(r222955)
@@ -239,6 +239,12 @@ intsetuse(struct thread *, struct mount
 intgetquotasize(struct thread *, struct mount *, u_long, int, void *);
 vfs_quotactl_t ufs_quotactl;
 
+#ifdef SOFTUPDATES
+intquotaref(struct vnode *, struct dquot **);
+void   quotarele(struct dquot **);
+void   quotaadj(struct dquot **, struct ufsmount *, int64_t);
+#endif /* SOFTUPDATES */
+
 #else /* !_KERNEL */
 
 #include 

Modified: head/sys/ufs/ufs/ufs_quota.c
==
--- head/sys/ufs/ufs/ufs_quota.cFri Jun 10 22:18:25 2011
(r222954)
+++ head/sys/ufs/ufs/ufs_quota.cFri Jun 10 22:19:44 2011
(r222955)
@@ -1613,6 +1613,101 @@ dqflush(struct vnode *vp)
 }
 
 /*
+ * The following three functions are provided for the adjustment of
+ * quotas by the soft updates code.
+ */
+#ifdef SOFTUPDATES
+/*
+ * Acquire a reference to the quota structures associated with a vnode.
+ * Return count of number of quota structures found.
+ */
+int
+quotaref(vp, qrp)
+   struct vnode *vp;
+   struct dquot **qrp;
+{
+   struct inode *ip;
+   struct dquot *dq;
+   int i, found;
+
+   for (i = 0; i < MAXQUOTAS; i++)
+   qrp[i] = NODQUOT;
+   /*
+* Disk quotas must be turned off for system files.  Currently
+* snapshot and quota files.
+*/
+   if ((vp->v_vflag & VV_SYSTEM) != 0)
+   return (0);
+   /*
+* Iterate through and copy active quotas.
+*/
+   found = 0;
+   ip = VTOI(vp);
+   for (i = 0; i < MAXQUOTAS; i++) {
+   if ((dq = ip->i_dquot[i]) == NODQUOT)
+   continue;
+   DQREF(dq);
+   qrp[i] = dq;
+   found++;
+   }
+   return (found);
+}
+
+/*
+ * Release a set of quota structures obtained from a vnode.
+ */
+void
+quotarele(qrp)
+   struct dquot **qrp;
+{
+   struct dquot *dq;
+   int i;
+
+   for (i = 0; i < MAXQUOTAS; i++) {
+   if ((dq = qrp[i]) == NODQUOT)
+   continue;
+   dqrele(NULL, dq);
+   }
+}
+
+/*
+ * Adjust the number of blocks associated with a quota.
+ * Positive numbers when adding blocks; negative numbers when freeing blocks.
+ */
+void
+quotaadj(qrp, ump, blkcount)
+   struct dquot **qrp;
+   struct ufsmount *ump;
+   int64_t blkcount;
+{
+   struct dquot *dq;
+   ufs2_daddr_t ncurblocks;
+   int i;
+
+   if (blkcount == 0)
+   return;
+   for (i = 0; i < MAXQUOTAS; i++) {
+   if ((dq = qrp[i]) == NODQUOT)
+   continue;
+   DQI_LOCK(dq);
+   DQI_WAIT(dq, PINOD+1, "adjqta");
+   ncurblocks = dq->dq_curblocks + blkcount;
+   if (ncurblocks >= 0)
+   dq->dq_curblocks = ncurblocks;
+   else
+   dq->dq_curblocks = 0;
+   if (blkcount < 0)
+   dq->dq_flags &= ~DQ_BLKS;
+   else if (dq->dq_curblocks + blkcount >= dq->dq_bsoftlimit &&
+dq->dq_curblocks < dq->dq_bsoftlimit)
+   dq->dq_btime = time_second + ump->um_btime[i];
+   dq->dq_flags |= DQ_MOD;
+   DQI_UNLOCK(dq);
+   }
+}
+#endif /* SOFTUPDATES */
+
+/*
  * 32-bit / 64-bit conversion functions.
  *
  * 32-bit quota records are stored in native byte order.  Attention must
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r222956 - head/sys/conf

2011-06-10 Thread Jeff Roberson

Author: jeff
Date: Fri Jun 10 22:38:31 2011
New Revision: 222956
URL: http://svn.freebsd.org/changeset/base/222956

Log:
   - Eliminate an incorrect include path from the mthca build.

Modified:
  head/sys/conf/files

Modified: head/sys/conf/files
==
--- head/sys/conf/files Fri Jun 10 22:19:44 2011(r222955)
+++ head/sys/conf/files Fri Jun 10 22:38:31 2011(r222956)
@@ -3152,41 +3152,41 @@ ofed/drivers/net/mlx4/en_tx.c   optional
compile-with "${OFED_C_NOIMP} -I$S/ofed/drivers/net/mlx4/"
 
 ofed/drivers/infiniband/hw/mthca/mthca_allocator.c optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_av.coptional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_catas.c optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cmd.c   optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_cq.coptional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_eq.coptional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mad.c   optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_main.c  optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mcg.c   optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_memfree.c   optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_mr.coptional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_pd.coptional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_profile.c   optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_provider.c  optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_qp.coptional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_reset.c optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_srq.c   optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 ofed/drivers/infiniband/hw/mthca/mthca_uar.c   optional mthca  \
-   no-depend compile-with "${OFED_C} -I$S/ofed/drivers/infiniband/mthca/"
+   no-depend compile-with "${OFED_C}"
 
 # crypto support
 opencrypto/cast.c  optional crypto | ipsec
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r222958 - in head: sbin/fsck_ffs sys/sys sys/ufs/ffs sys/ufs/ufs

2011-06-10 Thread Jeff Roberson

Author: jeff
Date: Fri Jun 10 22:48:35 2011
New Revision: 222958
URL: http://svn.freebsd.org/changeset/base/222958

Log:
  Implement fully asynchronous partial truncation with softupdates journaling
  to resolve errors which can cause corruption on recovery with the old
  synchronous mechanism.
  
   - Append partial truncation freework structures to indirdeps while
 truncation is proceeding.  These prevent new block pointers from
 becoming valid until truncation completes and serialize truncations.
   - On completion of a partial truncate journal work waits for zeroed
 pointers to hit indirects.
   - softdep_journal_freeblocks() handles last frag allocation and last
 block zeroing.
   - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it
 is only implemented in one place.
   - Block allocation failure handling moved up one level so it does not
 proceed with buf locks held.  This permits us to do more extensive
 reclaims when filesystem space is exhausted.
   - softdep_sync_metadata() is broken into two parts, the first executes
 once at the start of ffs_syncvnode() and flushes truncations and
 inode dependencies.  The second is called on each locked buf.  This
 eliminates excessive looping and rollbacks.
   - Improve the mechanism in process_worklist_item() that handles
 acquiring vnode locks for handle_workitem_remove() so that it works
 more generally and does not loop excessively over the same worklist
 items on each call.
   - Don't corrupt directories by zeroing the tail in fsck.  This is only
 done for regular files.
   - Push a fsync complete record for files that need it so the checker
 knows a truncation in the journal is no longer valid.
  
  Discussed with:   mckusick, kib (ffs_pages_remove and ffs_truncate parts)
  Tested by:pho

Modified:
  head/sbin/fsck_ffs/suj.c
  head/sys/sys/vnode.h
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/ffs_balloc.c
  head/sys/ufs/ffs/ffs_extern.h
  head/sys/ufs/ffs/ffs_inode.c
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/ffs_vfsops.c
  head/sys/ufs/ffs/ffs_vnops.c
  head/sys/ufs/ffs/fs.h
  head/sys/ufs/ffs/softdep.h
  head/sys/ufs/ufs/inode.h
  head/sys/ufs/ufs/ufsmount.h

Modified: head/sbin/fsck_ffs/suj.c
==
--- head/sbin/fsck_ffs/suj.cFri Jun 10 22:42:00 2011(r222957)
+++ head/sbin/fsck_ffs/suj.cFri Jun 10 22:48:35 2011(r222958)
@@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size)
 * uninitialized space later.
 */
off = blkoff(fs, size);
-   if (off) {
+   if (off && DIP(ip, di_mode) != IFDIR) {
uint8_t *buf;
long clrsize;
 
@@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc)
struct suj_ino *sino;
int i;
 
-   for (i = 0; i < SUJ_HASHSIZE; i++)
-   LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
+   for (i = 0; i < SUJ_HASHSIZE; i++) {
+   LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
if (sino->si_trunc) {
ino_trunc(sino->si_ino,
sino->si_trunc->jt_size);
+   sino->si_blkadj = 0;
sino->si_trunc = NULL;
}
+   if (sino->si_blkadj)
+   ino_adjblks(sino);
+   }
+   }
 }
 
 /*
@@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc)
 static void
 cg_check_blk(struct suj_cg *sc)
 {
-   struct suj_ino *sino;
struct suj_blk *sblk;
int i;
 
@@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc)
for (i = 0; i < SUJ_HASHSIZE; i++)
LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
blk_check(sblk);
-   /*
-* Now that we've freed blocks which are not referenced we
-* make a second pass over all inodes to adjust their block
-* counts.
-*/
-   for (i = 0; i < SUJ_HASHSIZE; i++)
-   LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
-   if (sino->si_blkadj)
-   ino_adjblks(sino);
 }
 
 /*
@@ -1961,14 +1956,7 @@ ino_append(union jrec *rec)
"parent %d, diroff %jd\n",
refrec->jr_op, refrec->jr_ino, refrec->jr_nlink,
refrec->jr_parent, refrec->jr_diroff);
-   /*
-* Lookup the ino and clear truncate if one is found.  Partial
-* truncates are always done synchronously so if we discover
-* an operation that requires a lock the truncation has completed
-* and can be discarded.
-*/
sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1);
-   sino->si_trunc = NULL;
sino->si_hasrecs = 1;
srec = errmalloc(sizeof(*srec));
srec->sr_rec = rec;
@@ -2174,9 +2162,7

Re: svn commit: r222958 - in head: sbin/fsck_ffs sys/sys sys/ufs/ffs sys/ufs/ufs

2011-06-10 Thread Jeff Roberson


On Fri, 10 Jun 2011, Jeff Roberson wrote:


Author: jeff
Date: Fri Jun 10 22:48:35 2011
New Revision: 222958
URL: http://svn.freebsd.org/changeset/base/222958

Log:
 Implement fully asynchronous partial truncation with softupdates journaling
 to resolve errors which can cause corruption on recovery with the old
 synchronous mechanism.



This diff is enormous and took months of work.  I'm sorry to get it in so 
close to 9.0, I had no idea it would take so long.  pho has tested 
multiple versions of the patch with and without journaling for days of 
test time and it has probably racked up a week of machine time for me but 
there may be problems given that it is so huge.


There is still a snapshot problem with SUJ that mckusick and I are working 
on.  Expect to see some checkins for that soon.


Thanks,
Jeff



  - Append partial truncation freework structures to indirdeps while
truncation is proceeding.  These prevent new block pointers from
becoming valid until truncation completes and serialize truncations.
  - On completion of a partial truncate journal work waits for zeroed
pointers to hit indirects.
  - softdep_journal_freeblocks() handles last frag allocation and last
block zeroing.
  - vtruncbuf/ffs_page_remove moved into softdep_*_freeblocks() so it
is only implemented in one place.
  - Block allocation failure handling moved up one level so it does not
proceed with buf locks held.  This permits us to do more extensive
reclaims when filesystem space is exhausted.
  - softdep_sync_metadata() is broken into two parts, the first executes
once at the start of ffs_syncvnode() and flushes truncations and
inode dependencies.  The second is called on each locked buf.  This
eliminates excessive looping and rollbacks.
  - Improve the mechanism in process_worklist_item() that handles
acquiring vnode locks for handle_workitem_remove() so that it works
more generally and does not loop excessively over the same worklist
items on each call.
  - Don't corrupt directories by zeroing the tail in fsck.  This is only
done for regular files.
  - Push a fsync complete record for files that need it so the checker
knows a truncation in the journal is no longer valid.

 Discussed with:mckusick, kib (ffs_pages_remove and ffs_truncate parts)
 Tested by: pho

Modified:
 head/sbin/fsck_ffs/suj.c
 head/sys/sys/vnode.h
 head/sys/ufs/ffs/ffs_alloc.c
 head/sys/ufs/ffs/ffs_balloc.c
 head/sys/ufs/ffs/ffs_extern.h
 head/sys/ufs/ffs/ffs_inode.c
 head/sys/ufs/ffs/ffs_softdep.c
 head/sys/ufs/ffs/ffs_vfsops.c
 head/sys/ufs/ffs/ffs_vnops.c
 head/sys/ufs/ffs/fs.h
 head/sys/ufs/ffs/softdep.h
 head/sys/ufs/ufs/inode.h
 head/sys/ufs/ufs/ufsmount.h

Modified: head/sbin/fsck_ffs/suj.c
==
--- head/sbin/fsck_ffs/suj.cFri Jun 10 22:42:00 2011(r222957)
+++ head/sbin/fsck_ffs/suj.cFri Jun 10 22:48:35 2011(r222958)
@@ -1604,7 +1604,7 @@ ino_trunc(ino_t ino, off_t size)
 * uninitialized space later.
 */
off = blkoff(fs, size);
-   if (off) {
+   if (off && DIP(ip, di_mode) != IFDIR) {
uint8_t *buf;
long clrsize;

@@ -1775,13 +1775,18 @@ cg_trunc(struct suj_cg *sc)
struct suj_ino *sino;
int i;

-   for (i = 0; i < SUJ_HASHSIZE; i++)
-   LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
+   for (i = 0; i < SUJ_HASHSIZE; i++) {
+   LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
if (sino->si_trunc) {
ino_trunc(sino->si_ino,
sino->si_trunc->jt_size);
+   sino->si_blkadj = 0;
sino->si_trunc = NULL;
}
+   if (sino->si_blkadj)
+   ino_adjblks(sino);
+   }
+   }
}

/*
@@ -1791,7 +1796,6 @@ cg_trunc(struct suj_cg *sc)
static void
cg_check_blk(struct suj_cg *sc)
{
-   struct suj_ino *sino;
struct suj_blk *sblk;
int i;

@@ -1799,15 +1803,6 @@ cg_check_blk(struct suj_cg *sc)
for (i = 0; i < SUJ_HASHSIZE; i++)
LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
blk_check(sblk);
-   /*
-* Now that we've freed blocks which are not referenced we
-* make a second pass over all inodes to adjust their block
-* counts.
-*/
-   for (i = 0; i < SUJ_HASHSIZE; i++)
-   LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
-   if (sino->si_blkadj)
-   ino_adjblks(sino);
}

/*
@@ -1961,14 +1956,7 @@ ino_append(union jrec *rec)
"parent %d, diroff %jd\n",
refrec-&

svn commit: r223325 - head/sys/ufs/ffs

2011-06-19 Thread Jeff Roberson

Author: jeff
Date: Mon Jun 20 03:25:09 2011
New Revision: 223325
URL: http://svn.freebsd.org/changeset/base/223325

Log:
   - Fix directory count rollbacks by passing the mode to the journal dep
 earlier.
   - Add rollback/forward code for frag and cluster accounting.
   - Handle the FREEDEP case in softdep_sync_buf().  (submitted by pho)

Modified:
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/ffs_extern.h
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_alloc.c
==
--- head/sys/ufs/ffs/ffs_alloc.cMon Jun 20 02:17:34 2011
(r223324)
+++ head/sys/ufs/ffs/ffs_alloc.cMon Jun 20 03:25:09 2011
(r223325)
@@ -1829,7 +1829,7 @@ gotit:
}
UFS_UNLOCK(ump);
if (DOINGSOFTDEP(ITOV(ip)))
-   softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
+   softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref, mode);
bdwrite(bp);
if (ibp != NULL)
bawrite(ibp);

Modified: head/sys/ufs/ffs/ffs_extern.h
==
--- head/sys/ufs/ffs/ffs_extern.h   Mon Jun 20 02:17:34 2011
(r223324)
+++ head/sys/ufs/ffs/ffs_extern.h   Mon Jun 20 03:25:09 2011
(r223325)
@@ -130,7 +130,7 @@ voidsoftdep_freefile(struct vnode *, in
 intsoftdep_request_cleanup(struct fs *, struct vnode *,
struct ucred *, int);
 void   softdep_setup_freeblocks(struct inode *, off_t, int);
-void   softdep_setup_inomapdep(struct buf *, struct inode *, ino_t);
+void   softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int);
 void   softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t,
int, int);
 void   softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t,

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Mon Jun 20 02:17:34 2011
(r223324)
+++ head/sys/ufs/ffs/ffs_softdep.c  Mon Jun 20 03:25:09 2011
(r223325)
@@ -142,10 +142,11 @@ softdep_setup_sbupdate(ump, fs, bp)
 }
 
 void
-softdep_setup_inomapdep(bp, ip, newinum)
+softdep_setup_inomapdep(bp, ip, newinum, mode)
struct buf *bp;
struct inode *ip;
ino_t newinum;
+   int mode;
 {
 
panic("softdep_setup_inomapdep called");
@@ -789,6 +790,8 @@ static  void diradd_inode_written(struct
 static int handle_written_indirdep(struct indirdep *, struct buf *,
struct buf**);
 static int handle_written_inodeblock(struct inodedep *, struct buf *);
+static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *,
+   uint8_t *);
 static int handle_written_bmsafemap(struct bmsafemap *, struct buf *);
 static void handle_written_jaddref(struct jaddref *);
 static void handle_written_jremref(struct jremref *);
@@ -820,6 +823,8 @@ static  void handle_allocindir_partdone(s
 static void initiate_write_filepage(struct pagedep *, struct buf *);
 static void initiate_write_indirdep(struct indirdep*, struct buf *);
 static void handle_written_mkdir(struct mkdir *, int);
+static int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *,
+   uint8_t *);
 static void initiate_write_bmsafemap(struct bmsafemap *, struct buf *);
 static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
 static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
@@ -935,6 +940,7 @@ static  void wake_worklist(struct worklis
 static void wait_worklist(struct worklist *, char *);
 static void remove_from_worklist(struct worklist *);
 static void softdep_flush(void);
+static void softdep_flushjournal(struct mount *);
 static int softdep_speedup(void);
 static void worklist_speedup(void);
 static int journal_mount(struct mount *, struct fs *, struct ucred *);
@@ -3046,6 +3052,25 @@ jfsync_write(jfsync, jseg, data)
rec->jt_extsize = jfsync->jfs_extsize;
 }
 
+static void
+softdep_flushjournal(mp)
+   struct mount *mp;
+{
+   struct jblocks *jblocks;
+   struct ufsmount *ump;
+
+   if ((mp->mnt_kern_flag & MNTK_SUJ) == 0)
+   return;
+   ump = VFSTOUFS(mp);
+   jblocks = ump->softdep_jblocks;
+   ACQUIRE_LOCK(&lk);
+   while (ump->softdep_on_journal) {
+   jblocks->jb_needseg = 1;
+   softdep_process_journal(mp, NULL, MNT_WAIT);
+   }
+   FREE_LOCK(&lk);
+}
+
 /*
  * Flush some journal records to disk.
  */
@@ -4310,7 +4335,6 @@ softdep_setup_create(dp, ip)
inoreflst);
KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
("softdep_setup_create: No addref structure present."));
-   jaddref->ja_mode = ip->i_mode;
}
softdep_prelink(dvp, NULL);
FREE_LOCK(&lk);
@@ -4417,7

svn commit: r223689 - head/sbin/fsck_ffs

2011-06-29 Thread Jeff Roberson

Author: jeff
Date: Thu Jun 30 05:28:10 2011
New Revision: 223689
URL: http://svn.freebsd.org/changeset/base/223689

Log:
   - Handle the JOP_SYNC case as appropriate.
  
  Reported by:  pho

Modified:
  head/sbin/fsck_ffs/suj.c

Modified: head/sbin/fsck_ffs/suj.c
==
--- head/sbin/fsck_ffs/suj.cThu Jun 30 05:20:02 2011(r223688)
+++ head/sbin/fsck_ffs/suj.cThu Jun 30 05:28:10 2011(r223689)
@@ -2261,6 +2261,7 @@ suj_build(void)
blk_build((struct jblkrec *)rec);
break;
case JOP_TRUNC:
+   case JOP_SYNC:
ino_build_trunc((struct jtrncrec *)rec);
break;
default:
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r223769 - head/sys/ufs/ufs

2011-07-04 Thread Jeff Roberson

Author: jeff
Date: Mon Jul  4 20:52:23 2011
New Revision: 223769
URL: http://svn.freebsd.org/changeset/base/223769

Log:
   - Fix an inode quota leak.  We need to decrement the quota once and only
 once.
  
  Tested by:pho
  Reviewed by:  mckusick

Modified:
  head/sys/ufs/ufs/ufs_inode.c

Modified: head/sys/ufs/ufs/ufs_inode.c
==
--- head/sys/ufs/ufs/ufs_inode.cMon Jul  4 20:50:09 2011
(r223768)
+++ head/sys/ufs/ufs/ufs_inode.cMon Jul  4 20:52:23 2011
(r223769)
@@ -120,15 +120,14 @@ ufs_inactive(ap)
isize = ip->i_size;
if (ip->i_ump->um_fstype == UFS2)
isize += ip->i_din2->di_extsize;
-   if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip)) {
+   if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip))
+   error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL,
+   NOCRED, td);
+   if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) {
 #ifdef QUOTA
if (!getinoquota(ip))
(void)chkiq(ip, -1, NOCRED, FORCE);
 #endif
-   error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL,
-   NOCRED, td);
-   }
-   if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) {
 #ifdef UFS_EXTATTR
ufs_extattr_vnode_inactive(vp, td);
 #endif
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r223770 - head/sys/ufs/ffs

2011-07-04 Thread Jeff Roberson

Author: jeff
Date: Mon Jul  4 20:53:55 2011
New Revision: 223770
URL: http://svn.freebsd.org/changeset/base/223770

Log:
   - It is impossible to run request_cleanup() while doing a copyonwrite.
 This will most likely cause new block allocations which can recurse
 into request cleanup.
   - While here optimize the ufs locking slightly.  We need only acquire and
 drop once.
   - process_removes() and process_truncates() also is only needed once.
   - Attempt to flush each item on the worklist once but do not loop forever
 if some can not be completed.
  
  Discussed with:   mckusick

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Mon Jul  4 20:52:23 2011
(r223769)
+++ head/sys/ufs/ffs/ffs_softdep.c  Mon Jul  4 20:53:55 2011
(r223770)
@@ -12510,33 +12510,36 @@ softdep_request_cleanup(fs, vp, cred, re
int error;
 
mp = vp->v_mount;
-   ump = VTOI(vp)->i_ump;
+   ump = VFSTOUFS(mp);
mtx_assert(UFS_MTX(ump), MA_OWNED);
if (resource == FLUSH_BLOCKS_WAIT)
stat_cleanup_blkrequests += 1;
else
stat_cleanup_inorequests += 1;
+
/*
 * If we are being called because of a process doing a
-* copy-on-write, then it is not safe to update the vnode
-* as we may recurse into the copy-on-write routine.
+* copy-on-write, then it is not safe to process any
+* worklist items as we will recurse into the copyonwrite
+* routine.  This will result in an incoherent snapshot.
 */
-   if (!(curthread->td_pflags & TDP_COWINPROGRESS)) {
-   UFS_UNLOCK(ump);
-   error = ffs_update(vp, 1);
+   if (curthread->td_pflags & TDP_COWINPROGRESS)
+   return (0);
+   UFS_UNLOCK(ump);
+   error = ffs_update(vp, 1);
+   if (error != 0) {
UFS_LOCK(ump);
-   if (error != 0)
-   return (0);
+   return (0);
}
/*
 * If we are in need of resources, consider pausing for
 * tickdelay to give ourselves some breathing room.
 */
-   UFS_UNLOCK(ump);
ACQUIRE_LOCK(&lk);
+   process_removes(vp);
+   process_truncates(vp);
request_cleanup(UFSTOVFS(ump), resource);
FREE_LOCK(&lk);
-   UFS_LOCK(ump);
/*
 * Now clean up at least as many resources as we will need.
 *
@@ -12568,29 +12571,23 @@ softdep_request_cleanup(fs, vp, cred, re
roundup((fs->fs_dsize * fs->fs_minfree / 100) -
fs->fs_cstotal.cs_nffree, fs->fs_frag));
} else {
+   UFS_LOCK(ump);
printf("softdep_request_cleanup: Unknown resource type %d\n",
resource);
return (0);
}
starttime = time_second;
 retry:
-   while ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
-   fs->fs_cstotal.cs_nbfree <= needed) ||
-  (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
-   fs->fs_cstotal.cs_nifree <= needed)) {
-   UFS_UNLOCK(ump);
+   if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
+   fs->fs_cstotal.cs_nbfree <= needed) ||
+   (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
+   fs->fs_cstotal.cs_nifree <= needed)) {
ACQUIRE_LOCK(&lk);
-   process_removes(vp);
-   process_truncates(vp);
if (ump->softdep_on_worklist > 0 &&
-   process_worklist_item(UFSTOVFS(ump), 1, LK_NOWAIT) != 0) {
+   process_worklist_item(UFSTOVFS(ump),
+   ump->softdep_on_worklist, LK_NOWAIT) != 0)
stat_worklist_push += 1;
-   FREE_LOCK(&lk);
-   UFS_LOCK(ump);
-   continue;
-   }
FREE_LOCK(&lk);
-   UFS_LOCK(ump);
}
/*
 * If we still need resources and there are no more worklist
@@ -12604,7 +12601,6 @@ retry:
 fs->fs_cstotal.cs_nbfree <= needed) ||
(resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
 fs->fs_cstotal.cs_nifree <= needed)) {
-   UFS_UNLOCK(ump);
MNT_ILOCK(mp);
MNT_VNODE_FOREACH(lvp, mp, mvp) {
VI_LOCK(lvp);
@@ -12633,7 +12629,6 @@ retry:
VOP_FSYNC(lvp, MNT_NOWAIT, curthread);
VOP_UNLOCK(lvp, 0);
}
-   UFS_LOCK(ump);
if (ump->softdep_on_worklist > 0) {
stat_cleanup_retries += 1;
goto retry;
@@ -12642,6 +1

svn commit: r223771 - head/sys/ufs/ffs

2011-07-04 Thread Jeff Roberson

Author: jeff
Date: Mon Jul  4 21:04:25 2011
New Revision: 223771
URL: http://svn.freebsd.org/changeset/base/223771

Log:
   - Handle D_JSEGDEP in the softdep_sync_buf() switch.  These can now
 find themselves on snapshot vnodes.
  
  Reported by:  pho

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Mon Jul  4 20:53:55 2011
(r223770)
+++ head/sys/ufs/ffs/ffs_softdep.c  Mon Jul  4 21:04:25 2011
(r223771)
@@ -12082,6 +12082,7 @@ top:
 
case D_FREEWORK:
case D_FREEDEP:
+   case D_JSEGDEP:
continue;
 
default:
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r223772 - head/sys/ufs/ffs

2011-07-04 Thread Jeff Roberson

Author: jeff
Date: Mon Jul  4 22:08:04 2011
New Revision: 223772
URL: http://svn.freebsd.org/changeset/base/223772

Log:
   - Speed up pendingblock processing again.  Having too much delay between
 ffs_blkfree() and the pending adjustment causes all kinds of
 space related problems.

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/softdep.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Mon Jul  4 21:04:25 2011
(r223771)
+++ head/sys/ufs/ffs/ffs_softdep.c  Mon Jul  4 22:08:04 2011
(r223772)
@@ -880,6 +880,7 @@ static  inline void setup_freeext(struct 
 static inline void setup_freeindir(struct freeblks *, struct inode *, int,
ufs_lbn_t, int);
 static inline struct freeblks *newfreeblks(struct mount *, struct inode *);
+static void freeblks_free(struct ufsmount *, struct freeblks *, int);
 static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
 ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
 static int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
@@ -5751,7 +5752,6 @@ newfreeblks(mp, ip)
freeblks->fb_modrev = DIP(ip, i_modrev);
freeblks->fb_devvp = ip->i_devvp;
freeblks->fb_chkcnt = 0;
-   freeblks->fb_freecnt = 0;
freeblks->fb_len = 0;
 
return (freeblks);
@@ -6199,7 +6199,7 @@ softdep_journal_freeblocks(ip, cred, len
quotaref(vp, freeblks->fb_quota);
(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
-   freeblks->fb_chkcnt = datablocks;
+   freeblks->fb_chkcnt = -datablocks;
UFS_LOCK(ip->i_ump);
fs->fs_pendingblocks += datablocks;
UFS_UNLOCK(ip->i_ump);
@@ -6429,7 +6429,7 @@ softdep_setup_freeblocks(ip, length, fla
quotaref(vp, freeblks->fb_quota);
(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
-   freeblks->fb_chkcnt = datablocks;
+   freeblks->fb_chkcnt = -datablocks;
UFS_LOCK(ip->i_ump);
fs->fs_pendingblocks += datablocks;
UFS_UNLOCK(ip->i_ump);
@@ -7284,8 +7284,8 @@ freework_freeblock(freework)
freeblks->fb_cgwait++;
WORKLIST_INSERT(&wkhd, &freework->fw_list);
}
-   freeblks->fb_freecnt += btodb(bsize);
FREE_LOCK(&lk);
+   freeblks_free(ump, freeblks, btodb(bsize));
ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
ACQUIRE_LOCK(&lk);
@@ -7459,6 +7459,33 @@ handle_workitem_freeblocks(freeblks, fla
 }
 
 /*
+ * Handle completion of block free via truncate.  This allows fs_pending
+ * to track the actual free block count more closely than if we only updated
+ * it at the end.  We must be careful to handle cases where the block count
+ * on free was incorrect.
+ */
+static void
+freeblks_free(ump, freeblks, blocks)
+   struct ufsmount *ump;
+   struct freeblks *freeblks;
+   int blocks;
+{
+   struct fs *fs;
+   ufs2_daddr_t remain;
+
+   UFS_LOCK(ump);
+   remain = -freeblks->fb_chkcnt;
+   freeblks->fb_chkcnt += blocks;
+   if (remain > 0) {
+   if (remain < blocks)
+   blocks = remain;
+   fs = ump->um_fs;
+   fs->fs_pendingblocks -= blocks;
+   }
+   UFS_UNLOCK(ump);
+}
+
+/*
  * Once all of the freework workitems are complete we can retire the
  * freeblocks dependency and any journal work awaiting completion.  This
  * can not be called until all other dependencies are stable on disk.
@@ -7478,7 +7505,7 @@ handle_complete_freeblocks(freeblks, fla
ump = VFSTOUFS(freeblks->fb_list.wk_mp);
fs = ump->um_fs;
flags = LK_EXCLUSIVE | flags;
-   spare = freeblks->fb_freecnt - freeblks->fb_chkcnt;
+   spare = freeblks->fb_chkcnt;
 
/*
 * If we did not release the expected number of blocks we may have
@@ -7501,9 +7528,9 @@ handle_complete_freeblocks(freeblks, fla
}
vput(vp);
}
-   if (freeblks->fb_chkcnt) {
+   if (spare < 0) {
UFS_LOCK(ump);
-   fs->fs_pendingblocks -= freeblks->fb_chkcnt;
+   fs->fs_pendingblocks += spare;
UFS_UNLOCK(ump);
}
 #ifdef QUOTA
@@ -7559,7 +7586,7 @@ indir_trunc(freework, dbn, lbn)
ufs2_daddr_t nb, nnb, *bap2 = 0;
ufs_lbn_t lbnadd, nlbn;
int i, nblocks, ufs1fmt;
-   int fs_pendingblocks;
+   int freedblocks;
int goingaway;
int freedeps;
int needj;
@@ -7701,16 +7728,18 @@ indir_trunc(freework, dbn, lbn)
bp->b_flags |= B_INVAL | B_NOCACHE;
brelse(bp);
}
-   fs_pendingblocks = 0;
+   freedblocks = 0;
if (level == 0)
-   fs_pendingblocks = (nblocks * cnt);
+   freedblocks = (nblocks * cnt);
+   if (ne

svn commit: r219819 - in head: sys/amd64/include sys/conf sys/dev/hptmv sys/i386/include sys/kern sys/net sys/netinet sys/netinet6 sys/sys sys/vm usr.sbin/config usr.sbin/ndp

2011-03-21 Thread Jeff Roberson

Author: jeff
Date: Mon Mar 21 09:40:01 2011
New Revision: 219819
URL: http://svn.freebsd.org/changeset/base/219819

Log:
   - Merge changes to the base system to support OFED.  These include
 a wider arg2 for sysctl, updates to vlan code, IFT_INFINIBAND,
 and other miscellaneous small features.

Modified:
  head/sys/amd64/include/endian.h
  head/sys/conf/files
  head/sys/conf/kern.pre.mk
  head/sys/conf/options
  head/sys/dev/hptmv/hptproc.c
  head/sys/i386/include/endian.h
  head/sys/kern/kern_intr.c
  head/sys/kern/kern_jail.c
  head/sys/kern/kern_sx.c
  head/sys/kern/kern_sysctl.c
  head/sys/kern/subr_bus.c
  head/sys/net/if.c
  head/sys/net/if_arp.h
  head/sys/net/if_llatbl.h
  head/sys/net/if_types.h
  head/sys/net/if_var.h
  head/sys/net/if_vlan.c
  head/sys/net/if_vlan_var.h
  head/sys/netinet/if_ether.c
  head/sys/netinet6/in6.c
  head/sys/netinet6/nd6.c
  head/sys/netinet6/nd6_nbr.c
  head/sys/sys/bus.h
  head/sys/sys/file.h
  head/sys/sys/interrupt.h
  head/sys/sys/jail.h
  head/sys/sys/sx.h
  head/sys/sys/sysctl.h
  head/sys/vm/uma_core.c
  head/sys/vm/vm_map.c
  head/sys/vm/vm_map.h
  head/usr.sbin/config/config.h
  head/usr.sbin/config/mkmakefile.c
  head/usr.sbin/ndp/ndp.c

Modified: head/sys/amd64/include/endian.h
==
--- head/sys/amd64/include/endian.h Mon Mar 21 08:54:59 2011
(r219818)
+++ head/sys/amd64/include/endian.h Mon Mar 21 09:40:01 2011
(r219819)
@@ -69,73 +69,59 @@ extern "C" {
 
 #if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE_BUILTIN_CONSTANT_P)
 
-#define __byte_swap_int_var(x) \
-__extension__ ({ register __uint32_t __X = (x); \
-   __asm ("bswap %0" : "+r" (__X)); \
-   __X; })
+#define__bswap64_const(_x) \
+   (((_x) >> 56) | \
+   (((_x) >> 40) & (0xffUL << 8)) |\
+   (((_x) >> 24) & (0xffUL << 16)) |   \
+   (((_x) >> 8) & (0xffUL << 24)) |\
+   (((_x) << 8) & (0xffUL << 32)) |\
+   (((_x) << 24) & (0xffUL << 40)) |   \
+   (((_x) << 40) & (0xffUL << 48)) |   \
+   ((_x) << 56))
+
+#define__bswap32_const(_x) \
+   (((_x) >> 24) | \
+   (((_x) & (0xff << 16)) >> 8) |  \
+   (((_x) & (0xff << 8)) << 8) |   \
+   ((_x) << 24))
 
-#ifdef __OPTIMIZE__
-
-#define__byte_swap_int_const(x) \
-   x) & 0xff00) >> 24) | \
-(((x) & 0x00ff) >>  8) | \
-(((x) & 0xff00) <<  8) | \
-(((x) & 0x00ff) << 24))
-#define__byte_swap_int(x) (__builtin_constant_p(x) ? \
-   __byte_swap_int_const(x) : __byte_swap_int_var(x))
-
-#else  /* __OPTIMIZE__ */
-
-#define__byte_swap_int(x) __byte_swap_int_var(x)
-
-#endif /* __OPTIMIZE__ */
-
-#define __byte_swap_long_var(x) \
-__extension__ ({ register __uint64_t __X = (x); \
-   __asm ("bswap %0" : "+r" (__X)); \
-   __X; })
-
-#ifdef __OPTIMIZE__
-
-#define__byte_swap_long_const(x) \
-   (((x >> 56) | \
-((x >> 40) & 0xff00) | \
-((x >> 24) & 0xff) | \
-((x >> 8) & 0xff00) | \
-((x << 8) & (0xfful << 32)) | \
-((x << 24) & (0xfful << 40)) | \
-((x << 40) & (0xfful << 48)) | \
-((x << 56
-
-#define__byte_swap_long(x) (__builtin_constant_p(x) ? \
-   __byte_swap_long_const(x) : __byte_swap_long_var(x))
-
-#else  /* __OPTIMIZE__ */
-
-#define__byte_swap_long(x) __byte_swap_long_var(x)
-
-#endif /* __OPTIMIZE__ */
+#define __bswap16_const(_x)(__uint16_t)((_x) << 8 | (_x) >> 8)
 
 static __inline __uint64_t
-__bswap64(__uint64_t _x)
+__bswap64_var(__uint64_t _x)
 {
 
-   return (__byte_swap_long(_x));
+   __asm ("bswap %0" : "+r" (_x));
+   return (_x);
 }
 
 static __inline __uint32_t
-__bswap32(__uint32_t _x)
+__bswap32_var(__uint32_t _x)
 {
 
-   return (__byte_swap_int(_x));
+   __asm ("bswap %0" : "+r" (_x));
+   return (_x);
 }
 
 static __inline __uint16_t
-__bswap16(__uint16_t _x)
+__bswap16_var(__uint16_t _x)
 {
-   return (_x << 8 | _x >> 8);
+
+   return (__bswap16_const(_x));
 }
 
+#define__bswap64(_x)   \
+   (__builtin_constant_p(_x) ? \
+   __bswap64_const((__uint64_t)(_x)) : __bswap64_var(_x))
+
+#define__bswap32(_x)   \
+   (__builtin_constant_p(_x) ? \
+   __bswap32_const((__uint32_t)(_x)) : __bswap32_var(_x))
+
+#define__bswap16(_x)   \
+   (__builtin_constant_p(_x) ? \
+   __bswap16_const((__uint16_t)(_x)) : __bswap16_var(_x))
+
 #define__htonl(x)  __bswap32(x)
 #define__htons(x)  __bswap16(x)
 #define__ntohl(x)  __bswap32(x)

Modified: head/sys/conf/files

Re: svn commit: r219667 - head/usr.sbin/bsdinstall/partedit

2011-03-21 Thread Jeff Roberson

On Sun, 20 Mar 2011, Kirk McKusick wrote:

Date: Sun, 20 Mar 2011 13:25:20 -0700
From: Doug Barton 
To: Marius Strobl 
CC: Kirk McKusick ,
Nathan Whitehorn , svn-src-h...@freebsd.org,
Jeff Roberson , Gavin Atkinson ,
svn-src-all@FreeBSD.org, src-committ...@freebsd.org,
kved...@kvedulv.de
Subject: Re: svn commit: r219667 - head/usr.sbin/bsdinstall/partedit

On 03/20/2011 09:22, Marius Strobl wrote:

I fear it's still a bit premature for enable SU+J by default. Rather
recently I was told about a SU+J filesystems lost after a panic
that happend after snapshotting it (report CC'ed, maybe he can
provide some more details) and I'm pretty sure I've seen the problem
described in PR 149022 also after the potential fix mentioned in its
feedback.

+1

I tried enabling SU+J on my /var (after backing up of course) and after
a panic random files were missing entirely. Not the last updates to
those files, the whole file, and many of them had not been written to in
days/weeks/months.

With all due respect to the hard work that went into the code, I would
be very uncomfortable with enabling it by default at this point.

Doug

With all due respect, how can we fix things that nobody reports?
If you have a problem, let us know about it. And of course, we
need something more specific than the above.

I have not been following current but I read any emails sent directly to 
me without a mailing list in the cc.  I also was not aware of this.  I had 
not heard of any filesystem corruption problems at all.  If there are any, 
I also am not comfortable with enabling it by default.  I want to fix that 
first.

I have blocked off next week to work on this.  I already sent an email out 
to current@ requesting bug reports.  Please if you have anything else let 
me know immediately so I can prioritize it and start investigating.

Thanks,
Jeff

Kirk McKusick

___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r219667 - head/usr.sbin/bsdinstall/partedit

2011-03-21 Thread Jeff Roberson


On Sun, 20 Mar 2011, Doug Barton wrote:


On 03/20/2011 09:22, Marius Strobl wrote:


I fear it's still a bit premature for enable SU+J by default. Rather
recently I was told about a SU+J filesystems lost after a panic
that happend after snapshotting it (report CC'ed, maybe he can
provide some more details) and I'm pretty sure I've seen the problem
described in PR 149022 also after the potential fix mentioned in its
feedback.


+1

I tried enabling SU+J on my /var (after backing up of course) and after a 
panic random files were missing entirely. Not the last updates to those 
files, the whole file, and many of them had not been written to in 
days/weeks/months.




So you're saying the directory entry was missing?  Can you tell me how big 
the directory was?  Number of files?  Approximate directory size when you 
consider file names?  When you fsck'd were inodes recovered and linked 
into lost and found?  What was the actual path?


I'm trying to wrap my head around how this would be possible and where the 
error could be and whether it could be caused by SUJ.  The number of 
interactions with disk writes are minimal.  Corruption if it occurs would 
most likely be caused by a bad journal recovery.


Thanks,
Jeff

With all due respect to the hard work that went into the code, I would be 
very uncomfortable with enabling it by default at this point.



Doug

--

Nothin' ever doesn't change, but nothin' changes much.
-- OK Go

Breadth of IT experience, and depth of knowledge in the DNS.
Yours for the right price.  :)  http://SupersetSolutions.com/


___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r219667 - head/usr.sbin/bsdinstall/partedit

2011-03-21 Thread Jeff Roberson


On Mon, 21 Mar 2011, Michael Moll wrote:


Hi All,

On Sun, Mar 20, 2011 at 05:22:12PM +0100, Marius Strobl wrote:

I fear it's still a bit premature for enable SU+J by default. Rather
recently I was told about a SU+J filesystems lost after a panic
that happend after snapshotting it (report CC'ed, maybe he can
provide some more details) and I'm pretty sure I've seen the problem
described in PR 149022 also after the potential fix mentioned in its
feedback.


Sorry, no details available, as I didn't record the panic and problems
back then. However this was not the first panic which I attribute (maybe
wrongly) to SUJ and as an consequence now all my UFS filesystems have
SUJ turned off again. If SUJ really is going to be the default I would
expact quite some fallout from this after my experiences.



How long ago was this?  We fixed quite a number of bugs a few months ago.

Thanks,
Jeff


Kind Regards
--
Michael Moll


___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r219849 - head/sys/modules

2011-03-21 Thread Jeff Roberson

Author: jeff
Date: Mon Mar 21 21:35:19 2011
New Revision: 219849
URL: http://svn.freebsd.org/changeset/base/219849

Log:
   - For now, disable ofed module build unless MK_OFED is set.

Modified:
  head/sys/modules/Makefile

Modified: head/sys/modules/Makefile
==
--- head/sys/modules/Makefile   Mon Mar 21 21:34:12 2011(r219848)
+++ head/sys/modules/Makefile   Mon Mar 21 21:35:19 2011(r219849)
@@ -185,9 +185,9 @@ SUBDIR= ${_3dfx} \
mfi \
mii \
mlx \
-   mlx4 \
-   mlx4ib \
-   mlxen \
+   ${_mlx4} \
+   ${_mlx4ib} \
+   ${_mlxen} \
${_mly} \
mmc \
mmcsd \
@@ -198,7 +198,7 @@ SUBDIR= ${_3dfx} \
msdosfs_iconv \
${_mse} \
msk \
-   mthca \
+   ${_mthca} \
mvs \
mwl \
mwlfw \
@@ -673,6 +673,13 @@ _zfs=  zfs
 .endif
 .endif
 
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_mthca=mthca
+_mlx4= mlx4
+_mlx4ib=   mlx4ib
+_mlxen=mlxen
+.endif
+
 .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES)
 SUBDIR=${MODULES_OVERRIDE}
 .endif
@@ -690,5 +697,6 @@ afterinstall:
kldxref ${DESTDIR}${KMODDIR}; \
fi
 .endif
+#endif
 
 .include 
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r219859 - head/sys/ofed/drivers/net/mlx4

2011-03-21 Thread Jeff Roberson

Author: jeff
Date: Tue Mar 22 04:50:47 2011
New Revision: 219859
URL: http://svn.freebsd.org/changeset/base/219859

Log:
   - Don't use a separate set of rx queues for UDP, hash them into the same
 set as TCP.
   - Eliminate the fully linear non-scatter/gather rx path, there is no
 harm in using arrays of clusters for both TCP and UDP.
   - Implement support for enabling/disabling per-vlan priority pause and
 queues via sysctl.

Modified:
  head/sys/ofed/drivers/net/mlx4/en_main.c
  head/sys/ofed/drivers/net/mlx4/en_netdev.c
  head/sys/ofed/drivers/net/mlx4/en_rx.c
  head/sys/ofed/drivers/net/mlx4/mlx4_en.h

Modified: head/sys/ofed/drivers/net/mlx4/en_main.c
==
--- head/sys/ofed/drivers/net/mlx4/en_main.cTue Mar 22 04:31:35 2011
(r219858)
+++ head/sys/ofed/drivers/net/mlx4/en_main.cTue Mar 22 04:50:47 2011
(r219859)
@@ -236,9 +236,8 @@ static void *mlx4_en_add(struct mlx4_dev
mlx4_info(mdev, "Using %d tx rings for port:%d\n",
  mdev->profile.prof[i].tx_ring_num, i);
mdev->profile.prof[i].rx_ring_num = rounddown_pow_of_two(
-   min_t(int, dev->caps.num_comp_vectors, MAX_RX_RINGS/2)) 
+
-   (mdev->profile.udp_rss ? rounddown_pow_of_two(
-   min_t(int, dev->caps.num_comp_vectors, MAX_RX_RINGS/2)) 
: 1);
+   min_t(int, dev->caps.num_comp_vectors, MAX_RX_RINGS));
+
mlx4_info(mdev, "Defaulting to %d rx rings for port:%d\n",
  mdev->profile.prof[i].rx_ring_num, i);
}

Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c
==
--- head/sys/ofed/drivers/net/mlx4/en_netdev.c  Tue Mar 22 04:31:35 2011
(r219858)
+++ head/sys/ofed/drivers/net/mlx4/en_netdev.c  Tue Mar 22 04:50:47 2011
(r219859)
@@ -277,10 +277,7 @@ static void mlx4_en_netpoll(struct net_d
cq = &priv->rx_cq[i];
spin_lock_irqsave(&cq->lock, flags);
napi_synchronize(&cq->napi);
-   if (priv->rx_ring[i].use_frags)
-   mlx4_en_process_rx_cq(dev, cq, 0);
-   else
-   mlx4_en_process_rx_cq_mb(dev, cq, 0);
+   mlx4_en_process_rx_cq(dev, cq, 0);
spin_unlock_irqrestore(&cq->lock, flags);
}
 }
@@ -866,10 +863,6 @@ int mlx4_en_alloc_resources(struct mlx4_
  prof->rx_ring_size, i, RX))
goto err;
 
-   if (i > priv->rx_ring_num - priv->udp_rings - 1)
-   priv->rx_ring[i].use_frags = 0;
-   else
-   priv->rx_ring[i].use_frags = 1;
if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
   prof->rx_ring_size))
goto err;
@@ -880,7 +873,7 @@ int mlx4_en_alloc_resources(struct mlx4_
 
/* Populate Tx priority mappings */
mlx4_en_set_prio_map(priv, priv->tx_prio_map,
-prof->tx_ring_num - MLX4_EN_NUM_HASH_RINGS);
+priv->tx_ring_num - MLX4_EN_NUM_HASH_RINGS);
 
return 0;
 
@@ -1193,6 +1186,83 @@ static int mlx4_en_set_tx_ring_size(SYSC
return (error);
 }
 
+static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS)
+{
+   struct mlx4_en_priv *priv;
+   int ppp;
+   int error;
+
+   priv = arg1;
+   ppp = priv->prof->tx_ppp;
+   error = sysctl_handle_int(oidp, &ppp, 0, req);
+   if (error || !req->newptr)
+   return (error);
+   if (ppp > 0xff || ppp < 0)
+   return (-EINVAL);
+   priv->prof->tx_ppp = ppp;
+   error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port,
+  priv->rx_mb_size + ETHER_CRC_LEN,
+  priv->prof->tx_pause,
+  priv->prof->tx_ppp,
+  priv->prof->rx_pause,
+  priv->prof->rx_ppp);
+
+   return (error);
+}
+
+static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS)
+{
+   struct mlx4_en_priv *priv;
+   struct mlx4_en_dev *mdev;
+   int tx_ring_num;
+   int ppp;
+   int error;
+   int port_up;
+
+   port_up = 0;
+   priv = arg1;
+   mdev = priv->mdev;
+   ppp = priv->prof->rx_ppp;
+   error = sysctl_handle_int(oidp, &ppp, 0, req);
+   if (error || !req->newptr)
+   return (error);
+   if (ppp > 0xff || ppp < 0)
+   return (-EINVAL);
+   /* See if we have to change the number of tx queues. */
+   if (!ppp != !priv->prof->rx_ppp) {
+   tx_ring_num = MLX4_EN_NUM_HASH_RINGS + 1 +
+   (!!ppp) * MLX4_EN_NUM_PPP_RINGS;
+

svn commit: r219893 - head/sys/ofed/drivers/net/mlx4

2011-03-22 Thread Jeff Roberson

Author: jeff
Date: Wed Mar 23 02:47:04 2011
New Revision: 219893
URL: http://svn.freebsd.org/changeset/base/219893

Log:
   - Correct the vlan filter programming.  The device filter is built in
 reverse order.
   - Name the cq taskqueues according to whether they handle rx or tx.
   - Default LRO to on.

Modified:
  head/sys/ofed/drivers/net/mlx4/en_cq.c
  head/sys/ofed/drivers/net/mlx4/en_netdev.c
  head/sys/ofed/drivers/net/mlx4/en_port.c

Modified: head/sys/ofed/drivers/net/mlx4/en_cq.c
==
--- head/sys/ofed/drivers/net/mlx4/en_cq.c  Wed Mar 23 01:26:21 2011
(r219892)
+++ head/sys/ofed/drivers/net/mlx4/en_cq.c  Wed Mar 23 02:47:04 2011
(r219893)
@@ -51,21 +51,23 @@ int mlx4_en_create_cq(struct mlx4_en_pri
int err;
 
cq->size = entries;
+   cq->tq = taskqueue_create_fast("mlx4_en_que", M_NOWAIT,
+   taskqueue_thread_enqueue, &cq->tq);
if (mode == RX) {
cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
cq->vector   = (ring + priv->port) %
mdev->dev->caps.num_comp_vectors;
TASK_INIT(&cq->cq_task, 0, mlx4_en_rx_que, cq);
+   taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s rx cq",
+   if_name(priv->dev));
} else {
cq->buf_size = sizeof(struct mlx4_cqe);
cq->vector   = MLX4_LEAST_ATTACHED_VECTOR;
TASK_INIT(&cq->cq_task, 0, mlx4_en_tx_que, cq);
+   taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s tx cq",
+   if_name(priv->dev));
}
 
-   cq->tq = taskqueue_create_fast("mlx4_en_que", M_NOWAIT,
-   taskqueue_thread_enqueue, &cq->tq);
-   taskqueue_start_threads(&cq->tq, 1, PI_NET, "%s cq",
-   if_name(priv->dev));
cq->ring = ring;
cq->is_tx = mode;
mtx_init(&cq->lock.m, "mlx4 cq", NULL, MTX_DEF);

Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c
==
--- head/sys/ofed/drivers/net/mlx4/en_netdev.c  Wed Mar 23 01:26:21 2011
(r219892)
+++ head/sys/ofed/drivers/net/mlx4/en_netdev.c  Wed Mar 23 02:47:04 2011
(r219893)
@@ -53,13 +53,11 @@ static void mlx4_en_vlan_rx_add_vid(void
 
if ((vid == 0) || (vid > 4095))/* Invalid */
return;
-
en_dbg(HW, priv, "adding VLAN:%d\n", vid);
-
-   spin_lock(&priv->vlan_lock);
-   priv->vlgrp_modified = true;
idx = vid >> 5;
field = 1 << (vid & 0x1f);
+   spin_lock(&priv->vlan_lock);
+   priv->vlgrp_modified = true;
if (priv->vlan_unregister[idx] & field)
priv->vlan_unregister[idx] &= ~field;
else
@@ -77,10 +75,10 @@ static void mlx4_en_vlan_rx_kill_vid(voi
if ((vid == 0) || (vid > 4095))/* Invalid */
return;
en_dbg(HW, priv, "Killing VID:%d\n", vid);
-   spin_lock(&priv->vlan_lock);
-   priv->vlgrp_modified = true;
idx = vid >> 5;
field = 1 << (vid & 0x1f);
+   spin_lock(&priv->vlan_lock);
+   priv->vlgrp_modified = true;
if (priv->vlan_register[idx] & field)
priv->vlan_register[idx] &= ~field;
else
@@ -1541,12 +1539,9 @@ int mlx4_en_init_netdev(struct mlx4_en_d
 #endif
if (mdev->LSO_support)
dev->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
-
-   /* Don't enable LOR unless the user requests. */
-   dev->if_capenable = dev->if_capabilities;
-
if (mdev->profile.num_lro)
dev->if_capabilities |= IFCAP_LRO;
+   dev->if_capenable = dev->if_capabilities;
 
 /* Register for VLAN events */
priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,

Modified: head/sys/ofed/drivers/net/mlx4/en_port.c
==
--- head/sys/ofed/drivers/net/mlx4/en_port.cWed Mar 23 01:26:21 2011
(r219892)
+++ head/sys/ofed/drivers/net/mlx4/en_port.cWed Mar 23 02:47:04 2011
(r219893)
@@ -51,7 +51,7 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *
 {
struct mlx4_cmd_mailbox *mailbox;
struct mlx4_set_vlan_fltr_mbox *filter;
-   int i;
+   int i, j;
int err = 0;
 
mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -61,8 +61,9 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *
filter = mailbox->buf;
memset(filter, 0, sizeof *filter);
if (vlans)
-   for (i = 0; i < VLAN_FLTR_SIZE; i ++)
-   filter->entry[i] = cpu_to_be32(vlans[i]);
+   for (i = 0, j = VLAN_FLTR_SIZE - 1; i < VLAN_FLTR_SIZE;
+   i++, j--)
+   filter->entry[j] = cpu_to_be32(vlans[i]);
err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_VLAN_FLTR,
   MLX4_CMD_TIME_CLA

svn commit: r219898 - head/sys/modules

2011-03-23 Thread Jeff Roberson

Author: jeff
Date: Wed Mar 23 08:27:57 2011
New Revision: 219898
URL: http://svn.freebsd.org/changeset/base/219898

Log:
   - Move ofed modules into the i386 and amd64 specific sections to fix
 universe on other architectures.

Modified:
  head/sys/modules/Makefile

Modified: head/sys/modules/Makefile
==
--- head/sys/modules/Makefile   Wed Mar 23 06:31:45 2011(r219897)
+++ head/sys/modules/Makefile   Wed Mar 23 08:27:57 2011(r219898)
@@ -418,6 +418,12 @@ _linprocfs=linprocfs
 _linsysfs= linsysfs
 _linux=linux
 _mse=  mse
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_mlx4= mlx4
+_mlx4ib=   mlx4ib
+_mlxen=mlxen
+_mthca=mthca
+.endif
 .if ${MK_NCP} != "no"
 _ncp=  ncp
 .endif
@@ -566,6 +572,12 @@ _linprocfs=linprocfs
 _linsysfs= linsysfs
 _linux=linux
 _mly=  mly
+.if ${MK_OFED} != "no" || defined(ALL_MODULES)
+_mlx4= mlx4
+_mlx4ib=   mlx4ib
+_mlxen=mlxen
+_mthca=mthca
+.endif
 _ndis= ndis
 _nfe=  nfe
 _nve=  nve
@@ -673,13 +685,6 @@ _zfs=  zfs
 .endif
 .endif
 
-.if ${MK_OFED} != "no" || defined(ALL_MODULES)
-_mthca=mthca
-_mlx4= mlx4
-_mlx4ib=   mlx4ib
-_mlxen=mlxen
-.endif
-
 .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES)
 SUBDIR=${MODULES_OVERRIDE}
 .endif
@@ -697,6 +702,5 @@ afterinstall:
kldxref ${DESTDIR}${KMODDIR}; \
fi
 .endif
-#endif
 
 .include 
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r220016 - in head/sys/ofed: drivers/net/mlx4 include/linux/mlx4

2011-03-25 Thread Jeff Roberson

Author: jeff
Date: Sat Mar 26 00:54:01 2011
New Revision: 220016
URL: http://svn.freebsd.org/changeset/base/220016

Log:
   - Implement wake-on-lan support in mlxen.

Modified:
  head/sys/ofed/drivers/net/mlx4/en_ethtool.c
  head/sys/ofed/drivers/net/mlx4/en_netdev.c
  head/sys/ofed/drivers/net/mlx4/fw.c
  head/sys/ofed/drivers/net/mlx4/fw.h
  head/sys/ofed/drivers/net/mlx4/main.c
  head/sys/ofed/drivers/net/mlx4/mlx4_en.h
  head/sys/ofed/include/linux/mlx4/device.h

Modified: head/sys/ofed/drivers/net/mlx4/en_ethtool.c
==
--- head/sys/ofed/drivers/net/mlx4/en_ethtool.c Sat Mar 26 00:34:35 2011
(r220015)
+++ head/sys/ofed/drivers/net/mlx4/en_ethtool.c Sat Mar 26 00:54:01 2011
(r220016)
@@ -494,6 +494,7 @@ const struct ethtool_ops mlx4_en_ethtool
.get_ethtool_stats = mlx4_en_get_ethtool_stats,
.self_test = mlx4_en_self_test,
.get_wol = mlx4_en_get_wol,
+   .set_wol = mlx4_en_set_wol,
.get_msglevel = mlx4_en_get_msglevel,
.set_msglevel = mlx4_en_set_msglevel,
.get_coalesce = mlx4_en_get_coalesce,

Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c
==
--- head/sys/ofed/drivers/net/mlx4/en_netdev.c  Sat Mar 26 00:34:35 2011
(r220015)
+++ head/sys/ofed/drivers/net/mlx4/en_netdev.c  Sat Mar 26 00:54:01 2011
(r220016)
@@ -532,6 +532,7 @@ int mlx4_en_start_port(struct net_device
struct mlx4_en_dev *mdev = priv->mdev;
struct mlx4_en_cq *cq;
struct mlx4_en_tx_ring *tx_ring;
+   u64 config;
int rx_index = 0;
int tx_index = 0;
int err = 0;
@@ -662,6 +663,25 @@ int mlx4_en_start_port(struct net_device
else
priv->rx_csum = 0;
 
+   err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
+   if (err) {
+   en_err(priv, "Failed to get WoL info, unable to modify\n");
+   goto wol_err;
+   }
+   if (dev->if_capenable & IFCAP_WOL_MAGIC) {
+   config |= MLX4_EN_WOL_DO_MODIFY | MLX4_EN_WOL_ENABLED |
+   MLX4_EN_WOL_MAGIC;
+   } else {
+   config &= ~(MLX4_EN_WOL_ENABLED | MLX4_EN_WOL_MAGIC);
+   config |= MLX4_EN_WOL_DO_MODIFY;
+   }
+
+   err = mlx4_wol_write(priv->mdev->dev, config, priv->port);
+   if (err) {
+   en_err(priv, "Failed to set WoL information\n");
+   goto wol_err;
+   }
+
priv->port_up = true;
 
/* Populate multicast list */
@@ -676,6 +696,10 @@ int mlx4_en_start_port(struct net_device
 
return 0;
 
+wol_err:
+   /* close port*/
+   mlx4_CLOSE_PORT(mdev->dev, priv->port);
+
 mac_err:
mlx4_unregister_mac(mdev->dev, priv->port, priv->mac_index);
 tx_err:
@@ -1095,6 +1119,8 @@ static int mlx4_en_ioctl(struct ifnet *d
dev->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (mask & IFCAP_VLAN_HWFILTER)
dev->if_capenable ^= IFCAP_VLAN_HWFILTER;
+   if (mask & IFCAP_WOL_MAGIC)
+   dev->if_capenable ^= IFCAP_WOL_MAGIC;
if (dev->if_drv_flags & IFF_DRV_RUNNING)
mlx4_en_init(priv);
VLAN_CAPABILITIES(dev);
@@ -1534,14 +1560,23 @@ int mlx4_en_init_netdev(struct mlx4_en_d
dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
-#if 0 /* Not yet */
-   dev->if_capabilities |= IFCAP_WOL;
-#endif
if (mdev->LSO_support)
dev->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
if (mdev->profile.num_lro)
dev->if_capabilities |= IFCAP_LRO;
dev->if_capenable = dev->if_capabilities;
+   /*
+* Setup wake-on-lan.
+*/
+   if (priv->mdev->dev->caps.wol) {
+   u64 config;
+   if (mlx4_wol_read(priv->mdev->dev, &config, priv->port) == 0) {
+   if (config & MLX4_EN_WOL_MAGIC)
+   dev->if_capabilities |= IFCAP_WOL_MAGIC;
+   if (config & MLX4_EN_WOL_ENABLED)
+   dev->if_capenable |= IFCAP_WOL_MAGIC;
+   }
+   }
 
 /* Register for VLAN events */
priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,

Modified: head/sys/ofed/drivers/net/mlx4/fw.c
==
--- head/sys/ofed/drivers/net/mlx4/fw.c Sat Mar 26 00:34:35 2011
(r220015)
+++ head/sys/ofed/drivers/net/mlx4/fw.c Sat Mar 26 00:54:01 2011
(r220016)
@@ -289,6 +289,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *
dev_cap->udp_rss = field & 0x1;
MLX4_GET(field, outbox, QUERY_DEV_CAP_ETH_UC_

svn commit: r220282 - head/sys/ufs/ffs

2011-04-02 Thread Jeff Roberson

Author: jeff
Date: Sat Apr  2 21:52:58 2011
New Revision: 220282
URL: http://svn.freebsd.org/changeset/base/220282

Log:
  Fix problems that manifested from filesystem full conditions:
  
   - In softdep_revert_mkdir() find the dotaddref before we attempt to cancel
 the jaddref so we can make assumptions about where the dotaddref is on
 the list.  cancel_jaddref() does not always remove items from the list
 anymore.
   - Always set GOINGAWAY on an inode in softdep_freefile() if DEPCOMPLETE
 was never set.  This ensures that dependencies will continue to be
 processed on the inowait/bufwait list and is more an artifact of
 the structure of the code than a pure ordering problem.
   - Always set DEPCOMPLETE on canceled jaddrefs so that they can be freed
 appropriately.  This normally occurs when the refs are added to the
 journal but if they are canceled before this point the state would
 never be set and the dependency could never be freed.
  
  Reported by:  pho
  Tested by:pho

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Sat Apr  2 16:02:25 2011
(r220281)
+++ head/sys/ufs/ffs/ffs_softdep.c  Sat Apr  2 21:52:58 2011
(r220282)
@@ -3501,10 +3501,14 @@ cancel_jaddref(jaddref, inodedep, wkhd)
 * us so that it is consistent with the in-memory reference.  This
 * ensures that inode nlink rollbacks always have the correct link.
 */
-   if (needsj == 0)
+   if (needsj == 0) {
for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
-   inoref = TAILQ_NEXT(inoref, if_deps))
+   inoref = TAILQ_NEXT(inoref, if_deps)) {
+   if (inoref->if_state & GOINGAWAY)
+   break;
inoref->if_nlink--;
+   }
+   }
jsegdep = inoref_jseg(&jaddref->ja_ref);
if (jaddref->ja_state & NEWBLOCK)
move_newblock_dep(jaddref, inodedep);
@@ -3522,6 +3526,7 @@ cancel_jaddref(jaddref, inodedep, wkhd)
if (jaddref->ja_state & DEPCOMPLETE)
remove_from_journal(&jaddref->ja_list);
}
+   jaddref->ja_state |= (GOINGAWAY | DEPCOMPLETE);
/*
 * Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove
 * can arrange for them to be freed with the bitmap.  Otherwise we
@@ -3535,7 +3540,6 @@ cancel_jaddref(jaddref, inodedep, wkhd)
free_jaddref(jaddref);
return (needsj);
}
-   jaddref->ja_state |= GOINGAWAY;
/*
 * Leave the head of the list for jsegdeps for fast merging.
 */
@@ -4071,6 +4075,7 @@ softdep_revert_mkdir(dp, ip)
 {
struct inodedep *inodedep;
struct jaddref *jaddref;
+   struct jaddref *dotaddref;
struct vnode *dvp;
 
dvp = ITOV(dp);
@@ -4090,12 +4095,12 @@ softdep_revert_mkdir(dp, ip)
inoreflst);
KASSERT(jaddref->ja_parent == dp->i_number,
("softdep_revert_mkdir: addref parent mismatch"));
+   dotaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
+   inoreflst, if_deps);
cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
-   jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
-   inoreflst);
-   KASSERT(jaddref->ja_parent == ip->i_number,
+   KASSERT(dotaddref->ja_parent == ip->i_number,
("softdep_revert_mkdir: dot addref parent mismatch"));
-   cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
+   cancel_jaddref(dotaddref, inodedep, &inodedep->id_inowait);
}
FREE_LOCK(&lk);
 }
@@ -5734,14 +5739,14 @@ softdep_freefile(pvp, ino, mode)
clear_unlinked_inodedep(inodedep);
/* Re-acquire inodedep as we've dropped lk. */
inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
-   if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0)
-   inodedep->id_state |= GOINGAWAY;
}
if (inodedep == NULL || check_inode_unwritten(inodedep)) {
FREE_LOCK(&lk);
handle_workitem_freefile(freefile);
return;
}
+   if (inodedep && (inodedep->id_state & DEPCOMPLETE) == 0)
+   inodedep->id_state |= GOINGAWAY;
WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
FREE_LOCK(&lk);
if (ip->i_number == ino)
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r220406 - head/sys/ufs/ffs

2011-04-06 Thread Jeff Roberson

Author: jeff
Date: Thu Apr  7 03:19:10 2011
New Revision: 220406
URL: http://svn.freebsd.org/changeset/base/220406

Log:
   - Don't invalidate jnewblks immediately upon discovering that the block
 will be removed.  Permit the journal to proceed so that we don't leave
 a rollback in a cg for a very long time as this can cause terrible perf
 problems in low memory situations.
  
  Tested by:  pho

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/softdep.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Thu Apr  7 00:30:32 2011
(r220405)
+++ head/sys/ufs/ffs/ffs_softdep.c  Thu Apr  7 03:19:10 2011
(r220406)
@@ -766,7 +766,8 @@ static  inline void inoref_write(struct i
struct jrefrec *);
 static void handle_allocdirect_partdone(struct allocdirect *,
struct workhead *);
-static void cancel_newblk(struct newblk *, struct workhead *);
+static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
+   struct workhead *);
 static void indirdep_complete(struct indirdep *);
 static void handle_allocindir_partdone(struct allocindir *);
 static void initiate_write_filepage(struct pagedep *, struct buf *);
@@ -826,6 +827,8 @@ static  void handle_complete_freeblocks(s
 static void handle_workitem_indirblk(struct freework *);
 static void handle_written_freework(struct freework *);
 static void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
+static struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
+   struct workhead *);
 static void setup_allocindir_phase2(struct buf *, struct inode *,
struct inodedep *, struct allocindir *, ufs_lbn_t);
 static struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
@@ -3125,33 +3128,72 @@ handle_written_jaddref(jaddref)
 
 /*
  * Called once a jnewblk journal is written.  The allocdirect or allocindir
- * is placed in the bmsafemap to await notification of a written bitmap.
+ * is placed in the bmsafemap to await notification of a written bitmap.  If
+ * the operation was canceled we add the segdep to the appropriate
+ * dependency to free the journal space once the canceling operation
+ * completes.
  */
 static void
 handle_written_jnewblk(jnewblk)
struct jnewblk *jnewblk;
 {
struct bmsafemap *bmsafemap;
+   struct freefrag *freefrag;
struct jsegdep *jsegdep;
struct newblk *newblk;
+   struct freework *freework;
+   struct indirdep *indirdep;
 
/* Grab the jsegdep. */
jsegdep = jnewblk->jn_jsegdep;
jnewblk->jn_jsegdep = NULL;
-   /*
-* Add the written block to the bmsafemap so it can be notified when
-* the bitmap is on disk.
-*/
-   newblk = jnewblk->jn_newblk;
-   jnewblk->jn_newblk = NULL;
-   if (newblk == NULL) 
+   if (jnewblk->jn_dep == NULL) 
panic("handle_written_jnewblk: No dependency for the segdep.");
-
-   newblk->nb_jnewblk = NULL;
-   bmsafemap = newblk->nb_bmsafemap;
-   WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list);
-   newblk->nb_state |= ONDEPLIST;
-   LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
+   switch (jnewblk->jn_dep->wk_type) {
+   case D_NEWBLK:
+   case D_ALLOCDIRECT:
+   case D_ALLOCINDIR:
+   /*
+* Add the written block to the bmsafemap so it can
+* be notified when the bitmap is on disk.
+*/
+   newblk = WK_NEWBLK(jnewblk->jn_dep);
+   newblk->nb_jnewblk = NULL;
+   bmsafemap = newblk->nb_bmsafemap;
+   newblk->nb_state |= ONDEPLIST;
+   LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
+   WORKLIST_INSERT(&newblk->nb_jwork, &jsegdep->jd_list);
+   break;
+   case D_FREEFRAG:
+   /*
+* A newblock being removed by a freefrag when replaced by
+* frag extension.
+*/
+   freefrag = WK_FREEFRAG(jnewblk->jn_dep);
+   freefrag->ff_jdep = NULL;
+   WORKLIST_INSERT(&freefrag->ff_jwork, &jsegdep->jd_list);
+   break;
+   case D_FREEWORK:
+   /*
+* A direct block was removed by truncate.
+*/
+   freework = WK_FREEWORK(jnewblk->jn_dep);
+   freework->fw_jnewblk = NULL;
+   WORKLIST_INSERT(&freework->fw_jwork, &jsegdep->jd_list);
+   break;
+   case D_INDIRDEP:
+   /*
+* An indirect block was removed by truncate.
+*/
+   indirdep = WK_INDIRDEP(jnewblk->jn_dep);
+   LIST_REMOVE(jnewblk, jn_indirdeps);
+   WORKLIST_INSERT(&indirdep->ir_jwork, &jsegdep->jd_list);
+

svn commit: r220511 - head/sys/ufs/ffs

2011-04-09 Thread Jeff Roberson

Author: jeff
Date: Sun Apr 10 03:49:53 2011
New Revision: 220511
URL: http://svn.freebsd.org/changeset/base/220511

Log:
  Fix a long standing SUJ performance problem:
  
   - Keep a hash of indirect blocks that have recently been freed and are
 still referenced in the journal.
   - Lookup blocks in this hash before forcing a new block write to wait on
 the journal entry to hit the disk.  This is only necessary to avoid
 confusion between old identities as indirects and new identities as
 file blocks.
   - Don't free jseg structures until the journal has written a record that
 invalidates it.  This keeps the indirect block information around for
 as long as is required to be safe.
   - Force an empty journal block write when required to flush out stale
 journal data that is simply waiting for the oldest valid sequence
 number to advance beyond it.

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/softdep.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Sun Apr 10 01:54:42 2011
(r220510)
+++ head/sys/ufs/ffs/ffs_softdep.c  Sun Apr 10 03:49:53 2011
(r220511)
@@ -753,8 +753,7 @@ static  void handle_written_jnewblk(struc
 static void handle_written_jfreeblk(struct jfreeblk *);
 static void handle_written_jfreefrag(struct jfreefrag *);
 static void complete_jseg(struct jseg *);
-static void jseg_write(struct ufsmount *ump, struct jblocks *, struct jseg *,
-   uint8_t *);
+static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
 static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
 static void jremref_write(struct jremref *, struct jseg *, uint8_t *);
 static void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
@@ -769,6 +768,7 @@ static  void handle_allocdirect_partdone(
 static struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
struct workhead *);
 static void indirdep_complete(struct indirdep *);
+static int indirblk_inseg(struct mount *, ufs2_daddr_t);
 static void handle_allocindir_partdone(struct allocindir *);
 static void initiate_write_filepage(struct pagedep *, struct buf *);
 static void initiate_write_indirdep(struct indirdep*, struct buf *);
@@ -802,7 +802,9 @@ static  void free_newdirblk(struct newdir
 static void free_jremref(struct jremref *);
 static void free_jaddref(struct jaddref *);
 static void free_jsegdep(struct jsegdep *);
-static void free_jseg(struct jseg *);
+static void free_jsegs(struct jblocks *);
+static void rele_jseg(struct jseg *);
+static void free_jseg(struct jseg *, struct jblocks *);
 static void free_jnewblk(struct jnewblk *);
 static void free_jfreeblk(struct jfreeblk *);
 static void free_jfreefrag(struct jfreefrag *);
@@ -872,7 +874,7 @@ static  int journal_unsuspend(struct ufsm
 static void softdep_prelink(struct vnode *, struct vnode *);
 static void add_to_journal(struct worklist *);
 static void remove_from_journal(struct worklist *);
-static void softdep_process_journal(struct mount *, int);
+static void softdep_process_journal(struct mount *, struct worklist *, int);
 static struct jremref *newjremref(struct dirrem *, struct inode *,
struct inode *ip, off_t, nlink_t);
 static struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
@@ -1376,7 +1378,7 @@ softdep_process_worklist(mp, full)
ump = VFSTOUFS(mp);
ACQUIRE_LOCK(&lk);
starttime = time_second;
-   softdep_process_journal(mp, full?MNT_WAIT:0);
+   softdep_process_journal(mp, NULL, full?MNT_WAIT:0);
while (ump->softdep_on_worklist > 0) {
if ((cnt = process_worklist_item(mp, LK_NOWAIT)) == -1)
break;
@@ -1999,6 +2001,37 @@ newblk_lookup(mp, newblkno, flags, newbl
 }
 
 /*
+ * Structures and routines associated with indir caching.
+ */
+struct workhead *indir_hashtbl;
+u_long indir_hash; /* size of hash table - 1 */
+#defineINDIR_HASH(mp, blkno) \
+   (&indir_hashtbl[register_t)(mp)) >> 13) + (blkno)) & indir_hash])
+
+static int
+indirblk_inseg(mp, blkno)
+   struct mount *mp;
+   ufs2_daddr_t blkno;
+{
+   struct freework *freework;
+   struct workhead *wkhd;
+   struct worklist *wk;
+
+   wkhd = INDIR_HASH(mp, blkno);
+   LIST_FOREACH(wk, wkhd, wk_list) {
+   freework = WK_FREEWORK(wk);
+   if (freework->fw_blkno == blkno &&
+   freework->fw_list.wk_mp == mp) {
+   LIST_REMOVE(freework, fw_next);
+   WORKLIST_REMOVE(&freework->fw_list);
+   WORKITEM_FREE(freework, D_FREEWORK);
+   return (1);
+   }
+   }
+   return (0);
+}
+
+/*
  * Executed during filesystem system initialization before
  * mounting any filesystems.
  */
@@ -2012,6 +2045,7 @@ softd

Re: svn commit: r220511 - head/sys/ufs/ffs

2011-04-09 Thread Jeff Roberson


On Sun, 10 Apr 2011, Jeff Roberson wrote:


Author: jeff
Date: Sun Apr 10 03:49:53 2011
New Revision: 220511
URL: http://svn.freebsd.org/changeset/base/220511

Log:
 Fix a long standing SUJ performance problem:


This brought my dbench performance to within 10-15% of softupdates on 
a real disk depending on concurrency.  There are cases where it 
outperforms softupdates as well.  Over time I can eliminate the 
extra blocking IO waits that cause the remaining degradation on this test. 
For now I'm going to focus on the mksnap bug that has been reported in 
several forms.


Thanks,
Jeff



  - Keep a hash of indirect blocks that have recently been freed and are
still referenced in the journal.
  - Lookup blocks in this hash before forcing a new block write to wait on
the journal entry to hit the disk.  This is only necessary to avoid
confusion between old identities as indirects and new identities as
file blocks.
  - Don't free jseg structures until the journal has written a record that
invalidates it.  This keeps the indirect block information around for
as long as is required to be safe.
  - Force an empty journal block write when required to flush out stale
journal data that is simply waiting for the oldest valid sequence
number to advance beyond it.

Modified:
 head/sys/ufs/ffs/ffs_softdep.c
 head/sys/ufs/ffs/softdep.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Sun Apr 10 01:54:42 2011
(r220510)
+++ head/sys/ufs/ffs/ffs_softdep.c  Sun Apr 10 03:49:53 2011
(r220511)
@@ -753,8 +753,7 @@ static  void handle_written_jnewblk(struc
static  void handle_written_jfreeblk(struct jfreeblk *);
static  void handle_written_jfreefrag(struct jfreefrag *);
static  void complete_jseg(struct jseg *);
-static void jseg_write(struct ufsmount *ump, struct jblocks *, struct jseg *,
-   uint8_t *);
+static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
static  void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
static  void jremref_write(struct jremref *, struct jseg *, uint8_t *);
static  void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
@@ -769,6 +768,7 @@ static  void handle_allocdirect_partdone(
static  struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
struct workhead *);
static  void indirdep_complete(struct indirdep *);
+static int indirblk_inseg(struct mount *, ufs2_daddr_t);
static  void handle_allocindir_partdone(struct allocindir *);
static  void initiate_write_filepage(struct pagedep *, struct buf *);
static  void initiate_write_indirdep(struct indirdep*, struct buf *);
@@ -802,7 +802,9 @@ static  void free_newdirblk(struct newdir
static  void free_jremref(struct jremref *);
static  void free_jaddref(struct jaddref *);
static  void free_jsegdep(struct jsegdep *);
-static void free_jseg(struct jseg *);
+static void free_jsegs(struct jblocks *);
+static void rele_jseg(struct jseg *);
+static void free_jseg(struct jseg *, struct jblocks *);
static  void free_jnewblk(struct jnewblk *);
static  void free_jfreeblk(struct jfreeblk *);
static  void free_jfreefrag(struct jfreefrag *);
@@ -872,7 +874,7 @@ static  int journal_unsuspend(struct ufsm
static  void softdep_prelink(struct vnode *, struct vnode *);
static  void add_to_journal(struct worklist *);
static  void remove_from_journal(struct worklist *);
-static void softdep_process_journal(struct mount *, int);
+static void softdep_process_journal(struct mount *, struct worklist *, int);
static  struct jremref *newjremref(struct dirrem *, struct inode *,
struct inode *ip, off_t, nlink_t);
static  struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
@@ -1376,7 +1378,7 @@ softdep_process_worklist(mp, full)
ump = VFSTOUFS(mp);
ACQUIRE_LOCK(&lk);
starttime = time_second;
-   softdep_process_journal(mp, full?MNT_WAIT:0);
+   softdep_process_journal(mp, NULL, full?MNT_WAIT:0);
while (ump->softdep_on_worklist > 0) {
if ((cnt = process_worklist_item(mp, LK_NOWAIT)) == -1)
break;
@@ -1999,6 +2001,37 @@ newblk_lookup(mp, newblkno, flags, newbl
}

/*
+ * Structures and routines associated with indir caching.
+ */
+struct workhead *indir_hashtbl;
+u_long indir_hash; /* size of hash table - 1 */
+#defineINDIR_HASH(mp, blkno) \
+   (&indir_hashtbl[register_t)(mp)) >> 13) + (blkno)) & indir_hash])
+
+static int
+indirblk_inseg(mp, blkno)
+   struct mount *mp;
+   ufs2_daddr_t blkno;
+{
+   struct freework *freework;
+   struct workhead *wkhd;
+   struct worklist *wk;
+
+   wkhd = INDIR_HASH(mp, blkno);
+   LIST_FOREACH(wk, wkhd, wk_list) {
+   freework = WK_FREEWORK(wk);
+   if (freework->fw_blkno ==

svn commit: r220532 - head/sys/ufs/ffs

2011-04-10 Thread Jeff Roberson

Author: jeff
Date: Mon Apr 11 01:43:59 2011
New Revision: 220532
URL: http://svn.freebsd.org/changeset/base/220532

Log:
   - Refactor softdep_setup_freeblocks() into a set of functions to prepare
 for a new journal specific partial truncate routine.
   - Use dep_current[] in place of specific dependency counts.  This is
 automatically maintained when workitems are allocated and has
 less risk of becoming incorrect.

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Mon Apr 11 01:19:02 2011
(r220531)
+++ head/sys/ufs/ffs/ffs_softdep.c  Mon Apr 11 01:43:59 2011
(r220532)
@@ -815,9 +815,19 @@ static void cancel_jnewblk(struct jnewbl
 static int cancel_jaddref(struct jaddref *, struct inodedep *,
struct workhead *);
 static void cancel_jfreefrag(struct jfreefrag *);
+static inline void setup_freedirect(struct freeblks *, struct inode *,
+   int, int);
+static inline void setup_freeext(struct freeblks *, struct inode *, int, int);
+static inline void setup_freeindir(struct freeblks *, struct inode *, int i,
+   ufs_lbn_t, int);
+static inline struct freeblks *newfreeblks(struct mount *, struct inode *);
 static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
-static int deallocate_dependencies(struct buf *, struct inodedep *,
+static void softdep_trunc_deps(struct vnode *, struct freeblks *, ufs_lbn_t,
+   int, int);
+static int cancel_pagedep(struct pagedep *, struct inodedep *,
struct freeblks *);
+static int deallocate_dependencies(struct buf *, struct inodedep *,
+   struct freeblks *, int off);
 static void free_newblk(struct newblk *);
 static void cancel_allocdirect(struct allocdirectlst *,
struct allocdirect *, struct freeblks *, int);
@@ -1114,7 +1124,6 @@ static struct callout softdep_callout;
 static int req_pending;
 static int req_clear_inodedeps;/* syncer process flush some inodedeps 
*/
 static int req_clear_remove;   /* syncer process flush some freeblks */
-static long num_freeblkdep;/* number of freeblks workitems allocated */
 
 /*
  * runtime statistics
@@ -1832,7 +1841,6 @@ pagedep_lookup(mp, ino, lbn, flags, page
  */
 LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
 static u_long  inodedep_hash;  /* size of hash table - 1 */
-static longnum_inodedep;   /* number of inodedep allocated */
 #defineINODEDEP_HASH(fs, inum) \
   (&inodedep_hashtbl[register_t)(fs)) >> 13) + (inum)) & 
inodedep_hash])
 
@@ -1884,7 +1892,7 @@ inodedep_lookup(mp, inum, flags, inodede
/*
 * If we are over our limit, try to improve the situation.
 */
-   if (num_inodedep > max_softdeps && (flags & NODELAY) == 0)
+   if (dep_current[D_INODEDEP] > max_softdeps && (flags & NODELAY) == 0)
request_cleanup(mp, FLUSH_INODES);
FREE_LOCK(&lk);
inodedep = malloc(sizeof(struct inodedep),
@@ -1895,7 +1903,6 @@ inodedep_lookup(mp, inum, flags, inodede
WORKITEM_FREE(inodedep, D_INODEDEP);
return (1);
}
-   num_inodedep += 1;
inodedep->id_fs = fs;
inodedep->id_ino = inum;
inodedep->id_state = ALLCOMPLETE;
@@ -2472,7 +2479,7 @@ journal_space(ump, thresh)
 * We use a tighter restriction here to prevent request_cleanup()
 * running in threads from running into locks we currently hold.
 */
-   if (num_inodedep > (max_softdeps / 10) * 9)
+   if (dep_current[D_INODEDEP] > (max_softdeps / 10) * 9)
return (0);
if (thresh)
thresh = jblocks->jb_min;
@@ -5340,6 +5347,83 @@ allocindir_merge(aip, oldaip)
return (freefrag);
 }
 
+static inline void
+setup_freedirect(freeblks, ip, i, needj)
+   struct freeblks *freeblks;
+   struct inode *ip;
+   int i;
+   int needj;
+{
+   ufs2_daddr_t blkno;
+   int frags;
+
+   blkno = DIP(ip, i_db[i]);
+   if (blkno == 0)
+   return;
+   DIP_SET(ip, i_db[i], 0);
+   frags = sblksize(ip->i_fs, ip->i_size, i);
+   frags = numfrags(ip->i_fs, frags);
+   newfreework(ip->i_ump, freeblks, NULL, i, blkno, frags, needj);
+}
+
+static inline void
+setup_freeext(freeblks, ip, i, needj)
+   struct freeblks *freeblks;
+   struct inode *ip;
+   int i;
+   int needj;
+{
+   ufs2_daddr_t blkno;
+   int frags;
+
+   blkno = ip->i_din2->di_extb[i];
+   if (blkno == 0)
+   return;
+   ip->i_din2->di_extb[i] = 0;
+   frags = sblksize(ip->i_fs, ip->i_din2->di_extsize, i);
+   frags = numfrags(ip->i_fs, frags);
+   newfreework(ip->i_ump, freeblks, NULL, -1 - i, blkno, frags, needj);
+}
+
+static inline void
+setup_freeindir(freeblks, ip, i, lbn, needj)
+   struct freeblks *freeblks;

svn commit: r221055 - in head/sys: kern ofed/include/linux

2011-04-26 Thread Jeff Roberson

Author: jeff
Date: Tue Apr 26 07:30:52 2011
New Revision: 221055
URL: http://svn.freebsd.org/changeset/base/221055

Log:
   - Catch up to falloc() changes.
   - PHOLD() before using a task structure on the stack.
   - Fix a LOR between the sleepq lock and thread lock in _intr_drain().

Modified:
  head/sys/kern/kern_intr.c
  head/sys/ofed/include/linux/file.h
  head/sys/ofed/include/linux/workqueue.h

Modified: head/sys/kern/kern_intr.c
==
--- head/sys/kern/kern_intr.c   Tue Apr 26 04:52:35 2011(r221054)
+++ head/sys/kern/kern_intr.c   Tue Apr 26 07:30:52 2011(r221055)
@@ -746,7 +746,6 @@ intr_handler_source(void *cookie)
 void
 _intr_drain(int irq)
 {
-   struct mtx *mtx;
struct intr_event *ie;
struct intr_thread *ithd;
struct thread *td;
@@ -758,13 +757,21 @@ _intr_drain(int irq)
return;
ithd = ie->ie_thread;
td = ithd->it_thread;
+   /*
+* We set the flag and wait for it to be cleared to avoid
+* long delays with potentially busy interrupt handlers
+* were we to only sample TD_AWAITING_INTR() every tick.
+*/
thread_lock(td);
-   mtx = td->td_lock;
if (!TD_AWAITING_INTR(td)) {
ithd->it_flags |= IT_WAIT;
-   msleep_spin(ithd, mtx, "isync", 0);
+   while (ithd->it_flags & IT_WAIT) {
+   thread_unlock(td);
+   pause("idrain", 1);
+   thread_lock(td);
+   }
}
-   mtx_unlock_spin(mtx);
+   thread_unlock(td);
return;
 }
 

Modified: head/sys/ofed/include/linux/file.h
==
--- head/sys/ofed/include/linux/file.h  Tue Apr 26 04:52:35 2011
(r221054)
+++ head/sys/ofed/include/linux/file.h  Tue Apr 26 07:30:52 2011
(r221055)
@@ -92,7 +92,7 @@ get_unused_fd(void)
int error;
int fd;
 
-   error = falloc(curthread, &file, &fd);
+   error = falloc(curthread, &file, &fd, 0);
if (error)
return -error;
return fd;

Modified: head/sys/ofed/include/linux/workqueue.h
==
--- head/sys/ofed/include/linux/workqueue.h Tue Apr 26 04:52:35 2011
(r221054)
+++ head/sys/ofed/include/linux/workqueue.h Tue Apr 26 07:30:52 2011
(r221055)
@@ -160,9 +160,11 @@ flush_taskqueue(struct taskqueue *tq)
 {
struct task flushtask;
 
+   PHOLD(curproc);
TASK_INIT(&flushtask, 0, _flush_fn, NULL);
taskqueue_enqueue(tq, &flushtask);
taskqueue_drain(tq, &flushtask);
+   PRELE(curproc);
 }
 
 static inline int
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r190570 - stable/7/sys/kern

2009-03-30 Thread Jeff Roberson

Author: jeff
Date: Mon Mar 30 19:20:56 2009
New Revision: 190570
URL: http://svn.freebsd.org/changeset/base/190570

Log:
  MFC SVN rev 189787.
   - Fix steal_thresh calculation with odd numbers of cpus and sched_affinity()
 for threads on runqueues.
  Approved by:  re

Modified:
  stable/7/sys/kern/sched_ule.c

Modified: stable/7/sys/kern/sched_ule.c
==
--- stable/7/sys/kern/sched_ule.c   Mon Mar 30 18:47:13 2009
(r190569)
+++ stable/7/sys/kern/sched_ule.c   Mon Mar 30 19:20:56 2009
(r190570)
@@ -1395,11 +1395,11 @@ sched_initticks(void *dummy)
 */
balance_interval = realstathz;
/*
-* Set steal thresh to log2(mp_ncpu) but no greater than 4.  This
-* prevents excess thrashing on large machines and excess idle on
-* smaller machines.
+* Set steal thresh to roughly log2(mp_ncpu) but no greater than 4. 
+* This prevents excess thrashing on large machines and excess idle 
+* on smaller machines.
 */
-   steal_thresh = min(ffs(mp_ncpus) - 1, 4);
+   steal_thresh = min(fls(mp_ncpus) - 1, 3);
affinity = SCHED_AFFINITY_DEFAULT;
 #endif
 }
@@ -2549,6 +2549,11 @@ sched_affinity(struct thread *td)
ts = td->td_sched;
if (THREAD_CAN_SCHED(td, ts->ts_cpu))
return;
+   if (TD_ON_RUNQ(td)) {
+   sched_rem(td);
+   sched_add(td, SRQ_BORING);
+   return;
+   }
if (!TD_IS_RUNNING(td))
return;
td->td_flags |= TDF_NEEDRESCHED;
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r191643 - in head/sys: kern sys

2009-04-28 Thread Jeff Roberson

Author: jeff
Date: Wed Apr 29 03:15:43 2009
New Revision: 191643
URL: http://svn.freebsd.org/changeset/base/191643

Log:
   - Remove the bogus idle thread state code.  This may have a race in it
 and it only optimized out an ipi or mwait in very few cases.
   - Skip the adaptive idle code when running on SMT or HTT cores.  This
 just wastes cpu time that could be used on a busy thread on the same
 core.
   - Rename CG_FLAG_THREAD to CG_FLAG_SMT to be more descriptive.  Re-use
 CG_FLAG_THREAD to mean SMT or HTT.
  
  Sponsored by:   Nokia

Modified:
  head/sys/kern/sched_ule.c
  head/sys/kern/subr_smp.c
  head/sys/sys/smp.h

Modified: head/sys/kern/sched_ule.c
==
--- head/sys/kern/sched_ule.c   Tue Apr 28 23:36:29 2009(r191642)
+++ head/sys/kern/sched_ule.c   Wed Apr 29 03:15:43 2009(r191643)
@@ -36,7 +36,7 @@
  */
 
 #include 
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD$);
 
 #include "opt_hwpmc_hooks.h"
 #include "opt_kdtrace.h"
@@ -213,7 +213,6 @@ struct tdq {
volatile inttdq_load;   /* Aggregate load. */
int tdq_sysload;/* For loadavg, !ITHD load. */
int tdq_transferable;   /* Transferable thread count. */
-   volatile inttdq_idlestate;  /* State of the idle thread. */
short   tdq_switchcnt;  /* Switches this tick. */
short   tdq_oldswitchcnt;   /* Switches last tick. */
u_char  tdq_lowpri; /* Lowest priority thread. */
@@ -360,7 +359,6 @@ tdq_print(int cpu)
printf("\tload:   %d\n", tdq->tdq_load);
printf("\tswitch cnt: %d\n", tdq->tdq_switchcnt);
printf("\told switch cnt: %d\n", tdq->tdq_oldswitchcnt);
-   printf("\tidle state: %d\n", tdq->tdq_idlestate);
printf("\ttimeshare idx:  %d\n", tdq->tdq_idx);
printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx);
printf("\tload transferable: %d\n", tdq->tdq_transferable);
@@ -913,7 +911,7 @@ tdq_idled(struct tdq *tdq)
/* We don't want to be preempted while we're iterating. */
spinlock_enter();
for (cg = tdq->tdq_cg; cg != NULL; ) {
-   if ((cg->cg_flags & (CG_FLAG_HTT | CG_FLAG_THREAD)) == 0)
+   if ((cg->cg_flags & CG_FLAG_THREAD) == 0)
thresh = steal_thresh;
else
thresh = 1;
@@ -969,13 +967,6 @@ tdq_notify(struct tdq *tdq, struct threa
return;
if (TD_IS_IDLETHREAD(ctd)) {
/*
-* If the idle thread is still 'running' it's probably
-* waiting on us to release the tdq spinlock already.  No
-* need to ipi.
-*/
-   if (tdq->tdq_idlestate == TDQ_RUNNING)
-   return;
-   /*
 * If the MD code has an idle wakeup routine try that before
 * falling back to IPI.
 */
@@ -2536,12 +2527,10 @@ sched_idletd(void *dummy)
int switchcnt;
int i;
 
+   mtx_assert(&Giant, MA_NOTOWNED);
td = curthread;
tdq = TDQ_SELF();
-   mtx_assert(&Giant, MA_NOTOWNED);
-   /* ULE relies on preemption for idle interruption. */
for (;;) {
-   tdq->tdq_idlestate = TDQ_RUNNING;
 #ifdef SMP
if (tdq_idled(tdq) == 0)
continue;
@@ -2550,26 +2539,21 @@ sched_idletd(void *dummy)
/*
 * If we're switching very frequently, spin while checking
 * for load rather than entering a low power state that 
-* requires an IPI.
+* may require an IPI.  However, don't do any busy
+* loops while on SMT machines as this simply steals
+* cycles from cores doing useful work.
 */
-   if (switchcnt > sched_idlespinthresh) {
+   if ((tdq->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0 &&
+   switchcnt > sched_idlespinthresh) {
for (i = 0; i < sched_idlespins; i++) {
if (tdq->tdq_load)
break;
cpu_spinwait();
}
}
-   /*
-* We must set our state to IDLE before checking
-* tdq_load for the last time to avoid a race with
-* tdq_notify().
-*/
-   if (tdq->tdq_load == 0) {
-   switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
-   tdq->tdq_idlestate = TDQ_IDLE;
-   if (tdq->tdq_load == 0)
-   cpu_idle(switchcnt > 1);
-   }
+   switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldsw

svn commit: r191645 - head/sys/kern

2009-04-28 Thread Jeff Roberson

Author: jeff
Date: Wed Apr 29 03:26:30 2009
New Revision: 191645
URL: http://svn.freebsd.org/changeset/base/191645

Log:
   - Fix the FBSDID line.

Modified:
  head/sys/kern/sched_ule.c

Modified: head/sys/kern/sched_ule.c
==
--- head/sys/kern/sched_ule.c   Wed Apr 29 03:21:53 2009(r191644)
+++ head/sys/kern/sched_ule.c   Wed Apr 29 03:26:30 2009(r191645)
@@ -36,7 +36,7 @@
  */
 
 #include 
-__FBSDID("$FreeBSD$);
+__FBSDID("$FreeBSD$");
 
 #include "opt_hwpmc_hooks.h"
 #include "opt_kdtrace.h"
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r191648 - in head/sys: amd64/amd64 amd64/include i386/i386 i386/include

2009-04-28 Thread Jeff Roberson

Author: jeff
Date: Wed Apr 29 06:54:40 2009
New Revision: 191648
URL: http://svn.freebsd.org/changeset/base/191648

Log:
   - Add support for cpuid leaf 0xb.  This allows us to determine the
 topology of nehalem/corei7 based systems.
   - Remove the cpu_cores/cpu_logical detection from identcpu.
   - Describe the layout of the system in cpu_mp_announce().
  
  Sponsored by:   Nokia

Modified:
  head/sys/amd64/amd64/identcpu.c
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/include/smp.h
  head/sys/amd64/include/specialreg.h
  head/sys/i386/i386/identcpu.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/include/smp.h
  head/sys/i386/include/specialreg.h

Modified: head/sys/amd64/amd64/identcpu.c
==
--- head/sys/amd64/amd64/identcpu.c Wed Apr 29 06:52:04 2009
(r191647)
+++ head/sys/amd64/amd64/identcpu.c Wed Apr 29 06:54:40 2009
(r191648)
@@ -106,9 +106,6 @@ static struct {
{ CENTAUR_VENDOR_ID,CPU_VENDOR_CENTAUR },   /* CentaurHauls */
 };
 
-int cpu_cores;
-int cpu_logical;
-
 
 extern int pq_l2size;
 extern int pq_l2nways;
@@ -195,7 +192,6 @@ printcpuinfo(void)
cpu_vendor_id == CPU_VENDOR_CENTAUR) {
printf("  Stepping = %u", cpu_id & 0xf);
if (cpu_high > 0) {
-   u_int cmp = 1, htt = 1;
 
/*
 * Here we should probably set up flags indicating
@@ -400,28 +396,6 @@ printcpuinfo(void)
if (tsc_is_invariant)
printf("\n  TSC: P-state invariant");
 
-   /*
-* If this CPU supports HTT or CMP then mention the
-* number of physical/logical cores it contains.
-*/
-   if (cpu_feature & CPUID_HTT)
-   htt = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
-   if (cpu_vendor_id == CPU_VENDOR_AMD &&
-   (amd_feature2 & AMDID2_CMP))
-   cmp = (cpu_procinfo2 & AMDID_CMP_CORES) + 1;
-   else if (cpu_vendor_id == CPU_VENDOR_INTEL &&
-   (cpu_high >= 4)) {
-   cpuid_count(4, 0, regs);
-   if ((regs[0] & 0x1f) != 0)
-   cmp = ((regs[0] >> 26) & 0x3f) + 1;
-   }
-   cpu_cores = cmp;
-   cpu_logical = htt / cmp;
-   if (cmp > 1)
-   printf("\n  Cores per package: %d", cmp);
-   if ((htt / cmp) > 1)
-   printf("\n  Logical CPUs per core: %d",
-   cpu_logical);
}
}
/* Avoid ugly blank lines: only print newline when we have to. */

Modified: head/sys/amd64/amd64/mp_machdep.c
==
--- head/sys/amd64/amd64/mp_machdep.c   Wed Apr 29 06:52:04 2009
(r191647)
+++ head/sys/amd64/amd64/mp_machdep.c   Wed Apr 29 06:54:40 2009
(r191648)
@@ -160,6 +160,8 @@ int apic_cpuids[MAX_APIC_ID + 1];
 static volatile u_int cpu_ipi_pending[MAXCPU];
 
 static u_int boot_address;
+static int cpu_logical;
+static int cpu_cores;
 
 static voidassign_cpu_ids(void);
 static voidset_interrupt_apic_ids(void);
@@ -181,13 +183,142 @@ mem_range_AP_init(void)
mem_range_softc.mr_op->initAP(&mem_range_softc);
 }
 
-struct cpu_group *
-cpu_topo(void)
+static void
+topo_probe_0xb(void)
+{
+   int logical;
+   int p[4];
+   int bits;
+   int type;
+   int cnt;
+   int i;
+   int x;
+
+   /* We only support two levels for now. */
+   for (i = 0; i < 3; i++) {
+   cpuid_count(0x0B, i, p);
+   bits = p[0] & 0x1f;
+   logical = p[1] &= 0x;
+   type = (p[2] >> 8) & 0xff;
+   if (type == 0 || logical == 0)
+   break;
+   for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) {
+   if (!cpu_info[x].cpu_present ||
+   cpu_info[x].cpu_disabled)
+   continue;
+   if (x >> bits == boot_cpu_id >> bits)
+   cnt++;
+   }
+   if (type == CPUID_TYPE_SMT)
+   cpu_logical = cnt;
+   else if (type == CPUID_TYPE_CORE)
+   cpu_cores = cnt;
+   }
+   if (cpu_logical == 0)
+   cpu_logical = 1;
+   cpu_cores /= cpu_logical;
+}
+
+static void
+topo_probe_0x4(void)
+{
+   u_int threads_per_cache, p[4];
+   u_int htt, cmp;
+   int i;
+
+   htt = cmp = 1;
+   /*
+* If this CPU supports HTT o

svn commit: r191676 - head/sys/kern

2009-04-29 Thread Jeff Roberson

Author: jeff
Date: Wed Apr 29 23:04:31 2009
New Revision: 191676
URL: http://svn.freebsd.org/changeset/base/191676

Log:
   - Fix non-SMP build by encapsulating idle spin logic in a macro.
  
  Pointy hat to:me

Modified:
  head/sys/kern/sched_ule.c

Modified: head/sys/kern/sched_ule.c
==
--- head/sys/kern/sched_ule.c   Wed Apr 29 21:50:13 2009(r191675)
+++ head/sys/kern/sched_ule.c   Wed Apr 29 23:04:31 2009(r191676)
@@ -2516,6 +2516,13 @@ sched_sizeof_thread(void)
return (sizeof(struct thread) + sizeof(struct td_sched));
 }
 
+#ifdef SMP
+#defineTDQ_IDLESPIN(tdq)   
\
+((tdq)->tdq_cg != NULL && ((tdq)->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0)
+#else
+#defineTDQ_IDLESPIN(tdq)   1
+#endif
+
 /*
  * The actual idle process.
  */
@@ -2543,8 +2550,7 @@ sched_idletd(void *dummy)
 * loops while on SMT machines as this simply steals
 * cycles from cores doing useful work.
 */
-   if ((tdq->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0 &&
-   switchcnt > sched_idlespinthresh) {
+   if (TDQ_IDLESPIN(tdq) && switchcnt > sched_idlespinthresh) {
for (i = 0; i < sched_idlespins; i++) {
if (tdq->tdq_load)
break;
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r191643 - in head/sys: kern sys

2009-04-29 Thread Jeff Roberson


On Wed, 29 Apr 2009, Kostik Belousov wrote:


On Wed, Apr 29, 2009 at 03:15:44AM +, Jeff Roberson wrote:

Author: jeff
Date: Wed Apr 29 03:15:43 2009
New Revision: 191643
URL: http://svn.freebsd.org/changeset/base/191643

Log:
   - Remove the bogus idle thread state code.  This may have a race in it
 and it only optimized out an ipi or mwait in very few cases.
   - Skip the adaptive idle code when running on SMT or HTT cores.  This
 just wastes cpu time that could be used on a busy thread on the same
 core.
   - Rename CG_FLAG_THREAD to CG_FLAG_SMT to be more descriptive.  Re-use
 CG_FLAG_THREAD to mean SMT or HTT.

  Sponsored by:   Nokia

Modified:
  head/sys/kern/sched_ule.c
  head/sys/kern/subr_smp.c
  head/sys/sys/smp.h


Now I see a reason why it is better #ifdef SMP the code that uses CG_FLAG_*.
Also, we should check for tdq_cg != NULL in one more place.

See the patch below, instead of exposing CG_FLAG_* for !SMP configs.


Thank you kan.  I did something slightly different so we can retain the 
adaptive idling on UP.


Thanks,
Jeff



diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 680572d..fe3a119 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -891,6 +891,7 @@ tdq_move(struct tdq *from, struct tdq *to)
return (1);
}

+#ifdef SMP
/*
 * This tdq has idled.  Try to steal a thread from another cpu and switch
 * to it.
@@ -947,6 +948,7 @@ tdq_idled(struct tdq *tdq)
spinlock_exit();
return (1);
}
+#endif

/*
 * Notify a remote cpu of new work.  Sends an IPI if criteria are met.
@@ -2525,7 +2527,9 @@ sched_idletd(void *dummy)
struct thread *td;
struct tdq *tdq;
int switchcnt;
+#ifdef SMP
int i;
+#endif

mtx_assert(&Giant, MA_NOTOWNED);
td = curthread;
@@ -2543,7 +2547,9 @@ sched_idletd(void *dummy)
 * loops while on SMT machines as this simply steals
 * cycles from cores doing useful work.
 */
-   if ((tdq->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0 &&
+#ifdef SMP
+   if (tdq->tdq_cg != NULL &&
+   (tdq->tdq_cg->cg_flags & CG_FLAG_THREAD) == 0 &&
switchcnt > sched_idlespinthresh) {
for (i = 0; i < sched_idlespins; i++) {
if (tdq->tdq_load)
@@ -2551,6 +2557,7 @@ sched_idletd(void *dummy)
cpu_spinwait();
}
}
+#endif
switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
if (tdq->tdq_load == 0)
cpu_idle(switchcnt > 1);


___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r251826 - head/sys/vm

2013-06-16 Thread Jeff Roberson

Author: jeff
Date: Mon Jun 17 03:43:47 2013
New Revision: 251826
URL: http://svnweb.freebsd.org/changeset/base/251826

Log:
   - Add a new UMA API: uma_zcache_create().  This makes a zone without any
 backing memory that is only a container for per-cpu caches of arbitrary
 pointer items.  These zones have no kegs.
   - Convert the regular keg based allocator to use the new import/release
 functions.
   - Move some stats to be atomics since they would require excessive zone
 locking/unlocking with the new import/release paradigm.  Make
 zone_free_item simpler now that callers can manage more stats.
   - Check for these cache-only zones in the public APIs and debugging
 code by checking zone_first_keg() against NULL.
  
  Sponsored by: EMC / Isilong Storage Division

Modified:
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma.h
==
--- head/sys/vm/uma.h   Mon Jun 17 03:32:27 2013(r251825)
+++ head/sys/vm/uma.h   Mon Jun 17 03:43:47 2013(r251826)
@@ -124,6 +124,16 @@ typedef int (*uma_init)(void *mem, int s
 typedef void (*uma_fini)(void *mem, int size);
 
 /*
+ * Import new memory into a cache zone.
+ */
+typedef int (*uma_import)(void *arg, void **store, int count, int flags);
+
+/*
+ * Free memory from a cache zone.
+ */
+typedef void (*uma_release)(void *arg, void **store, int count);
+
+/*
  * What's the difference between initializing and constructing?
  *
  * The item is initialized when it is cached, and this is the state that the
@@ -216,6 +226,19 @@ uma_zone_t uma_zsecond_create(char *name
 int uma_zsecond_add(uma_zone_t zone, uma_zone_t master);
 
 /*
+ * Create cache-only zones.
+ *
+ * This allows uma's per-cpu cache facilities to handle arbitrary
+ * pointers.  Consumers must specify the import and release functions to
+ * fill and destroy caches.  UMA does not allocate any memory for these
+ * zones.  The 'arg' parameter is passed to import/release and is caller
+ * specific.
+ */
+uma_zone_t uma_zcache_create(char *name, uma_ctor ctor, uma_dtor dtor,
+   uma_init zinit, uma_fini zfini, uma_import zimport,
+   uma_release zrelease, void *arg, int flags);
+
+/*
  * Definitions for uma_zcreate flags
  *
  * These flags share space with UMA_ZFLAGs in uma_int.h.  Be careful not to

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Mon Jun 17 03:32:27 2013(r251825)
+++ head/sys/vm/uma_core.c  Mon Jun 17 03:43:47 2013(r251826)
@@ -131,14 +131,14 @@ static int bucketdisable = 1;
 static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
 
 /* This mutex protects the keg list */
-static struct mtx uma_mtx;
+static struct mtx_padalign uma_mtx;
 
 /* Linked list of boot time pages */
 static LIST_HEAD(,uma_slab) uma_boot_pages =
 LIST_HEAD_INITIALIZER(uma_boot_pages);
 
 /* This mutex protects the boot time pages list */
-static struct mtx uma_boot_pages_mtx;
+static struct mtx_padalign uma_boot_pages_mtx;
 
 /* Is the VM done starting up? */
 static int booted = 0;
@@ -172,6 +172,9 @@ struct uma_zctor_args {
uma_dtor dtor;
uma_init uminit;
uma_fini fini;
+   uma_import import;
+   uma_release release;
+   void *arg;
uma_keg_t keg;
int align;
uint32_t flags;
@@ -216,9 +219,6 @@ static uint8_t bucket_size[BUCKET_ZONES]
  */
 enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
 
-#defineZFREE_STATFAIL  0x0001  /* Update zone failure 
statistic. */
-#defineZFREE_STATFREE  0x0002  /* Update zone free statistic. 
*/
-
 /* Prototypes.. */
 
 static void *noobj_alloc(uma_zone_t, int, uint8_t *, int);
@@ -244,8 +244,7 @@ static void hash_free(struct uma_hash *h
 static void uma_timeout(void *);
 static void uma_startup3(void);
 static void *zone_alloc_item(uma_zone_t, void *, int);
-static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
-int);
+static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
 static void bucket_enable(void);
 static void bucket_init(void);
 static uma_bucket_t bucket_alloc(int, int);
@@ -254,11 +253,14 @@ static void bucket_zone_drain(void);
 static int zone_alloc_bucket(uma_zone_t zone, int flags);
 static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
 static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int 
flags);
-static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
+static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
+static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
 uma_fini fini, int align, uint32_t flags);
 static inline void zone_relock(uma_zone_t zone, uma_keg_t

svn commit: r251894 - in head: lib/libmemstat sys/vm

2013-06-17 Thread Jeff Roberson

Author: jeff
Date: Tue Jun 18 04:50:20 2013
New Revision: 251894
URL: http://svnweb.freebsd.org/changeset/base/251894

Log:
  Refine UMA bucket allocation to reduce space consumption and improve
  performance.
  
   - Always free to the alloc bucket if there is space.  This gives LIFO
 allocation order to improve hot-cache performance.  This also allows
 for zones with a single bucket per-cpu rather than a pair if the entire
 working set fits in one bucket.
   - Enable per-cpu caches of buckets.  To prevent recursive bucket
 allocation one bucket zone still has per-cpu caches disabled.
   - Pick the initial bucket size based on a table driven maximum size
 per-bucket rather than the number of items per-page.  This gives
 more sane initial sizes.
   - Only grow the bucket size when we face contention on the zone lock, this
 causes bucket sizes to grow more slowly.
   - Adjust the number of items per-bucket to account for the header space.
 This packs the buckets more efficiently per-page while making them
 not quite powers of two.
   - Eliminate the per-zone free bucket list.  Always return buckets back
 to the bucket zone.  This ensures that as zones grow into larger
 bucket sizes they eventually discard the smaller sizes.  It persists
 fewer buckets in the system.  The locking is slightly trickier.
   - Only switch buckets in zalloc, not zfree, this eliminates pathological
 cases where we ping-pong between two buckets.
   - Ensure that the thread that fills a new bucket gets to allocate from
 it to give a better upper bound on allocation time.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/lib/libmemstat/memstat_uma.c
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/lib/libmemstat/memstat_uma.c
==
--- head/lib/libmemstat/memstat_uma.c   Tue Jun 18 04:11:16 2013
(r251893)
+++ head/lib/libmemstat/memstat_uma.c   Tue Jun 18 04:50:20 2013
(r251894)
@@ -446,7 +446,7 @@ skip_percpu:
kz.uk_ipers;
mtp->mt_byteslimit = mtp->mt_countlimit * mtp->mt_size;
mtp->mt_count = mtp->mt_numallocs - mtp->mt_numfrees;
-   for (ubp = LIST_FIRST(&uz.uz_full_bucket); ubp !=
+   for (ubp = LIST_FIRST(&uz.uz_buckets); ubp !=
NULL; ubp = LIST_NEXT(&ub, ub_link)) {
ret = kread(kvm, ubp, &ub, sizeof(ub), 0);
mtp->mt_zonefree += ub.ub_cnt;

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Tue Jun 18 04:11:16 2013(r251893)
+++ head/sys/vm/uma_core.c  Tue Jun 18 04:50:20 2013(r251894)
@@ -192,27 +192,26 @@ struct uma_kctor_args {
 struct uma_bucket_zone {
uma_zone_t  ubz_zone;
char*ubz_name;
-   int ubz_entries;
+   int ubz_entries;/* Number of items it can hold. */
+   int ubz_maxsize;/* Maximum allocation size per-item. */
 };
 
-#defineBUCKET_MAX  128
+/*
+ * Compute the actual number of bucket entries to pack them in power
+ * of two sizes for more efficient space utilization.
+ */
+#defineBUCKET_SIZE(n)  \
+(((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
+
+#defineBUCKET_MAX  BUCKET_SIZE(128)
 
 struct uma_bucket_zone bucket_zones[] = {
-   { NULL, "16 Bucket", 16 },
-   { NULL, "32 Bucket", 32 },
-   { NULL, "64 Bucket", 64 },
-   { NULL, "128 Bucket", 128 },
+   { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
+   { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
+   { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
{ NULL, NULL, 0}
 };
-
-#defineBUCKET_SHIFT4
-#defineBUCKET_ZONES((BUCKET_MAX >> BUCKET_SHIFT) + 1)
-
-/*
- * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
- * of approximately the right size.
- */
-static uint8_t bucket_size[BUCKET_ZONES];
+static uma_zone_t largebucket;
 
 /*
  * Flags and enumerations to be passed to internal functions.
@@ -250,7 +249,7 @@ static void bucket_init(void);
 static uma_bucket_t bucket_alloc(int, int);
 static void bucket_free(uma_bucket_t);
 static void bucket_zone_drain(void);
-static int zone_alloc_bucket(uma_zone_t zone, int flags);
+static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, int flags);
 static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
 static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int 
flags);
 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
@@ -283,7 +282,6 @@ SYSCTL_INT(_vm, OID_AUTO, zone_warnings,
 /*
  * This routine checks to see

Re: svn commit: r251894 - in head: lib/libmemstat sys/vm

2013-06-18 Thread Jeff Roberson


On Tue, 18 Jun 2013, Alfred Perlstein wrote:


On 6/18/13 4:37 AM, Gleb Smirnoff wrote:

On Tue, Jun 18, 2013 at 10:25:08AM +0200, Andre Oppermann wrote:
A> There used to be a problem with per CPU caches accumulating large 
amounts

A> of items without freeing back to the global (or socket) pool.
A>
A> Do these updates to UMA change this situation and/or do you have further
A> improvements coming up?

This is especially a problem with ZFS, which utilizes UMA extensively.

IMHO, we need a flag for uma_zcreate() that would disable per CPU caches, 
so

that certain zones (ZFS at least) would have them off.

It might be a good idea to force this flag on every zone that has 
allocation >=

then the page size.

What about people running with 256GB+ ram?  Do they also want the per cpu 
caches off?


If you look at the new system there is a static threshold for the initial 
item size required for different sized per-cpu buckets.  What might make 
sense is to tune this size based on available memory.  For what it's worth 
I looked at solaris settings and they cache roughly 4x as much on a 
per-cpu basis.


The new system should tend to cache less of large and infrequent 
allocations vs the old system.  I can't say yet whether it is still a 
problem.


I have an implementation of vmem to replace using vm_maps for kmem_map, 
buffer_map, etc. which may resolve the zfs allocation problems.  I hope to 
get this in over the next few weeks.


Thanks,
Jeff




--
Alfred Perlstein
VP Software Engineering, iXsystems


___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r251983 - head/sys/vm

2013-06-18 Thread Jeff Roberson

Author: jeff
Date: Wed Jun 19 02:30:32 2013
New Revision: 251983
URL: http://svnweb.freebsd.org/changeset/base/251983

Log:
   - Persist the caller's flags in the bucket allocation flags so we don't
 lose a M_NOVM when we recurse into a bucket allocation.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/uma_core.c

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Wed Jun 19 02:16:04 2013(r251982)
+++ head/sys/vm/uma_core.c  Wed Jun 19 02:30:32 2013(r251983)
@@ -2418,7 +2418,7 @@ zone_alloc_bucket(uma_zone_t zone, int f
int max;
 
max = zone->uz_count;
-   bflags = M_NOWAIT;
+   bflags = (flags & ~M_WAITOK) | M_NOWAIT;
if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
bflags |= M_NOVM;
bucket = bucket_alloc(zone->uz_count, bflags);
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r252040 - head/sys/vm

2013-06-20 Thread Jeff Roberson

Author: jeff
Date: Thu Jun 20 19:08:12 2013
New Revision: 252040
URL: http://svnweb.freebsd.org/changeset/base/252040

Log:
   - Add a per-zone lock for zones without kegs.
   - Be more explicit about zone vs keg locking.  This functionally changes
 almost nothing.
   - Add a size parameter to uma_zcache_create() so we can size the buckets.
   - Pass the zone to bucket_alloc() so it can modify allocation flags
 as appropriate.
   - Fix a bug in zone_alloc_bucket() where I missed an address of operator
 in a failure case.  (Found by pho)
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_dbg.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma.h
==
--- head/sys/vm/uma.h   Thu Jun 20 18:25:10 2013(r252039)
+++ head/sys/vm/uma.h   Thu Jun 20 19:08:12 2013(r252040)
@@ -234,7 +234,7 @@ int uma_zsecond_add(uma_zone_t zone, uma
  * zones.  The 'arg' parameter is passed to import/release and is caller
  * specific.
  */
-uma_zone_t uma_zcache_create(char *name, uma_ctor ctor, uma_dtor dtor,
+uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor 
dtor,
uma_init zinit, uma_fini zfini, uma_import zimport,
uma_release zrelease, void *arg, int flags);
 

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Thu Jun 20 18:25:10 2013(r252039)
+++ head/sys/vm/uma_core.c  Thu Jun 20 19:08:12 2013(r252040)
@@ -246,8 +246,8 @@ static void *zone_alloc_item(uma_zone_t,
 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
 static void bucket_enable(void);
 static void bucket_init(void);
-static uma_bucket_t bucket_alloc(int, int);
-static void bucket_free(uma_bucket_t);
+static uma_bucket_t bucket_alloc(uma_zone_t zone, int);
+static void bucket_free(uma_zone_t zone, uma_bucket_t);
 static void bucket_zone_drain(void);
 static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, int flags);
 static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
@@ -256,8 +256,6 @@ static void *slab_alloc_item(uma_keg_t k
 static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
 static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
 uma_fini fini, int align, uint32_t flags);
-static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
-static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
 static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
 static void zone_release(uma_zone_t zone, void **bucket, int cnt);
 
@@ -352,7 +350,7 @@ bucket_select(int size)
 }
 
 static uma_bucket_t
-bucket_alloc(int entries, int bflags)
+bucket_alloc(uma_zone_t zone, int flags)
 {
struct uma_bucket_zone *ubz;
uma_bucket_t bucket;
@@ -366,8 +364,10 @@ bucket_alloc(int entries, int bflags)
if (bucketdisable)
return (NULL);
 
-   ubz = bucket_zone_lookup(entries);
-   bucket = uma_zalloc(ubz->ubz_zone, bflags);
+   if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
+   flags |= M_NOVM;
+   ubz = bucket_zone_lookup(zone->uz_count);
+   bucket = uma_zalloc(ubz->ubz_zone, flags);
if (bucket) {
 #ifdef INVARIANTS
bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
@@ -380,7 +380,7 @@ bucket_alloc(int entries, int bflags)
 }
 
 static void
-bucket_free(uma_bucket_t bucket)
+bucket_free(uma_zone_t zone, uma_bucket_t bucket)
 {
struct uma_bucket_zone *ubz;
 
@@ -662,9 +662,9 @@ cache_drain(uma_zone_t zone)
bucket_drain(zone, cache->uc_allocbucket);
bucket_drain(zone, cache->uc_freebucket);
if (cache->uc_allocbucket != NULL)
-   bucket_free(cache->uc_allocbucket);
+   bucket_free(zone, cache->uc_allocbucket);
if (cache->uc_freebucket != NULL)
-   bucket_free(cache->uc_freebucket);
+   bucket_free(zone, cache->uc_freebucket);
cache->uc_allocbucket = cache->uc_freebucket = NULL;
}
ZONE_LOCK(zone);
@@ -688,7 +688,7 @@ bucket_cache_drain(uma_zone_t zone)
LIST_REMOVE(bucket, ub_link);
ZONE_UNLOCK(zone);
bucket_drain(zone, bucket);
-   bucket_free(bucket);
+   bucket_free(zone, bucket);
ZONE_LOCK(zone);
}
 }
@@ -801,7 +801,7 @@ zone_drain_wait(uma_zone_t zone, int wai
if (waitok == M_NOWAIT)
goto out;
mtx_unlock(&uma_mtx);
-   msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
+   msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
mtx_lock(&uma_mtx);
}

svn commit: r252226 - head/sys/vm

2013-06-25 Thread Jeff Roberson

Author: jeff
Date: Wed Jun 26 00:57:38 2013
New Revision: 252226
URL: http://svnweb.freebsd.org/changeset/base/252226

Log:
   - Resolve bucket recursion issues by passing a cookie with zone flags
 through bucket_alloc() to uma_zalloc_arg() and uma_zfree_arg().
   - Make some smaller buckets for large zones to further reduce memory
 waste.
   - Implement uma_zone_reserve().  This holds aside a number of items only
 for callers who specify M_USE_RESERVE.  buckets will never be filled
 from reserve allocations.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma.h
==
--- head/sys/vm/uma.h   Wed Jun 26 00:42:45 2013(r252225)
+++ head/sys/vm/uma.h   Wed Jun 26 00:57:38 2013(r252226)
@@ -459,6 +459,12 @@ void uma_reclaim(void);
 void uma_set_align(int align);
 
 /*
+ * Set a reserved number of items to hold for M_USE_RESERVE allocations.  All
+ * other requests must allocate new backing pages.
+ */
+void uma_zone_reserve(uma_zone_t zone, int nitems);
+
+/*
  * Reserves the maximum KVA space required by the zone and configures the zone
  * to use a VM_ALLOC_NOOBJ-based backend allocator.
  *

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Wed Jun 26 00:42:45 2013(r252225)
+++ head/sys/vm/uma_core.c  Wed Jun 26 00:57:38 2013(r252226)
@@ -206,12 +206,14 @@ struct uma_bucket_zone {
 #defineBUCKET_MAX  BUCKET_SIZE(128)
 
 struct uma_bucket_zone bucket_zones[] = {
+   { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
+   { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
+   { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
{ NULL, NULL, 0}
 };
-static uma_zone_t largebucket;
 
 /*
  * Flags and enumerations to be passed to internal functions.
@@ -246,10 +248,10 @@ static void *zone_alloc_item(uma_zone_t,
 static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
 static void bucket_enable(void);
 static void bucket_init(void);
-static uma_bucket_t bucket_alloc(uma_zone_t zone, int);
-static void bucket_free(uma_zone_t zone, uma_bucket_t);
+static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
+static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
 static void bucket_zone_drain(void);
-static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, int flags);
+static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
 static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
 static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int 
flags);
 static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
@@ -304,17 +306,8 @@ bucket_init(void)
size += sizeof(void *) * ubz->ubz_entries;
ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
-   UMA_ZONE_MAXBUCKET | UMA_ZONE_MTXCLASS);
+   UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
}
-   /*
-* To avoid recursive bucket allocation loops we disable buckets
-* on the smallest bucket zone and use it for the largest zone.
-* The remainder of the zones all use the largest zone.
-*/
-   ubz--;
-   ubz->ubz_zone->uz_count = bucket_zones[0].ubz_entries;
-   bucket_zones[0].ubz_zone->uz_count = 0;
-   largebucket = ubz->ubz_zone;
 }
 
 /*
@@ -350,7 +343,7 @@ bucket_select(int size)
 }
 
 static uma_bucket_t
-bucket_alloc(uma_zone_t zone, int flags)
+bucket_alloc(uma_zone_t zone, void *udata, int flags)
 {
struct uma_bucket_zone *ubz;
uma_bucket_t bucket;
@@ -363,11 +356,26 @@ bucket_alloc(uma_zone_t zone, int flags)
 */
if (bucketdisable)
return (NULL);
-
-   if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
+   /*
+* To limit bucket recursion we store the original zone flags
+* in a cookie passed via zalloc_arg/zfree_arg.  This allows the
+* NOVM flag to persist even through deep recursions.  We also
+* store ZFLAG_BUCKET once we have recursed attempting to allocate
+* a bucket for a bucket zone so we do not allow infinite bucket
+* recursion.  This cookie will even persist to frees of unused
+* buckets via the allocation path or bucket allocations in the
+* free path.
+*/
+   if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
+   return (NULL);
+   if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
+   udata = (void *)(uintptr_t)zone->uz_flags;
+   else
+   udata = (void *)((uintptr

svn commit: r252330 - in head/sys: conf geom kern sys vm

2013-06-27 Thread Jeff Roberson

Author: jeff
Date: Fri Jun 28 03:51:20 2013
New Revision: 252330
URL: http://svnweb.freebsd.org/changeset/base/252330

Log:
   - Add a general purpose resource allocator, vmem, from NetBSD.  It was
 originally inspired by the Solaris vmem detailed in the proceedings
 of usenix 2001.  The NetBSD version was heavily refactored for bugs
 and simplicity.
   - Use this resource allocator to allocate the buffer and transient maps.
 Buffer cache defrags are reduced by 25% when used by filesystems with
 mixed block sizes.  Ultimately this may permit dynamic buffer cache
 sizing on low KVA machines.
  
  Discussed with:   alc, kib, attilio
  Tested by:pho
  Sponsored by: EMC / Isilon Storage Division

Added:
  head/sys/kern/subr_vmem.c   (contents, props changed)
  head/sys/sys/vmem.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/geom/geom_io.c
  head/sys/kern/vfs_bio.c
  head/sys/sys/malloc.h
  head/sys/vm/vm.h
  head/sys/vm/vm_init.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_kern.h
  head/sys/vm/vm_object.c
  head/sys/vm/vm_pager.c
  head/sys/vm/vm_pager.h

Modified: head/sys/conf/files
==
--- head/sys/conf/files Fri Jun 28 03:41:23 2013(r252329)
+++ head/sys/conf/files Fri Jun 28 03:51:20 2013(r252330)
@@ -2797,6 +2797,7 @@ kern/subr_trap.c  standard
 kern/subr_turnstile.c  standard
 kern/subr_uio.cstandard
 kern/subr_unit.c   standard
+kern/subr_vmem.c   standard
 kern/subr_witness.coptional witness
 kern/sys_capability.c  standard
 kern/sys_generic.c standard

Modified: head/sys/geom/geom_io.c
==
--- head/sys/geom/geom_io.c Fri Jun 28 03:41:23 2013(r252329)
+++ head/sys/geom/geom_io.c Fri Jun 28 03:51:20 2013(r252330)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -626,7 +627,6 @@ g_io_transient_map_bio(struct bio *bp)
vm_offset_t addr;
long size;
u_int retried;
-   int rv;
 
KASSERT(unmapped_buf_allowed, ("unmapped disabled"));
 
@@ -636,10 +636,7 @@ g_io_transient_map_bio(struct bio *bp)
retried = 0;
atomic_add_long(&transient_maps, 1);
 retry:
-   vm_map_lock(bio_transient_map);
-   if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map),
-   size, &addr)) {
-   vm_map_unlock(bio_transient_map);
+   if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
if (transient_map_retries != 0 &&
retried >= transient_map_retries) {
g_io_deliver(bp, EDEADLK/* XXXKIB */);
@@ -651,7 +648,7 @@ retry:
/*
 * Naive attempt to quisce the I/O to get more
 * in-flight requests completed and defragment
-* the bio_transient_map.
+* the transient_arena.
 */
CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
bp, bp->bio_to->name, retried);
@@ -661,12 +658,6 @@ retry:
goto retry;
}
}
-   rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size,
-   VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
-   KASSERT(rv == KERN_SUCCESS,
-   ("vm_map_insert(bio_transient_map) rv %d %jx %lx",
-   rv, (uintmax_t)addr, size));
-   vm_map_unlock(bio_transient_map);
atomic_add_int(&inflight_transient_maps, 1);
pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;

Added: head/sys/kern/subr_vmem.c
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/kern/subr_vmem.c   Fri Jun 28 03:51:20 2013(r252330)
@@ -0,0 +1,1372 @@
+/*-
+ * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi,
+ * Copyright (c) 2013 EMC Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMP

Re: svn commit: r252330 - in head/sys: conf geom kern sys vm

2013-06-28 Thread Jeff Roberson


On Fri, 28 Jun 2013, Adrian Chadd wrote:


Hi,

Do we really need another allocator / resource manager just for this?



No, however;  I have a follow-up patch to replace kmem with this.  And 
then we will use it for NUMA allocations in the kernel.  After that it is 
likely that we could replace several other less efficient allocators with 
this.  Solaris uses it for pids, tids, device unit numbers.  etc.  We 
could easily do the same.  The existing allocators have failure modes, big 
O cost, and allocation requirements that are not tolerable for use in the 
vm.  This also has a very nice feature that works with UMA to provide 
per-cpu caches of arbitrary number ranges.  So it is more scalable as 
well as providing for less fragmentation.


Thanks,
Jeff





Adrian

On 27 June 2013 20:51, Jeff Roberson  wrote:

Author: jeff
Date: Fri Jun 28 03:51:20 2013
New Revision: 252330
URL: http://svnweb.freebsd.org/changeset/base/252330

Log:
   - Add a general purpose resource allocator, vmem, from NetBSD.  It was
 originally inspired by the Solaris vmem detailed in the proceedings
 of usenix 2001.  The NetBSD version was heavily refactored for bugs
 and simplicity.
   - Use this resource allocator to allocate the buffer and transient maps.
 Buffer cache defrags are reduced by 25% when used by filesystems with
 mixed block sizes.  Ultimately this may permit dynamic buffer cache
 sizing on low KVA machines.

  Discussed with:   alc, kib, attilio
  Tested by:pho
  Sponsored by: EMC / Isilon Storage Division

Added:
  head/sys/kern/subr_vmem.c   (contents, props changed)
  head/sys/sys/vmem.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/geom/geom_io.c
  head/sys/kern/vfs_bio.c
  head/sys/sys/malloc.h
  head/sys/vm/vm.h
  head/sys/vm/vm_init.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_kern.h
  head/sys/vm/vm_object.c
  head/sys/vm/vm_pager.c
  head/sys/vm/vm_pager.h

Modified: head/sys/conf/files
==
--- head/sys/conf/files Fri Jun 28 03:41:23 2013(r252329)
+++ head/sys/conf/files Fri Jun 28 03:51:20 2013(r252330)
@@ -2797,6 +2797,7 @@ kern/subr_trap.c  standard
 kern/subr_turnstile.c  standard
 kern/subr_uio.cstandard
 kern/subr_unit.c   standard
+kern/subr_vmem.c   standard
 kern/subr_witness.coptional witness
 kern/sys_capability.c  standard
 kern/sys_generic.c standard

Modified: head/sys/geom/geom_io.c
==
--- head/sys/geom/geom_io.c Fri Jun 28 03:41:23 2013(r252329)
+++ head/sys/geom/geom_io.c Fri Jun 28 03:51:20 2013(r252330)
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 

 #include 
 #include 
@@ -626,7 +627,6 @@ g_io_transient_map_bio(struct bio *bp)
vm_offset_t addr;
long size;
u_int retried;
-   int rv;

KASSERT(unmapped_buf_allowed, ("unmapped disabled"));

@@ -636,10 +636,7 @@ g_io_transient_map_bio(struct bio *bp)
retried = 0;
atomic_add_long(&transient_maps, 1);
 retry:
-   vm_map_lock(bio_transient_map);
-   if (vm_map_findspace(bio_transient_map, vm_map_min(bio_transient_map),
-   size, &addr)) {
-   vm_map_unlock(bio_transient_map);
+   if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) {
if (transient_map_retries != 0 &&
retried >= transient_map_retries) {
g_io_deliver(bp, EDEADLK/* XXXKIB */);
@@ -651,7 +648,7 @@ retry:
/*
 * Naive attempt to quisce the I/O to get more
 * in-flight requests completed and defragment
-* the bio_transient_map.
+* the transient_arena.
 */
CTR3(KTR_GEOM, "g_down retrymap bp %p provider %s r %d",
bp, bp->bio_to->name, retried);
@@ -661,12 +658,6 @@ retry:
goto retry;
}
}
-   rv = vm_map_insert(bio_transient_map, NULL, 0, addr, addr + size,
-   VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT);
-   KASSERT(rv == KERN_SUCCESS,
-   ("vm_map_insert(bio_transient_map) rv %d %jx %lx",
-   rv, (uintmax_t)addr, size));
-   vm_map_unlock(bio_transient_map);
atomic_add_int(&inflight_transient_maps, 1);
pmap_qenter((vm_offset_t)addr, bp->bio_ma, OFF_TO_IDX(size));
bp->bio_data = (caddr_t)addr + bp->bio_ma_offset;

Added: head/sys/kern/subr_vmem.c
==
--- /dev/null   00:00:00 1970   (empty, because file is new

svn commit: r253583 - head/sys/vm

2013-07-23 Thread Jeff Roberson

Author: jeff
Date: Tue Jul 23 22:52:38 2013
New Revision: 253583
URL: http://svnweb.freebsd.org/changeset/base/253583

Log:
   - Correct a stale comment.  We don't have vclean() anymore.  The work is
 done by vgonel() and destroy_vobject() should only be called once from
 VOP_INACTIVE().
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/vnode_pager.c

Modified: head/sys/vm/vnode_pager.c
==
--- head/sys/vm/vnode_pager.c   Tue Jul 23 22:17:00 2013(r253582)
+++ head/sys/vm/vnode_pager.c   Tue Jul 23 22:52:38 2013(r253583)
@@ -158,11 +158,6 @@ vnode_destroy_vobject(struct vnode *vp)
VM_OBJECT_WLOCK(obj);
if (obj->ref_count == 0) {
/*
-* vclean() may be called twice. The first time
-* removes the primary reference to the object,
-* the second time goes one further and is a
-* special-case to terminate the object.
-*
 * don't double-terminate the object
 */
if ((obj->flags & OBJ_DEAD) == 0)
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r253587 - head/sys/vm

2013-07-23 Thread Jeff Roberson

Author: jeff
Date: Wed Jul 24 01:25:56 2013
New Revision: 253587
URL: http://svnweb.freebsd.org/changeset/base/253587

Log:
   - Remove the long obsolete 'vm_pageout_algorithm' experiment.
  
  Discussed with:   alc
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/vm_pageout.c

Modified: head/sys/vm/vm_pageout.c
==
--- head/sys/vm/vm_pageout.cWed Jul 24 01:08:45 2013(r253586)
+++ head/sys/vm/vm_pageout.cWed Jul 24 01:25:56 2013(r253587)
@@ -157,7 +157,6 @@ static int vm_pageout_stats;
 static int vm_pageout_stats_interval;
 static int vm_pageout_full_stats;
 static int vm_pageout_full_stats_interval;
-static int vm_pageout_algorithm;
 static int defer_swap_pageouts;
 static int disable_swap_pageouts;
 
@@ -169,9 +168,6 @@ static int vm_swap_enabled = 1;
 static int vm_swap_idle_enabled = 0;
 #endif
 
-SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm,
-   CTLFLAG_RW, &vm_pageout_algorithm, 0, "LRU page mgmt");
-
 SYSCTL_INT(_vm, OID_AUTO, max_launder,
CTLFLAG_RW, &vm_max_launder, 0, "Limit dirty flushes in pageout");
 
@@ -756,9 +752,7 @@ vm_pageout_object_deactivate_pages(pmap_
if (actcount == 0) {
p->act_count -= min(p->act_count,
ACT_DECLINE);
-   if (!remove_mode &&
-   (vm_pageout_algorithm ||
-   p->act_count == 0)) {
+   if (!remove_mode && p->act_count == 0) {
pmap_remove_all(p);
vm_page_deactivate(p);
} else
@@ -1356,8 +1350,7 @@ relock_queues:
vm_page_requeue_locked(m);
else {
m->act_count -= min(m->act_count, ACT_DECLINE);
-   if (vm_pageout_algorithm ||
-   object->ref_count == 0 ||
+   if (object->ref_count == 0 ||
m->act_count == 0) {
page_shortage--;
/* Dequeue to avoid later lock recursion. */
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r253685 - in head/sys: amd64/amd64 i386/i386

2013-07-26 Thread Jeff Roberson

Author: jeff
Date: Fri Jul 26 19:06:14 2013
New Revision: 253685
URL: http://svnweb.freebsd.org/changeset/base/253685

Log:
   - Use kmem_malloc rather than kmem_alloc() for GDT/LDT/tss allocations etc.
 This eliminates some unusual uses of that API in favor of more typical
 uses of kmem_malloc().
  
  Discussed with:   kib/alc
  Tested by:pho
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/amd64/amd64/sys_machdep.c
  head/sys/i386/i386/sys_machdep.c

Modified: head/sys/amd64/amd64/sys_machdep.c
==
--- head/sys/amd64/amd64/sys_machdep.c  Fri Jul 26 19:02:17 2013
(r253684)
+++ head/sys/amd64/amd64/sys_machdep.c  Fri Jul 26 19:06:14 2013
(r253685)
@@ -356,8 +356,8 @@ amd64_set_ioperm(td, uap)
 */
pcb = td->td_pcb;
if (pcb->pcb_tssp == NULL) {
-   tssp = (struct amd64tss *)kmem_alloc(kernel_map,
-   ctob(IOPAGES+1));
+   tssp = (struct amd64tss *)kmem_malloc(kernel_map,
+   ctob(IOPAGES+1), M_WAITOK);
if (tssp == NULL)
return (ENOMEM);
iomap = (char *)&tssp[1];
@@ -463,8 +463,9 @@ user_ldt_alloc(struct proc *p, int force
return (mdp->md_ldt);
mtx_unlock(&dt_lock);
new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
-   new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
-max_ldt_segment * sizeof(struct user_segment_descriptor));
+   new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_map,
+max_ldt_segment * sizeof(struct user_segment_descriptor),
+M_WAITOK);
if (new_ldt->ldt_base == NULL) {
FREE(new_ldt, M_SUBPROC);
mtx_lock(&dt_lock);

Modified: head/sys/i386/i386/sys_machdep.c
==
--- head/sys/i386/i386/sys_machdep.cFri Jul 26 19:02:17 2013
(r253684)
+++ head/sys/i386/i386/sys_machdep.cFri Jul 26 19:06:14 2013
(r253685)
@@ -164,8 +164,9 @@ sysarch(td, uap)
break;
case I386_SET_LDT:
if (kargs.largs.descs != NULL) {
-   lp = (union descriptor *)kmem_alloc(kernel_map,
-   kargs.largs.num * sizeof(union descriptor));
+   lp = (union descriptor *)kmem_malloc(kernel_map,
+   kargs.largs.num * sizeof(union descriptor),
+   M_WAITOK);
if (lp == NULL) {
error = ENOMEM;
break;
@@ -298,7 +299,8 @@ i386_extend_pcb(struct thread *td)
0   /* granularity */
};
 
-   ext = (struct pcb_ext *)kmem_alloc(kernel_map, ctob(IOPAGES+1));
+   ext = (struct pcb_ext *)kmem_malloc(kernel_map, ctob(IOPAGES+1),
+   M_WAITOK);
if (ext == 0)
return (ENOMEM);
bzero(ext, sizeof(struct pcb_ext)); 
@@ -471,8 +473,8 @@ user_ldt_alloc(struct mdproc *mdp, int l
 M_SUBPROC, M_WAITOK); 
  
 new_ldt->ldt_len = len = NEW_MAX_LD(len); 
-new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, 
-round_page(len * sizeof(union descriptor))); 
+new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_map, 
+round_page(len * sizeof(union descriptor)), M_WAITOK);
 if (new_ldt->ldt_base == NULL) { 
 free(new_ldt, M_SUBPROC);
mtx_lock_spin(&dt_lock);
@@ -511,8 +513,8 @@ user_ldt_alloc(struct mdproc *mdp, int l
M_SUBPROC, M_WAITOK);
 
new_ldt->ldt_len = len = NEW_MAX_LD(len);
-   new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map,
-   len * sizeof(union descriptor));
+   new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_map,
+   len * sizeof(union descriptor), M_WAITOK);
if (new_ldt->ldt_base == NULL) {
free(new_ldt, M_SUBPROC);
mtx_lock_spin(&dt_lock);
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r253697 - head/sys/vm

2013-07-26 Thread Jeff Roberson

Author: jeff
Date: Fri Jul 26 23:22:05 2013
New Revision: 253697
URL: http://svnweb.freebsd.org/changeset/base/253697

Log:
  Improve page LRU quality and simplify the logic.
  
   - Don't short-circuit aging tests for unmapped objects.  This biases
 against unmapped file pages and transient mappings.
   - Always honor PGA_REFERENCED.  We can now use this after soft busying
 to lazily restart the LRU.
   - Don't transition directly from active to cached bypassing the inactive
 queue.  This frees recently used data much too early.
   - Rename actcount to act_delta to be more consistent with use and meaning.
  
  Reviewed by:  kib, alc
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/vm_pageout.c

Modified: head/sys/vm/vm_pageout.c
==
--- head/sys/vm/vm_pageout.cFri Jul 26 22:53:17 2013(r253696)
+++ head/sys/vm/vm_pageout.cFri Jul 26 23:22:05 2013(r253697)
@@ -708,7 +708,7 @@ vm_pageout_object_deactivate_pages(pmap_
 {
vm_object_t backing_object, object;
vm_page_t p;
-   int actcount, remove_mode;
+   int act_delta, remove_mode;
 
VM_OBJECT_ASSERT_LOCKED(first_object);
if ((first_object->flags & OBJ_FICTITIOUS) != 0)
@@ -739,17 +739,17 @@ vm_pageout_object_deactivate_pages(pmap_
vm_page_unlock(p);
continue;
}
-   actcount = pmap_ts_referenced(p);
+   act_delta = pmap_ts_referenced(p);
if ((p->aflags & PGA_REFERENCED) != 0) {
-   if (actcount == 0)
-   actcount = 1;
+   if (act_delta == 0)
+   act_delta = 1;
vm_page_aflag_clear(p, PGA_REFERENCED);
}
-   if (p->queue != PQ_ACTIVE && actcount != 0) {
+   if (p->queue != PQ_ACTIVE && act_delta != 0) {
vm_page_activate(p);
-   p->act_count += actcount;
+   p->act_count += act_delta;
} else if (p->queue == PQ_ACTIVE) {
-   if (actcount == 0) {
+   if (act_delta == 0) {
p->act_count -= min(p->act_count,
ACT_DECLINE);
if (!remove_mode && p->act_count == 0) {
@@ -869,7 +869,7 @@ vm_pageout_scan(int pass)
int page_shortage, maxscan, pcount;
int addl_page_shortage;
vm_object_t object;
-   int actcount;
+   int act_delta;
int vnodes_skipped = 0;
int maxlaunder;
boolean_t queues_locked;
@@ -989,44 +989,40 @@ vm_pageout_scan(int pass)
queues_locked = FALSE;
 
/*
-* If the object is not being used, we ignore previous 
+* We bump the activation count if the page has been
+* referenced while in the inactive queue.  This makes
+* it less likely that the page will be added back to the
+* inactive queue prematurely again.  Here we check the 
+* page tables (or emulated bits, if any), given the upper 
+* level VM system not knowing anything about existing 
 * references.
 */
-   if (object->ref_count == 0) {
+   act_delta = 0;
+   if ((m->aflags & PGA_REFERENCED) != 0) {
vm_page_aflag_clear(m, PGA_REFERENCED);
+   act_delta = 1;
+   }
+   if (object->ref_count != 0) {
+   act_delta += pmap_ts_referenced(m);
+   } else {
KASSERT(!pmap_page_is_mapped(m),
("vm_pageout_scan: page %p is mapped", m));
-
-   /*
-* Otherwise, if the page has been referenced while in the 
-* inactive queue, we bump the "activation count" upwards, 
-* making it less likely that the page will be added back to 
-* the inactive queue prematurely again.  Here we check the 
-* page tables (or emulated bits, if any), given the upper 
-* level VM system not knowing anything about existing 
-* references.
-*/
-   } else if ((m->aflags & PGA_REFERENCED) == 0 &&
-   (actcount = pmap_ts_referenced(m)) != 0) {
-   vm_page_activate(m);
-   VM_OBJECT_WUNLOCK(object);
-   m->act_count += actcount + ACT_ADVANCE;
-   vm_page_unlock(m);
-

svn commit: r249218 - in head/sys: fs/ext2fs kern ufs/ffs vm

2013-04-06 Thread Jeff Roberson

Author: jeff
Date: Sat Apr  6 22:21:23 2013
New Revision: 249218
URL: http://svnweb.freebsd.org/changeset/base/249218

Log:
  Prepare to replace the buf splay with a trie:
  
   - Don't insert BKGRDMARKER bufs into the splay or dirty/clean buf lists.
 No consumers need to find them there and it complicates the tree.
 These flags are all FFS specific and could be moved out of the buf
 cache.
   - Use pbgetvp() and pbrelvp() to associate the background and journal
 bufs with the vp.  Not only is this much cheaper it makes more sense
 for these transient bufs.
   - Fix the assertions in pbget* and pbrel*.  It's not safe to check list
 pointers which were never initialized.  Use the BX flags instead.  We
 also check B_PAGING in reassignbuf() so this should cover all cases.
  
  Discussed with:   kib, mckusick, attilio
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/fs/ext2fs/ext2_alloc.c
  head/sys/kern/vfs_subr.c
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/ffs_vfsops.c
  head/sys/vm/vm_pager.c

Modified: head/sys/fs/ext2fs/ext2_alloc.c
==
--- head/sys/fs/ext2fs/ext2_alloc.c Sat Apr  6 21:56:54 2013
(r249217)
+++ head/sys/fs/ext2fs/ext2_alloc.c Sat Apr  6 22:21:23 2013
(r249218)
@@ -794,8 +794,6 @@ ext2_clusteralloc(struct inode *ip, int 
goto fail_lock;
 
bbp = (char *)bp->b_data;
-   bp->b_xflags |= BX_BKGRDWRITE;
-
EXT2_LOCK(ump);
/*
 * Check to see if a cluster of the needed size (or bigger) is

Modified: head/sys/kern/vfs_subr.c
==
--- head/sys/kern/vfs_subr.cSat Apr  6 21:56:54 2013(r249217)
+++ head/sys/kern/vfs_subr.cSat Apr  6 22:21:23 2013(r249218)
@@ -1312,8 +1312,7 @@ flushbuflist(struct bufv *bufv, int flag
xflags = 0;
if (nbp != NULL) {
lblkno = nbp->b_lblkno;
-   xflags = nbp->b_xflags &
-   (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN);
+   xflags = nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN);
}
retval = EAGAIN;
error = BUF_TIMELOCK(bp,
@@ -1357,8 +1356,7 @@ flushbuflist(struct bufv *bufv, int flag
if (nbp != NULL &&
(nbp->b_bufobj != bo ||
 nbp->b_lblkno != lblkno ||
-(nbp->b_xflags &
- (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN)) != xflags))
+(nbp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) != xflags))
break;  /* nbp invalid */
}
return (retval);
@@ -1501,9 +1499,7 @@ buf_splay(daddr_t lblkno, b_xflags_t xfl
return (NULL);
lefttreemax = righttreemin = &dummy;
for (;;) {
-   if (lblkno < root->b_lblkno ||
-   (lblkno == root->b_lblkno &&
-   (xflags & BX_BKGRDMARKER) < (root->b_xflags & 
BX_BKGRDMARKER))) {
+   if (lblkno < root->b_lblkno) {
if ((y = root->b_left) == NULL)
break;
if (lblkno < y->b_lblkno) {
@@ -1517,9 +1513,7 @@ buf_splay(daddr_t lblkno, b_xflags_t xfl
/* Link into the new root's right tree. */
righttreemin->b_left = root;
righttreemin = root;
-   } else if (lblkno > root->b_lblkno ||
-   (lblkno == root->b_lblkno &&
-   (xflags & BX_BKGRDMARKER) > (root->b_xflags & 
BX_BKGRDMARKER))) {
+   } else if (lblkno > root->b_lblkno) {
if ((y = root->b_right) == NULL)
break;
if (lblkno > y->b_lblkno) {
@@ -1603,9 +1597,7 @@ buf_vlist_add(struct buf *bp, struct buf
bp->b_left = NULL;
bp->b_right = NULL;
TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs);
-   } else if (bp->b_lblkno < root->b_lblkno ||
-   (bp->b_lblkno == root->b_lblkno &&
-   (bp->b_xflags & BX_BKGRDMARKER) < (root->b_xflags & 
BX_BKGRDMARKER))) {
+   } else if (bp->b_lblkno < root->b_lblkno) {
bp->b_left = root->b_left;
bp->b_right = root;
root->b_left = NULL;
@@ -1638,20 +1630,18 @@ gbincore(struct bufobj *bo, daddr_t lblk
struct buf *bp;
 
ASSERT_BO_LOCKED(bo);
-   if ((bp = bo->bo_clean.bv_root) != NULL &&
-   bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER))
+   if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno)
return (bp);
-   if ((bp = bo->bo_dirty.bv_root) != NULL &&
-   bp->b_lblkno == lblkno && !(bp->b_xflags &

svn commit: r262812 - head/sys/ufs/ufs

2014-03-05 Thread Jeff Roberson

Author: jeff
Date: Thu Mar  6 00:10:07 2014
New Revision: 262812
URL: http://svnweb.freebsd.org/changeset/base/262812

Log:
   - Gracefully handle truncation failures when trying to shrink directories.
 This could cause dirhash panics since the dirhash state would be
 successfully truncated while the directory was not.
  
  Reported by:  pho
  Discussed with:   mckusick
  Sponsored by: EMC / Isilon Storage Division
  MFC after:2 weeks

Modified:
  head/sys/ufs/ufs/ufs_lookup.c

Modified: head/sys/ufs/ufs/ufs_lookup.c
==
--- head/sys/ufs/ufs/ufs_lookup.c   Wed Mar  5 23:37:25 2014
(r262811)
+++ head/sys/ufs/ufs/ufs_lookup.c   Thu Mar  6 00:10:07 2014
(r262812)
@@ -1130,12 +1130,15 @@ ufs_direnter(dvp, tvp, dirp, cnp, newdir
dp->i_endoff && dp->i_endoff < dp->i_size) {
if (tvp != NULL)
VOP_UNLOCK(tvp, 0);
+   error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
+   IO_NORMAL | IO_SYNC, cr);
+   if (error != 0)
+   vprint("ufs_direnter: failted to truncate", dvp);
 #ifdef UFS_DIRHASH
-   if (dp->i_dirhash != NULL)
+   if (error == 0 && dp->i_dirhash != NULL)
ufsdirhash_dirtrunc(dp, dp->i_endoff);
 #endif
-   (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff,
-   IO_NORMAL | IO_SYNC, cr);
+   error = 0;
if (tvp != NULL)
vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
}
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r262814 - head/sys/ufs/ffs

2014-03-05 Thread Jeff Roberson

Author: jeff
Date: Thu Mar  6 00:13:21 2014
New Revision: 262814
URL: http://svnweb.freebsd.org/changeset/base/262814

Log:
   - If we fail to do a non-blocking acquire of a buf lock while doing a
 waiting sync pass we need to do a blocking acquire and restart.
 Another thread, typically the buf daemon, may have this buf locked and
 if we don't wait we can fail to sync the file.  This lead to a great
 variety of softdep panics because we rely on all dependencies being
 flushed before proceeding in several cases.
  
  Reported by:  pho
  Discussed with:   mckusick
  Sponsored by: EMC / Isilon Storage Division
  MFC after:2 weeks

Modified:
  head/sys/ufs/ffs/ffs_vnops.c

Modified: head/sys/ufs/ffs/ffs_vnops.c
==
--- head/sys/ufs/ffs/ffs_vnops.cThu Mar  6 00:11:47 2014
(r262813)
+++ head/sys/ufs/ffs/ffs_vnops.cThu Mar  6 00:13:21 2014
(r262814)
@@ -259,9 +259,17 @@ loop:
continue;
if (bp->b_lblkno > lbn)
panic("ffs_syncvnode: syncing truncated data.");
-   if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
+   if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0) {
+   BO_UNLOCK(bo);
+   } else if (wait != 0) {
+   if (BUF_LOCK(bp,
+   LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
+   BO_LOCKPTR(bo)) != 0) {
+   bp->b_vflags &= ~BV_SCANNED;
+   goto next;
+   }
+   } else
continue;
-   BO_UNLOCK(bo);
if ((bp->b_flags & B_DELWRI) == 0)
panic("ffs_fsync: not dirty");
/*
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r262917 - head/sys/kern

2014-03-07 Thread Jeff Roberson

Author: jeff
Date: Sat Mar  8 00:35:06 2014
New Revision: 262917
URL: http://svnweb.freebsd.org/changeset/base/262917

Log:
   - Make runq_steal_from more aggressive.  Previously it would examine only
 a single priority queue.  If that queue had a thread or threads which
 could not be migrated we would fail to steal load.  This could cause
 starvation in situations where cores are idle.
  
  Submitted by: Doug Kilpatrick 
  Tested by:pho
  Reviewed by:  mav
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/kern/sched_ule.c

Modified: head/sys/kern/sched_ule.c
==
--- head/sys/kern/sched_ule.c   Sat Mar  8 00:14:40 2014(r262916)
+++ head/sys/kern/sched_ule.c   Sat Mar  8 00:35:06 2014(r262917)
@@ -1057,32 +1057,27 @@ runq_steal_from(struct runq *rq, int cpu
struct rqhead *rqh;
struct thread *td, *first;
int bit;
-   int pri;
int i;
 
rqb = &rq->rq_status;
bit = start & (RQB_BPW -1);
-   pri = 0;
first = NULL;
 again:
for (i = RQB_WORD(start); i < RQB_LEN; bit = 0, i++) {
if (rqb->rqb_bits[i] == 0)
continue;
-   if (bit != 0) {
-   for (pri = bit; pri < RQB_BPW; pri++)
-   if (rqb->rqb_bits[i] & (1ul << pri))
-   break;
-   if (pri >= RQB_BPW)
+   if (bit == 0)
+   bit = RQB_FFS(rqb->rqb_bits[i]);
+   for (; bit < RQB_BPW; bit++) {
+   if ((rqb->rqb_bits[i] & (1ul << bit)) == 0)
continue;
-   } else
-   pri = RQB_FFS(rqb->rqb_bits[i]);
-   pri += (i << RQB_L2BPW);
-   rqh = &rq->rq_queues[pri];
-   TAILQ_FOREACH(td, rqh, td_runq) {
-   if (first && THREAD_CAN_MIGRATE(td) &&
-   THREAD_CAN_SCHED(td, cpu))
-   return (td);
-   first = td;
+   rqh = &rq->rq_queues[bit + (i << RQB_L2BPW)];
+   TAILQ_FOREACH(td, rqh, td_runq) {
+   if (first && THREAD_CAN_MIGRATE(td) &&
+   THREAD_CAN_SCHED(td, cpu))
+   return (td);
+   first = td;
+   }
}
}
if (start != 0) {
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r253949 - in head/sys: amd64/amd64 i386/i386

2013-08-04 Thread Jeff Roberson

Author: jeff
Date: Mon Aug  5 00:28:03 2013
New Revision: 253949
URL: http://svnweb.freebsd.org/changeset/base/253949

Log:
   - Introduce a specific function, pmap_remove_kernel_pde, for removing
 huge pages in the kernel's address space.  This works around several
 asserts from pmap_demote_pde_locked that did not apply and gave false
 warnings.
  
  Discovered by:pho
  Reviewed by:  alc
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/amd64/amd64/pmap.c
  head/sys/i386/i386/pmap.c

Modified: head/sys/amd64/amd64/pmap.c
==
--- head/sys/amd64/amd64/pmap.c Sun Aug  4 23:45:04 2013(r253948)
+++ head/sys/amd64/amd64/pmap.c Mon Aug  5 00:28:03 2013(r253949)
@@ -2795,6 +2795,44 @@ pmap_demote_pde_locked(pmap_t pmap, pd_e
 }
 
 /*
+ * pmap_remove_kernel_pde: Remove a kernel superpage mapping.
+ */
+static void
+pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+{
+   pd_entry_t newpde;
+   vm_paddr_t mptepa;
+   vm_page_t mpte;
+
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   mpte = pmap_lookup_pt_page(pmap, va);
+   if (mpte == NULL)
+   panic("pmap_remove_kernel_pde: Missing pt page.");
+
+   pmap_remove_pt_page(pmap, mpte);
+   mptepa = VM_PAGE_TO_PHYS(mpte);
+   newpde = mptepa | PG_M | PG_A | PG_RW | PG_V;
+
+   /*
+* Initialize the page table page.
+*/
+   pagezero((void *)PHYS_TO_DMAP(mptepa));
+
+   /*
+* Demote the mapping.
+*/
+   if (workaround_erratum383)
+   pmap_update_pde(pmap, va, pde, newpde);
+   else
+   pde_store(pde, newpde);
+
+   /*
+* Invalidate a stale recursive mapping of the page table page.
+*/
+   pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
+}
+
+/*
  * pmap_remove_pde: do the things to unmap a superpage in a process
  */
 static int
@@ -2837,8 +2875,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t 
}
}
if (pmap == kernel_pmap) {
-   if (!pmap_demote_pde_locked(pmap, pdq, sva, lockp))
-   panic("pmap_remove_pde: failed demotion");
+   pmap_remove_kernel_pde(pmap, pdq, sva);
} else {
mpte = pmap_lookup_pt_page(pmap, sva);
if (mpte != NULL) {

Modified: head/sys/i386/i386/pmap.c
==
--- head/sys/i386/i386/pmap.c   Sun Aug  4 23:45:04 2013(r253948)
+++ head/sys/i386/i386/pmap.c   Mon Aug  5 00:28:03 2013(r253949)
@@ -2773,6 +2773,44 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t 
 }
 
 /*
+ * Removes a 2- or 4MB page mapping from the kernel pmap.
+ */
+static void
+pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
+{
+   pd_entry_t newpde;
+   vm_paddr_t mptepa;
+   vm_page_t mpte;
+
+   PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+   mpte = pmap_lookup_pt_page(pmap, va);
+   if (mpte == NULL)
+   panic("pmap_remove_kernel_pde: Missing pt page.");
+
+   pmap_remove_pt_page(pmap, mpte);
+   mptepa = VM_PAGE_TO_PHYS(mpte);
+   newpde = mptepa | PG_M | PG_A | PG_RW | PG_V;
+
+   /*
+* Initialize the page table page.
+*/
+   pagezero((void *)&KPTmap[i386_btop(trunc_4mpage(va))]);
+
+   /*
+* Remove the mapping.
+*/
+   if (workaround_erratum383)
+   pmap_update_pde(pmap, va, pde, newpde);
+   else 
+   pmap_kenter_pde(va, newpde);
+
+   /*
+* Invalidate the recursive mapping of the page table page.
+*/
+   pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va));
+}
+
+/*
  * pmap_remove_pde: do the things to unmap a superpage in a process
  */
 static void
@@ -2814,8 +2852,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t 
}
}
if (pmap == kernel_pmap) {
-   if (!pmap_demote_pde(pmap, pdq, sva))
-   panic("pmap_remove_pde: failed demotion");
+   pmap_remove_kernel_pde(pmap, pdq, sva);
} else {
mpte = pmap_lookup_pt_page(pmap, sva);
if (mpte != NULL) {
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254025 - in head/sys: amd64/amd64 arm/arm arm/at91 arm/mv/armadaxp arm/s3c2xx0 arm/xscale/i80321 arm/xscale/i8134x arm/xscale/ixp425 cddl/compat/opensolaris/kern cddl/compat/opensolari...

2013-08-06 Thread Jeff Roberson

Author: jeff
Date: Wed Aug  7 06:21:20 2013
New Revision: 254025
URL: http://svnweb.freebsd.org/changeset/base/254025

Log:
  Replace kernel virtual address space allocation with vmem.  This provides
  transparent layering and better fragmentation.
  
   - Normalize functions that allocate memory to use kmem_*
   - Those that allocate address space are named kva_*
   - Those that operate on maps are named kmap_*
   - Implement recursive allocation handling for kmem_arena in vmem.
  
  Reviewed by:  alc
  Tested by:pho
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/amd64/amd64/pmap.c
  head/sys/amd64/amd64/sys_machdep.c
  head/sys/amd64/amd64/vm_machdep.c
  head/sys/arm/arm/bus_space_generic.c
  head/sys/arm/arm/busdma_machdep-v6.c
  head/sys/arm/arm/busdma_machdep.c
  head/sys/arm/arm/mp_machdep.c
  head/sys/arm/arm/pmap-v6.c
  head/sys/arm/arm/pmap.c
  head/sys/arm/arm/vm_machdep.c
  head/sys/arm/at91/at91.c
  head/sys/arm/mv/armadaxp/armadaxp_mp.c
  head/sys/arm/s3c2xx0/s3c2xx0_space.c
  head/sys/arm/xscale/i80321/i80321_space.c
  head/sys/arm/xscale/i8134x/i81342_space.c
  head/sys/arm/xscale/ixp425/ixp425_pci_space.c
  head/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c
  head/sys/cddl/compat/opensolaris/sys/kmem.h
  head/sys/compat/linux/linux_misc.c
  head/sys/compat/ndis/subr_ntoskrnl.c
  head/sys/dev/bktr/bktr_core.c
  head/sys/dev/drm/drm_scatter.c
  head/sys/dev/drm2/drm_scatter.c
  head/sys/dev/drm2/i915/intel_ringbuffer.c
  head/sys/dev/drm2/ttm/ttm_bo_util.c
  head/sys/dev/xen/blkback/blkback.c
  head/sys/dev/xen/netback/netback.c
  head/sys/dev/xen/xenpci/xenpci.c
  head/sys/i386/i386/machdep.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/i386/pmap.c
  head/sys/i386/i386/sys_machdep.c
  head/sys/i386/i386/vm_machdep.c
  head/sys/i386/ibcs2/imgact_coff.c
  head/sys/i386/pci/pci_cfgreg.c
  head/sys/i386/xen/mp_machdep.c
  head/sys/i386/xen/pmap.c
  head/sys/ia64/ia64/mp_machdep.c
  head/sys/kern/imgact_gzip.c
  head/sys/kern/init_main.c
  head/sys/kern/kern_exec.c
  head/sys/kern/kern_malloc.c
  head/sys/kern/kern_mbuf.c
  head/sys/kern/kern_sharedpage.c
  head/sys/kern/subr_busdma_bufalloc.c
  head/sys/kern/subr_vmem.c
  head/sys/kern/vfs_bio.c
  head/sys/mips/mips/mp_machdep.c
  head/sys/mips/mips/pmap.c
  head/sys/mips/mips/vm_machdep.c
  head/sys/mips/sibyte/sb_zbpci.c
  head/sys/ofed/include/linux/dma-mapping.h
  head/sys/ofed/include/linux/gfp.h
  head/sys/ofed/include/linux/linux_compat.c
  head/sys/pc98/pc98/machdep.c
  head/sys/powerpc/aim/mmu_oea.c
  head/sys/powerpc/aim/mmu_oea64.c
  head/sys/powerpc/aim/vm_machdep.c
  head/sys/powerpc/booke/pmap.c
  head/sys/powerpc/booke/vm_machdep.c
  head/sys/powerpc/powerpc/busdma_machdep.c
  head/sys/powerpc/powerpc/mp_machdep.c
  head/sys/sparc64/sparc64/bus_machdep.c
  head/sys/sparc64/sparc64/mem.c
  head/sys/sparc64/sparc64/mp_machdep.c
  head/sys/sparc64/sparc64/pmap.c
  head/sys/sparc64/sparc64/vm_machdep.c
  head/sys/vm/memguard.c
  head/sys/vm/memguard.h
  head/sys/vm/pmap.h
  head/sys/vm/uma_core.c
  head/sys/vm/vm_extern.h
  head/sys/vm/vm_glue.c
  head/sys/vm/vm_init.c
  head/sys/vm/vm_kern.c
  head/sys/vm/vm_kern.h
  head/sys/vm/vm_map.c
  head/sys/vm/vm_map.h
  head/sys/vm/vm_object.c
  head/sys/x86/x86/busdma_machdep.c
  head/sys/xen/gnttab.c

Modified: head/sys/amd64/amd64/mp_machdep.c
==
--- head/sys/amd64/amd64/mp_machdep.c   Wed Aug  7 06:05:57 2013
(r254024)
+++ head/sys/amd64/amd64/mp_machdep.c   Wed Aug  7 06:21:20 2013
(r254025)
@@ -938,10 +938,14 @@ start_all_aps(void)
apic_id = cpu_apic_ids[cpu];
 
/* allocate and set up an idle stack data page */
-   bootstacks[cpu] = (void *)kmem_alloc(kernel_map, KSTACK_PAGES * 
PAGE_SIZE);
-   doublefault_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
-   nmi_stack = (char *)kmem_alloc(kernel_map, PAGE_SIZE);
-   dpcpu = (void *)kmem_alloc(kernel_map, DPCPU_SIZE);
+   bootstacks[cpu] = (void *)kmem_malloc(kernel_arena,
+   KSTACK_PAGES * PAGE_SIZE, M_WAITOK | M_ZERO);
+   doublefault_stack = (char *)kmem_malloc(kernel_arena,
+   PAGE_SIZE, M_WAITOK | M_ZERO);
+   nmi_stack = (char *)kmem_malloc(kernel_arena, PAGE_SIZE,
+   M_WAITOK | M_ZERO);
+   dpcpu = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
+   M_WAITOK | M_ZERO);
 
bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 
8;
bootAP = cpu;

Modified: head/sys/amd64/amd64/pmap.c
==
--- head/sys/amd64/amd64/pmap.c Wed Aug  7 06:05:57 2013(r254024)
+++ head/sys/amd64/amd64/pmap.c Wed Aug  7 06:21:20 2013(r254025)
@@ -860,7 +860,8 @@ pmap_init(void)

Re: svn commit: r254025 - in head/sys: amd64/amd64 arm/arm arm/at91 arm/mv/armadaxp arm/s3c2xx0 arm/xscale/i80321 arm/xscale/i8134x arm/xscale/ixp425 cddl/compat/opensolaris/kern cddl/compat/opensolar

2013-08-07 Thread Jeff Roberson


On Wed, 7 Aug 2013, Zbyszek Bodek wrote:


On 07.08.2013 08:21, Jeff Roberson wrote:

Author: jeff
Date: Wed Aug  7 06:21:20 2013
New Revision: 254025
URL: http://svnweb.freebsd.org/changeset/base/254025

Log:
  Replace kernel virtual address space allocation with vmem.  This provides
  transparent layering and better fragmentation.

   - Normalize functions that allocate memory to use kmem_*
   - Those that allocate address space are named kva_*
   - Those that operate on maps are named kmap_*
   - Implement recursive allocation handling for kmem_arena in vmem.

  Reviewed by:  alc
  Tested by:pho
  Sponsored by: EMC / Isilon Storage Division



Hello Jeff,

I'm having some trouble on my ARM platform staring from this commit.
Kernel panics on assertion very early. Please check out log below (as
you can see bt doesn't look helpful but assertion message is visible. I
can send you which functions are in bt if it is necessary).


It would be very helpful to know which function is passing the unaligned 
value.  I will resolve this later today if you can get me that 
information.


Thanks,
Jeff



Best regards
Zbyszek Bodek
-
## Starting application at 0x00F0 ...
GDB: no debug ports present
KDB: debugger backends: ddb
KDB: current backend: ddb
Copyright (c) 1992-2013 The FreeBSD Project.
Copyright (c) 1979, 1980, 1983, 1986, 1988, 1989, 1991, 1992, 1993, 1994
The Regents of the University of California. All rights reserved.
FreeBSD is a registered trademark of The FreeBSD Foundation.
FreeBSD 10.0-CURRENT #155 7ddb89a-dirty: Wed Aug  7 12:12:39 CEST 2013

zbb@:/home/zbb/projects/armsp/obj_gcc/arm.arm/home/zbb/projects/armsp/freebsd-arm-superpages/sys/ARMADAXP
arm
gcc version 4.2.1 20070831 patched [FreeBSD]
WARNING: DIAGNOSTIC option enabled, expect reduced performance.
panic: Assertion (size & vm->vm_quantum_mask) == 0 failed at
/home/zbb/projects/armsp/freebsd-arm-superpages/sys/kern/subr_vmem.c:341
KDB: stack backtrace:
(null)() at 0xc11f6874
 pc = 0xc11f6874  lr = 0xc0f2dc00 (0xc0f2dc00)
 sp = 0xc1361c98  fp = 0xc1340288
(null)() at 0xc0f2dc00
 pc = 0xc0f2dc00  lr = 0xc108dd14 (0xc108dd14)
 sp = 0xc1361db0  fp = 0xc1340288
 r4 = 0xc133d654
(null)() at 0xc108dd14
 pc = 0xc108dd14  lr = 0xc105a6f0 (0xc105a6f0)
 sp = 0xc1361db8  fp = 0xc1340288
 r4 = 0xc132f940
(null)() at 0xc105a6f0
 pc = 0xc105a6f0  lr = 0xc105a7dc (0xc105a7dc)
 sp = 0xc1361dd0  fp = 0xc1340288
 r4 = 0xc124c6fc  r5 = 0x1333
 r6 = 0xc1340240  r7 = 0xc147d150
 r8 = 0x0010
(null)() at 0xc105a7dc
 pc = 0xc105a7dc  lr = 0xc10a2ef8 (0xc10a2ef8)
 sp = 0xc1361e08  fp = 0xc1340288
 r0 = 0xc124c6fc  r1 = 0xc12662b8
 r2 = 0xc1266230  r3 = 0x0155
 r4 = 0x0001
(null)() at 0xc10a2ef8
 pc = 0xc10a2ef8  lr = 0xc10a37e4 (0xc10a37e4)
 sp = 0xc1361e20  fp = 0xc1340288
 r4 = 0xc147d150  r5 = 0xc147d16c
 r6 = 0xc1340240  r7 = 0x1333
 r8 = 0xc57b1000
(null)() at 0xc10a37e4
 pc = 0xc10a37e4  lr = 0xc10a39d8 (0xc10a39d8)
 sp = 0xc1361e38  fp = 0xc1340288
 r4 = 0xc1340240  r5 = 0x
 r6 = 0xc57b1000  r7 = 0x1333
 r8 = 0x0010
(null)() at 0xc10a39d8
 pc = 0xc10a39d8  lr = 0xc10a4f8c (0xc10a4f8c)
 sp = 0xc1361e50  fp = 0xc1340288
 r4 = 0xc13402a4  r5 = 0x
 r6 = 0x0001  r7 = 0xc1340240
(null)() at 0xc10a4f8c
 pc = 0xc10a4f8c  lr = 0xc1044398 (0xc1044398)
 sp = 0xc1361e98  fp = 0x
 r4 = 0x1333  r5 = 0xc1340240
 r6 = 0xc1307574  r7 = 0x00f0004c
 r8 = 0x7f9ea674  r9 = 0x0001
r10 = 0x7ff1449c
(null)() at 0xc1044398
 pc = 0xc1044398  lr = 0xc1044408 (0xc1044408)
 sp = 0xc1361eb8  fp = 0x
 r4 = 0xc1291584  r5 = 0x00f00058
 r6 = 0x00f0
(null)() at 0xc1044408
 pc = 0xc1044408  lr = 0xc1010800 (0xc1010800)
 sp = 0xc1361ee8  fp = 0x
 r4 = 0xc1291584  r5 = 0x00f00058
 r6 = 0x00f0  r7 = 0x00f0004c
 r8 = 0x7f9ea674  r9 = 0x0001
r10 = 0x7ff1449c
(null)() at 0xc1010800
 pc = 0xc1010800  lr = 0xc0f00124 (0xc0f00124)
 sp = 0xc1361ef8  fp = 0x
 r4 = 0x00f00164  r5 = 0x00f00058
(null)() at 0xc0f00124
 pc = 0xc0f00124  lr = 0xc0f00124 (0xc0f00124)
 sp = 0xc1361ef8  fp = 0x
Unable to unwind further
KDB: enter: panic
[ thread pid 0 tid 0 ]
Stopped at  0xc108dba8: ldrbr15, [r15, r15, ror r15]!
db>


___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254120 - head/sys/ofed/include/linux

2013-08-08 Thread Jeff Roberson

Author: jeff
Date: Fri Aug  9 03:24:12 2013
New Revision: 254120
URL: http://svnweb.freebsd.org/changeset/base/254120

Log:
   - Use the correct type in the linux bitops emulation.
  
  Submitted by: Maxim Ignatenko 

Modified:
  head/sys/ofed/include/linux/bitops.h

Modified: head/sys/ofed/include/linux/bitops.h
==
--- head/sys/ofed/include/linux/bitops.hFri Aug  9 01:27:05 2013
(r254119)
+++ head/sys/ofed/include/linux/bitops.hFri Aug  9 03:24:12 2013
(r254120)
@@ -272,22 +272,25 @@ bitmap_empty(unsigned long *addr, int si
return (1);
 }
 
-#defineNBINT   (NBBY * sizeof(int))
+#defineNBLONG  (NBBY * sizeof(long))
 
 #defineset_bit(i, a)   
\
-atomic_set_int(&((volatile int *)(a))[(i)/NBINT], 1 << (i) % NBINT)
+atomic_set_long(&((volatile long *)(a))[(i)/NBLONG], 1 << (i) % NBLONG)
 
 #defineclear_bit(i, a) 
\
-atomic_clear_int(&((volatile int *)(a))[(i)/NBINT], 1 << (i) % NBINT)
+atomic_clear_long(&((volatile long *)(a))[(i)/NBLONG], 1 << (i) % NBLONG)
 
 #definetest_bit(i, a)  
\
-!!(atomic_load_acq_int(&((volatile int *)(a))[(i)/NBINT]) & 1 << ((i) % 
NBINT))
+!!(atomic_load_acq_long(&((volatile long *)(a))[(i)/NBLONG]) & \
+1 << ((i) % NBLONG))
 
 static inline long
 test_and_clear_bit(long bit, long *var)
 {
long val;
 
+   var += bit / (sizeof(long) * NBBY);
+   bit %= sizeof(long) * NBBY;
bit = 1 << bit;
do {
val = *(volatile long *)var;
@@ -301,6 +304,8 @@ test_and_set_bit(long bit, long *var)
 {
long val;
 
+   var += bit / (sizeof(long) * NBBY);
+   bit %= sizeof(long) * NBBY;
bit = 1 << bit;
do {
val = *(volatile long *)var;
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254121 - head/sys/ofed/include/linux

2013-08-08 Thread Jeff Roberson

Author: jeff
Date: Fri Aug  9 03:24:48 2013
New Revision: 254121
URL: http://svnweb.freebsd.org/changeset/base/254121

Log:
   - Correctly handle various edge cases in sysfs emulation.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/ofed/include/linux/sysfs.h

Modified: head/sys/ofed/include/linux/sysfs.h
==
--- head/sys/ofed/include/linux/sysfs.h Fri Aug  9 03:24:12 2013
(r254120)
+++ head/sys/ofed/include/linux/sysfs.h Fri Aug  9 03:24:48 2013
(r254121)
@@ -97,11 +97,14 @@ sysctl_handle_attr(SYSCTL_HANDLER_ARGS)
error = -len;
if (error != EIO)
goto out;
+   buf[0] = '\0';
+   } else if (len) {
+   len--;
+   if (len >= PAGE_SIZE)
+   len = PAGE_SIZE - 1;
+   /* Trim trailing newline. */
+   buf[len] = '\0';
}
-
-   /* Trim trailing newline. */
-   len--;
-   buf[len] = '\0';
}
 
/* Leave one trailing byte to append a newline. */
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254122 - in head/sys: ofed/include/rdma sys

2013-08-08 Thread Jeff Roberson

Author: jeff
Date: Fri Aug  9 03:26:17 2013
New Revision: 254122
URL: http://svnweb.freebsd.org/changeset/base/254122

Log:
   - Reserve a special AF for SDP.  The one we were incorrectly using before
 was taken by another AF.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/ofed/include/rdma/sdp_socket.h
  head/sys/sys/socket.h

Modified: head/sys/ofed/include/rdma/sdp_socket.h
==
--- head/sys/ofed/include/rdma/sdp_socket.h Fri Aug  9 03:24:48 2013
(r254121)
+++ head/sys/ofed/include/rdma/sdp_socket.h Fri Aug  9 03:26:17 2013
(r254122)
@@ -3,10 +3,12 @@
 #ifndef SDP_SOCKET_H
 #define SDP_SOCKET_H
 
+#ifndef __FreeBSD__
 #ifndef AF_INET_SDP
 #define AF_INET_SDP 27
 #define PF_INET_SDP AF_INET_SDP
 #endif
+#endif
 
 #ifndef SDP_ZCOPY_THRESH
 #define SDP_ZCOPY_THRESH 80

Modified: head/sys/sys/socket.h
==
--- head/sys/sys/socket.h   Fri Aug  9 03:24:48 2013(r254121)
+++ head/sys/sys/socket.h   Fri Aug  9 03:26:17 2013(r254122)
@@ -230,7 +230,9 @@ struct accept_filter_arg {
 #defineAF_ARP  35
 #defineAF_BLUETOOTH36  /* Bluetooth sockets */
 #defineAF_IEEE8021137  /* IEEE 802.11 protocol */
-#defineAF_MAX  38
+#defineAF_INET_SDP 40  /* OFED Socket Direct Protocol 
ipv4 */
+#defineAF_INET6_SDP42  /* OFED Socket Direct Protocol 
ipv6 */
+#defineAF_MAX  42
 /*
  * When allocating a new AF_ constant, please only allocate
  * even numbered constants for FreeBSD until 134 as odd numbered AF_
@@ -353,6 +355,8 @@ struct sockproto {
 #definePF_ARP  AF_ARP
 #definePF_BLUETOOTHAF_BLUETOOTH
 #definePF_IEEE80211AF_IEEE80211
+#definePF_INET_SDP AF_INET_SDP
+#definePF_INET6_SDPAF_INET6_SDP
 
 #definePF_MAX  AF_MAX
 
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254123 - in head/contrib/ofed: libsdp/src/linux management/infiniband-diags/src management/opensm/opensm

2013-08-08 Thread Jeff Roberson

Author: jeff
Date: Fri Aug  9 03:29:46 2013
New Revision: 254123
URL: http://svnweb.freebsd.org/changeset/base/254123

Log:
   - Fix compile errors from the clang conversion
   - Grab AF_SDP_INET from sys/socket.h
  
  Submitted by: Garrett Cooper
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/contrib/ofed/libsdp/src/linux/sdp_inet.h
  head/contrib/ofed/management/infiniband-diags/src/sminfo.c
  head/contrib/ofed/management/opensm/opensm/osm_console.c
  head/contrib/ofed/management/opensm/opensm/osm_subnet.c

Modified: head/contrib/ofed/libsdp/src/linux/sdp_inet.h
==
--- head/contrib/ofed/libsdp/src/linux/sdp_inet.h   Fri Aug  9 03:26:17 
2013(r254122)
+++ head/contrib/ofed/libsdp/src/linux/sdp_inet.h   Fri Aug  9 03:29:46 
2013(r254123)
@@ -29,8 +29,12 @@
  */
 
 #ifndef SOLARIS_BUILD
+#ifdef __FreeBSD__
+#include 
+#else
 #define AF_INET_SDP 27   /* SDP socket protocol family */
 #define AF_INET6_SDP 28   /* SDP socket protocol family */
+#endif
 #else
 #define AF_INET_SDP 31  /* This is an invalid family on native solaris
  * and will only work using QuickTransit */

Modified: head/contrib/ofed/management/infiniband-diags/src/sminfo.c
==
--- head/contrib/ofed/management/infiniband-diags/src/sminfo.c  Fri Aug  9 
03:26:17 2013(r254122)
+++ head/contrib/ofed/management/infiniband-diags/src/sminfo.c  Fri Aug  9 
03:29:46 2013(r254123)
@@ -72,10 +72,10 @@ enum {
 };
 
 char *statestr[] = {
-   [SMINFO_NOTACT] "SMINFO_NOTACT",
-   [SMINFO_DISCOVER] "SMINFO_DISCOVER",
-   [SMINFO_STANDBY] "SMINFO_STANDBY",
-   [SMINFO_MASTER] "SMINFO_MASTER",
+   [SMINFO_NOTACT] = "SMINFO_NOTACT",
+   [SMINFO_DISCOVER] = "SMINFO_DISCOVER",
+   [SMINFO_STANDBY] = "SMINFO_STANDBY",
+   [SMINFO_MASTER] = "SMINFO_MASTER",
 };
 
 #define STATESTR(s)(((unsigned)(s)) < SMINFO_STATE_LAST ? statestr[s] : 
"???")

Modified: head/contrib/ofed/management/opensm/opensm/osm_console.c
==
--- head/contrib/ofed/management/opensm/opensm/osm_console.cFri Aug  9 
03:26:17 2013(r254122)
+++ head/contrib/ofed/management/opensm/opensm/osm_console.cFri Aug  9 
03:29:46 2013(r254123)
@@ -67,7 +67,10 @@ static struct {
time_t previous;
void (*loop_function) (osm_opensm_t * p_osm, FILE * out);
 } loop_command = {
-on: 0, delay_s: 2, loop_function:NULL};
+   .on = 0,
+   .delay_s = 2,
+   .loop_function = NULL,
+};
 
 static const struct command console_cmds[];
 

Modified: head/contrib/ofed/management/opensm/opensm/osm_subnet.c
==
--- head/contrib/ofed/management/opensm/opensm/osm_subnet.c Fri Aug  9 
03:26:17 2013(r254122)
+++ head/contrib/ofed/management/opensm/opensm/osm_subnet.c Fri Aug  9 
03:29:46 2013(r254123)
@@ -482,7 +482,7 @@ static void log_report(const char *fmt, 
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
-   printf(buf);
+   printf("%s", buf);
cl_log_event("OpenSM", CL_LOG_INFO, buf, NULL, 0);
 }
 
@@ -500,7 +500,7 @@ static void log_config_value(char *name,
n = sizeof(buf);
snprintf(buf + n, sizeof(buf) - n, "\n");
va_end(args);
-   printf(buf);
+   printf("%s", buf);
cl_log_event("OpenSM", CL_LOG_INFO, buf, NULL, 0);
 }
 
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254304 - in head/sys: sys vm

2013-08-13 Thread Jeff Roberson

Author: jeff
Date: Tue Aug 13 21:56:16 2013
New Revision: 254304
URL: http://svnweb.freebsd.org/changeset/base/254304

Log:
  Improve pageout flow control to wakeup more frequently and do less work while
  maintaining better LRU of active pages.
  
   - Change v_free_target to include the quantity previously represented by
 v_cache_min so we don't need to add them together everywhere we use them.
   - Add a pageout_wakeup_thresh that sets the free page count trigger for
 waking the page daemon.  Set this 10% above v_free_min so we wakeup before
 any phase transitions in vm users.
   - Adjust down v_free_target now that we're willing to accept more pagedaemon
 wakeups.  This means we process fewer pages in one iteration as well,
 leading to shorter lock hold times and less overall disruption.
   - Eliminate vm_pageout_page_stats().  This was a minor variation on the
 PQ_ACTIVE segment of the normal pageout daemon.  Instead we now process
 1 / vm_pageout_update_period pages every second.  This causes us to visit
 the whole active list every 60 seconds.  Previously we would only maintain
 the active LRU when we were short on pages which would mean it could be
 woefully out of date.
  
  Reviewed by:  alc (slight variant of this)
  Discussed with:   alc, kib, jhb
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/sys/vmmeter.h
  head/sys/vm/vm_page.c
  head/sys/vm/vm_page.h
  head/sys/vm/vm_pageout.c

Modified: head/sys/sys/vmmeter.h
==
--- head/sys/sys/vmmeter.h  Tue Aug 13 21:49:32 2013(r254303)
+++ head/sys/sys/vmmeter.h  Tue Aug 13 21:56:16 2013(r254304)
@@ -98,7 +98,7 @@ struct vmmeter {
u_int v_inactive_count; /* (q) pages inactive */
u_int v_cache_count;/* (f) pages on cache queue */
u_int v_cache_min;  /* (c) min pages desired on cache queue */
-   u_int v_cache_max;  /* (c) max pages in cached obj */
+   u_int v_cache_max;  /* (c) max pages in cached obj (unused) */
u_int v_pageout_free_min;   /* (c) min pages reserved for kernel */
u_int v_interrupt_free_min; /* (c) reserved pages for int code */
u_int v_free_severe;/* (c) severe page depletion point */
@@ -118,6 +118,8 @@ struct vmmeter {
 
 extern struct vmmeter cnt;
 
+extern int vm_pageout_wakeup_thresh;
+
 /*
  * Return TRUE if we are under our severe low-free-pages threshold
  *
@@ -170,10 +172,7 @@ static __inline 
 int
 vm_paging_target(void)
 {
-return (
-   (cnt.v_free_target + cnt.v_cache_min) -
-   (cnt.v_free_count + cnt.v_cache_count)
-);
+return (cnt.v_free_target - (cnt.v_free_count + cnt.v_cache_count));
 }
 
 /*
@@ -184,10 +183,7 @@ static __inline 
 int
 vm_paging_needed(void)
 {
-return (
-   (cnt.v_free_reserved + cnt.v_cache_min) >
-   (cnt.v_free_count + cnt.v_cache_count)
-);
+return (cnt.v_free_count + cnt.v_cache_count < vm_pageout_wakeup_thresh);
 }
 
 #endif

Modified: head/sys/vm/vm_page.c
==
--- head/sys/vm/vm_page.c   Tue Aug 13 21:49:32 2013(r254303)
+++ head/sys/vm/vm_page.c   Tue Aug 13 21:56:16 2013(r254304)
@@ -259,7 +259,6 @@ vm_page_domain_init(struct vm_domain *vm
"vm active pagequeue";
*__DECONST(int **, &vmd->vmd_pagequeues[PQ_ACTIVE].pq_vcnt) =
&cnt.v_active_count;
-   vmd->vmd_fullintervalcount = 0;
vmd->vmd_page_count = 0;
vmd->vmd_free_count = 0;
vmd->vmd_segs = 0;

Modified: head/sys/vm/vm_page.h
==
--- head/sys/vm/vm_page.h   Tue Aug 13 21:49:32 2013(r254303)
+++ head/sys/vm/vm_page.h   Tue Aug 13 21:56:16 2013(r254304)
@@ -223,7 +223,6 @@ struct vm_pagequeue {
 
 struct vm_domain {
struct vm_pagequeue vmd_pagequeues[PQ_COUNT];
-   int vmd_fullintervalcount;
u_int vmd_page_count;
u_int vmd_free_count;
long vmd_segs;  /* bitmask of the segments */

Modified: head/sys/vm/vm_pageout.c
==
--- head/sys/vm/vm_pageout.cTue Aug 13 21:49:32 2013(r254303)
+++ head/sys/vm/vm_pageout.cTue Aug 13 21:56:16 2013(r254304)
@@ -146,6 +146,7 @@ SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_
 int vm_pages_needed;   /* Event on which pageout daemon sleeps */
 int vm_pageout_deficit;/* Estimated number of pages deficit */
 int vm_pageout_pages_needed;   /* flag saying that the pageout daemon needs 
pages */
+int vm_pageout_wakeup_thresh;
 
 #if !defined(NO_SWAPPING)
 static int vm_pageout_req_swapout; /* XXX */
@@ -155,11 +156,7 @@ static struct mtx vm_daemon_mtx;
 MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
 #endif
 static in

svn commit: r254307 - in head/sys: kern vm

2013-08-13 Thread Jeff Roberson

Author: jeff
Date: Tue Aug 13 22:40:43 2013
New Revision: 254307
URL: http://svnweb.freebsd.org/changeset/base/254307

Log:
   - Add a statically allocated memguard arena since it is needed very early
 on.
   - Pass the appropriate flags to vmem_xalloc() when allocating space for
 the arena from kmem_arena.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/kern/subr_vmem.c
  head/sys/vm/memguard.c
  head/sys/vm/vm_kern.h

Modified: head/sys/kern/subr_vmem.c
==
--- head/sys/kern/subr_vmem.c   Tue Aug 13 22:05:50 2013(r254306)
+++ head/sys/kern/subr_vmem.c   Tue Aug 13 22:40:43 2013(r254307)
@@ -57,6 +57,8 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 
+#include "opt_vm.h"
+
 #include 
 #include 
 #include 
@@ -223,6 +225,11 @@ vmem_t *kmem_arena = &kmem_arena_storage
 vmem_t *buffer_arena = &buffer_arena_storage;
 vmem_t *transient_arena = &transient_arena_storage;
 
+#ifdef DEBUG_MEMGUARD
+static struct vmem memguard_arena_storage;
+vmem_t *memguard_arena = &memguard_arena_storage;
+#endif
+
 /*
  * Fill the vmem's boundary tag cache.  We guarantee that boundary tag
  * allocation will not fail once bt_fill() passes.  To do so we cache

Modified: head/sys/vm/memguard.c
==
--- head/sys/vm/memguard.c  Tue Aug 13 22:05:50 2013(r254306)
+++ head/sys/vm/memguard.c  Tue Aug 13 22:40:43 2013(r254307)
@@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -100,7 +101,6 @@ SYSCTL_PROC(_vm_memguard, OID_AUTO, desc
 CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
 memguard_sysctl_desc, "A", "Short description of memory type to monitor");
 
-static vmem_t *memguard_map = NULL;
 static vm_offset_t memguard_cursor;
 static vm_offset_t memguard_base;
 static vm_size_t memguard_mapsize;
@@ -206,8 +206,8 @@ memguard_init(vmem_t *parent)
 {
vm_offset_t base;
 
-   vmem_alloc(parent, memguard_mapsize, M_WAITOK, &base);
-   memguard_map = vmem_create("memguard arena", base, memguard_mapsize,
+   vmem_alloc(parent, memguard_mapsize, M_BESTFIT | M_WAITOK, &base);
+   vmem_init(memguard_arena, "memguard arena", base, memguard_mapsize,
PAGE_SIZE, 0, M_WAITOK);
memguard_cursor = base;
memguard_base = base;
@@ -311,7 +311,7 @@ memguard_alloc(unsigned long req_size, i
 * of physical memory whether we allocate or hand off to
 * uma_large_alloc(), so keep those.
 */
-   if (vmem_size(memguard_map, VMEM_ALLOC) >= memguard_physlimit &&
+   if (vmem_size(memguard_arena, VMEM_ALLOC) >= memguard_physlimit &&
req_size < PAGE_SIZE) {
addr = (vm_offset_t)NULL;
memguard_fail_pgs++;
@@ -328,8 +328,9 @@ memguard_alloc(unsigned long req_size, i
 * map, unless vm_map_findspace() is tweaked.
 */
for (;;) {
-   if (vmem_xalloc(memguard_map, size_v, 0, 0, 0, memguard_cursor,
-   VMEM_ADDR_MAX, M_BESTFIT | M_NOWAIT, &addr) == 0)
+   if (vmem_xalloc(memguard_arena, size_v, 0, 0, 0,
+   memguard_cursor, VMEM_ADDR_MAX,
+   M_BESTFIT | M_NOWAIT, &addr) == 0)
break;
/*
 * The map has no space.  This may be due to
@@ -348,7 +349,7 @@ memguard_alloc(unsigned long req_size, i
addr += PAGE_SIZE;
rv = kmem_back(kmem_object, addr, size_p, flags);
if (rv != KERN_SUCCESS) {
-   vmem_xfree(memguard_map, addr, size_v);
+   vmem_xfree(memguard_arena, addr, size_v);
memguard_fail_pgs++;
addr = (vm_offset_t)NULL;
goto out;
@@ -419,7 +420,7 @@ memguard_free(void *ptr)
kmem_unback(kmem_object, addr, size);
if (sizev > size)
addr -= PAGE_SIZE;
-   vmem_xfree(memguard_map, addr, sizev);
+   vmem_xfree(memguard_arena, addr, sizev);
if (req_size < PAGE_SIZE)
memguard_wasted -= (PAGE_SIZE - req_size);
 }

Modified: head/sys/vm/vm_kern.h
==
--- head/sys/vm/vm_kern.h   Tue Aug 13 22:05:50 2013(r254306)
+++ head/sys/vm/vm_kern.h   Tue Aug 13 22:40:43 2013(r254307)
@@ -71,6 +71,7 @@ extern struct vmem *kernel_arena;
 extern struct vmem *kmem_arena;
 extern struct vmem *buffer_arena;
 extern struct vmem *transient_arena;
+extern struct vmem *memguard_arena;
 extern vm_offset_t swapbkva;
 extern u_long vm_kmem_size;
 
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254308 - head/sys/kern

2013-08-13 Thread Jeff Roberson

Author: jeff
Date: Tue Aug 13 22:41:24 2013
New Revision: 254308
URL: http://svnweb.freebsd.org/changeset/base/254308

Log:
   - Disable quantum caches on the kmem_arena.  This can make fragmentation
 worse on small KVA systems.  I had intended to only enable it for
 debugging.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/kern/kern_malloc.c

Modified: head/sys/kern/kern_malloc.c
==
--- head/sys/kern/kern_malloc.c Tue Aug 13 22:40:43 2013(r254307)
+++ head/sys/kern/kern_malloc.c Tue Aug 13 22:41:24 2013(r254308)
@@ -747,7 +747,7 @@ kmeminit(void)
tmp = vm_kmem_size;
 #endif
vmem_init(kmem_arena, "kmem arena", kva_alloc(tmp), tmp, PAGE_SIZE,
-   PAGE_SIZE * 16, 0);
+   0, 0);
vmem_set_reclaim(kmem_arena, kmem_reclaim);
 
 #ifdef DEBUG_MEMGUARD
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254387 - head/sys/vm

2013-08-15 Thread Jeff Roberson

Author: jeff
Date: Thu Aug 15 22:29:49 2013
New Revision: 254387
URL: http://svnweb.freebsd.org/changeset/base/254387

Log:
   - Fix bug in r254304.  Use the ACTIVE pq count for the active list
 processing, not inactive.  This was the result of a bad merge.
  
  Reported by:  pho
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/vm_pageout.c

Modified: head/sys/vm/vm_pageout.c
==
--- head/sys/vm/vm_pageout.cThu Aug 15 21:48:29 2013(r254386)
+++ head/sys/vm/vm_pageout.cThu Aug 15 22:29:49 2013(r254387)
@@ -1286,6 +1286,8 @@ relock_queues:
 * Compute the number of pages we want to try to move from the
 * active queue to the inactive queue.
 */
+   pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
+   vm_pagequeue_lock(pq);
pcount = pq->pq_cnt;
page_shortage = vm_paging_target() +
cnt.v_inactive_target - cnt.v_inactive_count;
@@ -1304,8 +1306,6 @@ relock_queues:
 * track the per-page activity counter and use it to locate
 * deactivation candidates.
 */
-   pq = &vmd->vmd_pagequeues[PQ_ACTIVE];
-   vm_pagequeue_lock(pq);
m = TAILQ_FIRST(&pq->pq_pl);
while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
 
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254543 - in head/sys: kern vm

2013-08-19 Thread Jeff Roberson

Author: jeff
Date: Mon Aug 19 23:02:39 2013
New Revision: 254543
URL: http://svnweb.freebsd.org/changeset/base/254543

Log:
   - Use an arbitrary but reasonably large import size for kva on architectures
 that don't support superpages.  This keeps the number of spans and internal
 fragmentation lower.
   - When the user asks for alignment from vmem_xalloc adjust the imported size
 by 2*align to be certain we can satisfy the allocation.  This comes at
 the expense of potential failures when the backend can't supply enough
 memory but could supply the requested size and alignment.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/kern/subr_vmem.c
  head/sys/vm/vm_init.c

Modified: head/sys/kern/subr_vmem.c
==
--- head/sys/kern/subr_vmem.c   Mon Aug 19 22:25:36 2013(r254542)
+++ head/sys/kern/subr_vmem.c   Mon Aug 19 23:02:39 2013(r254543)
@@ -758,6 +758,7 @@ vmem_add1(vmem_t *vm, vmem_addr_t addr, 
bt_t *btfree;
 
MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC);
+   MPASS((size & vm->vm_quantum_mask) == 0);
 
btspan = bt_alloc(vm);
btspan->bt_type = type;
@@ -805,7 +806,7 @@ vmem_destroy1(vmem_t *vm)
 }
 
 static int
-vmem_import(vmem_t *vm, vmem_size_t size, int flags)
+vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags)
 {
vmem_addr_t addr;
int error;
@@ -813,6 +814,12 @@ vmem_import(vmem_t *vm, vmem_size_t size
if (vm->vm_importfn == NULL)
return EINVAL;
 
+   /*
+* To make sure we get a span that meets the alignment we double it
+* and add the size to the tail.  This slightly overestimates.
+*/
+   if (align != vm->vm_quantum_mask + 1)
+   size = (align * 2) + size;
size = roundup(size, vm->vm_import_quantum);
 
/*
@@ -1157,7 +1164,7 @@ vmem_xalloc(vmem_t *vm, const vmem_size_
 * imported region.  It is up to the user to specify the
 * import quantum such that it can satisfy any allocation.
 */
-   if (vmem_import(vm, size, flags) == 0)
+   if (vmem_import(vm, size, align, flags) == 0)
continue;
 
/*

Modified: head/sys/vm/vm_init.c
==
--- head/sys/vm/vm_init.c   Mon Aug 19 22:25:36 2013(r254542)
+++ head/sys/vm/vm_init.c   Mon Aug 19 23:02:39 2013(r254543)
@@ -156,7 +156,8 @@ vm_mem_init(dummy)
 #if VM_NRESERVLEVEL > 0
1 << (VM_LEVEL_0_ORDER + PAGE_SHIFT));
 #else
-   PAGE_SIZE);
+   /* On non-superpage architectures want large import sizes. */
+   PAGE_SIZE * 1024);
 #endif
 
kmem_init_zero_region();
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254544 - head/sys/vm

2013-08-19 Thread Jeff Roberson

Author: jeff
Date: Mon Aug 19 23:54:24 2013
New Revision: 254544
URL: http://svnweb.freebsd.org/changeset/base/254544

Log:
   - Increase the active lru refresh interval to 10 minutes.  This has been
 shown to negatively impact some workloads and the goal is only to
 eliminate worst case behaviors for very long periods of paging
 inactivity.  Eventually we should determine a more complex scaling
 factor for this feature.
   - Rate limit low memory callback handlers to limit thrashing.  Set the
 default to 10 seconds.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/vm_pageout.c

Modified: head/sys/vm/vm_pageout.c
==
--- head/sys/vm/vm_pageout.cMon Aug 19 23:02:39 2013(r254543)
+++ head/sys/vm/vm_pageout.cMon Aug 19 23:54:24 2013(r254544)
@@ -159,6 +159,8 @@ static int vm_max_launder = 32;
 static int vm_pageout_update_period;
 static int defer_swap_pageouts;
 static int disable_swap_pageouts;
+static int lowmem_period = 10;
+static int lowmem_ticks;
 
 #if defined(NO_SWAPPING)
 static int vm_swap_enabled = 0;
@@ -179,6 +181,9 @@ SYSCTL_INT(_vm, OID_AUTO, pageout_update
CTLFLAG_RW, &vm_pageout_update_period, 0,
"Maximum active LRU update period");
   
+SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0,
+   "Low memory callback period");
+
 #if defined(NO_SWAPPING)
 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout");
@@ -901,9 +906,10 @@ vm_pageout_scan(struct vm_domain *vmd, i
 
/*
 * If we need to reclaim memory ask kernel caches to return
-* some.
+* some.  We rate limit to avoid thrashing.
 */
-   if (pass > 0) {
+   if (vmd == &vm_dom[0] && pass > 0 &&
+   lowmem_ticks + (lowmem_period * hz) < ticks) {
/*
 * Decrease registered cache sizes.
 */
@@ -913,6 +919,7 @@ vm_pageout_scan(struct vm_domain *vmd, i
 * drained above.
 */
uma_reclaim();
+   lowmem_ticks = ticks;
}
 
/*
@@ -1680,10 +1687,11 @@ vm_pageout(void)
 
/*
 * Set interval in seconds for active scan.  We want to visit each
-* page at least once a minute.
+* page at least once every ten minutes.  This is to prevent worst
+* case paging behaviors with stale active LRU.
 */
if (vm_pageout_update_period == 0)
-   vm_pageout_update_period = 60;
+   vm_pageout_update_period = 600;
 
/* XXX does not really belong here */
if (vm_page_max_wired == 0)
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r254622 - head/sys/vm

2013-08-21 Thread Jeff Roberson

Author: jeff
Date: Wed Aug 21 22:39:19 2013
New Revision: 254622
URL: http://svnweb.freebsd.org/changeset/base/254622

Log:
   - Eliminate the vm object lock from the active queue scan.  It is not
 necessary since we do not free or cache the page from active anymore.
 Document the one possible race that is harmless.
  
  Sponsored by: EMC / Isilon Storage Division
  Discussed with:   alc

Modified:
  head/sys/vm/vm_pageout.c

Modified: head/sys/vm/vm_pageout.c
==
--- head/sys/vm/vm_pageout.cWed Aug 21 22:37:15 2013(r254621)
+++ head/sys/vm/vm_pageout.cWed Aug 21 22:39:19 2013(r254622)
@@ -1333,25 +1333,6 @@ relock_queues:
m = next;
continue;
}
-   object = m->object;
-   if (!VM_OBJECT_TRYWLOCK(object) &&
-   !vm_pageout_fallback_object_lock(m, &next)) {
-   VM_OBJECT_WUNLOCK(object);
-   vm_page_unlock(m);
-   m = next;
-   continue;
-   }
-
-   /*
-* Don't deactivate pages that are busy.
-*/
-   if (vm_page_busied(m) || m->hold_count != 0) {
-   vm_page_unlock(m);
-   VM_OBJECT_WUNLOCK(object);
-   vm_page_requeue_locked(m);
-   m = next;
-   continue;
-   }
 
/*
 * The count for pagedaemon pages is done after checking the
@@ -1367,7 +1348,15 @@ relock_queues:
vm_page_aflag_clear(m, PGA_REFERENCED);
act_delta += 1;
}
-   if (object->ref_count != 0)
+   /*
+* Unlocked object ref count check.  Two races are possible.
+* 1) The ref was transitioning to zero and we saw non-zero,
+*the pmap bits will be checked unnecessarily.
+* 2) The ref was transitioning to one and we saw zero. 
+*The page lock prevents a new reference to this page so
+*we need not check the reference bits.
+*/
+   if (m->object->ref_count != 0)
act_delta += pmap_ts_referenced(m);
 
/*
@@ -1387,9 +1376,6 @@ relock_queues:
 * queue depending on usage.
 */
if (act_delta == 0) {
-   KASSERT(object->ref_count != 0 ||
-   !pmap_page_is_mapped(m),
-   ("vm_pageout_scan: page %p is mapped", m));
/* Dequeue to avoid later lock recursion. */
vm_page_dequeue_locked(m);
vm_page_deactivate(m);
@@ -1397,7 +1383,6 @@ relock_queues:
} else
vm_page_requeue_locked(m);
vm_page_unlock(m);
-   VM_OBJECT_WUNLOCK(object);
m = next;
}
vm_pagequeue_unlock(pq);
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r250411 - in head/sys: conf kern sys

2013-05-11 Thread Jeff Roberson


On Thu, 9 May 2013, Marcel Moolenaar wrote:


Author: marcel
Date: Thu May  9 16:28:18 2013
New Revision: 250411
URL: http://svnweb.freebsd.org/changeset/base/250411

Log:
 Add option WITNESS_NO_VNODE to suppress printing LORs between VNODE
 locks. To support this, VNODE locks are created with the LK_IS_VNODE
 flag. This flag is propagated down using the LO_IS_VNODE flag.

 Note that WITNESS still records the LOR. Only the printing and the
 optional entering into the kernel debugger is bypassed with the
 WITNESS_NO_VNODE option.


I'm replying to the original commit because the resulting thread got way 
out of hand.  We need to all take a deep breath and take a pragmatic 
approach to solving the problem at hand.


Let me first say I understand the utility here as this is also coming up 
in my organization.  Test, and users, do not want to see erroneous warning 
messages.  I understand that.  Let's find a solution.


Secondly, I think this project has grown too far for us to commit changes 
like this without some focused discussion.  We need to be more mindful of 
the size of the impact and the number of people who are interested in a 
particular area.  I'm not picking on you Marcel because this sort of thing 
has been coming up lately and we have all been guilty of it from time to 
time.  There are more companies and individuals than ever trying to push 
work into the repository and we're having some growing pains.


I am intimately familiar with the problems that lead to these erroneous 
witness messages as I have tracked down many of them and am even 
responsible for the code that generates them in some cases.  Let me first 
outline a handful of generic problems.  The root cause is that witness can 
not determine the real order between two locks due to relationships too 
complex to describe with a pair of strings.


One example, which has been brought up, is the hierarchical nature of 
vnode locks.  This impacts vnodes within one filesystem but it also 
involves vnodes between two different filesystems as you cross mount 
points.  We can construct perfectly valid and deadlock free chains of 
mount points that have two different filesystem types in different orders 
which will LOR at the boundaries.  We already skip duplicates to avoid 
this problem within each filesystem.  We need to skip cross-filesystem 
duplicates, most desirably at the few specific places where this happens. 
This problem comes up especially for devfs because we lock devvps while 
file vnodes are locked but we lock devfs directories after the rootfs lock 
when crossing mountpoints in lookup.


A second example, is locks of a fundamentally different type that have a 
complex ordering relationship.  For example, a vnode lock may be acquired 
after a buf lock belonging to the parent's directory block.  A cg buf lock 
may be acquired after any file buf lock.  Here we want to ignore 
interactions between these two specific types at this particular location 
but not others as they may be unsafe.


The third example, is a complex locking pattern with shared locks as 
presented by dirhash.  We are seeing a similar pattern develop in the vm 
where we are going to use an exclusive object lock to protect pages or a 
shared object lock + a page lock.  The semantics only get more complex as 
we push for more scalability.  I expect to see more of these patterns 
develop.


None of these problems can be solved with names alone.  So far we've 
just lived with the warnings and we're no longer willing to accept that. 
What we need is a solution that blesses the specific instances and the 
specific lock classes involved without silencing legitimate warnings that 
may only occur after new code is added.  For example, it may be safe to 
add a sx lock around some vnode code but you may not notice that you LOR 
if you silence all witness warnings related to the vnode lock site.


I believe that the perfect solution would be a mechanism that could teach 
witness about and enforce these specific relationships.  However, that may 
be computationally prohibitive and too complex to code.  A more reasonable 
option would be to bless the specific relationships at the specific call 
sites.  Turning all witness off at particular sites or with particular 
types renders important infrastructure useless for very large functional 
areas.  It's also important to distinguish between squelching the error 
message from eliminating the other state that is saved at lock sites.


We already have lock names and types.  What I would propose we do is make 
the type 'vnode' for all vnodes and 'buf' for all bufs with the names used 
for the specific filesystems.  Then you could specify a DUPOK that 
automatically blesses any filesystem to filesystem related LORs.  In this 
way witness still records the call sites and unrelated LORs or panics 
still have the acquisition information.  You could eventually unwind this 
to only DUPOK at the specific currently known places that we ant

svn commit: r250551 - in head/sys: conf kern sys

2013-05-11 Thread Jeff Roberson

Author: jeff
Date: Sun May 12 04:05:01 2013
New Revision: 250551
URL: http://svnweb.freebsd.org/changeset/base/250551

Log:
   - Add a new general purpose path-compressed radix trie which can be used
 with any structure containing a uint64_t index.  The tree code
 auto-generates type safe wrappers.
   - Eliminate the buf splay and replace it with pctrie.  This is not only
 significantly faster with large files but also allows for the possibility
 of shared locking.
  
  Reviewed by:alc, attilio
  Sponsored by:   EMC / Isilon Storage Division

Added:
  head/sys/kern/subr_pctrie.c   (contents, props changed)
  head/sys/sys/_pctrie.h
 - copied, changed from r249323, head/sys/vm/_vm_radix.h
  head/sys/sys/pctrie.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/kern/vfs_subr.c
  head/sys/sys/buf.h
  head/sys/sys/bufobj.h

Modified: head/sys/conf/files
==
--- head/sys/conf/files Sun May 12 03:36:28 2013(r250550)
+++ head/sys/conf/files Sun May 12 04:05:01 2013(r250551)
@@ -2760,6 +2760,7 @@ kern/subr_module.cstandard
 kern/subr_msgbuf.c standard
 kern/subr_param.c  standard
 kern/subr_pcpu.c   standard
+kern/subr_pctrie.c standard
 kern/subr_power.c  standard
 kern/subr_prf.cstandard
 kern/subr_prof.c   standard

Added: head/sys/kern/subr_pctrie.c
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/kern/subr_pctrie.c Sun May 12 04:05:01 2013(r250551)
@@ -0,0 +1,705 @@
+/*
+ * Copyright (c) 2013 EMC Corp.
+ * Copyright (c) 2011 Jeffrey Roberson 
+ * Copyright (c) 2008 Mayur Shardul 
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Path-compressed radix trie implementation.
+ *
+ * The implementation takes into account the following rationale:
+ * - Size of the nodes should be as small as possible but still big enough
+ *   to avoid a large maximum depth for the trie.  This is a balance
+ *   between the necessity to not wire too much physical memory for the nodes
+ *   and the necessity to avoid too much cache pollution during the trie
+ *   operations.
+ * - There is not a huge bias toward the number of lookup operations over
+ *   the number of insert and remove operations.  This basically implies
+ *   that optimizations supposedly helping one operation but hurting the
+ *   other might be carefully evaluated.
+ * - On average not many nodes are expected to be fully populated, hence
+ *   level compression may just complicate things.
+ */
+
+#include 
+__FBSDID("$FreeBSD$");
+
+#include "opt_ddb.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#ifdef DDB
+#include 
+#endif
+
+/*
+ * These widths should allow the pointers to a node's children to fit within
+ * a single cache line.  The extra levels from a narrow width should not be
+ * a problem thanks to path compression.
+ */
+#ifdef __LP64__
+#definePCTRIE_WIDTH4
+#else
+#definePCTRIE_WIDTH3
+#endif
+
+#definePCTRIE_COUNT(1 << PCTRIE_WIDTH)
+#definePCTRIE_MASK (PCTRIE_COUNT - 1)
+#definePCTRIE_LIMIT(howmany((sizeof(uint64_t) * NBBY), 
PCTRIE_WIDTH) - 1)
+
+/* Flag bits stored in node pointers. */
+#definePCTRIE_ISLEAF   0x1
+#definePCTRIE_FLAGS0x1
+#definePCTRIE_PAD  PCTRIE_FLAGS
+
+/* Returns one unit associated with specified level. */
+#definePCTRIE_UNITLEVEL(lev)   
\
+

svn commit: r250578 - head/sys/sys

2013-05-12 Thread Jeff Roberson

Author: jeff
Date: Sun May 12 20:44:28 2013
New Revision: 250578
URL: http://svnweb.freebsd.org/changeset/base/250578

Log:
   - pctrie really only requires two byte alignment so that there is a single
 bit available for a flag in the pointer.  However, it felt more correct
 to enforce natural alignment of the key pointer.  Unfortunately on
 32bit architectures 64bit integers are not always naturally aligned.
 Change the assert to enforce only 32bit alignment of the 64bit key for
 now to fix the build.  A more correct fix would be to properly sort
 the struct buf fields which definitely suffer from bloat due to padding.

Modified:
  head/sys/sys/pctrie.h

Modified: head/sys/sys/pctrie.h
==
--- head/sys/sys/pctrie.h   Sun May 12 16:50:18 2013(r250577)
+++ head/sys/sys/pctrie.h   Sun May 12 20:44:28 2013(r250578)
@@ -38,7 +38,11 @@
 #definePCTRIE_DEFINE(name, type, field, allocfn, freefn)   
\
\
 CTASSERT(sizeof(((struct type *)0)->field) == sizeof(uint64_t));   \
-CTASSERT((__offsetof(struct type, field) & (sizeof(uint64_t) - 1)) == 0); \
+/* \
+ * XXX This assert protects flag bits, it does not enforce natural \
+ * alignment.  32bit architectures do not naturally align 64bit fields.
\
+ */\
+CTASSERT((__offsetof(struct type, field) & (sizeof(uint32_t) - 1)) == 0); \
\
 static __inline struct type *  \
 name##_PCTRIE_VAL2PTR(uint64_t *val)   \
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r251171 - in head/sys: fs/ext2fs fs/nandfs fs/nfsclient fs/nfsserver kern nfsclient nfsserver sys ufs/ffs

2013-05-30 Thread Jeff Roberson

Author: jeff
Date: Fri May 31 00:43:41 2013
New Revision: 251171
URL: http://svnweb.freebsd.org/changeset/base/251171

Log:
   - Convert the bufobj lock to rwlock.
   - Use a shared bufobj lock in getblk() and inmem().
   - Convert softdep's lk to rwlock to match the bufobj lock.
   - Move INFREECNT to b_flags and protect it with the buf lock.
   - Remove unnecessary locking around bremfree() and BKGRDINPROG.
  
  Sponsored by: EMC / Isilon Storage Division
  Discussed with:   mckusick, kib, mdf

Modified:
  head/sys/fs/ext2fs/ext2_inode.c
  head/sys/fs/nandfs/nandfs_segment.c
  head/sys/fs/nandfs/nandfs_vnops.c
  head/sys/fs/nfsclient/nfs_clvnops.c
  head/sys/fs/nfsserver/nfs_nfsdport.c
  head/sys/kern/vfs_bio.c
  head/sys/kern/vfs_cluster.c
  head/sys/kern/vfs_default.c
  head/sys/kern/vfs_subr.c
  head/sys/nfsclient/nfs_subs.c
  head/sys/nfsclient/nfs_vnops.c
  head/sys/nfsserver/nfs_serv.c
  head/sys/sys/buf.h
  head/sys/sys/bufobj.h
  head/sys/ufs/ffs/ffs_inode.c
  head/sys/ufs/ffs/ffs_snapshot.c
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/ffs_vfsops.c

Modified: head/sys/fs/ext2fs/ext2_inode.c
==
--- head/sys/fs/ext2fs/ext2_inode.c Fri May 31 00:31:45 2013
(r251170)
+++ head/sys/fs/ext2fs/ext2_inode.c Fri May 31 00:43:41 2013
(r251171)
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 

Modified: head/sys/fs/nandfs/nandfs_segment.c
==
--- head/sys/fs/nandfs/nandfs_segment.c Fri May 31 00:31:45 2013
(r251170)
+++ head/sys/fs/nandfs/nandfs_segment.c Fri May 31 00:43:41 2013
(r251171)
@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -702,7 +703,7 @@ nandfs_save_buf(struct buf *bp, uint64_t
if (bp->b_bufobj != bo) {
BO_LOCK(bp->b_bufobj);
BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
-   BO_MTX(bp->b_bufobj));
+   BO_LOCKPTR(bp->b_bufobj));
KASSERT(BUF_ISLOCKED(bp), ("Problem with locking buffer"));
}
 

Modified: head/sys/fs/nandfs/nandfs_vnops.c
==
--- head/sys/fs/nandfs/nandfs_vnops.c   Fri May 31 00:31:45 2013
(r251170)
+++ head/sys/fs/nandfs/nandfs_vnops.c   Fri May 31 00:43:41 2013
(r251171)
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -556,7 +557,7 @@ restart_locked:
continue;
if (BUF_LOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
-   BO_MTX(bo)) == ENOLCK)
+   BO_LOCKPTR(bo)) == ENOLCK)
goto restart;
bp->b_flags |= (B_INVAL | B_RELBUF);
bp->b_flags &= ~(B_ASYNC | B_MANAGED);

Modified: head/sys/fs/nfsclient/nfs_clvnops.c
==
--- head/sys/fs/nfsclient/nfs_clvnops.c Fri May 31 00:31:45 2013
(r251170)
+++ head/sys/fs/nfsclient/nfs_clvnops.c Fri May 31 00:43:41 2013
(r251171)
@@ -2852,7 +2852,7 @@ loop:
 
error = BUF_TIMELOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
-   BO_MTX(bo), "nfsfsync", slpflag, slptimeo);
+   BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo);
if (error == 0) {
BUF_UNLOCK(bp);
goto loop;

Modified: head/sys/fs/nfsserver/nfs_nfsdport.c
==
--- head/sys/fs/nfsserver/nfs_nfsdport.cFri May 31 00:31:45 2013
(r251170)
+++ head/sys/fs/nfsserver/nfs_nfsdport.cFri May 31 00:43:41 2013
(r251171)
@@ -1321,7 +1321,7 @@ nfsvno_fsync(struct vnode *vp, u_int64_t
 */
if ((bp = gbincore(&vp->v_bufobj, lblkno)) != NULL) {
if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
-   LK_INTERLOCK, BO_MTX(bo)) == ENOLCK) {
+   LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) {
BO_LOCK(bo);
continue; /* retry */
}

Modified: head/sys/kern/vfs_bio.c
==
--- head/sys/kern/vfs_bio.c Fri May 31 00:31:45 2013(r251170)
+++ head/sys/kern/vfs_bio.c Fri May 31 00:43:41 2013(r251171)
@@ -418,11 +418,9 @@ bufcountwakeup(struct buf *bp) 
 {
int old;
 
-   KASSERT((bp->b_v

svn commit: r251446 - head/sys/kern

2013-06-05 Thread Jeff Roberson

Author: jeff
Date: Wed Jun  5 23:53:00 2013
New Revision: 251446
URL: http://svnweb.freebsd.org/changeset/base/251446

Log:
   - Consolidate duplicate code into support functions.
   - Split the bqlock into bqclean and bqdirty locks.
   - Only acquire the wakeup synchronization locks when we cross a
 threshold requiring them.
   - Restructure the way flushbufqueues() targets work so they are more
 smp friendly and sane.
  
  Reviewed by:  kib
  Discussed with:   mckusick, attilio
  Sponsored by: EMC / Isilon Storage Division
  
  Mvfs_bio.c

Modified:
  head/sys/kern/vfs_bio.c

Modified: head/sys/kern/vfs_bio.c
==
--- head/sys/kern/vfs_bio.c Wed Jun  5 23:28:29 2013(r251445)
+++ head/sys/kern/vfs_bio.c Wed Jun  5 23:53:00 2013(r251446)
@@ -113,10 +113,11 @@ static void vfs_setdirty_locked_object(s
 static void vfs_vmio_release(struct buf *bp);
 static int vfs_bio_clcheck(struct vnode *vp, int size,
daddr_t lblkno, daddr_t blkno);
-static int buf_do_flush(struct vnode *vp);
+static int buf_flush(struct vnode *vp, int);
 static int flushbufqueues(struct vnode *, int, int);
 static void buf_daemon(void);
 static void bremfreel(struct buf *bp);
+static __inline void bd_wakeup(void);
 #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
 defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
 static int sysctl_bufspace(SYSCTL_HANDLER_ARGS);
@@ -217,8 +218,8 @@ SYSCTL_INT(_vfs, OID_AUTO, mappingrestar
 static int flushbufqtarget = 100;
 SYSCTL_INT(_vfs, OID_AUTO, flushbufqtarget, CTLFLAG_RW, &flushbufqtarget, 0,
 "Amount of work to do in flushbufqueues when helping bufdaemon");
-static long notbufdflashes;
-SYSCTL_LONG(_vfs, OID_AUTO, notbufdflashes, CTLFLAG_RD, ¬bufdflashes, 0,
+static long notbufdflushes;
+SYSCTL_LONG(_vfs, OID_AUTO, notbufdflushes, CTLFLAG_RD, ¬bufdflushes, 0,
 "Number of dirty buffer flushes done by the bufdaemon helpers");
 static long barrierwrites;
 SYSCTL_LONG(_vfs, OID_AUTO, barrierwrites, CTLFLAG_RW, &barrierwrites, 0,
@@ -228,6 +229,37 @@ SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_
 "Permit the use of the unmapped i/o");
 
 /*
+ * Lock for the non-dirty bufqueues
+ */
+static struct mtx_padalign bqclean;
+
+/*
+ * Lock for the dirty queue.
+ */
+static struct mtx_padalign bqdirty;
+
+/*
+ * This lock synchronizes access to bd_request.
+ */
+static struct mtx_padalign bdlock;
+
+/*
+ * This lock protects the runningbufreq and synchronizes runningbufwakeup and
+ * waitrunningbufspace().
+ */
+static struct mtx_padalign rbreqlock;
+
+/*
+ * Lock that protects needsbuffer and the sleeps/wakeups surrounding it.
+ */
+static struct mtx_padalign nblock;
+
+/*
+ * Lock that protects bdirtywait.
+ */
+static struct mtx_padalign bdirtylock;
+
+/*
  * Wakeup point for bufdaemon, as well as indicator of whether it is already
  * active.  Set to 1 when the bufdaemon is already "on" the queue, 0 when it
  * is idling.
@@ -243,11 +275,6 @@ static int bd_request;
 static int bd_speedupreq;
 
 /*
- * This lock synchronizes access to bd_request.
- */
-static struct mtx bdlock;
-
-/*
  * bogus page -- for I/O to/from partially complete buffers
  * this is a temporary solution to the problem, but it is not
  * really that bad.  it would be better to split the buffer
@@ -263,25 +290,19 @@ vm_page_t bogus_page;
  */
 static int runningbufreq;
 
-/*
- * This lock protects the runningbufreq and synchronizes runningbufwakeup and
- * waitrunningbufspace().
- */
-static struct mtx rbreqlock;
-
 /* 
  * Synchronization (sleep/wakeup) variable for buffer requests.
  * Can contain the VFS_BIO_NEED flags defined below; setting/clearing is done
  * by and/or.
- * Used in numdirtywakeup(), bufspacewakeup(), bufcountwakeup(), bwillwrite(),
+ * Used in numdirtywakeup(), bufspacewakeup(), bufcountadd(), bwillwrite(),
  * getnewbuf(), and getblk().
  */
 static int needsbuffer;
 
 /*
- * Lock that protects needsbuffer and the sleeps/wakeups surrounding it.
+ * Synchronization for bwillwrite() waiters.
  */
-static struct mtx nblock;
+static int bdirtywait;
 
 /*
  * Definitions for the buffer free lists.
@@ -301,9 +322,6 @@ static TAILQ_HEAD(bqueues, buf) bufqueue
 static int bq_len[BUFFER_QUEUES];
 #endif
 
-/* Lock for the bufqueues */
-static struct mtx bqlock;
-
 /*
  * Single global constant for BUF_WMESG, to avoid getting multiple references.
  * buf_wmesg is referred from macros.
@@ -311,7 +329,6 @@ static struct mtx bqlock;
 const char *buf_wmesg = BUF_WMESG;
 
 #define VFS_BIO_NEED_ANY   0x01/* any freeable buffer */
-#define VFS_BIO_NEED_DIRTYFLUSH0x02/* waiting for dirty buffer 
flush */
 #define VFS_BIO_NEED_FREE  0x04/* wait for free bufs, hi hysteresis */
 #define VFS_BIO_NEED_BUFSPACE  0x08/* wait for buf space, lo hysteresis */
 
@@ -337,25 +354,69 @@ sysctl_bufspace(SYSCTL_HANDLER_ARGS)
 #ifdef DIRECTIO
 extern void ffs_raw

svn commit: r251703 - in head/sys: amd64/amd64 i386/i386 i386/xen kern mips/mips sparc64/sparc64 sys

2013-06-13 Thread Jeff Roberson

Author: jeff
Date: Thu Jun 13 20:46:03 2013
New Revision: 251703
URL: http://svnweb.freebsd.org/changeset/base/251703

Log:
   - Add a BIT_FFS() macro and use it to replace cpusetffs_obj()
  
  Discussed with:   attilio
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/amd64/amd64/mp_machdep.c
  head/sys/i386/i386/mp_machdep.c
  head/sys/i386/i386/pmap.c
  head/sys/i386/xen/mp_machdep.c
  head/sys/i386/xen/pmap.c
  head/sys/kern/kern_cpuset.c
  head/sys/mips/mips/mp_machdep.c
  head/sys/sparc64/sparc64/mp_machdep.c
  head/sys/sys/bitset.h
  head/sys/sys/cpuset.h

Modified: head/sys/amd64/amd64/mp_machdep.c
==
--- head/sys/amd64/amd64/mp_machdep.c   Thu Jun 13 20:41:09 2013
(r251702)
+++ head/sys/amd64/amd64/mp_machdep.c   Thu Jun 13 20:46:03 2013
(r251703)
@@ -1150,7 +1150,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask
ipi_all_but_self(vector);
} else {
ncpu = 0;
-   while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+   while ((cpu = CPU_FFS(&mask)) != 0) {
cpu--;
CPU_CLR(cpu, &mask);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
@@ -1299,7 +1299,7 @@ ipi_selected(cpuset_t cpus, u_int ipi)
if (ipi == IPI_STOP_HARD)
CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-   while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
+   while ((cpu = CPU_FFS(&cpus)) != 0) {
cpu--;
CPU_CLR(cpu, &cpus);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);

Modified: head/sys/i386/i386/mp_machdep.c
==
--- head/sys/i386/i386/mp_machdep.c Thu Jun 13 20:41:09 2013
(r251702)
+++ head/sys/i386/i386/mp_machdep.c Thu Jun 13 20:46:03 2013
(r251703)
@@ -1249,7 +1249,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask
ipi_all_but_self(vector);
} else {
ncpu = 0;
-   while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+   while ((cpu = CPU_FFS(&mask)) != 0) {
cpu--;
CPU_CLR(cpu, &mask);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
@@ -1398,7 +1398,7 @@ ipi_selected(cpuset_t cpus, u_int ipi)
if (ipi == IPI_STOP_HARD)
CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-   while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
+   while ((cpu = CPU_FFS(&cpus)) != 0) {
cpu--;
CPU_CLR(cpu, &cpus);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);

Modified: head/sys/i386/i386/pmap.c
==
--- head/sys/i386/i386/pmap.c   Thu Jun 13 20:41:09 2013(r251702)
+++ head/sys/i386/i386/pmap.c   Thu Jun 13 20:46:03 2013(r251703)
@@ -1957,7 +1957,7 @@ pmap_lazyfix(pmap_t pmap)
spins = 5000;
 
/* Find least significant set bit. */
-   lsb = cpusetobj_ffs(&mask);
+   lsb = CPU_FFS(&mask);
MPASS(lsb != 0);
lsb--;
CPU_SETOF(lsb, &mask);

Modified: head/sys/i386/xen/mp_machdep.c
==
--- head/sys/i386/xen/mp_machdep.c  Thu Jun 13 20:41:09 2013
(r251702)
+++ head/sys/i386/xen/mp_machdep.c  Thu Jun 13 20:46:03 2013
(r251703)
@@ -1039,7 +1039,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask
ipi_all_but_self(vector);
} else {
ncpu = 0;
-   while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+   while ((cpu = CPU_FFS(&mask)) != 0) {
cpu--;
CPU_CLR(cpu, &mask);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
@@ -1132,7 +1132,7 @@ ipi_selected(cpuset_t cpus, u_int ipi)
if (ipi == IPI_STOP_HARD)
CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-   while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
+   while ((cpu = CPU_FFS(&cpus)) != 0) {
cpu--;
CPU_CLR(cpu, &cpus);
CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);

Modified: head/sys/i386/xen/pmap.c
==
--- head/sys/i386/xen/pmap.cThu Jun 13 20:41:09 2013(r251702)
+++ head/sys/i386/xen/pmap.cThu Jun 13 20:46:03 2013(r251703)
@@ -1707,7 +1707,7 @@ pmap_lazyfix(pmap_t pmap)
spins = 5000;
 
/* Find least significant set bit. */
-   lsb = cpusetobj_ffs(&mask);
+   lsb = CPU_FFS(&mask);
MPASS(lsb != 0);
lsb--;
CPU_SETOF(lsb, &m

svn commit: r251709 - head/sys/vm

2013-06-13 Thread Jeff Roberson

Author: jeff
Date: Thu Jun 13 21:05:38 2013
New Revision: 251709
URL: http://svnweb.freebsd.org/changeset/base/251709

Log:
   - Convert the slab free item list from a linked array of indices to a
 bitmap using sys/bitset.  This is much simpler, has lower space
 overhead and is cheaper in most cases.
   - Use a second bitmap for invariants asserts and improve the quality of
 the asserts as well as the number of erroneous conditions that we will
 catch.
   - Drastically simplify sizing code.  Special case refcnt zones since they
 will be going away.
   - Update stale comments.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/vm/uma_core.c
  head/sys/vm/uma_dbg.c
  head/sys/vm/uma_int.h

Modified: head/sys/vm/uma_core.c
==
--- head/sys/vm/uma_core.c  Thu Jun 13 21:03:23 2013(r251708)
+++ head/sys/vm/uma_core.c  Thu Jun 13 21:05:38 2013(r251709)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2002-2005, 2009 Jeffrey Roberson 
+ * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson 
  * Copyright (c) 2004, 2005 Bosko Milekic 
  * Copyright (c) 2004-2006 Robert N. M. Watson
  * All rights reserved.
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -145,8 +146,13 @@ static int booted = 0;
 #defineUMA_STARTUP22
 
 /* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
-static u_int uma_max_ipers;
-static u_int uma_max_ipers_ref;
+static const u_int uma_max_ipers = SLAB_SETSIZE;
+
+/*
+ * Only mbuf clusters use ref zones.  Just provide enough references
+ * to support the one user.  New code should not use the ref facility.
+ */
+static const u_int uma_max_ipers_ref = PAGE_SIZE / MCLBYTES;
 
 /*
  * This is the handle used to schedule events that need to happen
@@ -208,7 +214,7 @@ static uint8_t bucket_size[BUCKET_ZONES]
 /*
  * Flags and enumerations to be passed to internal functions.
  */
-enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
+enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
 
 #defineZFREE_STATFAIL  0x0001  /* Update zone failure 
statistic. */
 #defineZFREE_STATFREE  0x0002  /* Update zone free statistic. 
*/
@@ -885,18 +891,15 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t
slab->us_keg = keg;
slab->us_data = mem;
slab->us_freecount = keg->uk_ipers;
-   slab->us_firstfree = 0;
slab->us_flags = flags;
-
+   BIT_FILL(SLAB_SETSIZE, &slab->us_free);
+#ifdef INVARIANTS
+   BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
+#endif
if (keg->uk_flags & UMA_ZONE_REFCNT) {
slabref = (uma_slabrefcnt_t)slab;
-   for (i = 0; i < keg->uk_ipers; i++) {
-   slabref->us_freelist[i].us_refcnt = 0;
-   slabref->us_freelist[i].us_item = i+1;
-   }
-   } else {
for (i = 0; i < keg->uk_ipers; i++)
-   slab->us_freelist[i].us_item = i+1;
+   slabref->us_refcnt[i] = 0;
}
 
if (keg->uk_init != NULL) {
@@ -1148,31 +1151,32 @@ keg_small_init(uma_keg_t keg)
keg->uk_ppera = 1;
}
 
+   /*
+* Calculate the size of each allocation (rsize) according to
+* alignment.  If the requested size is smaller than we have
+* allocation bits for we round it up.
+*/
rsize = keg->uk_size;
-
+   if (rsize < keg->uk_slabsize / SLAB_SETSIZE)
+   rsize = keg->uk_slabsize / SLAB_SETSIZE;
if (rsize & keg->uk_align)
rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
-   if (rsize < keg->uk_slabsize / 256)
-   rsize = keg->uk_slabsize / 256;
-
keg->uk_rsize = rsize;
 
KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
keg->uk_rsize < sizeof(struct pcpu),
("%s: size %u too large", __func__, keg->uk_rsize));
 
-   if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
+   if (keg->uk_flags & UMA_ZONE_REFCNT)
+   rsize += sizeof(uint32_t);
+
+   if (keg->uk_flags & UMA_ZONE_OFFPAGE)
shsize = 0;
-   } else if (keg->uk_flags & UMA_ZONE_REFCNT) {
-   rsize += UMA_FRITMREF_SZ;   /* linkage & refcnt */
-   shsize = sizeof(struct uma_slab_refcnt);
-   } else {
-   rsize += UMA_FRITM_SZ;  /* Account for linkage */
+   else 
shsize = sizeof(struct uma_slab);
-   }
 
keg->uk_ipers = (keg->uk_slabsize - shsize) / rsize;
-   KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= 256,
+   KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
 
memused = keg->uk_ipers * rsize + shsize;
@@ -1189,10 +1193,18 @@ keg_small_init(uma_keg_t keg)
(keg->uk_fl

svn commit: r244444 - head/sys/kern

2012-12-19 Thread Jeff Roberson

Author: jeff
Date: Wed Dec 19 20:08:06 2012
New Revision: 24
URL: http://svnweb.freebsd.org/changeset/base/24

Log:
   - Correctly handle EWOULDBLOCK in quiesce_cpus
  
  Discussed with:   mav

Modified:
  head/sys/kern/subr_smp.c

Modified: head/sys/kern/subr_smp.c
==
--- head/sys/kern/subr_smp.cWed Dec 19 18:51:35 2012(r23)
+++ head/sys/kern/subr_smp.cWed Dec 19 20:08:06 2012(r24)
@@ -766,8 +766,9 @@ quiesce_cpus(cpuset_t map, const char *w
thread_unlock(curthread);
while (gen[cpu] == pcpu->pc_idlethread->td_generation) {
error = tsleep(quiesce_cpus, prio, wmesg, 1);
-   if (error)
+   if (error != EWOULDBLOCK)
goto out;
+   error = 0;
}
}
 out:
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r244445 - in head/sys: kern sys tools

2012-12-19 Thread Jeff Roberson

Author: jeff
Date: Wed Dec 19 20:10:00 2012
New Revision: 25
URL: http://svnweb.freebsd.org/changeset/base/25

Log:
   - Add new machine parsable KTR macros for timing events.
   - Use this new format to automatically handle syscalls and VOPs.  This
 changes the earlier format but is still human readable.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/kern/subr_syscall.c
  head/sys/sys/ktr.h
  head/sys/tools/vnode_if.awk

Modified: head/sys/kern/subr_syscall.c
==
--- head/sys/kern/subr_syscall.cWed Dec 19 20:08:06 2012
(r24)
+++ head/sys/kern/subr_syscall.cWed Dec 19 20:10:00 2012
(r25)
@@ -77,13 +77,12 @@ syscallenter(struct thread *td, struct s
if (KTRPOINT(td, KTR_SYSCALL))
ktrsyscall(sa->code, sa->narg, sa->args);
 #endif
-
-   CTR6(KTR_SYSC,
-"syscall: td=%p pid %d %s (%#lx, %#lx, %#lx)",
-   td, td->td_proc->p_pid, syscallname(p, sa->code),
-   sa->args[0], sa->args[1], sa->args[2]);
+   KTR_START4(KTR_SYSC, "syscall", syscallname(p, sa->code),
+   td, "pid:%d", td->td_proc->p_pid, "arg0:%p", sa->args[0],
+   "arg1:%p", sa->args[1], "arg2:%p", sa->args[2]);
 
if (error == 0) {
+
STOPEVENT(p, S_SCE, sa->narg);
if (p->p_flag & P_TRACED && p->p_stops & S_PT_SCE) {
PROC_LOCK(p);
@@ -150,10 +149,12 @@ syscallenter(struct thread *td, struct s
sa->callp, NULL, (error) ? -1 : td->td_retval[0]);
 #endif
syscall_thread_exit(td, sa->callp);
-   CTR4(KTR_SYSC, "syscall: p=%p error=%d return %#lx %#lx",
-   p, error, td->td_retval[0], td->td_retval[1]);
}
  retval:
+   KTR_STOP4(KTR_SYSC, "syscall", syscallname(p, sa->code),
+   td, "pid:%d", td->td_proc->p_pid, "error:%d", error,
+   "retval0:%#lx", td->td_retval[0], "retval1:%#lx",
+   td->td_retval[1]);
if (traced) {
PROC_LOCK(p);
td->td_dbgflags &= ~TDB_SCE;
@@ -176,9 +177,6 @@ syscallret(struct thread *td, int error,
 */
userret(td, td->td_frame);
 
-   CTR4(KTR_SYSC, "syscall %s exit thread %p pid %d proc %s",
-   syscallname(p, sa->code), td, td->td_proc->p_pid, td->td_name);
-
 #ifdef KTRACE
if (KTRPOINT(td, KTR_SYSRET)) {
ktrsysret(sa->code, (td->td_pflags & TDP_NERRNO) == 0 ?

Modified: head/sys/sys/ktr.h
==
--- head/sys/sys/ktr.h  Wed Dec 19 20:08:06 2012(r24)
+++ head/sys/sys/ktr.h  Wed Dec 19 20:10:00 2012(r25)
@@ -244,6 +244,50 @@ void   ktr_tracepoint(u_int mask, const ch
point, a0, (v0), a1, (v1), a2, (v2), a3, (v3))
 
 /*
+ * Start functions denote the start of a region of code or operation
+ * and should be paired with stop functions for timing of nested
+ * sequences.
+ *
+ * Specifying extra attributes with the name "key" will result in
+ * multi-part keys.  For example a block device and offset pair
+ * might be used to describe a buf undergoing I/O.
+ */
+#defineKTR_START0(m, egroup, ident, key)   
\
+   KTR_EVENT0(m, egroup, ident, "start:0x%jX", (uintmax_t)key)
+#defineKTR_START1(m, egroup, ident, key, a0, v0)   
\
+   KTR_EVENT1(m, egroup, ident, "start:0x%jX", (uintmax_t)key, a0, (v0))
+#defineKTR_START2(m, egroup, ident, key, a0, v0, a1, v1)   
\
+   KTR_EVENT2(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \
+   a0, (v0), a1, (v1))
+#defineKTR_START3(m, egroup, ident, key, a0, v0, a1, v1, a2, v2)\
+   KTR_EVENT3(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \
+   a0, (v0), a1, (v1), a2, (v2))
+#defineKTR_START4(m, egroup, ident, key,   
\
+   a0, v0, a1, v1, a2, v2, a3, v3) \
+   KTR_EVENT4(m, egroup, ident, "start:0x%jX", (uintmax_t)key, \
+   a0, (v0), a1, (v1), a2, (v2), a3, (v3))
+
+/*
+ * Stop functions denote the end of a region of code or operation
+ * and should be paired with start functions for timing of nested
+ * sequences.
+ */
+#defineKTR_STOP0(m, egroup, ident, key)
\
+   KTR_EVENT0(m, egroup, ident, "stop:0x%jX", (uintmax_t)key)
+#defineKTR_STOP1(m, egroup, ident, key, a0, v0)
\
+   KTR_EVENT1(m, egroup, ident, "stop:0x%jX", (uintmax_t)key, a0, (v0))
+#defineKTR_STOP2(m, egroup, ident, key, a0, v0, a1, v1)
\
+   KTR_EVENT2(m, egroup, ident, "stop:0x%jX", (uintmax_t)key,  \
+   a0, (v0), a1, (v1))
+#defineKTR_STOP3(m, egroup, ident, key, a0, v0, a1, v1, a2, v2)\
+   KTR_EVENT3(m, egroup

Re: svn commit: r242014 - head/sys/kern

2012-10-24 Thread Jeff Roberson


On Wed, 24 Oct 2012, Attilio Rao wrote:


On Wed, Oct 24, 2012 at 8:16 PM, Andre Oppermann  wrote:

On 24.10.2012 20:56, Jim Harris wrote:


On Wed, Oct 24, 2012 at 11:41 AM, Adrian Chadd  wrote:


On 24 October 2012 11:36, Jim Harris  wrote:


   Pad tdq_lock to avoid false sharing with tdq_load and tdq_cpu_idle.



Ok, but..



 struct mtx  tdq_lock;   /* run queue lock. */
+   charpad[64 - sizeof(struct mtx)];



.. don't we have an existing compile time macro for the cache line
size, which can be used here?



Yes, but I didn't use it for a couple of reasons:

1) struct tdq itself is currently using __aligned(64), so I wanted to
keep it consistent.
2) CACHE_LINE_SIZE is currently defined as 128 on x86, due to
NetBurst-based processors having 128-byte cache sectors a while back.
I had planned to start a separate thread on arch@ about this today on
whether this was still appropriate.



See also the discussion on svn-src-all regarding global struct mtx
alignment.

Thank you for proving my point. ;)

Let's go back and see how we can do this the sanest way.  These are
the options I see at the moment:

 1. sprinkle __aligned(CACHE_LINE_SIZE) all over the place


This is wrong because it doesn't give padding.


 2. use a macro like MTX_ALIGN that can be SMP/UP aware and in
the future possibly change to a different compiler dependent
align attribute


What is this macro supposed to do? I don't understand that from your
description.


 3. embed __aligned(CACHE_LINE_SIZE) into struct mtx itself so it
automatically gets aligned in all cases, even when dynamically
allocated.


This works but I think it is overkill for structures including sleep
mutexes which are the vast majority. So I wouldn't certainly be in
favor of such a patch.


I agree.  For locks with little contention we probably want smaller 
structures.  For example, you wouldn't want to put a huge lock in every 
file descriptor.  It would be nice to have an automatic way to pad every 
global lock though.  I think it should be done as needed.


Jeff



Attilio


--
Peace can only be achieved by understanding - A. Einstein


___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r242492 - head/sys/ufs/ffs

2012-11-02 Thread Jeff Roberson

Author: jeff
Date: Fri Nov  2 21:04:06 2012
New Revision: 242492
URL: http://svn.freebsd.org/changeset/base/242492

Log:
   - In cancel_mkdir_dotdot don't panic if the inodedep is not available.  If
 the previous diradd had already finished it could have been reclaimed
 already.  This would only happen under heavy dependency pressure.
  
  Reported by:  Andrey Zonov 
  Discussed with:   mckusick
  MFC after:1 week

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Fri Nov  2 20:36:41 2012
(r242491)
+++ head/sys/ufs/ffs/ffs_softdep.c  Fri Nov  2 21:04:06 2012
(r242492)
@@ -8579,7 +8579,7 @@ cancel_mkdir_dotdot(ip, dirrem, jremref)
 
if (inodedep_lookup(UFSTOVFS(ip->i_ump), ip->i_number, 0,
&inodedep) == 0)
-   panic("cancel_mkdir_dotdot: Lost inodedep");
+   return (jremref);
dap = inodedep->id_mkdiradd;
if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0)
return (jremref);
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r242734 - head/sys/ufs/ffs

2012-11-07 Thread Jeff Roberson

Author: jeff
Date: Thu Nov  8 01:41:04 2012
New Revision: 242734
URL: http://svnweb.freebsd.org/changeset/base/242734

Log:
   - Implement BIO_FLUSH support around journal entries.  This will not 100%
 solve power loss problems with dishonest write caches.  However, it
 should improve the situation and force a full fsck when it is unable
 to resolve with the journal.
   - Resolve a case where the journal could wrap in an unsafe way causing
 us to prematurely lose journal entries in very specific scenarios.
  
  Discussed with:   mckusick
  MFC after:1 month

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Thu Nov  8 01:38:30 2012
(r242733)
+++ head/sys/ufs/ffs/ffs_softdep.c  Thu Nov  8 01:41:04 2012
(r242734)
@@ -88,6 +88,8 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 
+#include 
+
 #include 
 
 #ifndef SOFTUPDATES
@@ -802,6 +804,7 @@ static  void handle_written_jnewblk(struc
 static void handle_written_jblkdep(struct jblkdep *);
 static void handle_written_jfreefrag(struct jfreefrag *);
 static void complete_jseg(struct jseg *);
+static void complete_jsegs(struct jseg *);
 static void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
 static void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
 static void jremref_write(struct jremref *, struct jseg *, uint8_t *);
@@ -1227,6 +1230,7 @@ static struct callout softdep_callout;
 static int req_pending;
 static int req_clear_inodedeps;/* syncer process flush some inodedeps 
*/
 static int req_clear_remove;   /* syncer process flush some freeblks */
+static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
 
 /*
  * runtime statistics
@@ -1310,6 +1314,8 @@ SYSCTL_INT(_debug_softdep, OID_AUTO, cle
 &stat_cleanup_retries, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_failures, CTLFLAG_RW,
 &stat_cleanup_failures, 0, "");
+SYSCTL_INT(_debug_softdep, OID_AUTO, flushcache, CTLFLAG_RW,
+&softdep_flushcache, 0, "");
 
 SYSCTL_DECL(_vfs_ffs);
 
@@ -3078,6 +3084,67 @@ softdep_flushjournal(mp)
FREE_LOCK(&lk);
 }
 
+static void softdep_synchronize_completed(struct bio *);
+static void softdep_synchronize(struct bio *, struct ufsmount *, void *);
+
+static void
+softdep_synchronize_completed(bp)
+struct bio *bp;
+{
+   struct jseg *oldest;
+   struct jseg *jseg;
+
+   /*
+* caller1 marks the last segment written before we issued the
+* synchronize cache.
+*/
+   jseg = bp->bio_caller1;
+   oldest = NULL;
+   ACQUIRE_LOCK(&lk);
+   /*
+* Mark all the journal entries waiting on the synchronize cache
+* as completed so they may continue on.
+*/
+   while (jseg != NULL && (jseg->js_state & COMPLETE) == 0) {
+   jseg->js_state |= COMPLETE;
+   oldest = jseg;
+   jseg = TAILQ_PREV(jseg, jseglst, js_next);
+   }
+   /*
+* Restart deferred journal entry processing from the oldest
+* completed jseg.
+*/
+   if (oldest)
+   complete_jsegs(oldest);
+
+   FREE_LOCK(&lk);
+   g_destroy_bio(bp);
+}
+
+/*
+ * Send BIO_FLUSH/SYNCHRONIZE CACHE to the device to enforce write ordering
+ * barriers.  The journal must be written prior to any blocks that depend
+ * on it and the journal can not be released until the blocks have be
+ * written.  This code handles both barriers simultaneously.
+ */
+static void
+softdep_synchronize(bp, ump, caller1)
+   struct bio *bp;
+   struct ufsmount *ump;
+   void *caller1;
+{
+
+   bp->bio_cmd = BIO_FLUSH;
+   bp->bio_flags |= BIO_ORDERED;
+   bp->bio_data = NULL;
+   bp->bio_offset = ump->um_cp->provider->mediasize;
+   bp->bio_length = 0;
+   bp->bio_done = softdep_synchronize_completed;
+   bp->bio_caller1 = caller1;
+   g_io_request(bp,
+   (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private);
+}
+
 /*
  * Flush some journal records to disk.
  */
@@ -3092,8 +3159,10 @@ softdep_process_journal(mp, needwk, flag
struct worklist *wk;
struct jseg *jseg;
struct buf *bp;
+   struct bio *bio;
uint8_t *data;
struct fs *fs;
+   int shouldflush;
int segwritten;
int jrecmin;/* Minimum records per block. */
int jrecmax;/* Maximum records per block. */
@@ -3104,6 +3173,9 @@ softdep_process_journal(mp, needwk, flag
 
if (MOUNTEDSUJ(mp) == 0)
return;
+   shouldflush = softdep_flushcache;
+   bio = NULL;
+   jseg = NULL;
ump = VFSTOUFS(mp);
fs = ump->um_fs;
jblocks = ump->softdep_jblocks;
@@ -3152,6 +3224,10 @@ softdep_process_journal(mp, needwk, flag
LIST_INIT(&jseg->js_entries);
LIST_INIT(&jseg->js_

svn commit: r242736 - head/sys/kern

2012-11-07 Thread Jeff Roberson

Author: jeff
Date: Thu Nov  8 01:46:47 2012
New Revision: 242736
URL: http://svnweb.freebsd.org/changeset/base/242736

Log:
   - Change ULE to use dynamic slice sizes for the timeshare queue in order
 to further reduce latency for threads in this queue.  This should help
 as threads transition from realtime to timeshare.  The latency is
 bound to a max of sched_slice until we have more than sched_slice / 6
 threads runnable.  Then the min slice is allotted to all threads and
 latency becomes (nthreads - 1) * min_slice.
  
  Discussed with: mav

Modified:
  head/sys/kern/sched_ule.c

Modified: head/sys/kern/sched_ule.c
==
--- head/sys/kern/sched_ule.c   Thu Nov  8 01:42:54 2012(r242735)
+++ head/sys/kern/sched_ule.c   Thu Nov  8 01:46:47 2012(r242736)
@@ -189,6 +189,12 @@ static struct td_sched td_sched0;
 #defineSCHED_INTERACT_HALF (SCHED_INTERACT_MAX / 2)
 #defineSCHED_INTERACT_THRESH   (30)
 
+/*
+ * These parameters determine the slice behavior for batch work.
+ */
+#defineSCHED_SLICE_DEFAULT_DIVISOR 10  /* ~94 ms, 12 stathz 
ticks. */
+#defineSCHED_SLICE_MIN_DIVISOR 6   /* DEFAULT/MIN = ~16 
ms. */
+
 /* Flags kept in td_flags. */
 #defineTDF_SLICEENDTDF_SCHED2  /* Thread time slice is over. */
 
@@ -201,9 +207,10 @@ static struct td_sched td_sched0;
  * preempt_thresh: Priority threshold for preemption and remote IPIs.
  */
 static int sched_interact = SCHED_INTERACT_THRESH;
-static int realstathz = 127;
 static int tickincr = 8 << SCHED_TICK_SHIFT;
-static int sched_slice = 12;
+static int realstathz = 127;   /* reset during boot. */
+static int sched_slice = 10;   /* reset during boot. */
+static int sched_slice_min = 1;/* reset during boot. */
 #ifdef PREEMPTION
 #ifdef FULL_PREEMPTION
 static int preempt_thresh = PRI_MAX_IDLE;
@@ -559,6 +566,30 @@ tdq_load_rem(struct tdq *tdq, struct thr
 }
 
 /*
+ * Bound timeshare latency by decreasing slice size as load increases.  We
+ * consider the maximum latency as the sum of the threads waiting to run
+ * aside from curthread and target no more than sched_slice latency but
+ * no less than sched_slice_min runtime.
+ */
+static inline int
+tdq_slice(struct tdq *tdq)
+{
+   int load;
+
+   /*
+* It is safe to use sys_load here because this is called from
+* contexts where timeshare threads are running and so there
+* cannot be higher priority load in the system.
+*/
+   load = tdq->tdq_sysload - 1;
+   if (load >= SCHED_SLICE_MIN_DIVISOR)
+   return (sched_slice_min);
+   if (load <= 1)
+   return (sched_slice);
+   return (sched_slice / load);
+}
+
+/*
  * Set lowpri to its exact value by searching the run-queue and
  * evaluating curthread.  curthread may be passed as an optimization.
  */
@@ -1384,7 +1415,8 @@ sched_initticks(void *dummy)
int incr;
 
realstathz = stathz ? stathz : hz;
-   sched_slice = realstathz / 10;  /* ~100ms */
+   sched_slice = realstathz / SCHED_SLICE_DEFAULT_DIVISOR;
+   sched_slice_min = sched_slice / SCHED_SLICE_MIN_DIVISOR;
hogticks = imax(1, (2 * hz * sched_slice + realstathz / 2) /
realstathz);
 
@@ -1585,7 +1617,7 @@ schedinit(void)
thread0.td_sched = &td_sched0;
td_sched0.ts_ltick = ticks;
td_sched0.ts_ftick = ticks;
-   td_sched0.ts_slice = sched_slice;
+   td_sched0.ts_slice = 0;
 }
 
 /*
@@ -2003,8 +2035,10 @@ sched_wakeup(struct thread *td)
sched_interact_update(td);
sched_pctcpu_update(ts, 0);
}
-   /* Reset the slice value after we sleep. */
-   ts->ts_slice = sched_slice;
+   /*
+* Reset the slice value since we slept and advanced the round-robin.
+*/
+   ts->ts_slice = 0;
sched_add(td, SRQ_BORING);
 }
 
@@ -2036,14 +2070,16 @@ sched_fork_thread(struct thread *td, str
 {
struct td_sched *ts;
struct td_sched *ts2;
+   struct tdq *tdq;
 
+   tdq = TDQ_SELF();
THREAD_LOCK_ASSERT(td, MA_OWNED);
/*
 * Initialize child.
 */
ts = td->td_sched;
ts2 = child->td_sched;
-   child->td_lock = TDQ_LOCKPTR(TDQ_SELF());
+   child->td_lock = TDQ_LOCKPTR(tdq);
child->td_cpuset = cpuset_ref(td->td_cpuset);
ts2->ts_cpu = ts->ts_cpu;
ts2->ts_flags = 0;
@@ -2062,7 +2098,8 @@ sched_fork_thread(struct thread *td, str
 */
ts2->ts_slptime = ts->ts_slptime;
ts2->ts_runtime = ts->ts_runtime;
-   ts2->ts_slice = 1;  /* Attempt to quickly learn interactivity. */
+   /* Attempt to quickly learn interactivity. */
+   ts2->ts_slice = tdq_slice(tdq) - sched_slice_min;
 #ifdef KTR
bzero(ts2->ts_name, sizeof(ts2->ts_name));
 #endif
@@ -2227,8 +2264,8 @@ sched_clock(struct th

svn commit: r242815 - head/sys/ufs/ffs

2012-11-08 Thread Jeff Roberson

Author: jeff
Date: Fri Nov  9 04:04:25 2012
New Revision: 242815
URL: http://svnweb.freebsd.org/changeset/base/242815

Log:
   - Correct rev 242734, segments can sometimes get stuck.  Be a bit more
 defensive with segment state.
  
  Reported by:   b. f. 

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Fri Nov  9 01:51:06 2012
(r242814)
+++ head/sys/ufs/ffs/ffs_softdep.c  Fri Nov  9 04:04:25 2012
(r242815)
@@ -4291,13 +4291,16 @@ free_jsegs(jblocks)
jblocks->jb_oldestseg = jseg;
return;
}
+   if ((jseg->js_state & ALLCOMPLETE) != ALLCOMPLETE)
+   break;
if (jseg->js_seq > jblocks->jb_oldestwrseq)
break;
/*
 * We can free jsegs that didn't write entries when
 * oldestwrseq == js_seq.
 */
-   if (jseg->js_cnt != 0)
+   if (jseg->js_seq == jblocks->jb_oldestwrseq &&
+   jseg->js_cnt != 0)
break;
free_jseg(jseg, jblocks);
}
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r242924 - head/sys/ufs/ffs

2012-11-12 Thread Jeff Roberson

Author: jeff
Date: Mon Nov 12 19:53:55 2012
New Revision: 242924
URL: http://svnweb.freebsd.org/changeset/base/242924

Log:
   - Fix a bug that has existed since the original softdep implementation.
 When a background copy of a cg is written we complete any work associated
 with that bmsafemap.  If new work has been added to the non-background
 copy of the buffer it will be completed before the next write happens.
 The solution is to do the rollbacks when we make the copy so only those
 dependencies that were present at the time of writing will be completed
 when the background write completes.  This would've resulted in various
 bitmap related corruptions and panics.  It also would've expired journal
 entries early causing journal replay to miss some records.
  
  MFC after:2 weeks

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Mon Nov 12 18:38:54 2012
(r242923)
+++ head/sys/ufs/ffs/ffs_softdep.c  Mon Nov 12 19:53:55 2012
(r242924)
@@ -977,7 +977,7 @@ static  struct freework *newfreework(stru
struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int);
 static int jwait(struct worklist *, int);
 static struct inodedep *inodedep_lookup_ip(struct inode *);
-static int bmsafemap_rollbacks(struct bmsafemap *);
+static int bmsafemap_backgroundwrite(struct bmsafemap *, struct buf *);
 static struct freefile *handle_bufwait(struct inodedep *, struct workhead *);
 static void handle_jwork(struct workhead *);
 static struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *,
@@ -1795,7 +1795,7 @@ softdep_move_dependencies(oldbp, newbp)
while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
LIST_REMOVE(wk, wk_list);
if (wk->wk_type == D_BMSAFEMAP &&
-   bmsafemap_rollbacks(WK_BMSAFEMAP(wk)))
+   bmsafemap_backgroundwrite(WK_BMSAFEMAP(wk), newbp))
dirty = 1;
if (wktail == 0)
LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
@@ -5173,9 +5173,15 @@ jnewblk_merge(new, old, wkhd)
return (new);
/* Replace a jfreefrag with a jnewblk. */
if (new->wk_type == D_JFREEFRAG) {
+   if (WK_JNEWBLK(old)->jn_blkno != WK_JFREEFRAG(new)->fr_blkno)
+   panic("jnewblk_merge: blkno mismatch: %p, %p",
+   old, new);
cancel_jfreefrag(WK_JFREEFRAG(new));
return (old);
}
+   if (old->wk_type != D_JNEWBLK || new->wk_type != D_JNEWBLK)
+   panic("jnewblk_merge: Bad type: old %d new %d\n",
+   old->wk_type, new->wk_type);
/*
 * Handle merging of two jnewblk records that describe
 * different sets of fragments in the same block.
@@ -10504,7 +10510,7 @@ initiate_write_bmsafemap(bmsafemap, bp)
ino_t ino;
 
if (bmsafemap->sm_state & IOSTARTED)
-   panic("initiate_write_bmsafemap: Already started\n");
+   return;
bmsafemap->sm_state |= IOSTARTED;
/*
 * Clear any inode allocations which are pending journal writes.
@@ -10515,10 +10521,6 @@ initiate_write_bmsafemap(bmsafemap, bp)
inosused = cg_inosused(cgp);
LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) {
ino = jaddref->ja_ino % fs->fs_ipg;
-   /*
-* If this is a background copy the inode may not
-* be marked used yet.
-*/
if (isset(inosused, ino)) {
if ((jaddref->ja_mode & IFMT) == IFDIR)
cgp->cg_cs.cs_ndir--;
@@ -10527,7 +10529,7 @@ initiate_write_bmsafemap(bmsafemap, bp)
jaddref->ja_state &= ~ATTACHED;
jaddref->ja_state |= UNDONE;
stat_jaddref++;
-   } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0)
+   } else
panic("initiate_write_bmsafemap: inode %ju "
"marked free", (uintmax_t)jaddref->ja_ino);
}
@@ -10542,9 +10544,8 @@ initiate_write_bmsafemap(bmsafemap, bp)
LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
if (jnewblk_rollback(jnewblk, fs, cgp, blksfree))
continue;
-   if ((bp->b_xflags & BX_BKGRDMARKER) == 0)
-   panic("initiate_write_bmsafemap: block %jd "
-   "marked free", jnewblk->jn_blkno);
+   panic("initiate_write_bmsafemap: block %jd

svn commit: r243017 - head/sbin/fsck_ffs

2012-11-13 Thread Jeff Roberson

Author: jeff
Date: Wed Nov 14 06:31:47 2012
New Revision: 243017
URL: http://svnweb.freebsd.org/changeset/base/243017

Log:
   - blk_equals() is too strict.  If the journal entry defines more frags
 than we're claiming it should still be considered an exact match.  This
 would previously leak frags that had been extended.
   - If there is a sequence number problem in the journal print the sequence
 numbers we've seen so far for debugging.
   - Clean up the block mask related debuging printfs.  Some are redundant.
  
  MFC after:1 week

Modified:
  head/sbin/fsck_ffs/suj.c

Modified: head/sbin/fsck_ffs/suj.c
==
--- head/sbin/fsck_ffs/suj.cWed Nov 14 06:23:32 2012(r243016)
+++ head/sbin/fsck_ffs/suj.cWed Nov 14 06:31:47 2012(r243017)
@@ -504,7 +504,7 @@ blk_equals(struct jblkrec *brec, ino_t i
return (0);
if (brec->jb_blkno + brec->jb_oldfrags != start)
return (0);
-   if (brec->jb_frags != frags)
+   if (brec->jb_frags < frags)
return (0);
return (1);
 }
@@ -551,7 +551,6 @@ blk_freemask(ufs2_daddr_t blk, ino_t ino
brec = (struct jblkrec *)srec->sr_rec;
/*
 * If the block overlaps but does not match
-* exactly it's a new allocation.  If it matches
 * exactly this record refers to the current
 * location.
 */
@@ -648,7 +647,8 @@ blk_free(ufs2_daddr_t bno, int mask, int
uint8_t *blksfree;
 
if (debug)
-   printf("Freeing %d frags at blk %jd\n", frags, bno);
+   printf("Freeing %d frags at blk %jd mask 0x%x\n",
+   frags, bno, mask);
cg = dtog(fs, bno);
sc = cg_lookup(cg);
cgp = sc->sc_cgp;
@@ -1143,12 +1143,8 @@ ino_adjblks(struct suj_ino *sino)
 static void
 blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
-   int mask;
 
-   mask = blk_freemask(blk, ino, lbn, frags);
-   if (debug)
-   printf("blk %jd freemask 0x%X\n", blk, mask);
-   blk_free(blk, mask, frags);
+   blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags);
 }
 
 /*
@@ -1163,8 +1159,6 @@ blk_free_lbn(ufs2_daddr_t blk, ino_t ino
int mask;
 
mask = blk_freemask(blk, ino, lbn, frags);
-   if (debug)
-   printf("blk %jd freemask 0x%X\n", blk, mask);
resid = 0;
if (lbn <= -NDADDR && follow && mask == 0)
indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR);
@@ -2334,6 +2328,10 @@ suj_prune(void)
 
}
if (newseq != oldseq) {
+   TAILQ_FOREACH(seg, &allsegs, ss_next) {
+   printf("%jd, ", seg->ss_rec.jsr_seq);
+   }
+   printf("\n");
err_suj("Journal file sequence mismatch %jd != %jd\n",
newseq, oldseq);
}
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r243018 - head/sys/ufs/ffs

2012-11-13 Thread Jeff Roberson

Author: jeff
Date: Wed Nov 14 06:37:43 2012
New Revision: 243018
URL: http://svnweb.freebsd.org/changeset/base/243018

Log:
   - Fix a truncation bug with softdep journaling that could leak blocks on
 crash.  When truncating a file that never made it to disk we use the
 canceled allocation dependencies to hold the journal records until
 the truncation completes.  Previously allocdirect dependencies on
 the id_bufwait list were not considered and their journal space
 could expire before the bitmaps were written.  Cancel them and attach
 them to the freeblks as we do for other allocdirects.
   - Add KTR traces that were used to debug this problem.
   - When adding jsegdeps, always use jwork_insert() so we don't have more
 than one segdep on a given jwork list.
  
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/ufs/ffs/ffs_softdep.c

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Wed Nov 14 06:31:47 2012
(r243017)
+++ head/sys/ufs/ffs/ffs_softdep.c  Wed Nov 14 06:37:43 2012
(r243018)
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -92,6 +93,8 @@ __FBSDID("$FreeBSD$");
 
 #include 
 
+#defineKTR_SUJ 0   /* Define to KTR_SPARE. */
+
 #ifndef SOFTUPDATES
 
 int
@@ -770,6 +773,34 @@ struct pagedep_hashhead;
 struct bmsafemap_hashhead;
 
 /*
+ * Private journaling structures.
+ */
+struct jblocks {
+   struct jseglst  jb_segs;/* TAILQ of current segments. */
+   struct jseg *jb_writeseg;   /* Next write to complete. */
+   struct jseg *jb_oldestseg;  /* Oldest segment with valid entries. */
+   struct jextent  *jb_extent; /* Extent array. */
+   uint64_tjb_nextseq; /* Next sequence number. */
+   uint64_tjb_oldestwrseq; /* Oldest written sequence number. */
+   uint8_t jb_needseg; /* Need a forced segment. */
+   uint8_t jb_suspended;   /* Did journal suspend writes? */
+   int jb_avail;   /* Available extents. */
+   int jb_used;/* Last used extent. */
+   int jb_head;/* Allocator head. */
+   int jb_off; /* Allocator extent offset. */
+   int jb_blocks;  /* Total disk blocks covered. */
+   int jb_free;/* Total disk blocks free. */
+   int jb_min; /* Minimum free space. */
+   int jb_low; /* Low on space. */
+   int jb_age; /* Insertion time of oldest rec. */
+};
+
+struct jextent {
+   ufs2_daddr_tje_daddr;   /* Disk block address. */
+   int je_blocks;  /* Disk block count. */
+};
+
+/*
  * Internal function prototypes.
  */
 static void softdep_error(char *, int);
@@ -2268,19 +2299,15 @@ static void
 indirblk_insert(freework)
struct freework *freework;
 {
-   struct freeblks *freeblks;
-   struct jsegdep *jsegdep;
-   struct worklist *wk;
+   struct jblocks *jblocks;
+   struct jseg *jseg;
 
-   freeblks = freework->fw_freeblks;
-   LIST_FOREACH(wk, &freeblks->fb_jwork, wk_list)
-   if (wk->wk_type == D_JSEGDEP)
-   break;
-   if (wk == NULL)
+   jblocks = VFSTOUFS(freework->fw_list.wk_mp)->softdep_jblocks;
+   jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
+   if (jseg == NULL)
return;

-   jsegdep = WK_JSEGDEP(wk);
-   LIST_INSERT_HEAD(&jsegdep->jd_seg->js_indirs, freework, fw_segs);
+   LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
TAILQ_INSERT_HEAD(INDIR_HASH(freework->fw_list.wk_mp,
freework->fw_blkno), freework, fw_next);
freework->fw_state &= ~DEPCOMPLETE;
@@ -2433,31 +2460,6 @@ softdep_unmount(mp)
journal_unmount(mp);
 }
 
-struct jblocks {
-   struct jseglst  jb_segs;/* TAILQ of current segments. */
-   struct jseg *jb_writeseg;   /* Next write to complete. */
-   struct jseg *jb_oldestseg;  /* Oldest segment with valid entries. */
-   struct jextent  *jb_extent; /* Extent array. */
-   uint64_tjb_nextseq; /* Next sequence number. */
-   uint64_tjb_oldestwrseq; /* Oldest written sequence number. */
-   uint8_t jb_needseg; /* Need a forced segment. */
-   uint8_t jb_suspended;   /* Did journal suspend writes? */
-   int jb_avail;   /* Available extents. */
-   int jb_used;/* Last used extent. */
-   int jb_head;/* Allocator head. */
-   int jb_off; /* Allocator extent offset. */
-   int jb_blocks;  /* Total disk blocks covered. */
-   int

svn commit: r243046 - in head: sys/kern sys/sparc64/include sys/sys usr.bin/ktrdump

2012-11-14 Thread Jeff Roberson

Author: jeff
Date: Thu Nov 15 00:51:57 2012
New Revision: 243046
URL: http://svnweb.freebsd.org/changeset/base/243046

Log:
   - Implement run-time expansion of the KTR buffer via sysctl.
   - Implement a function to ensure that all preempted threads have switched
 back out at least once.  Use this to make sure there are no stale
 references to the old ktr_buf or the lock profiling buffers before
 updating them.
  
  Reviewed by:  marius (sparc64 parts), attilio (earlier patch)
  Sponsored by: EMC / Isilon Storage Division

Modified:
  head/sys/kern/kern_ktr.c
  head/sys/kern/subr_lock.c
  head/sys/kern/subr_smp.c
  head/sys/sparc64/include/ktr.h
  head/sys/sys/ktr.h
  head/sys/sys/smp.h
  head/usr.bin/ktrdump/ktrdump.c

Modified: head/sys/kern/kern_ktr.c
==
--- head/sys/kern/kern_ktr.cWed Nov 14 22:21:03 2012(r243045)
+++ head/sys/kern/kern_ktr.cThu Nov 15 00:51:57 2012(r243046)
@@ -47,7 +47,11 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -66,6 +70,9 @@ __FBSDID("$FreeBSD$");
 #defineKTR_ENTRIES 1024
 #endif
 
+/* Limit the allocations to something manageable. */
+#defineKTR_ENTRIES_MAX (8 * 1024 * 1024)
+
 #ifndef KTR_MASK
 #defineKTR_MASK(0)
 #endif
@@ -82,30 +89,31 @@ __FBSDID("$FreeBSD$");
 #defineKTR_CPU PCPU_GET(cpuid)
 #endif
 
-FEATURE(ktr, "Kernel support for KTR kernel tracing facility");
+static MALLOC_DEFINE(M_KTR, "KTR", "KTR");
 
-static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options");
+FEATURE(ktr, "Kernel support for KTR kernel tracing facility");
 
+volatile int   ktr_idx = 0;
 intktr_mask = KTR_MASK;
+intktr_compile = KTR_COMPILE;
+intktr_entries = KTR_ENTRIES;
+intktr_version = KTR_VERSION;
+struct ktr_entry ktr_buf_init[KTR_ENTRIES];
+struct ktr_entry *ktr_buf = ktr_buf_init;
+cpuset_t ktr_cpumask = CPUSET_T_INITIALIZER(KTR_CPUMASK);
+static char ktr_cpumask_str[CPUSETBUFSIZ];
+
 TUNABLE_INT("debug.ktr.mask", &ktr_mask);
-SYSCTL_INT(_debug_ktr, OID_AUTO, mask, CTLFLAG_RW,
-&ktr_mask, 0, "Bitmask of KTR event classes for which logging is enabled");
 
-intktr_compile = KTR_COMPILE;
-SYSCTL_INT(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD,
-&ktr_compile, 0, "Bitmask of KTR event classes compiled into the kernel");
+TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str));
 
-intktr_entries = KTR_ENTRIES;
-SYSCTL_INT(_debug_ktr, OID_AUTO, entries, CTLFLAG_RD,
-&ktr_entries, 0, "Number of entries in the KTR buffer");
+static SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options");
 
-intktr_version = KTR_VERSION;
 SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD,
 &ktr_version, 0, "Version of the KTR interface");
 
-cpuset_t ktr_cpumask = CPUSET_T_INITIALIZER(KTR_CPUMASK);
-static char ktr_cpumask_str[CPUSETBUFSIZ];
-TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str));
+SYSCTL_INT(_debug_ktr, OID_AUTO, compile, CTLFLAG_RD,
+&ktr_compile, 0, "Bitmask of KTR event classes compiled into the kernel");
 
 static void
 ktr_cpumask_initializer(void *dummy __unused)
@@ -145,9 +153,6 @@ SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumas
 sysctl_debug_ktr_cpumask, "S",
 "Bitmask of CPUs on which KTR logging is enabled");
 
-volatile int   ktr_idx = 0;
-struct ktr_entry ktr_buf[KTR_ENTRIES];
-
 static int
 sysctl_debug_ktr_clear(SYSCTL_HANDLER_ARGS)
 {
@@ -159,7 +164,7 @@ sysctl_debug_ktr_clear(SYSCTL_HANDLER_AR
return (error);
 
if (clear) {
-   bzero(ktr_buf, sizeof(ktr_buf));
+   bzero(ktr_buf, sizeof(*ktr_buf) * ktr_entries);
ktr_idx = 0;
}
 
@@ -168,6 +173,67 @@ sysctl_debug_ktr_clear(SYSCTL_HANDLER_AR
 SYSCTL_PROC(_debug_ktr, OID_AUTO, clear, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
 sysctl_debug_ktr_clear, "I", "Clear KTR Buffer");
 
+/*
+ * This is a sysctl proc so that it is serialized as !MPSAFE along with
+ * the other ktr sysctl procs.
+ */
+static int
+sysctl_debug_ktr_mask(SYSCTL_HANDLER_ARGS)
+{
+   int mask, error;
+
+   mask = ktr_mask;
+   error = sysctl_handle_int(oidp, &mask, 0, req);
+   if (error || !req->newptr)
+   return (error);
+   ktr_mask = mask;
+   return (error);
+}
+
+SYSCTL_PROC(_debug_ktr, OID_AUTO, mask, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
+sysctl_debug_ktr_mask, "I",
+"Bitmask of KTR event classes for which logging is enabled");
+
+static int
+sysctl_debug_ktr_entries(SYSCTL_HANDLER_ARGS)
+{
+   int entries, error, mask;
+   struct ktr_entry *buf, *oldbuf;
+
+   entries = ktr_entries;
+   error = sysctl_handle_int(oidp, &entries, 0, req);
+   if (error || !req->newptr)
+   return (error);
+   if (entries > KTR_ENTRIES_MAX)
+   return (ERANGE

svn commit: r188904 - in head/sys: amd64/amd64 i386/i386

2009-02-21 Thread Jeff Roberson

Author: jeff
Date: Sat Feb 21 23:15:34 2009
New Revision: 188904
URL: http://svn.freebsd.org/changeset/base/188904

Log:
   - Resolve an issue where we may clear an idt while an interrupt on a
 different cpu is still assigned to that vector by never clearing idt
 entries.  This was only provided as a debugging feature and the bugs
 are caught by other means.
   - Drop the sched lock when rebinding to reassign an interrupt vector
 to a new cpu so that pending interrupts have a chance to be delivered
 before removing the old vector.
  
  Discussed with:   tegge, jhb

Modified:
  head/sys/amd64/amd64/local_apic.c
  head/sys/i386/i386/local_apic.c

Modified: head/sys/amd64/amd64/local_apic.c
==
--- head/sys/amd64/amd64/local_apic.c   Sat Feb 21 22:57:26 2009
(r188903)
+++ head/sys/amd64/amd64/local_apic.c   Sat Feb 21 23:15:34 2009
(r188904)
@@ -900,7 +900,13 @@ apic_disable_vector(u_int apic_id, u_int
KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
KASSERT(ioint_handlers[vector / 32] != NULL,
("No ISR handler for vector %u", vector));
+#ifdef notyet
+   /*
+* We can not currently clear the idt entry because other cpus
+* may have a valid vector at this offset.
+*/
setidt(vector, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0);
+#endif
 }
 
 /* Release an APIC vector when it's no longer in use. */
@@ -924,9 +930,11 @@ apic_free_vector(u_int apic_id, u_int ve
if (sched_is_bound(td))
panic("apic_free_vector: Thread already bound.\n");
sched_bind(td, apic_cpuid(apic_id));
+   thread_unlock(td);
mtx_lock_spin(&icu_lock);
lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0;
mtx_unlock_spin(&icu_lock);
+   thread_lock(td);
sched_unbind(td);
thread_unlock(td);
 

Modified: head/sys/i386/i386/local_apic.c
==
--- head/sys/i386/i386/local_apic.c Sat Feb 21 22:57:26 2009
(r188903)
+++ head/sys/i386/i386/local_apic.c Sat Feb 21 23:15:34 2009
(r188904)
@@ -903,8 +903,14 @@ apic_disable_vector(u_int apic_id, u_int
KASSERT(vector != IDT_SYSCALL, ("Attempt to overwrite syscall entry"));
KASSERT(ioint_handlers[vector / 32] != NULL,
("No ISR handler for vector %u", vector));
+#ifdef notyet
+   /*
+* We can not currently clear the idt entry because other cpus
+* may have a valid vector at this offset.
+*/
setidt(vector, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
+#endif
 }
 
 /* Release an APIC vector when it's no longer in use. */
@@ -928,9 +934,11 @@ apic_free_vector(u_int apic_id, u_int ve
if (sched_is_bound(td))
panic("apic_free_vector: Thread already bound.\n");
sched_bind(td, apic_cpuid(apic_id));
+   thread_unlock(td);
mtx_lock_spin(&icu_lock);
lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = 0;
mtx_unlock_spin(&icu_lock);
+   thread_lock(td);
sched_unbind(td);
thread_unlock(td);
 
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r189787 - head/sys/kern

2009-03-14 Thread Jeff Roberson

Author: jeff
Date: Sat Mar 14 11:41:36 2009
New Revision: 189787
URL: http://svn.freebsd.org/changeset/base/189787

Log:
   - Fix an error that occurs when mp_ncpu is an odd number.  steal_thresh
 is calculated as 0 which causes errors elsewhere.
  
  Submitted by: KOIE Hidetaka 
  
   - When sched_affinity() is called with a thread that is not curthread we
 need to handle the ON_RUNQ() case by adding the thread to the correct
 run queue.
  
  Submitted by: Justin Teller 
  
  MFC after:1 Week

Modified:
  head/sys/kern/sched_ule.c

Modified: head/sys/kern/sched_ule.c
==
--- head/sys/kern/sched_ule.c   Sat Mar 14 08:34:45 2009(r189786)
+++ head/sys/kern/sched_ule.c   Sat Mar 14 11:41:36 2009(r189787)
@@ -1337,11 +1337,11 @@ sched_initticks(void *dummy)
 */
balance_interval = realstathz;
/*
-* Set steal thresh to log2(mp_ncpu) but no greater than 4.  This
-* prevents excess thrashing on large machines and excess idle on
-* smaller machines.
+* Set steal thresh to roughly log2(mp_ncpu) but no greater than 4. 
+* This prevents excess thrashing on large machines and excess idle 
+* on smaller machines.
 */
-   steal_thresh = min(ffs(mp_ncpus) - 1, 3);
+   steal_thresh = min(fls(mp_ncpus) - 1, 3);
affinity = SCHED_AFFINITY_DEFAULT;
 #endif
 }
@@ -2417,6 +2417,11 @@ sched_affinity(struct thread *td)
ts = td->td_sched;
if (THREAD_CAN_SCHED(td, ts->ts_cpu))
return;
+   if (TD_ON_RUNQ(td)) {
+   sched_rem(td);
+   sched_add(td, SRQ_BORING);
+   return;
+   }
if (!TD_IS_RUNNING(td))
return;
td->td_flags |= TDF_NEEDRESCHED;
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r189788 - head/sys/kern

2009-03-14 Thread Jeff Roberson

Author: jeff
Date: Sat Mar 14 11:43:02 2009
New Revision: 189788
URL: http://svn.freebsd.org/changeset/base/189788

Log:
   - Call lock_profile_release when we're transitioning a lock to be owned by
 LK_KERNPROC.
  
  Discussed with:   attilio

Modified:
  head/sys/kern/kern_lock.c

Modified: head/sys/kern/kern_lock.c
==
--- head/sys/kern/kern_lock.c   Sat Mar 14 11:41:36 2009(r189787)
+++ head/sys/kern/kern_lock.c   Sat Mar 14 11:43:02 2009(r189788)
@@ -686,7 +686,8 @@ __lockmgr_args(struct lock *lk, u_int fl
lk->lk_recurse--;
break;
}
-   lock_profile_release_lock(&lk->lock_object);
+   if (tid != LK_KERNPROC)
+   lock_profile_release_lock(&lk->lock_object);
 
if (atomic_cmpset_rel_ptr(&lk->lk_lock, tid,
LK_UNLOCKED))
@@ -874,6 +875,7 @@ _lockmgr_disown(struct lock *lk, const c
 */
if (LK_HOLDER(lk->lk_lock) != tid)
return;
+   lock_profile_release_lock(&lk->lock_object);
LOCK_LOG_LOCK("XDISOWN", &lk->lock_object, 0, 0, file, line);
WITNESS_UNLOCK(&lk->lock_object, LOP_EXCLUSIVE, file, line);
TD_LOCKS_DEC(curthread);
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r189789 - head/sys/kern

2009-03-14 Thread Jeff Roberson

Author: jeff
Date: Sat Mar 14 11:43:38 2009
New Revision: 189789
URL: http://svn.freebsd.org/changeset/base/189789

Log:
   - When a mutex is destroyed while locked we need to inform lock profiling
 that it has been released.

Modified:
  head/sys/kern/kern_mutex.c

Modified: head/sys/kern/kern_mutex.c
==
--- head/sys/kern/kern_mutex.c  Sat Mar 14 11:43:02 2009(r189788)
+++ head/sys/kern/kern_mutex.c  Sat Mar 14 11:43:38 2009(r189789)
@@ -765,6 +765,7 @@ mtx_destroy(struct mtx *m)
else
curthread->td_locks--;
 
+   lock_profile_release_lock(&m->lock_object);
/* Tell witness this isn't locked to make it happy. */
WITNESS_UNLOCK(&m->lock_object, LOP_EXCLUSIVE, __FILE__,
__LINE__);
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r189845 - in head/sys: kern sys

2009-03-14 Thread Jeff Roberson

Author: jeff
Date: Sun Mar 15 06:41:47 2009
New Revision: 189845
URL: http://svn.freebsd.org/changeset/base/189845

Log:
   - Implement a new mechanism for resetting lock profiling.  We now
 guarantee that all cpus have acknowledged the cleared enable int by
 scheduling the resetting thread on each cpu in succession.  Since all
 lock profiling happens within a critical section this guarantees that
 all cpus have left lock profiling before we clear the datastructures.
   - Assert that the per-thread queue of locks lock profiling is aware of
 is clear on thread exit.  There were several cases where this was not
 true that slows lock profiling and leaks information.
   - Remove all objects from all lists before clearing any per-cpu
 information in reset.  Lock profiling objects can migrate between
 per-cpu caches and previously these migrated objects could be zero'd
 before they'd been removed
  
  Discussed with:   attilio
  Sponsored by: Nokia

Modified:
  head/sys/kern/kern_thread.c
  head/sys/kern/subr_lock.c
  head/sys/sys/lock_profile.h

Modified: head/sys/kern/kern_thread.c
==
--- head/sys/kern/kern_thread.c Sun Mar 15 06:40:57 2009(r189844)
+++ head/sys/kern/kern_thread.c Sun Mar 15 06:41:47 2009(r189845)
@@ -306,6 +306,8 @@ thread_alloc(void)
 void
 thread_free(struct thread *td)
 {
+
+   lock_profile_thread_exit(td);
if (td->td_cpuset)
cpuset_rel(td->td_cpuset);
td->td_cpuset = NULL;
@@ -439,6 +441,7 @@ thread_wait(struct proc *p)
/* Wait for any remaining threads to exit cpu_throw(). */
while (p->p_exitthreads)
sched_relinquish(curthread);
+   lock_profile_thread_exit(td);
cpuset_rel(td->td_cpuset);
td->td_cpuset = NULL;
cpu_thread_clean(td);

Modified: head/sys/kern/subr_lock.c
==
--- head/sys/kern/subr_lock.c   Sun Mar 15 06:40:57 2009(r189844)
+++ head/sys/kern/subr_lock.c   Sun Mar 15 06:41:47 2009(r189845)
@@ -46,9 +46,11 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -186,7 +188,8 @@ struct lock_prof_cpu {
 
 struct lock_prof_cpu *lp_cpu[MAXCPU];
 
-int lock_prof_enable = 0;
+volatile int lock_prof_enable = 0;
+static volatile int lock_prof_resetting;
 
 /* SWAG: sbuf size = avg stat. line size * number of locks */
 #define LPROF_SBUF_SIZE256 * 400
@@ -239,25 +242,77 @@ lock_prof_init(void *arg)
 }
 SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
 
+/*
+ * To be certain that lock profiling has idled on all cpus before we
+ * reset, we schedule the resetting thread on all active cpus.  Since
+ * all operations happen within critical sections we can be sure that
+ * it is safe to zero the profiling structures.
+ */
+static void
+lock_prof_idle(void)
+{
+   struct thread *td;
+   int cpu;
+
+   td = curthread;
+   thread_lock(td);
+   for (cpu = 0; cpu <= mp_maxid; cpu++) {
+   if (CPU_ABSENT(cpu))
+   continue;
+   sched_bind(td, cpu);
+   }
+   sched_unbind(td);
+   thread_unlock(td);
+}
+
+static void
+lock_prof_reset_wait(void)
+{
+
+   /*
+* Spin relinquishing our cpu so that lock_prof_idle may
+* run on it.
+*/
+   while (lock_prof_resetting)
+   sched_relinquish(curthread);
+}
+
 static void
 lock_prof_reset(void)
 {
struct lock_prof_cpu *lpc;
int enabled, i, cpu;
 
+   /*
+* We not only race with acquiring and releasing locks but also
+* thread exit.  To be certain that threads exit without valid head
+* pointers they must see resetting set before enabled is cleared.
+* Otherwise a lock may not be removed from a per-thread list due
+* to disabled being set but not wait for reset() to remove it below.
+*/
+   atomic_store_rel_int(&lock_prof_resetting, 1);
enabled = lock_prof_enable;
lock_prof_enable = 0;
-   pause("lpreset", hz / 10);
+   lock_prof_idle();
+   /*
+* Some objects may have migrated between CPUs.  Clear all links
+* before we zero the structures.  Some items may still be linked
+* into per-thread lists as well.
+*/
for (cpu = 0; cpu <= mp_maxid; cpu++) {
lpc = lp_cpu[cpu];
for (i = 0; i < LPROF_CACHE_SIZE; i++) {
LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link);
LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link);
}
+   }
+   for (cpu = 0; cpu <= mp_maxid; cpu++) {
+   lpc = lp_cpu[cpu];
bzero(lpc, sizeof(*lpc));
lock_prof_init

svn commit: r189846 - head/sys/kern

2009-03-15 Thread Jeff Roberson

Author: jeff
Date: Sun Mar 15 08:03:54 2009
New Revision: 189846
URL: http://svn.freebsd.org/changeset/base/189846

Log:
   - Wrap lock profiling state variables in #ifdef LOCK_PROFILING blocks.

Modified:
  head/sys/kern/kern_lock.c
  head/sys/kern/kern_mutex.c
  head/sys/kern/kern_rwlock.c
  head/sys/kern/kern_sx.c

Modified: head/sys/kern/kern_lock.c
==
--- head/sys/kern/kern_lock.c   Sun Mar 15 06:41:47 2009(r189845)
+++ head/sys/kern/kern_lock.c   Sun Mar 15 08:03:54 2009(r189846)
@@ -333,16 +333,17 @@ __lockmgr_args(struct lock *lk, u_int fl
 const char *wmesg, int pri, int timo, const char *file, int line)
 {
GIANT_DECLARE;
-   uint64_t waittime;
struct lock_class *class;
const char *iwmesg;
uintptr_t tid, v, x;
u_int op;
-   int contested, error, ipri, itimo, queue, wakeup_swapper;
+   int error, ipri, itimo, queue, wakeup_swapper;
+#ifdef LOCK_PROFILING
+   uint64_t waittime = 0;
+   int contested = 0;
+#endif
 
-   contested = 0;
error = 0;
-   waittime = 0;
tid = (uintptr_t)curthread;
op = (flags & LK_TYPE_MASK);
iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;

Modified: head/sys/kern/kern_mutex.c
==
--- head/sys/kern/kern_mutex.c  Sun Mar 15 06:41:47 2009(r189845)
+++ head/sys/kern/kern_mutex.c  Sun Mar 15 08:03:54 2009(r189846)
@@ -254,8 +254,11 @@ _mtx_unlock_spin_flags(struct mtx *m, in
 int
 _mtx_trylock(struct mtx *m, int opts, const char *file, int line)
 {
-   int rval, contested = 0;
+#ifdef LOCK_PROFILING
uint64_t waittime = 0;
+   int contested = 0;
+#endif
+   int rval;
 
MPASS(curthread != NULL);
KASSERT(m->mtx_lock != MTX_DESTROYED,
@@ -296,15 +299,17 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t
 int line)
 {
struct turnstile *ts;
+   uintptr_t v;
 #ifdef ADAPTIVE_MUTEXES
volatile struct thread *owner;
 #endif
 #ifdef KTR
int cont_logged = 0;
 #endif
+#ifdef LOCK_PROFILING
int contested = 0;
uint64_t waittime = 0;
-   uintptr_t v;
+#endif
 
if (mtx_owned(m)) {
KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
@@ -448,8 +453,11 @@ void
 _mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file,
 int line)
 {
-   int i = 0, contested = 0;
+   int i = 0;
+#ifdef LOCK_PROFILING
+   int contested = 0;
uint64_t waittime = 0;
+#endif
 
if (LOCK_LOG_TEST(&m->lock_object, opts))
CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
@@ -486,11 +494,13 @@ _thread_lock_flags(struct thread *td, in
 {
struct mtx *m;
uintptr_t tid;
-   int i, contested;
-   uint64_t waittime;
+   int i;
+#ifdef LOCK_PROFILING
+   int contested = 0;
+   uint64_t waittime = 0;
+#endif
 
-   contested = i = 0;
-   waittime = 0;
+   i = 0;
tid = (uintptr_t)curthread;
for (;;) {
 retry:

Modified: head/sys/kern/kern_rwlock.c
==
--- head/sys/kern/kern_rwlock.c Sun Mar 15 06:41:47 2009(r189845)
+++ head/sys/kern/kern_rwlock.c Sun Mar 15 08:03:54 2009(r189846)
@@ -282,8 +282,10 @@ _rw_rlock(struct rwlock *rw, const char 
int spintries = 0;
int i;
 #endif
+#ifdef LOCK_PROFILING
uint64_t waittime = 0;
int contested = 0;
+#endif
uintptr_t v;
 
KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -584,9 +586,11 @@ _rw_wlock_hard(struct rwlock *rw, uintpt
int spintries = 0;
int i;
 #endif
-   uint64_t waittime = 0;
uintptr_t v, x;
+#ifdef LOCK_PROFILING
+   uint64_t waittime = 0;
int contested = 0;
+#endif
 
if (rw_wlocked(rw)) {
KASSERT(rw->lock_object.lo_flags & RW_RECURSE,

Modified: head/sys/kern/kern_sx.c
==
--- head/sys/kern/kern_sx.c Sun Mar 15 06:41:47 2009(r189845)
+++ head/sys/kern/kern_sx.c Sun Mar 15 08:03:54 2009(r189846)
@@ -431,9 +431,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t 
 #ifdef ADAPTIVE_SX
volatile struct thread *owner;
 #endif
-   uint64_t waittime = 0;
uintptr_t x;
-   int contested = 0, error = 0;
+#ifdef LOCK_PROFILING
+   uint64_t waittime = 0;
+   int contested = 0;
+#endif
+   int error = 0;
 
/* If we already hold an exclusive lock, then recurse. */
if (sx_xlocked(sx)) {
@@ -652,8 +655,10 @@ _sx_slock_hard(struct sx *sx, int opts, 
 #ifdef ADAPTIVE_SX
volatile struct thread *owner;
 #endif
+#ifdef LOCK_PROFILING
uint64_t waittime = 0;
int contested = 0;
+#endif
uintptr_t x;
in

Re: svn commit: r189846 - head/sys/kern

2009-03-15 Thread Jeff Roberson

Sorry for the temporary build breakage; I meant to commit these two 
patches together.


Jeff

On Sun, 15 Mar 2009, Jeff Roberson wrote:


Author: jeff
Date: Sun Mar 15 08:03:54 2009
New Revision: 189846
URL: http://svn.freebsd.org/changeset/base/189846

Log:
  - Wrap lock profiling state variables in #ifdef LOCK_PROFILING blocks.

Modified:
 head/sys/kern/kern_lock.c
 head/sys/kern/kern_mutex.c
 head/sys/kern/kern_rwlock.c
 head/sys/kern/kern_sx.c

Modified: head/sys/kern/kern_lock.c
==
--- head/sys/kern/kern_lock.c   Sun Mar 15 06:41:47 2009(r189845)
+++ head/sys/kern/kern_lock.c   Sun Mar 15 08:03:54 2009(r189846)
@@ -333,16 +333,17 @@ __lockmgr_args(struct lock *lk, u_int fl
const char *wmesg, int pri, int timo, const char *file, int line)
{
GIANT_DECLARE;
-   uint64_t waittime;
struct lock_class *class;
const char *iwmesg;
uintptr_t tid, v, x;
u_int op;
-   int contested, error, ipri, itimo, queue, wakeup_swapper;
+   int error, ipri, itimo, queue, wakeup_swapper;
+#ifdef LOCK_PROFILING
+   uint64_t waittime = 0;
+   int contested = 0;
+#endif

-   contested = 0;
error = 0;
-   waittime = 0;
tid = (uintptr_t)curthread;
op = (flags & LK_TYPE_MASK);
iwmesg = (wmesg == LK_WMESG_DEFAULT) ? lk->lock_object.lo_name : wmesg;

Modified: head/sys/kern/kern_mutex.c
==
--- head/sys/kern/kern_mutex.c  Sun Mar 15 06:41:47 2009(r189845)
+++ head/sys/kern/kern_mutex.c  Sun Mar 15 08:03:54 2009(r189846)
@@ -254,8 +254,11 @@ _mtx_unlock_spin_flags(struct mtx *m, in
int
_mtx_trylock(struct mtx *m, int opts, const char *file, int line)
{
-   int rval, contested = 0;
+#ifdef LOCK_PROFILING
uint64_t waittime = 0;
+   int contested = 0;
+#endif
+   int rval;

MPASS(curthread != NULL);
KASSERT(m->mtx_lock != MTX_DESTROYED,
@@ -296,15 +299,17 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t
int line)
{
struct turnstile *ts;
+   uintptr_t v;
#ifdef ADAPTIVE_MUTEXES
volatile struct thread *owner;
#endif
#ifdef KTR
int cont_logged = 0;
#endif
+#ifdef LOCK_PROFILING
int contested = 0;
uint64_t waittime = 0;
-   uintptr_t v;
+#endif

if (mtx_owned(m)) {
KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0,
@@ -448,8 +453,11 @@ void
_mtx_lock_spin(struct mtx *m, uintptr_t tid, int opts, const char *file,
int line)
{
-   int i = 0, contested = 0;
+   int i = 0;
+#ifdef LOCK_PROFILING
+   int contested = 0;
uint64_t waittime = 0;
+#endif

if (LOCK_LOG_TEST(&m->lock_object, opts))
CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
@@ -486,11 +494,13 @@ _thread_lock_flags(struct thread *td, in
{
struct mtx *m;
uintptr_t tid;
-   int i, contested;
-   uint64_t waittime;
+   int i;
+#ifdef LOCK_PROFILING
+   int contested = 0;
+   uint64_t waittime = 0;
+#endif

-   contested = i = 0;
-   waittime = 0;
+   i = 0;
tid = (uintptr_t)curthread;
for (;;) {
retry:

Modified: head/sys/kern/kern_rwlock.c
==
--- head/sys/kern/kern_rwlock.c Sun Mar 15 06:41:47 2009(r189845)
+++ head/sys/kern/kern_rwlock.c Sun Mar 15 08:03:54 2009(r189846)
@@ -282,8 +282,10 @@ _rw_rlock(struct rwlock *rw, const char
int spintries = 0;
int i;
#endif
+#ifdef LOCK_PROFILING
uint64_t waittime = 0;
int contested = 0;
+#endif
uintptr_t v;

KASSERT(rw->rw_lock != RW_DESTROYED,
@@ -584,9 +586,11 @@ _rw_wlock_hard(struct rwlock *rw, uintpt
int spintries = 0;
int i;
#endif
-   uint64_t waittime = 0;
uintptr_t v, x;
+#ifdef LOCK_PROFILING
+   uint64_t waittime = 0;
int contested = 0;
+#endif

if (rw_wlocked(rw)) {
KASSERT(rw->lock_object.lo_flags & RW_RECURSE,

Modified: head/sys/kern/kern_sx.c
==
--- head/sys/kern/kern_sx.c Sun Mar 15 06:41:47 2009(r189845)
+++ head/sys/kern/kern_sx.c Sun Mar 15 08:03:54 2009(r189846)
@@ -431,9 +431,12 @@ _sx_xlock_hard(struct sx *sx, uintptr_t
#ifdef ADAPTIVE_SX
volatile struct thread *owner;
#endif
-   uint64_t waittime = 0;
uintptr_t x;
-   int contested = 0, error = 0;
+#ifdef LOCK_PROFILING
+   uint64_t waittime = 0;
+   int contested = 0;
+#endif
+   int error = 0;

/* If we already hold an exclusive lock, then recurse. */
if (sx_xlocked(sx)) {
@@ -652,8 +655,10 @@ _sx_slock_hard(struct sx *sx, int opts,
#ifdef ADAPTIVE_SX
v

svn commit: r208241 - head/sbin/tunefs

2010-05-17 Thread Jeff Roberson

Author: jeff
Date: Tue May 18 01:45:28 2010
New Revision: 208241
URL: http://svn.freebsd.org/changeset/base/208241

Log:
   - Round up the journal size to the block size so we don't confuse fsck.
  
  Reported by:  Mikolaj Golub 
  
   - Only require 256k of blocks per-cg when trying to allocate contiguous
 journal blocks.  The storage may not actually be contiguous but is at
 least within one cg.
   - When disabling SUJ leave SU enabled and report this to the user.  It
 is expected that users will upgrade SU filesystems to SUJ and want
 a similar downgrade path.

Modified:
  head/sbin/tunefs/tunefs.c

Modified: head/sbin/tunefs/tunefs.c
==
--- head/sbin/tunefs/tunefs.c   Tue May 18 00:46:15 2010(r208240)
+++ head/sbin/tunefs/tunefs.c   Tue May 18 01:45:28 2010(r208241)
@@ -358,10 +358,12 @@ main(int argc, char *argv[])
warnx("%s remains unchanged as disabled", name);
} else {
journal_clear();
-   sblock.fs_flags &= ~(FS_DOSOFTDEP | FS_SUJ);
+   sblock.fs_flags &= ~FS_SUJ;
sblock.fs_sujfree = 0;
-   warnx("%s cleared, "
-   "remove .sujournal to reclaim space", name);
+   warnx("%s cleared but soft updates still set.",
+   name);
+
+   warnx("remove .sujournal to reclaim space");
}
}
}
@@ -546,7 +548,7 @@ journal_balloc(void)
 * Try to minimize fragmentation by requiring a minimum
 * number of blocks present.
 */
-   if (cgp->cg_cs.cs_nbfree > 128 * 1024 * 1024)
+   if (cgp->cg_cs.cs_nbfree > 256 * 1024)
break;
if (contig == 0 && cgp->cg_cs.cs_nbfree)
break;
@@ -906,6 +908,8 @@ journal_alloc(int64_t size)
if (size / sblock.fs_fsize > sblock.fs_fpg)
size = sblock.fs_fpg * sblock.fs_fsize;
size = MAX(SUJ_MIN, size);
+   /* fsck does not support fragments in journal files. */
+   size = roundup(size, sblock.fs_bsize);
}
resid = blocks = size / sblock.fs_bsize;
if (sblock.fs_cstotal.cs_nbfree < blocks) {
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r208287 - head/sys/ufs/ffs

2010-05-18 Thread Jeff Roberson

Author: jeff
Date: Wed May 19 06:18:01 2010
New Revision: 208287
URL: http://svn.freebsd.org/changeset/base/208287

Log:
   - Don't immediately re-run softdepflush if we didn't make any progress
 on the last iteration.  This can lead to a deadlock when we have
 worklist items that cannot be immediately satisfied.
  
  Reported by:  uqs, Dimitry Andric 
  
   - Remove some unnecessary debugging code and place some other under
 SUJ_DEBUG.
   - Examine the journal state in softdep_slowdown().
   - Re-format some comments so I may more easily add flag descriptions.

Modified:
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/softdep.h

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_softdep.c  Wed May 19 04:00:42 2010
(r208286)
+++ head/sys/ufs/ffs/ffs_softdep.c  Wed May 19 06:18:01 2010
(r208287)
@@ -51,7 +51,6 @@ __FBSDID("$FreeBSD$");
 #ifndef DEBUG
 #define DEBUG
 #endif
-#defineSUJ_DEBUG
 
 #include 
 #include 
@@ -1200,6 +1199,7 @@ softdep_flush(void)
struct ufsmount *ump;
struct thread *td;
int remaining;
+   int progress;
int vfslocked;
 
td = curthread;
@@ -1224,7 +1224,7 @@ softdep_flush(void)
}
FREE_LOCK(&lk);
VFS_UNLOCK_GIANT(vfslocked);
-   remaining = 0;
+   remaining = progress = 0;
mtx_lock(&mountlist_mtx);
for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp)  {
nmp = TAILQ_NEXT(mp, mnt_list);
@@ -1233,7 +1233,7 @@ softdep_flush(void)
if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK))
continue;
vfslocked = VFS_LOCK_GIANT(mp);
-   softdep_process_worklist(mp, 0);
+   progress += softdep_process_worklist(mp, 0);
ump = VFSTOUFS(mp);
remaining += ump->softdep_on_worklist -
ump->softdep_on_worklist_inprogress;
@@ -1243,7 +1243,7 @@ softdep_flush(void)
vfs_unbusy(mp);
}
mtx_unlock(&mountlist_mtx);
-   if (remaining)
+   if (remaining && progress)
continue;
ACQUIRE_LOCK(&lk);
if (!req_pending)
@@ -1449,7 +1449,7 @@ process_worklist_item(mp, flags)
struct mount *mp;
int flags;
 {
-   struct worklist *wk, *wkXXX;
+   struct worklist *wk;
struct ufsmount *ump;
struct vnode *vp;
int matchcnt = 0;
@@ -1472,11 +1472,8 @@ process_worklist_item(mp, flags)
vp = NULL;
ump = VFSTOUFS(mp);
LIST_FOREACH(wk, &ump->softdep_workitem_pending, wk_list) {
-   if (wk->wk_state & INPROGRESS) {
-   wkXXX = wk;
+   if (wk->wk_state & INPROGRESS)
continue;
-   }
-   wkXXX = wk; /* Record the last valid wk pointer. */
if ((flags & LK_NOWAIT) == 0 || wk->wk_type != D_DIRREM)
break;
wk->wk_state |= INPROGRESS;
@@ -2364,7 +2361,7 @@ remove_from_journal(wk)
 
mtx_assert(&lk, MA_OWNED);
ump = VFSTOUFS(wk->wk_mp);
-#ifdef DEBUG   /* XXX Expensive, temporary. */
+#ifdef SUJ_DEBUG
{
struct worklist *wkn;
 
@@ -2401,16 +2398,15 @@ journal_space(ump, thresh)
struct jblocks *jblocks;
int avail;
 
+   jblocks = ump->softdep_jblocks;
+   if (jblocks == NULL)
+   return (1);
/*
 * We use a tighter restriction here to prevent request_cleanup()
 * running in threads from running into locks we currently hold.
 */
if (num_inodedep > (max_softdeps / 10) * 9)
return (0);
-
-   jblocks = ump->softdep_jblocks;
-   if (jblocks == NULL)
-   return (1);
if (thresh)
thresh = jblocks->jb_min;
else
@@ -2727,7 +2723,7 @@ softdep_process_journal(mp, flags)
break;
printf("softdep: Out of journal space!\n");
softdep_speedup();
-   msleep(jblocks, &lk, PRIBIO, "jblocks", 1);
+   msleep(jblocks, &lk, PRIBIO, "jblocks", hz);
}
FREE_LOCK(&lk);
jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
@@ -10870,18 +10866,29 @@ int
 softdep_slowdown(vp)
struct vnode *vp;
 {
+   struct ufsmount *ump;
+   int jlow;
int max_softdeps_hard;
 
ACQUIRE_LOCK(&lk);
+   jlow = 0;
+   /*
+* Check for journal space if needed.
+*/
+   if (DOINGSUJ(vp)) {
+   ump = VFSTOUFS(vp->v_mount);
+   if

svn commit: r209716 - head/sbin/fsck_ffs

2010-07-06 Thread Jeff Roberson

Author: jeff
Date: Tue Jul  6 07:07:29 2010
New Revision: 209716
URL: http://svn.freebsd.org/changeset/base/209716

Log:
   - Permit zero length directories as a handled inconsistency.  This allows
 directory truncation to proceed before the link has been cleared.  This
 is accomplished by detecting a directory with no . or .. links and
 clearing the named directory entry in the parent.
   - Add a new function ino_remref() which handles the details of removing
 a reference to an inode as a result of a lost directory.  There were
 some minor errors in various subcases of this routine.

Modified:
  head/sbin/fsck_ffs/suj.c

Modified: head/sbin/fsck_ffs/suj.c
==
--- head/sbin/fsck_ffs/suj.cTue Jul  6 03:48:46 2010(r209715)
+++ head/sbin/fsck_ffs/suj.cTue Jul  6 07:07:29 2010(r209716)
@@ -808,6 +808,44 @@ blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_
 }
 
 /*
+ * Clear the directory entry at diroff that should point to child.  Minimal
+ * checking is done and it is assumed that this path was verified with isat.
+ */
+static void
+ino_clrat(ino_t parent, off_t diroff, ino_t child)
+{
+   union dinode *dip;
+   struct direct *dp;
+   ufs2_daddr_t blk;
+   uint8_t *block;
+   ufs_lbn_t lbn;
+   int blksize;
+   int frags;
+   int doff;
+
+   if (debug)
+   printf("Clearing inode %d from parent %d at offset %jd\n",
+   child, parent, diroff);
+
+   lbn = lblkno(fs, diroff);
+   doff = blkoff(fs, diroff);
+   dip = ino_read(parent);
+   blk = ino_blkatoff(dip, parent, lbn, &frags);
+   blksize = sblksize(fs, DIP(dip, di_size), lbn);
+   block = dblk_read(blk, blksize);
+   dp = (struct direct *)&block[doff];
+   if (dp->d_ino != child)
+   errx(1, "Inode %d does not exist in %d at %jd",
+   child, parent, diroff);
+   dp->d_ino = 0;
+   dblk_dirty(blk);
+   /*
+* The actual .. reference count will already have been removed
+* from the parent by the .. remref record.
+*/
+}
+
+/*
  * Determines whether a pointer to an inode exists within a directory
  * at a specified offset.  Returns the mode of the found entry.
  */
@@ -1134,6 +1172,57 @@ ino_setskip(struct suj_ino *sino, ino_t 
sino->si_skipparent = 1;
 }
 
+static void
+ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot)
+{
+   struct suj_ino *sino;
+   struct suj_rec *srec;
+   struct jrefrec *rrec;
+
+   /*
+* Lookup this inode to see if we have a record for it.
+*/
+   sino = ino_lookup(child, 0);
+   /*
+* Tell any child directories we've already removed their
+* parent link cnt.  Don't try to adjust our link down again.
+*/
+   if (sino != NULL && isdotdot == 0)
+   ino_setskip(sino, parent);
+   /*
+* No valid record for this inode.  Just drop the on-disk
+* link by one.
+*/
+   if (sino == NULL || sino->si_hasrecs == 0) {
+   ino_decr(child);
+   return;
+   }
+   /*
+* Use ino_adjust() if ino_check() has already processed this
+* child.  If we lose the last non-dot reference to a
+* directory it will be discarded.
+*/
+   if (sino->si_linkadj) {
+   sino->si_nlink--;
+   if (isdotdot)
+   sino->si_dotlinks--;
+   ino_adjust(sino);
+   return;
+   }
+   /*
+* If we haven't yet processed this inode we need to make
+* sure we will successfully discover the lost path.  If not
+* use nlinkadj to remember.
+*/
+   TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
+   rrec = (struct jrefrec *)srec->sr_rec;
+   if (rrec->jr_parent == parent &&
+   rrec->jr_diroff == diroff)
+   return;
+   }
+   sino->si_nlinkadj++;
+}
+
 /*
  * Free the children of a directory when the directory is discarded.
  */
@@ -1141,13 +1230,11 @@ static void
 ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
struct suj_ino *sino;
-   struct suj_rec *srec;
-   struct jrefrec *rrec;
struct direct *dp;
off_t diroff;
uint8_t *block;
int skipparent;
-   int isparent;
+   int isdotdot;
int dpoff;
int size;
 
@@ -1165,53 +1252,15 @@ ino_free_children(ino_t ino, ufs_lbn_t l
continue;
if (dp->d_namlen == 1 && dp->d_name[0] == '.')
continue;
-   isparent = dp->d_namlen == 2 && dp->d_name[0] == '.' &&
+   isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' &&
dp->d_name[1] == '.';
-   if (isparent && skipparent == 1)
+

svn commit: r209717 - in head/sys/ufs: ffs ufs

2010-07-06 Thread Jeff Roberson

Author: jeff
Date: Tue Jul  6 07:11:04 2010
New Revision: 209717
URL: http://svn.freebsd.org/changeset/base/209717

Log:
   - Handle the truncation of an inode with an effective link count of 0 in
 the context of the process that reduced the effective count.  Previously
 all truncation as a result of unlink happened in the softdep flush
 thread.  This had the effect of being impossible to rate limit properly
 with the journal code.  Now the process issuing unlinks is suspended
 when the journal files.  This has a side-effect of improving rm
 performance by allowing more concurrent work.
   - Handle two cases in inactive, one for effnlink == 0 and another when
 nlink finally reaches 0.
   - Eliminate the SPACECOUNTED related code since the truncation is no
 longer delayed.
  
  Discussed with:   mckusick

Modified:
  head/sys/ufs/ffs/ffs_alloc.c
  head/sys/ufs/ffs/ffs_inode.c
  head/sys/ufs/ffs/ffs_softdep.c
  head/sys/ufs/ffs/ffs_vnops.c
  head/sys/ufs/ffs/softdep.h
  head/sys/ufs/ufs/inode.h
  head/sys/ufs/ufs/ufs_inode.c
  head/sys/ufs/ufs/ufs_lookup.c
  head/sys/ufs/ufs/ufs_vnops.c

Modified: head/sys/ufs/ffs/ffs_alloc.c
==
--- head/sys/ufs/ffs/ffs_alloc.cTue Jul  6 07:07:29 2010
(r209716)
+++ head/sys/ufs/ffs/ffs_alloc.cTue Jul  6 07:11:04 2010
(r209717)
@@ -191,11 +191,6 @@ retry:
bno = ffs_hashalloc(ip, cg, bpref, size, size, ffs_alloccg);
if (bno > 0) {
delta = btodb(size);
-   if (ip->i_flag & IN_SPACECOUNTED) {
-   UFS_LOCK(ump);
-   fs->fs_pendingblocks += delta;
-   UFS_UNLOCK(ump);
-   }
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
if (flags & IO_EXT)
ip->i_flag |= IN_CHANGE;
@@ -321,11 +316,6 @@ retry:
if (bp->b_blkno != fsbtodb(fs, bno))
panic("ffs_realloccg: bad blockno");
delta = btodb(nsize - osize);
-   if (ip->i_flag & IN_SPACECOUNTED) {
-   UFS_LOCK(ump);
-   fs->fs_pendingblocks += delta;
-   UFS_UNLOCK(ump);
-   }
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
if (flags & IO_EXT)
ip->i_flag |= IN_CHANGE;
@@ -394,11 +384,6 @@ retry:
ffs_blkfree(ump, fs, ip->i_devvp, bprev, (long)osize,
ip->i_number, NULL);
delta = btodb(nsize - osize);
-   if (ip->i_flag & IN_SPACECOUNTED) {
-   UFS_LOCK(ump);
-   fs->fs_pendingblocks += delta;
-   UFS_UNLOCK(ump);
-   }
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
if (flags & IO_EXT)
ip->i_flag |= IN_CHANGE;
@@ -2422,11 +2407,6 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
break;
ip = VTOI(vp);
-   if (ip->i_flag & IN_SPACECOUNTED) {
-   UFS_LOCK(ump);
-   fs->fs_pendingblocks += cmd.size;
-   UFS_UNLOCK(ump);
-   }
DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size);
ip->i_flag |= IN_CHANGE;
vput(vp);

Modified: head/sys/ufs/ffs/ffs_inode.c
==
--- head/sys/ufs/ffs/ffs_inode.cTue Jul  6 07:07:29 2010
(r209716)
+++ head/sys/ufs/ffs/ffs_inode.cTue Jul  6 07:11:04 2010
(r209717)
@@ -180,6 +180,8 @@ ffs_truncate(vp, length, flags, cred, td
 */
if ((flags & (IO_EXT | IO_NORMAL)) == 0)
flags |= IO_NORMAL;
+   if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp))
+   flags |= IO_SYNC;
/*
 * If we are truncating the extended-attributes, and cannot
 * do it with soft updates, then do it slowly here. If we are
@@ -310,10 +312,6 @@ ffs_truncate(vp, length, flags, cred, td
 */
if ((error = ffs_syncvnode(vp, MNT_WAIT)) != 0)
goto out;
-   UFS_LOCK(ump);
-   if (ip->i_flag & IN_SPACECOUNTED)
-   fs->fs_pendingblocks -= datablocks;
-   UFS_UNLOCK(ump);
/*
 * We have to journal the truncation before we change
 * any blocks so we don't leave the file partially

Modified: head/sys/ufs/ffs/ffs_softdep.c
==
--- head/sys/ufs/ffs/ffs_sof

svn commit: r187357 - in head/sys: kern sys

2009-01-16 Thread Jeff Roberson

Author: jeff
Date: Sat Jan 17 07:17:57 2009
New Revision: 187357
URL: http://svn.freebsd.org/changeset/base/187357

Log:
   - Implement generic macros for producing KTR records that are compatible
 with src/tools/sched/schedgraph.py.  This allows developers to quickly
 create a graphical view of ktr data for any resource in the system.
   - Add sched_tdname() and the pcpu field 'name' for quickly and uniformly
 identifying records associated with a thread or cpu.
   - Reimplement the KTR_SCHED traces using the new generic facility.
  
  Obtained from:attilio
  Discussed with:   jhb
  Sponsored by: Nokia

Modified:
  head/sys/kern/kern_clock.c
  head/sys/kern/kern_synch.c
  head/sys/kern/sched_4bsd.c
  head/sys/kern/sched_ule.c
  head/sys/kern/subr_pcpu.c
  head/sys/sys/ktr.h
  head/sys/sys/pcpu.h
  head/sys/sys/sched.h

Modified: head/sys/kern/kern_clock.c
==
--- head/sys/kern/kern_clock.c  Sat Jan 17 06:55:28 2009(r187356)
+++ head/sys/kern/kern_clock.c  Sat Jan 17 07:17:57 2009(r187357)
@@ -498,8 +498,8 @@ statclock(int usermode)
rss = pgtok(vmspace_resident_count(vm));
if (ru->ru_maxrss < rss)
ru->ru_maxrss = rss;
-   CTR4(KTR_SCHED, "statclock: %p(%s) prio %d stathz %d",
-   td, td->td_name, td->td_priority, (stathz)?stathz:hz);
+   KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock",
+   "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz);
thread_lock_flags(td, MTX_QUIET);
sched_clock(td);
thread_unlock(td);

Modified: head/sys/kern/kern_synch.c
==
--- head/sys/kern/kern_synch.c  Sat Jan 17 06:55:28 2009(r187356)
+++ head/sys/kern/kern_synch.c  Sat Jan 17 07:17:57 2009(r187357)
@@ -71,6 +71,13 @@ __FBSDID("$FreeBSD$");
 #include 
 #endif
 
+#defineKTDSTATE(td)
\
+   (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep"  : \
+   ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" :  \
+   ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" :  \
+   ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \
+   ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding")
+
 static void synch_setup(void *dummy);
 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup,
 NULL);
@@ -425,25 +432,19 @@ mi_switch(int flags, struct thread *newt
td->td_tid, td->td_sched, p->p_pid, td->td_name);
 #if (KTR_COMPILE & KTR_SCHED) != 0
if (TD_IS_IDLETHREAD(td))
-   CTR3(KTR_SCHED, "mi_switch: %p(%s) prio %d idle",
-   td, td->td_name, td->td_priority);
-   else if (newtd != NULL)
-   CTR5(KTR_SCHED,
-   "mi_switch: %p(%s) prio %d preempted by %p(%s)",
-   td, td->td_name, td->td_priority, newtd,
-   newtd->td_name);
+   KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
+   "prio:%d", td->td_priority);
else
-   CTR6(KTR_SCHED,
-   "mi_switch: %p(%s) prio %d inhibit %d wmesg %s lock %s",
-   td, td->td_name, td->td_priority,
-   td->td_inhibitors, td->td_wmesg, td->td_lockname);
+   KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td),
+   "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg,
+   "lockname:\"%s\"", td->td_lockname);
 #endif
 #ifdef XEN
PT_UPDATES_FLUSH();
 #endif
sched_switch(td, newtd, flags);
-   CTR3(KTR_SCHED, "mi_switch: running %p(%s) prio %d",
-   td, td->td_name, td->td_priority);
+   KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
+   "prio:%d", td->td_priority);
 
CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",
td->td_tid, td->td_sched, p->p_pid, td->td_name);

Modified: head/sys/kern/sched_4bsd.c
==
--- head/sys/kern/sched_4bsd.c  Sat Jan 17 06:55:28 2009(r187356)
+++ head/sys/kern/sched_4bsd.c  Sat Jan 17 07:17:57 2009(r187357)
@@ -82,6 +82,8 @@ dtrace_vtime_switch_func_tdtrace_vtime_
 #endif
 #defineNICE_WEIGHT 1   /* Priorities per nice level. */
 
+#defineTS_NAME_LEN (MAXCOMLEN + sizeof(" td ") + 
sizeof(__STRING(UINT_MAX)))
+
 /*
  * The schedulable entity that runs a context.
  * This is  an extension to the thread structure and is tailored to
@@ -93,6 +95,9 @@ struct td_sched {
int ts_slptime; /* (j) Seconds !RUNNING. */
int ts_flags;
struct runq *ts_runq;   /* runq the thread is currently on */
+#ifdef K

svn commit: r187358 - head/tools/sched

2009-01-16 Thread Jeff Roberson

Author: jeff
Date: Sat Jan 17 07:24:25 2009
New Revision: 187358
URL: http://svn.freebsd.org/changeset/base/187358

Log:
   - Rewrite the parser to support the new generic schedgraph interface.
 This no longer requires any custom classes or parsers to support new
 event types.
   - Add an optional command line argument for specifying the clock frequency
 in ghz.  This is useful for traces that do not include KTR_SCHED.
  
  Sponsored by: Nokia
  
   - Add support for sorting rows by clicking and dragging them to their new
 position.
   - Add support for configuring the cpu background colors.
   - Improve the scaling so a better center is maintained as you zoom.  This
 is not perfect due to precision loss with floats used in the window
 views.
   - Add new colors and a random assignment for unknown event types.  A table
 is used for known event types.  This is the only event specific
 information.

Modified:
  head/tools/sched/schedgraph.py

Modified: head/tools/sched/schedgraph.py
==
--- head/tools/sched/schedgraph.py  Sat Jan 17 07:17:57 2009
(r187357)
+++ head/tools/sched/schedgraph.py  Sat Jan 17 07:24:25 2009
(r187358)
@@ -28,6 +28,7 @@
 
 import sys
 import re
+import random
 from Tkinter import *
 
 # To use:
@@ -53,30 +54,96 @@ from Tkinter import *
 #   while the workload is still running is to avoid wasting log entries on
 #   "idle" time at the end.
 # - Dump the trace to a file: 'ktrdump -ct > ktr.out'
-# - Run the python script: 'python schedgraph.py ktr.out'
+# - Run the python script: 'python schedgraph.py ktr.out' optionally provide
+#   your cpu frequency in ghz: 'python schedgraph.py ktr.out 2.4'
 #
 # To do:
-# 1)  Add a per-thread summary display
-# 2)  Add bounding box style zoom.
-# 3)  Click to center.
-# 4)  Implement some sorting mechanism.
-# 5)  Widget to display variable-range data (e.g. q length)
-# 6)  Reorder rows, hide rows, etc.
-# 7)  "Vertical rule" to help relate data in different rows
-# 8)  Mouse-over popup of full thread/event/row lable (currently truncated)
-# 9)  More visible anchors for popup event windows
+# Add a per-source summary display
+# Click to move.
+# Hide rows
+# "Vertical rule" to help relate data in different rows
+# Mouse-over popup of full thread/event/row label (currently truncated)
+# More visible anchors for popup event windows
 #
 # BUGS: 1) Only 8 CPUs are supported, more CPUs require more choices of
 #  colours to represent them ;-)
-#   2) Extremely short traces may cause a crash because the code
-#  assumes there is always at least one stathz entry logged, and
-#  the number of such events is used as a denominator
+
+eventcolors = [
+   ("count",   "red"),
+   ("running", "green"),
+   ("idle","grey"),
+   ("yielding","yellow"),
+   ("swapped", "violet"),
+   ("suspended",   "purple"),
+   ("iwait",   "grey"),
+   ("sleep",   "blue"),
+   ("blocked", "dark red"),
+   ("runq add","yellow"),
+   ("runq rem","yellow"),
+   ("thread exit", "grey"),
+   ("proc exit",   "grey"),
+   ("callwheel idle", "grey"),
+   ("callout running", "green"),
+   ("lock acquire", "blue"),
+   ("lock contest", "purple"),
+   ("failed lock try", "red"),
+   ("lock release", "grey"),
+   ("tick","black"),
+   ("prio","black"),
+   ("lend prio",   "black"),
+   ("wokeup",  "black")
+]
+
+cpucolors = [
+   ("CPU 0",   "light grey"),
+   ("CPU 1",   "dark grey"),
+   ("CPU 2",   "light blue"),
+   ("CPU 3",   "light pink"),
+   ("CPU 4",   "blanched almond"),
+   ("CPU 5",   "slate grey"),
+   ("CPU 6",   "tan"),
+   ("CPU 7",   "thistle"),
+   ("CPU 8",   "white")
+]
+
+colors = [
+   "white", "thistle", "blanched almond", "tan", "chartreuse",
+   "dark red", "red", "pale violet red", "pink", "light pink",
+   "dark orange", "orange", "coral", "light coral",
+   "goldenrod", "gold", "yellow", "light yellow",
+   "dark green", "green", "light green", "light sea green",
+   "dark blue", "blue", "light blue", "steel blue", "light slate blue",
+   "dark violet", "violet", "purple", "blue violet",
+   "dark grey", "slate grey", "light grey",
+   "black",
+]
+colors.sort()
 
 ticksps = None
 status = None
-configtypes = []
+colormap = None
+ktrfile = None
+clockfreq = None
+sources = []
 lineno = -1
 
+class Colormap:
+   def __init__(self, table):
+   self.table = table
+   self.map = {}
+   for entry in table:
+   self.map[entry[0]] = entry[1]
+
+   def lookup(self, name):
+   try:
+   color = self.map[name]
+   except:
+   color =

svn commit: r187359 - head/tools/sched

2009-01-17 Thread Jeff Roberson

Author: jeff
Date: Sat Jan 17 11:19:15 2009
New Revision: 187359
URL: http://svn.freebsd.org/changeset/base/187359

Log:
   - Add a new source configuration menu option that allows hiding and
 displaying sources.
   - Add functions to the main SchedGraph to facilitate source hiding.  The
 source is simply moved off screen and all other sources are moved to
 compensate.

Modified:
  head/tools/sched/schedgraph.py

Modified: head/tools/sched/schedgraph.py
==
--- head/tools/sched/schedgraph.py  Sat Jan 17 07:24:25 2009
(r187358)
+++ head/tools/sched/schedgraph.py  Sat Jan 17 11:19:15 2009
(r187359)
@@ -59,8 +59,6 @@ from Tkinter import *
 #
 # To do:
 # Add a per-source summary display
-# Click to move.
-# Hide rows
 # "Vertical rule" to help relate data in different rows
 # Mouse-over popup of full thread/event/row label (currently truncated)
 # More visible anchors for popup event windows
@@ -294,6 +292,118 @@ class ColorConfigure(Toplevel):
for item in self.types:
item.revert()
 
+class SourceConf(Frame):
+   def __init__(self, master, source):
+   Frame.__init__(self, master)
+   if (source.hidden == 1):
+   enabled = 0
+   else:
+   enabled = 1
+   self.source = source
+   self.name = source.name
+   self.enabled = IntVar()
+   self.enabled_default = enabled
+   self.enabled_current = enabled
+   self.enabled.set(enabled)
+   self.draw()
+
+   def draw(self):
+   self.label = Label(self, text=self.name, anchor=W)
+   self.checkbox = Checkbutton(self, text="enabled",
+   variable=self.enabled)
+   self.label.grid(row=0, column=0, sticky=E+W)
+   self.checkbox.grid(row=0, column=1)
+   self.columnconfigure(0, weight=1)
+
+   def apply(self):
+   echange = 0
+   if (self.enabled_current != self.enabled.get()):
+   echange = 1
+   self.enabled_current = self.enabled.get()
+   if (echange != 0):
+   if (self.enabled_current):
+   graph.sourceshow(self.source)
+   else:
+   graph.sourcehide(self.source)
+   return
+
+   def revert(self):
+   self.enabled.set(self.enabled_default)
+
+   def check(self):
+   self.enabled.set(1)
+
+   def uncheck(self):
+   self.enabled.set(0)
+
+class SourceConfigure(Toplevel):
+   def __init__(self):
+   Toplevel.__init__(self)
+   self.resizable(0, 0)
+   self.title("Source Configuration")
+   self.items = []
+   self.iframe = Frame(self)
+   self.iframe.grid(row=0, column=0, sticky=E+W)
+   f = LabelFrame(self.iframe, bd=4, text="Sources")
+   self.items.append(f)
+   self.buttons = Frame(self)
+   self.items[0].grid(row=0, column=0, sticky=E+W)
+   self.columnconfigure(0, weight=1)
+   self.sconfig = []
+   self.irow = 0
+   self.icol = 0
+   for source in sources:
+   self.addsource(source)
+   self.drawbuttons()
+   self.buttons.grid(row=1, column=0, sticky=W)
+
+   def addsource(self, source):
+   if (self.irow > 30):
+   self.icol += 1
+   self.irow = 0
+   c = self.icol
+   f = LabelFrame(self.iframe, bd=4, text="Sources")
+   f.grid(row=0, column=c, sticky=N+E+W)
+   self.items.append(f)
+   item = SourceConf(self.items[self.icol], source)
+   self.sconfig.append(item)
+   item.grid(row=self.irow, column=0, sticky=E+W)
+   self.irow += 1
+
+   def drawbuttons(self):
+   self.apply = Button(self.buttons, text="Apply",
+   command=self.apress)
+   self.default = Button(self.buttons, text="Revert",
+   command=self.rpress)
+   self.checkall = Button(self.buttons, text="Check All",
+   command=self.cpress)
+   self.uncheckall = Button(self.buttons, text="Uncheck All",
+   command=self.upress)
+   self.checkall.grid(row=0, column=0, sticky=W)
+   self.uncheckall.grid(row=0, column=1, sticky=W)
+   self.apply.grid(row=0, column=2, sticky=W)
+   self.default.grid(row=0, column=3, sticky=W)
+   self.buttons.columnconfigure(0, weight=1)
+   self.buttons.columnconfigure(

svn commit: r187376 - head/tools/sched

2009-01-17 Thread Jeff Roberson

Author: jeff
Date: Sun Jan 18 04:49:01 2009
New Revision: 187376
URL: http://svn.freebsd.org/changeset/base/187376

Log:
   - Significantly speedup hiding and displaying multiple rows by writing an
 optimized single pass function for each.  This reduces the number of
 tkinter calls required to the minimum.
   - Add a right-click context menu for sources.  Supported commands hide
 the source, hide the whole group the source is in, and bring up a stat
 window.
   - Add a source stat frame that gives an event frequency table as well as
 the total duration for each event type that has a duration.  This can
 be used to see, for example, the total time a thread spent running or
 blocked by a wchan or lock.

Modified:
  head/tools/sched/schedgraph.py

Modified: head/tools/sched/schedgraph.py
==
--- head/tools/sched/schedgraph.py  Sun Jan 18 04:29:42 2009
(r187375)
+++ head/tools/sched/schedgraph.py  Sun Jan 18 04:49:01 2009
(r187376)
@@ -315,17 +315,13 @@ class SourceConf(Frame):
self.checkbox.grid(row=0, column=1)
self.columnconfigure(0, weight=1)
 
-   def apply(self):
-   echange = 0
+   def changed(self):
if (self.enabled_current != self.enabled.get()):
-   echange = 1
+   return 1
+   return 0
+
+   def apply(self):
self.enabled_current = self.enabled.get()
-   if (echange != 0):
-   if (self.enabled_current):
-   graph.sourceshow(self.source)
-   else:
-   graph.sourcehide(self.source)
-   return
 
def revert(self):
self.enabled.set(self.enabled_default)
@@ -389,6 +385,21 @@ class SourceConfigure(Toplevel):
self.buttons.columnconfigure(3, weight=1)
 
def apress(self):
+   disable_sources = []
+   enable_sources = []
+   for item in self.sconfig:
+   if (item.changed() == 0):
+   continue
+   if (item.enabled.get() == 1):
+   enable_sources.append(item.source)
+   else:
+   disable_sources.append(item.source)
+
+   if (len(disable_sources)):
+   graph.sourcehidelist(disable_sources)
+   if (len(enable_sources)):
+   graph.sourceshowlist(enable_sources)
+
for item in self.sconfig:
item.apply()
 
@@ -404,6 +415,77 @@ class SourceConfigure(Toplevel):
for item in self.sconfig:
item.uncheck()
 
+# Reverse compare of second member of the tuple
+def cmp_counts(x, y):
+   return y[1] - x[1]
+
+class SourceStats(Toplevel):
+   def __init__(self, source):
+   self.source = source
+   Toplevel.__init__(self)
+   self.resizable(0, 0)
+   self.title(source.name + " statistics")
+   self.evframe = LabelFrame(self,
+   text="Event Frequency and Duration")
+   self.evframe.grid(row=0, column=0, sticky=E+W)
+   eventtypes={}
+   for event in self.source.events:
+   if (event.type == "pad"):
+   continue
+   duration = event.duration
+   if (eventtypes.has_key(event.name)):
+   (c, d) = eventtypes[event.name]
+   c += 1
+   d += duration
+   eventtypes[event.name] = (c, d)
+   else:
+   eventtypes[event.name] = (1, duration)
+   events = []
+   for k, v in eventtypes.iteritems():
+   (c, d) = v
+   events.append((k, c, d))
+   events.sort(cmp=cmp_counts)
+
+   ypos = 0
+   for event in events:
+   (name, c, d) = event
+   l = Label(self.evframe, text=name, bd=1, 
+   relief=SUNKEN, anchor=W, width=30)
+   m = Label(self.evframe, text=str(c), bd=1,
+   relief=SUNKEN, anchor=W, width=10)
+   r = Label(self.evframe, text=ticks2sec(d),
+   bd=1, relief=SUNKEN, width=10)
+   l.grid(row=ypos, column=0, sticky=E+W)
+   m.grid(row=ypos, column=1, sticky=E+W)
+   r.grid(row=ypos, column=2, sticky=E+W)
+   ypos += 1
+
+
+class SourceContext(Menu):
+   def __init__(self, event, source):
+

svn commit: r187379 - head/tools/sched

2009-01-17 Thread Jeff Roberson

Author: jeff
Date: Sun Jan 18 05:44:31 2009
New Revision: 187379
URL: http://svn.freebsd.org/changeset/base/187379

Log:
   - Add summary information to the title once the file is parsed rather than
 printing it to the terminal.  Now only parse errors go to the terminal.
   - Speedup drawing by raising and lowering tags only once everything has
 been drawn.  Surprisingly, it now takes a little longer to parse than
 it does to draw.
   - Parameterize the layout with X_ and Y_ defines that determine the sizes
 of various things.
   - Remove unnecessary tags.

Modified:
  head/tools/sched/schedgraph.py

Modified: head/tools/sched/schedgraph.py
==
--- head/tools/sched/schedgraph.py  Sun Jan 18 05:35:58 2009
(r187378)
+++ head/tools/sched/schedgraph.py  Sun Jan 18 05:44:31 2009
(r187379)
@@ -86,7 +86,7 @@ eventcolors = [
("lock contest", "purple"),
("failed lock try", "red"),
("lock release", "grey"),
-   ("tick","black"),
+   ("statclock",   "black"),
("prio","black"),
("lend prio",   "black"),
("wokeup",  "black")
@@ -125,6 +125,12 @@ clockfreq = None
 sources = []
 lineno = -1
 
+Y_BORDER = 10
+X_BORDER = 10
+Y_COUNTER = 80
+Y_EVENTSOURCE = 10
+XY_POINT = 4
+
 class Colormap:
def __init__(self, table):
self.table = table
@@ -674,9 +680,10 @@ class PointEvent(Event):
 
def draw(self, canvas, xpos, ypos):
color = colormap.lookup(self.name)
-   l = canvas.create_oval(xpos - 6, ypos + 1, xpos + 6, ypos - 11,
+   l = canvas.create_oval(xpos - XY_POINT, ypos,
+   xpos + XY_POINT, ypos - (XY_POINT * 2),
fill=color, width=0,
-   tags=("all", "point", "event", self.name, self.source.tag))
+   tags=("event", self.type, self.name, self.source.tag))
Event.draw(self, canvas, xpos, ypos, l)
 
return xpos
@@ -701,7 +708,7 @@ class StateEvent(Event):
delta = duration / canvas.ratio
l = canvas.create_rectangle(xpos, ypos,
xpos + delta, ypos - 10, fill=color, width=0,
-   tags=("all", "state", "event", self.name, self.source.tag))
+   tags=("event", self.type, self.name, self.source.tag))
Event.draw(self, canvas, xpos, ypos, l)
 
return (xpos + delta)
@@ -725,7 +732,7 @@ class CountEvent(Event):
yhight = self.source.yscale() * self.count
l = canvas.create_rectangle(xpos, ypos - yhight,
xpos + delta, ypos, fill=color, width=0,
-   tags=("all", "count", "event", self.name, self.source.tag))
+   tags=("event", self.type, self.name, self.source.tag))
Event.draw(self, canvas, xpos, ypos, l)
return (xpos + delta)
 
@@ -797,7 +804,8 @@ class EventSource:
def drawname(self, canvas, ypos):
self.y = ypos
ypos = ypos - (self.ysize() / 2)
-   self.item = canvas.create_text(10, ypos, anchor="w", 
text=self.name)
+   self.item = canvas.create_text(X_BORDER, ypos, anchor="w",
+   text=self.name)
return (self.item)
 
def drawcpu(self, canvas, cpu, fromx, tox, ypos):
@@ -807,7 +815,7 @@ class EventSource:
l = canvas.create_rectangle(fromx,
ypos - self.ysize() - canvas.bdheight,
tox, ypos + canvas.bdheight, fill=color, width=0,
-   tags=("all", "cpuinfo", cpu, self.tag), state="hidden")
+   tags=("cpubg", cpu, self.tag), state="hidden")
self.cpuitems.append(l)
 
def move(self, canvas, xpos, ypos):
@@ -818,7 +826,7 @@ class EventSource:
canvas.move(self.item, xpos, ypos)
 
def ysize(self):
-   return (10)
+   return (Y_EVENTSOURCE)
 
def eventat(self, i):
if (i >= len(self.events)):
@@ -858,7 +866,7 @@ class Counter(EventSource):
return (Counter.groups[self.group])
 
def ysize(self):
-   return (80)
+   return (Y_COUNTER)
 
def yscale(self):
return (self.ysize() / self.ymax())
@@ -873,16 +881,22 @@ class KTRFile:
self.load = {}
self.crit = {}
self.stathz = 0
+   self.eventcnt = 0
 
self.parse(file)
self.fixup()
global ticksps
ticksps = self.ticksps()
-   timespan = self.timespan()
-   print "first tick", self.timestamp_f,
-   print "last tick", self.timestamp_l
-   print "Ticks per second", ticksps
-   print "time span", times

svn commit: r187471 - head/tools/sched

2009-01-20 Thread Jeff Roberson

Author: jeff
Date: Tue Jan 20 12:33:04 2009
New Revision: 187471
URL: http://svn.freebsd.org/changeset/base/187471

Log:
   - Permit timestamps to be as far as 2048 ticks apart before we complain
 about invalid timestamps.  Nehalem CPUs seem to be synchronized but only
 within a fraction of a microsecond.
   - Make the Counter code more flexible to poor timestamps.  In general we
 now complain a lot but render as much as we can.
   - Change the scaler behavior so it works better with very long and very
 short traces.  We now set the maximum scale such that it properly
 displays the entire file by default and doesn't permit zooming out
 beyond the file.  This improves other awkward navigation behavior.
 The interval is now set very small which can't be achieved by simply
 dragging the mouse.  Clicking to the left of or right of the scaler bar
 will produce increments of a single, very small, interval now.
  
  Sponsored by:   Nokia

Modified:
  head/tools/sched/schedgraph.py

Modified: head/tools/sched/schedgraph.py
==
--- head/tools/sched/schedgraph.py  Tue Jan 20 12:07:49 2009
(r187470)
+++ head/tools/sched/schedgraph.py  Tue Jan 20 12:33:04 2009
(r187471)
@@ -162,15 +162,12 @@ def ticks2sec(ticks):
 class Scaler(Frame):
def __init__(self, master, target):
Frame.__init__(self, master)
-   self.scale = Scale(self, command=self.scaleset,
-   from_=1000, to_=1000, orient=HORIZONTAL,
-   resolution=1000)
+   self.scale = None
+   self.target = target
self.label = Label(self, text="Ticks per pixel")
self.label.pack(side=LEFT)
-   self.scale.pack(fill="both", expand=1)
-   self.target = target
-   self.scale.set(target.scaleget())
-   self.initialized = 1
+   self.resolution = 100
+   self.setmax(1)
 
def scaleset(self, value):
self.target.scaleset(int(value))
@@ -178,6 +175,20 @@ class Scaler(Frame):
def set(self, value):
self.scale.set(value)
 
+   def setmax(self, value):
+   #
+   # We can't reconfigure the to_ value so we delete the old
+   # window and make a new one when we resize.
+   #
+   if (self.scale != None):
+   self.scale.pack_forget()
+   self.scale.destroy()
+   self.scale = Scale(self, command=self.scaleset,
+   from_=100, to_=value, orient=HORIZONTAL,
+   resolution=self.resolution)
+   self.scale.pack(fill="both", expand=1)
+   self.scale.set(self.target.scaleget())
+
 class Status(Frame):
def __init__(self, master):
Frame.__init__(self, master)
@@ -726,6 +737,11 @@ class CountEvent(Event):
return (xpos)
color = colormap.lookup("count")
self.duration = duration = next.timestamp - self.timestamp
+   if (duration < 0):
+   duration = 0
+   print "Unsynchronized timestamp"
+   print self.cpu, self.timestamp
+   print next.cpu, next.timestamp
self.attrs.insert(0, ("count", self.count))
self.attrs.insert(1, ("duration", ticks2sec(duration)))
delta = duration / canvas.ratio
@@ -882,6 +898,7 @@ class KTRFile:
self.crit = {}
self.stathz = 0
self.eventcnt = 0
+   self.taghash = {}
 
self.parse(file)
self.fixup()
@@ -956,7 +973,8 @@ class KTRFile:
if (dat == None):
dat = dat1
if (self.checkstamp(timestamp) == 0):
-   print "Bad timestamp at", lineno, ":", line, 
+   print "Bad timestamp at", lineno, ":",
+   print cpu, timestamp 
continue
#
# Build the table of optional attributes
@@ -1021,20 +1039,22 @@ class KTRFile:
timestamp = int(timestamp)
if (self.timestamp_f == None):
self.timestamp_f = timestamp;
-   if (self.timestamp_l != None and timestamp > self.timestamp_l):
+   if (self.timestamp_l != None and
+   timestamp -2048> self.timestamp_l):
return (0)
self.timestamp_l = timestamp;
return (1)
 
def makeid(self, group, id, type):
-   for source in sources:
-   if (source.name == id and source.group == group):
-

svn commit: r187580 - head/tools/sched

2009-01-21 Thread Jeff Roberson

Author: jeff
Date: Thu Jan 22 06:21:30 2009
New Revision: 187580
URL: http://svn.freebsd.org/changeset/base/187580

Log:
   - Update my copyright.
   - Print human readable time as a float with two digits of precision.  Use
 ns now as well since clock periods are well into the hundreds of
 picoseconds now.
   - Show the average duration in the stats frame.  This is often more useful
 than total duration.

Modified:
  head/tools/sched/schedgraph.py

Modified: head/tools/sched/schedgraph.py
==
--- head/tools/sched/schedgraph.py  Thu Jan 22 05:05:56 2009
(r187579)
+++ head/tools/sched/schedgraph.py  Thu Jan 22 06:21:30 2009
(r187580)
@@ -1,6 +1,6 @@
 #!/usr/local/bin/python
 
-# Copyright (c) 2002-2003, Jeffrey Roberson 
+# Copyright (c) 2002-2003, 2009, Jeffrey Roberson 
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -149,15 +149,19 @@ class Colormap:
return (color)
 
 def ticks2sec(ticks):
-   us = ticksps / 100
-   ticks /= us
+   ticks = float(ticks)
+   ns = float(ticksps) / 10
+   ticks /= ns
if (ticks < 1000):
-   return (str(ticks) + "us")
+   return ("%.2fns" % ticks)
ticks /= 1000
if (ticks < 1000):
-   return (str(ticks) + "ms")
+   return ("%.2fus" % ticks)
ticks /= 1000
-   return (str(ticks) + "s")
+   if (ticks < 1000):
+   return ("%.2fms" % ticks)
+   ticks /= 1000
+   return ("%.2fs" % ticks)
 
 class Scaler(Frame):
def __init__(self, master, target):
@@ -443,7 +447,7 @@ class SourceStats(Toplevel):
self.resizable(0, 0)
self.title(source.name + " statistics")
self.evframe = LabelFrame(self,
-   text="Event Frequency and Duration")
+   text="Event Count, Duration, Avg Duration")
self.evframe.grid(row=0, column=0, sticky=E+W)
eventtypes={}
for event in self.source.events:
@@ -466,15 +470,22 @@ class SourceStats(Toplevel):
ypos = 0
for event in events:
(name, c, d) = event
-   l = Label(self.evframe, text=name, bd=1, 
-   relief=SUNKEN, anchor=W, width=30)
-   m = Label(self.evframe, text=str(c), bd=1,
-   relief=SUNKEN, anchor=W, width=10)
-   r = Label(self.evframe, text=ticks2sec(d),
-   bd=1, relief=SUNKEN, width=10)
-   l.grid(row=ypos, column=0, sticky=E+W)
-   m.grid(row=ypos, column=1, sticky=E+W)
-   r.grid(row=ypos, column=2, sticky=E+W)
+   Label(self.evframe, text=name, bd=1, 
+   relief=SUNKEN, anchor=W, width=30).grid(
+   row=ypos, column=0, sticky=W+E)
+   Label(self.evframe, text=str(c), bd=1,
+   relief=SUNKEN, anchor=W, width=10).grid(
+   row=ypos, column=1, sticky=W+E)
+   Label(self.evframe, text=ticks2sec(d),
+   bd=1, relief=SUNKEN, width=10).grid(
+   row=ypos, column=2, sticky=W+E)
+   if (d and c):
+   d /= c
+   else:
+   d = 0
+   Label(self.evframe, text=ticks2sec(d),
+   bd=1, relief=SUNKEN, width=10).grid(
+   row=ypos, column=3, sticky=W+E)
ypos += 1
 
 
___
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r187576 - in head/sys/dev: ppbus ppc

2009-01-21 Thread Jeff Roberson


On Wed, 21 Jan 2009, John Baldwin wrote:


Author: jhb
Date: Wed Jan 21 23:10:06 2009
New Revision: 187576
URL: http://svn.freebsd.org/changeset/base/187576

Log:
 Add locking to ppc and ppbus and mark the whole lot MPSAFE:


Looks like there might be some kinks still:

ppc0:  port 0x378-0x37f,0x778-0x77f irq 7 drq 3 on acpi0
ppc0: SMC-like chipset (ECP/EPP/PS2/NIBBLE) in COMPATIBLE mode
ppc0: FIFO with 16/16/9 bytes threshold
ppc0: [ITHREAD]
ppbus0:  on ppc0
panic: mutex ppc0 not owned at ../../../dev/ppc/ppc.c:1983
cpuid = 0
KDB: enter: panic
[thread pid 0 tid 10 ]
Stopped at  kdb_enter+0x3d: movq$0,0x652ea8(%rip)
_mtx_assert() at _mtx_assert+0xdc
ppc_write_ivar() at ppc_write_ivar+0x6e
ppbus_attach() at ppbus_attach+0x14b

Thanks,
Jeff


 - To avoid having a bunch of locks that end up always getting acquired as
   a group, give each ppc(4) device a mutex which it shares with all the
   child devices including ppbus(4), lpt(4), plip(4), etc.  This mutex
   is then used for all the locking.
 - Rework the interrupt handling stuff yet again.  Now ppbus drivers setup
   their interrupt handler during attach and tear it down during detach
   like most other drivers.  ppbus(4) only invokes the interrupt handler
   of the device that currently owns the bus (if any) when an interrupt
   occurs, however.  Also, interrupt handlers in general now accept their
   softc pointers as their argument rather than the device_t.  Another
   feature of the ppbus interrupt handlers is that they are called with
   the parent ppc device's lock already held.  This minimizes the number
   of lock operations during an interrupt.
 - Mark plip(4), lpt(4), pcfclock(4), ppi(4), vpo(4) MPSAFE.
 - lpbb(4) uses the ppc lock instead of Giant.
 - Other plip(4) changes:
   - Add a mutex to protect the global tables in plip(4) and free them on
 module unload.
   - Add a detach routine.
   - Split out the init/stop code from the ioctl routine into separate
 functions.
 - Other lpt(4) changes:
   - Use device_printf().
   - Use a dedicated callout for the lptout timer.
   - Allocate the I/O buffers at attach and detach rather than during
 open and close as this simplifies the locking at the cost of
 1024+32 bytes when the driver is attached.
 - Other ppi(4) changes:
   - Use an sx lock to serialize open and close.
   - Remove unused HADBUS flag.
   - Add a detach routine.
   - Use a malloc'd buffer for each read and write to avoid races with
 concurrent read/write.
 - Other pps(4) changes:
   - Use a callout rather than a callout handle with timeout().
   - Conform to the new ppbus requirements (regular mutex, non-filter
 interrupt handler).  pps(4) is probably going to have to become a
 standalone driver that doesn't use ppbus(4) to satisfy it's
 requirements for low latency as a result.
   - Use an sx lock to serialize open and close.
 - Other vpo(4) changes:
   - Use the parent ppc device's lock to create the CAM sim instead of
 Giant.
 - Other ppc(4) changes:
   - Fix ppc_isa's detach method to detach instead of calling attach.

 Tested by:   no one :-(

Modified:
 head/sys/dev/ppbus/if_plip.c
 head/sys/dev/ppbus/immio.c
 head/sys/dev/ppbus/lpbb.c
 head/sys/dev/ppbus/lpt.c
 head/sys/dev/ppbus/pcfclock.c
 head/sys/dev/ppbus/ppb_1284.c
 head/sys/dev/ppbus/ppb_base.c
 head/sys/dev/ppbus/ppb_msq.c
 head/sys/dev/ppbus/ppbconf.c
 head/sys/dev/ppbus/ppbconf.h
 head/sys/dev/ppbus/ppi.c
 head/sys/dev/ppbus/pps.c
 head/sys/dev/ppbus/vpo.c
 head/sys/dev/ppbus/vpoio.c
 head/sys/dev/ppc/ppc.c
 head/sys/dev/ppc/ppc_acpi.c
 head/sys/dev/ppc/ppc_isa.c
 head/sys/dev/ppc/ppc_pci.c
 head/sys/dev/ppc/ppc_puc.c
 head/sys/dev/ppc/ppcreg.h
 head/sys/dev/ppc/ppcvar.h

Modified: head/sys/dev/ppbus/if_plip.c
==
--- head/sys/dev/ppbus/if_plip.cWed Jan 21 21:48:46 2009
(r187575)
+++ head/sys/dev/ppbus/if_plip.cWed Jan 21 23:10:06 2009
(r187576)
@@ -152,8 +152,12 @@ struct lp_data {
int sc_iferrs;

struct resource *res_irq;
+   void*sc_intr_cookie;
};

+static struct mtx lp_tables_lock;
+MTX_SYSINIT(lp_tables, &lp_tables_lock, "plip tables", MTX_DEF);
+
/* Tables for the lp# interface */
static u_char *txmith;
#define txmitl (txmith + (1 * LPIPTBLSIZE))
@@ -170,13 +174,41 @@ static int lpinittables(void);
static int lpioctl(struct ifnet *, u_long, caddr_t);
static int lpoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
+static void lpstop(struct lp_data *);
static void lp_intr(void *);
+static int lp_module_handler(module_t, int, void *);

#define DEVTOSOFTC(dev) \
((struct lp_data *)device_get_softc(dev))

static devclass_t lp_devclass;

+static int
+lp_module_handler(module_t mod, int what, void *arg)
+{
+
+   switch (what) {
+   case MOD_UNLOAD:
+   mtx_lock(&lp_tables_lock);
+   if (txmith

1 2 3 4 >

1 - 100 of 357 matches

Mail list logo