Author: delphij
Date: Tue Sep 10 01:46:47 2013
New Revision: 255437
URL: http://svnweb.freebsd.org/changeset/base/255437

Log:
  MFV r247844 (illumos-gate 13975:ef6409bc370f)
  
  Illumos ZFS issues:
    3582 zfs_delay() should support a variable resolution
    3584 DTrace sdt probes for ZFS txg states
  
  Provide a compatibility shim for Solaris's cv_timedwait_hires
  to help aid future porting.
  
  Approved by:  re (ZFS blanket)

Modified:
  head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
  head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
  head/sys/cddl/compat/opensolaris/sys/kcondvar.h
  head/sys/cddl/compat/opensolaris/sys/time.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
Directory Properties:
  head/cddl/contrib/opensolaris/   (props changed)
  head/sys/cddl/contrib/opensolaris/   (props changed)

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c  Tue Sep 10 
01:38:41 2013        (r255436)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c  Tue Sep 10 
01:46:47 2013        (r255437)
@@ -349,6 +349,41 @@ top:
        return (1);
 }
 
+/*ARGSUSED*/
+clock_t
+cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+    int flag)
+{
+       int error;
+       timestruc_t ts;
+       hrtime_t delta;
+
+       ASSERT(flag == 0);
+
+top:
+       delta = tim - gethrtime();
+       if (delta <= 0)
+               return (-1);
+
+       ts.tv_sec = delta / NANOSEC;
+       ts.tv_nsec = delta % NANOSEC;
+
+       ASSERT(mutex_owner(mp) == curthread);
+       mp->m_owner = NULL;
+       error = pthread_cond_timedwait(cv, &mp->m_lock, &ts);
+       mp->m_owner = curthread;
+
+       if (error == ETIMEDOUT)
+               return (-1);
+
+       if (error == EINTR)
+               goto top;
+
+       ASSERT(error == 0);
+
+       return (1);
+}
+
 void
 cv_signal(kcondvar_t *cv)
 {

Modified: head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Tue Sep 
10 01:38:41 2013        (r255436)
+++ head/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Tue Sep 
10 01:46:47 2013        (r255437)
@@ -313,6 +313,8 @@ extern void cv_init(kcondvar_t *cv, char
 extern void cv_destroy(kcondvar_t *cv);
 extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
 extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
+extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
+    hrtime_t res, int flag);
 extern void cv_signal(kcondvar_t *cv);
 extern void cv_broadcast(kcondvar_t *cv);
 

Modified: head/sys/cddl/compat/opensolaris/sys/kcondvar.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/kcondvar.h     Tue Sep 10 01:38:41 
2013        (r255436)
+++ head/sys/cddl/compat/opensolaris/sys/kcondvar.h     Tue Sep 10 01:46:47 
2013        (r255437)
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2007 Pawel Jakub Dawidek <p...@freebsd.org>
+ * Copyright (c) 2013 iXsystems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -36,6 +37,7 @@
 
 #include <sys/mutex.h>
 #include <sys/condvar.h>
+#include <sys/time.h>
 
 typedef struct cv      kcondvar_t;
 
@@ -57,6 +59,19 @@ typedef enum {
 } while (0)
 #define        cv_init(cv, name, type, arg)    zfs_cv_init(cv, name, type, arg)
 
+static clock_t
+cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+    int flag)
+{
+       sbintime_t sbt;
+       sbintime_t pr;
+
+       sbt = tim * SBT_1NS;
+       pr = res * SBT_1NS;
+
+       return (cv_timedwait_sbt(cvp, mp, sbt, pr, 0));
+}
+
 #endif /* _KERNEL */
 
 #endif /* _OPENSOLARIS_SYS_CONDVAR_H_ */

Modified: head/sys/cddl/compat/opensolaris/sys/time.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/time.h Tue Sep 10 01:38:41 2013        
(r255436)
+++ head/sys/cddl/compat/opensolaris/sys/time.h Tue Sep 10 01:46:47 2013        
(r255437)
@@ -37,6 +37,9 @@
 #define NANOSEC                1000000000
 #define TIME_MAX       LLONG_MAX
 
+#define        MSEC2NSEC(m)    ((hrtime_t)(m) * (NANOSEC / MILLISEC))
+#define        NSEC2MSEC(n)    ((n) / (NANOSEC / MILLISEC))
+
 typedef longlong_t     hrtime_t;
 
 #if defined(__i386__) || defined(__powerpc__)

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c       Tue Sep 
10 01:38:41 2013        (r255436)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dir.c       Tue Sep 
10 01:46:47 2013        (r255437)
@@ -744,7 +744,8 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd,
                err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
        } else {
                if (err == EAGAIN) {
-                       txg_delay(dd->dd_pool, tx->tx_txg, 1);
+                       txg_delay(dd->dd_pool, tx->tx_txg,
+                           MSEC2NSEC(10), MSEC2NSEC(10));
                        err = SET_ERROR(ERESTART);
                }
                dsl_pool_memory_pressure(dd->dd_pool);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c      Tue Sep 
10 01:38:41 2013        (r255436)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c      Tue Sep 
10 01:46:47 2013        (r255437)
@@ -85,6 +85,9 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, write_l
     &zfs_write_limit_override, 0,
     "Force a txg if dirty buffers exceed this value (bytes)");
 
+hrtime_t zfs_throttle_delay = MSEC2NSEC(10);
+hrtime_t zfs_throttle_resolution = MSEC2NSEC(10);
+
 int
 dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
 {
@@ -538,12 +541,13 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t t
         * Weight the throughput calculation towards the current value:
         *      thru = 3/4 old_thru + 1/4 new_thru
         *
-        * Note: write_time is in nanosecs, so write_time/MICROSEC
-        * yields millisecs
+        * Note: write_time is in nanosecs while dp_throughput is expressed in
+        * bytes per millisecond.
         */
        ASSERT(zfs_write_limit_min > 0);
-       if (data_written > zfs_write_limit_min / 8 && write_time > MICROSEC) {
-               uint64_t throughput = data_written / (write_time / MICROSEC);
+       if (data_written > zfs_write_limit_min / 8 &&
+           write_time > MSEC2NSEC(1)) {
+               uint64_t throughput = data_written / NSEC2MSEC(write_time);
 
                if (dp->dp_throughput)
                        dp->dp_throughput = throughput / 4 +
@@ -641,8 +645,10 @@ dsl_pool_tempreserve_space(dsl_pool_t *d
         * the caller 1 clock tick.  This will slow down the "fill"
         * rate until the sync process can catch up with us.
         */
-       if (reserved && reserved > (write_limit - (write_limit >> 3)))
-               txg_delay(dp, tx->tx_txg, 1);
+       if (reserved && reserved > (write_limit - (write_limit >> 3))) {
+               txg_delay(dp, tx->tx_txg, zfs_throttle_delay,
+                   zfs_throttle_resolution);
+       }
 
        return (0);
 }

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c      Tue Sep 
10 01:38:41 2013        (r255436)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c      Tue Sep 
10 01:46:47 2013        (r255437)
@@ -444,7 +444,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, co
            zfs_resilver_min_time_ms : zfs_scan_min_time_ms;
        elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
        if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
-           (elapsed_nanosecs / MICROSEC > mintime &&
+           (NSEC2MSEC(elapsed_nanosecs) > mintime &&
            txg_sync_waiting(scn->scn_dp)) ||
            spa_shutting_down(scn->scn_dp->dp_spa)) {
                if (zb) {
@@ -1349,7 +1349,7 @@ dsl_scan_free_should_pause(dsl_scan_t *s
 
        elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
        return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
-           (elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
+           (NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms &&
            txg_sync_waiting(scn->scn_dp)) ||
            spa_shutting_down(scn->scn_dp->dp_spa));
 }
@@ -1473,7 +1473,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *
                            "free_bpobj/bptree txg %llu",
                            (longlong_t)scn->scn_visited_this_txg,
                            (longlong_t)
-                           (gethrtime() - scn->scn_sync_start_time) / MICROSEC,
+                           NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
                            (longlong_t)tx->tx_txg);
                        scn->scn_visited_this_txg = 0;
                        /*
@@ -1531,7 +1531,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *
 
        zfs_dbgmsg("visited %llu blocks in %llums",
            (longlong_t)scn->scn_visited_this_txg,
-           (longlong_t)(gethrtime() - scn->scn_sync_start_time) / MICROSEC);
+           (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
 
        if (!scn->scn_pausing) {
                scn->scn_done_txg = tx->tx_txg + 1;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c      Tue Sep 
10 01:38:41 2013        (r255436)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c      Tue Sep 
10 01:46:47 2013        (r255437)
@@ -534,8 +534,8 @@ spa_add(const char *name, nvlist_t *conf
        hdlr.cyh_level = CY_LOW_LEVEL;
 #endif
 
-       spa->spa_deadman_synctime = zfs_deadman_synctime *
-           zfs_txg_synctime_ms * MICROSEC;
+       spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime *
+           zfs_txg_synctime_ms);
 
 #ifdef illumos
        /*
@@ -544,7 +544,7 @@ spa_add(const char *name, nvlist_t *conf
         * an expensive operation we don't want to check too frequently.
         * Instead wait for 5 synctimes before checking again.
         */
-       when.cyt_interval = 5ULL * zfs_txg_synctime_ms * MICROSEC;
+       when.cyt_interval = MSEC2NSEC(5 * zfs_txg_synctime_ms);
        when.cyt_when = CY_INFINITY;
        mutex_enter(&cpu_lock);
        spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h       Tue Sep 
10 01:38:41 2013        (r255436)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg.h       Tue Sep 
10 01:46:47 2013        (r255437)
@@ -74,13 +74,8 @@ extern void txg_rele_to_quiesce(txg_hand
 extern void txg_rele_to_sync(txg_handle_t *txghp);
 extern void txg_register_callbacks(txg_handle_t *txghp, list_t *tx_callbacks);
 
-/*
- * Delay the caller by the specified number of ticks or until
- * the txg closes (whichever comes first).  This is intended
- * to be used to throttle writers when the system nears its
- * capacity.
- */
-extern void txg_delay(struct dsl_pool *dp, uint64_t txg, int ticks);
+extern void txg_delay(struct dsl_pool *dp, uint64_t txg, hrtime_t delta,
+    hrtime_t resolution);
 
 /*
  * Wait until the given transaction group has finished syncing.

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h  Tue Sep 
10 01:38:41 2013        (r255436)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/txg_impl.h  Tue Sep 
10 01:46:47 2013        (r255437)
@@ -70,7 +70,7 @@ struct tx_cpu {
        kmutex_t        tc_open_lock;   /* protects tx_open_txg */
        kmutex_t        tc_lock;        /* protects the rest of this struct */
        kcondvar_t      tc_cv[TXG_SIZE];
-       uint64_t        tc_count[TXG_SIZE];
+       uint64_t        tc_count[TXG_SIZE];     /* tx hold count on each txg */
        list_t          tc_callbacks[TXG_SIZE]; /* commit cb list */
        char            tc_pad[8];              /* pad to fill 3 cache lines */
 };
@@ -87,8 +87,8 @@ struct tx_cpu {
  * every cpu (see txg_quiesce()).
  */
 typedef struct tx_state {
-       tx_cpu_t        *tx_cpu;        /* protects right to enter txg  */
-       kmutex_t        tx_sync_lock;   /* protects tx_state_t */
+       tx_cpu_t        *tx_cpu;        /* protects access to tx_open_txg */
+       kmutex_t        tx_sync_lock;   /* protects the rest of this struct */
        uint64_t        tx_open_txg;    /* currently open txg id */
        uint64_t        tx_quiesced_txg; /* quiesced txg waiting for sync */
        uint64_t        tx_syncing_txg; /* currently syncing txg id */

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c   Tue Sep 10 
01:38:41 2013        (r255436)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c   Tue Sep 10 
01:46:47 2013        (r255437)
@@ -241,7 +241,7 @@ txg_thread_exit(tx_state_t *tx, callb_cp
 }
 
 static void
-txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, uint64_t 
time)
+txg_thread_wait(tx_state_t *tx, callb_cpr_t *cpr, kcondvar_t *cv, clock_t time)
 {
        CALLB_CPR_SAFE_BEGIN(cpr);
 
@@ -370,6 +370,9 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg
        ASSERT(txg == tx->tx_open_txg);
        tx->tx_open_txg++;
 
+       DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg);
+       DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg);
+
        /*
         * Now that we've incremented tx_open_txg, we can let threads
         * enter the next transaction group.
@@ -501,6 +504,7 @@ txg_sync_thread(void *arg)
                txg = tx->tx_quiesced_txg;
                tx->tx_quiesced_txg = 0;
                tx->tx_syncing_txg = txg;
+               DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg);
                cv_broadcast(&tx->tx_quiesce_more_cv);
 
                dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
@@ -514,6 +518,7 @@ txg_sync_thread(void *arg)
                mutex_enter(&tx->tx_sync_lock);
                tx->tx_synced_txg = txg;
                tx->tx_syncing_txg = 0;
+               DTRACE_PROBE2(txg__synced, dsl_pool_t *, dp, uint64_t, txg);
                cv_broadcast(&tx->tx_sync_done_cv);
 
                /*
@@ -563,21 +568,22 @@ txg_quiesce_thread(void *arg)
                 */
                dprintf("quiesce done, handing off txg %llu\n", txg);
                tx->tx_quiesced_txg = txg;
+               DTRACE_PROBE2(txg__quiesced, dsl_pool_t *, dp, uint64_t, txg);
                cv_broadcast(&tx->tx_sync_more_cv);
                cv_broadcast(&tx->tx_quiesce_done_cv);
        }
 }
 
 /*
- * Delay this thread by 'ticks' if we are still in the open transaction
- * group and there is already a waiting txg quiescing or quiesced.
- * Abort the delay if this txg stalls or enters the quiescing state.
+ * Delay this thread by delay nanoseconds if we are still in the open
+ * transaction group and there is already a waiting txg quiesing or quiesced.
+ * Abort the delay if this txg stalls or enters the quiesing state.
  */
 void
-txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks)
+txg_delay(dsl_pool_t *dp, uint64_t txg, hrtime_t delay, hrtime_t resolution)
 {
        tx_state_t *tx = &dp->dp_tx;
-       clock_t timeout = ddi_get_lbolt() + ticks;
+       hrtime_t start = gethrtime();
 
        /* don't delay if this txg could transition to quiescing immediately */
        if (tx->tx_open_txg > txg ||
@@ -590,10 +596,11 @@ txg_delay(dsl_pool_t *dp, uint64_t txg, 
                return;
        }
 
-       while (ddi_get_lbolt() < timeout &&
-           tx->tx_syncing_txg < txg-1 && !txg_stalled(dp))
-               (void) cv_timedwait(&tx->tx_quiesce_more_cv, &tx->tx_sync_lock,
-                   timeout - ddi_get_lbolt());
+       while (gethrtime() - start < delay &&
+           tx->tx_syncing_txg < txg-1 && !txg_stalled(dp)) {
+               (void) cv_timedwait_hires(&tx->tx_quiesce_more_cv,
+                   &tx->tx_sync_lock, delay, resolution, 0);
+       }
 
        mutex_exit(&tx->tx_sync_lock);
 }
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to