On Thu, Feb 14, 2013 at 11:56:54PM -0800, Alfred Perlstein wrote:
> [ added Luigi Rizzo to thread ]
> 
> 
> On 2/11/13 12:26 PM, Davide Italiano wrote:
> > [trimmed old mails]
> >
> > Here's a new version of the patch:
> > http://people.freebsd.org/~davide/patches/calloutng-11022012.diff
> >
> > Significant bits changed (after wider discussion and suggestion by phk@):
> > - Introduction of the new sbintime_t type (32.32 fixed point) with the
> > respective conversion (sbintime2bintime, sbintime2timeval etc...)
> > - switch from 64.64 (struct bintime) format to measure time to sbintime_t
> > - Use sbintime_t to represent expected sleep time instead of measuring
> > it in microseconds (cpu_idle_acpi(), cpu_idle_spin() ...).
> 
> 
> Luigi and I discussed this at BAFUG tonight and he expressed an interest 
> in shepherding the code in at this point.
> 
> Luigi, can you reiterate any points that still remain before we 
> integrate this code?

I am mostly ok with the patch in the current state.
I have few comments/suggestions below but they are mostly
on documenting/style/trivial bugfixes.

So now I would really like to see this code go in HEAD,
to give people a chance to see how it works and possibly
figure out whether the new API needs modifications.

To recall, my major concerns were the following:

- API explosion, with multiple versions of the callout routines.

  This seems to be solved now, with only one version (the *_sbt()
  functions) and macros remapping the old functions to the new ones.

- the use of struct bintime for timeouts and precision.

  This is also solved thanks to the introduction of sbintime_t values
  (fixed-point 32.32 times)

- Some measurements also indicated that the default "precision"
  could give large (absolute) errors on the timeouts, especially
  with large timeouts.

  I am not sure what is the situation with this new version, but i believe
  this a relatively minor issue because it surely simple to customize, e.g.
  having a couple of sysctl setting the default precision (percentage)
  and maximum error (absolute time). So users could e.g. set
  a 5% precision and a maximum error of 100us on a desktop,
  and 10% and 10ms on a laptop.

- Another thing that we should do (but after the patch is in) is to
  see if any existing code is converting times to ticks just to
  call the timeout routines -- right now the macros convert back
  to times, clearly it would be best to avoid the double conversion.

comments inline below, search for XXX-lr

thanks again for your work on this, and for following up with changes
after the previous discussion

cheers
luigi
  
Index: sys/dev/acpica/acpi_cpu.c
===================================================================
--- sys/dev/acpica/acpi_cpu.c   (.../head)      (revision 246685)
+++ sys/dev/acpica/acpi_cpu.c   (.../projects/calloutng)        (revision 
246685)
@@ -980,13 +980,16 @@
     }
 
     /* Find the lowest state that has small enough latency. */
+    us = sc->cpu_prev_sleep;
+    if (sbt >= 0 && us > sbt / SBT_1US)        XXX-lr can we have us2sbt() , 
ms2sbt() and the like ?
+       us = sbt / SBT_1US;
     cx_next_idx = 0;
     if (cpu_disable_deep_sleep)
        i = min(sc->cpu_cx_lowest, sc->cpu_non_c3);
     else
        i = sc->cpu_cx_lowest;
     for (; i >= 0; i--) {
-       if (sc->cpu_cx_states[i].trans_lat * 3 <= sc->cpu_prev_sleep) {
+       if (sc->cpu_cx_states[i].trans_lat * 3 <= us) {
            cx_next_idx = i;
            break;
        }
Index: sys/kern/kern_synch.c
===================================================================
--- sys/kern/kern_synch.c       (.../head)      (revision 246685)
+++ sys/kern/kern_synch.c       (.../projects/calloutng)        (revision 
246685)
@@ -146,12 +146,12 @@
  */
 int
 _sleep(void *ident, struct lock_object *lock, int priority,
-    const char *wmesg, int timo)
+    const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 {
        struct thread *td;
        struct proc *p;
        struct lock_class *class;
-       int catch, flags, lock_state, pri, rval;
+       int catch, sleepq_flags, lock_state, pri, rval;         XXX-lr keep 
flags, use _sleep_flags as function argument ?
        WITNESS_SAVE_DECL(lock_witness);
 
        td = curthread;
@@ -348,28 +349,30 @@
  * to a "timo" value of one.
  */
 int
-pause(const char *wmesg, int timo)
+pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 {
-       KASSERT(timo >= 0, ("pause: timo must be >= 0"));
+       int sbt_sec;                                            XXX-lr localize 
to the cold block
 
+       sbt_sec = sbintime_getsec(sbt); 
+       KASSERT(sbt_sec >= 0, ("pause: timo must be >= 0"));
+
        /* silently convert invalid timeouts */
-       if (timo < 1)
-               timo = 1;
+       if (sbt == 0)
+               sbt = tick_sbt;
 
        if (cold) {
                /*
-                * We delay one HZ at a time to avoid overflowing the
+                * We delay one second at a time to avoid overflowing the
                 * system specific DELAY() function(s):
                 */
-               while (timo >= hz) {
+               while (sbt_sec > 0) {
                        DELAY(1000000);
-                       timo -= hz;
+                       sbt_sec--;
                }
-               if (timo > 0)
-                       DELAY(timo * tick);
+               DELAY((sbt & 0xffffffff) / SBT_1US);    XXX-lr sbt2us() ?
                return (0);
        }
-       return (tsleep(&pause_wchan, 0, wmesg, timo));
+       return (_sleep(&pause_wchan, NULL, 0, wmesg, sbt, pr, flags));
 }
 
 /*
@@ -560,8 +563,9 @@
         * random variation to avoid synchronisation with processes that
         * run at regular intervals.
         */
-       callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)),
-           loadav, NULL);
+       callout_reset_sbt(&loadav_callout,                              XXX-lr 
we have better resolution than HZ here ?
+           tick_sbt * (hz * 4 + (int)(random() % (hz * 2 + 1))), 0,
+           loadav, NULL, C_DIRECT_EXEC | C_HARDCLOCK);
 }
 
 /* ARGSUSED */
Index: sys/kern/sys_generic.c
===================================================================
--- sys/kern/sys_generic.c      (.../head)      (revision 246685)
+++ sys/kern/sys_generic.c      (.../projects/calloutng)        (revision 
246685)
@@ -995,35 +996,29 @@
        if (nbufbytes != 0)
                bzero(selbits, nbufbytes / 2);
 
+       precision = 0;
        if (tvp != NULL) {
-               atv = *tvp;
-               if (itimerfix(&atv)) {
+               rtv = *tvp;
+               if (rtv.tv_sec < 0 || rtv.tv_usec < 0 ||        XXX-lr 
itimerfix or some check routine ? several places
+                   rtv.tv_usec >= 1000000) {
                        error = EINVAL;
                        goto done;
                }
-               getmicrouptime(&rtv);
-               timevaladd(&atv, &rtv);
-       } else {
-               atv.tv_sec = 0;
-               atv.tv_usec = 0;
-       }
-       timo = 0;
+               rsbt = timeval2sbintime(rtv);
+               precision = rsbt;
+               precision >>= tc_precexp;
+               if (TIMESEL(&asbt, rsbt))
+                       asbt += tc_tick_sbt;
+               asbt += rsbt; 
+       } else
+               asbt = -1;
        seltdinit(td);
        /* Iterate until the timeout expires or descriptors become ready. */
        for (;;) {
                error = selscan(td, ibits, obits, nd);
                if (error || td->td_retval[0] != 0)
                        break;
-               if (atv.tv_sec || atv.tv_usec) {
-                       getmicrouptime(&rtv);
-                       if (timevalcmp(&rtv, &atv, >=))
-                               break;
-                       ttv = atv;
-                       timevalsub(&ttv, &rtv);
-                       timo = ttv.tv_sec > 24 * 60 * 60 ?
-                           24 * 60 * 60 * hz : tvtohz(&ttv);
-               }
-               error = seltdwait(td, timo);
+               error = seltdwait(td, asbt, precision);
                if (error)
                        break;
                error = selrescan(td, ibits, obits);
Index: sys/kern/kern_tc.c
===================================================================
--- sys/kern/kern_tc.c  (.../head)      (revision 246685)
+++ sys/kern/kern_tc.c  (.../projects/calloutng)        (revision 246685)
@@ -119,6 +120,21 @@
 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
     &timestepwarnings, 0, "Log time steps");
 
+struct bintime bt_timethreshold;               XXX-lr document these variables
+struct bintime bt_tickthreshold;
+sbintime_t sbt_timethreshold;
+sbintime_t sbt_tickthreshold;
+struct bintime tc_tick_bt;
+sbintime_t tc_tick_sbt;
+int tc_precexp;
+int tc_timepercentage = TC_DEFAULTPERC;
+TUNABLE_INT("kern.timecounter.alloweddeviation", &tc_timepercentage);
+static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
+    sysctl_kern_timecounter_adjprecision, "I",
+    "Allowed time interval deviation in percents");
+
 static void tc_windup(void);
 static void cpu_tick_calibrate(int);
 
@@ -883,6 +919,16 @@
 }
 
 void
+sbinuptime(sbintime_t sbt)     XXX-lr wrong argument ? not compile-tested ?
Index: sys/kern/kern_timeout.c
===================================================================
--- sys/kern/kern_timeout.c     (.../head)      (revision 246685)
+++ sys/kern/kern_timeout.c     (.../projects/calloutng)        (revision 
246685)
@@ -87,58 +105,62 @@
 int callwheelsize, callwheelmask;
 
 /*
- * The callout cpu migration entity represents informations necessary for
- * describing the migrating callout to the new callout cpu.
+ * The callout cpu exec entities represent informations necessary for
+ * describing the state of callouts currently running on the CPU and the ones
+ * necessary for migrating callouts to the new callout cpu. In particular,
+ * the first entry of the array cc_exec_entity holds informations for callout
+ * running in SWI thread context, while the second one holds informations
+ * for callout running directly from hardware interrupt context.
  * The cached informations are very important for deferring migration when
  * the migrating callout is already running.
  */
-struct cc_mig_ent {
+struct cc_exec {
+       struct callout          *cc_next;
+       struct callout          *cc_curr;
 #ifdef SMP
-       void    (*ce_migration_func)(void *);
-       void    *ce_migration_arg;
-       int     ce_migration_cpu;
-       int     ce_migration_ticks;
+       void                    (*ce_migration_func)(void *);
+       void                    *ce_migration_arg;
+       int                     ce_migration_cpu;
+       sbintime_t              ce_migration_time;
 #endif
+       int                     cc_cancel;
+       int                     cc_waiting;
 };
-       
+
 /*
- * There is one struct callout_cpu per cpu, holding all relevant
+ * There is one struct callou_cpu per cpu, holding all relevant
  * state for the callout processing thread on the individual CPU.
- * In particular:
- *     cc_ticks is incremented once per tick in callout_cpu().
- *     It tracks the global 'ticks' but in a way that the individual
- *     threads should not worry about races in the order in which
- *     hardclock() and hardclock_cpu() run on the various CPUs.
- *     cc_softclock is advanced in callout_cpu() to point to the
- *     first entry in cc_callwheel that may need handling. In turn,
- *     a softclock() is scheduled so it can serve the various entries i
- *     such that cc_softclock <= i <= cc_ticks .
- *     XXX maybe cc_softclock and cc_ticks should be volatile ?
- *
- *     cc_ticks is also used in callout_reset_cpu() to determine
- *     when the callout should be served.
  */
 struct callout_cpu {
        struct mtx_padalign     cc_lock;
-       struct cc_mig_ent       cc_migrating_entity;
+       struct cc_exec          cc_exec_entity[2];      XXX-lr repeat (short) 
explanation on why 2 entities
        struct callout          *cc_callout;
        struct callout_tailq    *cc_callwheel;
+       struct callout_tailq    cc_expireq;
        struct callout_list     cc_callfree;
-       struct callout          *cc_next;
-       struct callout          *cc_curr;
+       sbintime_t              cc_firstevent;
+       sbintime_t              cc_lastscan;
        void                    *cc_cookie;
-       int                     cc_ticks;
-       int                     cc_softticks;
-       int                     cc_cancel;
-       int                     cc_waiting;
-       int                     cc_firsttick;
 };
 
+#define        cc_exec_curr            cc_exec_entity[0].cc_curr
+#define        cc_exec_next            cc_exec_entity[0].cc_next
+#define        cc_exec_cancel          cc_exec_entity[0].cc_cancel
+#define        cc_exec_waiting         cc_exec_entity[0].cc_waiting
+#define        cc_exec_curr_dir        cc_exec_entity[1].cc_curr
+#define        cc_exec_next_dir        cc_exec_entity[1].cc_next
+#define        cc_exec_cancel_dir      cc_exec_entity[1].cc_cancel
+#define        cc_exec_waiting_dir     cc_exec_entity[1].cc_waiting
+
 #ifdef SMP
-#define        cc_migration_func       cc_migrating_entity.ce_migration_func
-#define        cc_migration_arg        cc_migrating_entity.ce_migration_arg
-#define        cc_migration_cpu        cc_migrating_entity.ce_migration_cpu
-#define        cc_migration_ticks      cc_migrating_entity.ce_migration_ticks
+#define        cc_migration_func       cc_exec_entity[0].ce_migration_func
+#define        cc_migration_arg        cc_exec_entity[0].ce_migration_arg
+#define        cc_migration_cpu        cc_exec_entity[0].ce_migration_cpu
+#define        cc_migration_time       cc_exec_entity[0].ce_migration_time
+#define        cc_migration_func_dir   cc_exec_entity[1].ce_migration_func
+#define        cc_migration_arg_dir    cc_exec_entity[1].ce_migration_arg
+#define        cc_migration_cpu_dir    cc_exec_entity[1].ce_migration_cpu
+#define        cc_migration_time_dir   cc_exec_entity[1].ce_migration_time
 
 struct callout_cpu cc_cpu[MAXCPU];
 #define        CPUBLOCK        MAXCPU
Index: sys/ia64/ia64/machdep.c
===================================================================
--- sys/ia64/ia64/machdep.c     (.../head)      (revision 246685)
+++ sys/ia64/ia64/machdep.c     (.../projects/calloutng)        (revision 
246685)
@@ -404,7 +405,7 @@
        if (sched_runnable())
                ia64_enable_intr();
        else if (cpu_idle_hook != NULL) {
-               (*cpu_idle_hook)();             XXX-lr style ? just use 
cpu_idle_hook(sbt);
+               (*cpu_idle_hook)(sbt);
                /* The hook must enable interrupts! */
        } else {
                ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
Index: sys/ofed/include/linux/timer.h
===================================================================
--- sys/ofed/include/linux/timer.h      (.../head)      (revision 246685)
+++ sys/ofed/include/linux/timer.h      (.../projects/calloutng)        
(revision 246685)
@@ -65,13 +64,16 @@
        callout_init(&(timer)->timer_callout, CALLOUT_MPSAFE);          \
 } while (0)
 
-#define        mod_timer(timer, expire)                                        
\       XXX-lr gratuitous rename
-       callout_reset(&(timer)->timer_callout, (expire) - jiffies,      \
-           _timer_fn, (timer))
+#define        mod_timer(timer, exp)                                           
\
+do {                                                                   \
+       (timer)->expires = exp;                                         \
+       callout_reset(&(timer)->timer_callout, (exp) - jiffies,         \
+           _timer_fn, (timer));                                        \
+} while (0)
 
 #define        add_timer(timer)                                                
\
        callout_reset(&(timer)->timer_callout,                          \
-           (timer)->timer_callout.c_time - jiffies, _timer_fn, (timer))
+           (timer)->expires - jiffies, _timer_fn, (timer))
 
 #define        del_timer(timer)        callout_stop(&(timer)->timer_callout)
 #define        del_timer_sync(timer)   callout_drain(&(timer)->timer_callout)
Index: sys/sys/callout.h
===================================================================
--- sys/sys/callout.h   (.../head)      (revision 246685)
+++ sys/sys/callout.h   (.../projects/calloutng)        (revision 246685)
@@ -47,7 +47,17 @@
 #define        CALLOUT_RETURNUNLOCKED  0x0010 /* handler returns with mtx 
unlocked */
 #define        CALLOUT_SHAREDLOCK      0x0020 /* callout lock held in shared 
mode */
 #define        CALLOUT_DFRMIGRATION    0x0040 /* callout in deferred migration 
mode */
+#define        CALLOUT_PROCESSED       0x0080 /* callout in wheel or 
processing list? */
+#define        CALLOUT_DIRECT          0x0100 /* allow exec from hw int 
context */
 
+#define        C_DIRECT_EXEC           0x0001 /* direct execution of callout */
+#define        C_PRELBITS              7                                       
        XXX-lr document all
+#define        C_PRELRANGE             ((1 << C_PRELBITS) - 1)
+#define        C_PREL(x)               (((x) + 1) << 1)
+#define        C_PRELGET(x)            (int)((((x) >> 1) & C_PRELRANGE) - 1)
+#define        C_HARDCLOCK             0x0100 /* align to hardclock() calls */
+#define        C_ABSOLUTE              0x0200 /* event time is absolute. */
+
 struct callout_handle {
        struct callout *callout;
 };
Index: sys/sys/time.h
===================================================================
--- sys/sys/time.h      (.../head)      (revision 246685)
+++ sys/sys/time.h      (.../projects/calloutng)        (revision 246685)
@@ -109,6 +124,45 @@
            ((a)->frac cmp (b)->frac) :                                 \
            ((a)->sec cmp (b)->sec))
 
+typedef int64_t sbintime_t;                            XXX-lr add function 
ns2sbt(), us2sbt(), ms2sbt()
+#define        SBT_1S  ((sbintime_t)1 << 32)
+#define        SBT_1M  (SBT_1S * 60)
+#define        SBT_1MS (SBT_1S / 1000)
+#define        SBT_1US (SBT_1S / 1000000)
+#define        SBT_1NS (SBT_1S / 1000000000)
+
+static __inline int
+sbintime_getsec(sbintime_t sbt)
+{
+       
+       return (int)(sbt >> 32);
+}
+
+static __inline sbintime_t
+bintime2sbintime(const struct bintime bt)
+{
+
+       return (((sbintime_t)bt.sec << 32) + (bt.frac >> 32));
+}
+
+static __inline struct bintime 
+sbintime2bintime(sbintime_t sbt)
+{
+       struct bintime bt;
+
+       bt.sec = sbt >> 32;
+       bt.frac = sbt << 32;
+       return (bt);
+
+}
+
+#ifdef _KERNEL
+
+extern struct bintime tick_bt;
+extern sbintime_t tick_sbt;
+
+#endif /* KERNEL */ 
+
 /*-
  * Background information:
  *
@@ -290,7 +381,15 @@
 extern volatile time_t time_second;
 extern volatile time_t time_uptime;
 extern struct bintime boottimebin;
+extern struct bintime tc_tick_bt;
+extern sbintime_t tc_tick_sbt;                         XXX-lr comment the new 
vars
 extern struct timeval boottime;
+extern int tc_precexp;                                 XXX-lr comment
+extern int tc_timepercentage;
+extern struct bintime bt_timethreshold;
+extern struct bintime bt_tickthreshold;
+extern sbintime_t sbt_timethreshold;
+extern sbintime_t sbt_tickthreshold;
 
 /*
  * Functions for looking at our clock: [get]{bin,nano,micro}[up]time()
@@ -337,6 +438,23 @@
 void   timevaladd(struct timeval *t1, const struct timeval *t2);
 void   timevalsub(struct timeval *t1, const struct timeval *t2);
 int    tvtohz(struct timeval *tv);
+
+#define        TC_DEFAULTPERC          5       XXX-lr comment
+
+#define        BT2FREQ(bt)                                                     
\
+       (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) /           \
+           ((bt)->frac >> 1))
+
+#define        FREQ2BT(freq, bt)                                               
\
+{                                                                      \
+       (bt)->sec = 0;                                                  \
+       (bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;     \
+}
+
+#define        TIMESEL(sbt, sbt2)                                              
\
+       (((sbt2) >= sbt_timethreshold) ?                                \
+           (getsbinuptime(sbt), 1) : (sbinuptime(sbt), 0))
+
 #else /* !_KERNEL */
 #include <time.h>
 
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c     (.../head)      (revision 246685)
+++ sys/netinet/tcp_timer.c     (.../projects/calloutng)        (revision 
246685)
@@ -719,20 +719,24 @@
 #define        ticks_to_msecs(t)       (1000*(t) / hz)
 
 void
-tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct 
xtcp_timer *xtimer)
+tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
+    struct xtcp_timer *xtimer)
 {
-       bzero(xtimer, sizeof(struct xtcp_timer));
+       sbintime_t now;
+
+       bzero(xtimer, sizeof(*xtimer));                         XXX-lr use 
sbd2ms()
        if (timer == NULL)
                return;
+       getsbinuptime(&now);
        if (callout_active(&timer->tt_delack))
-               xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - 
ticks);
+               xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
        if (callout_active(&timer->tt_rexmt))
-               xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - 
ticks);
+               xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
        if (callout_active(&timer->tt_persist))
-               xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - 
ticks);
+               xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
        if (callout_active(&timer->tt_keep))
-               xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
+               xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
        if (callout_active(&timer->tt_2msl))
-               xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
+               xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
        xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
 }
_______________________________________________
freebsd-current@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to