Author: davide
Date: Mon Mar  4 11:09:56 2013
New Revision: 247777
URL: http://svnweb.freebsd.org/changeset/base/247777

Log:
  - Make callout(9) tickless, relying on eventtimers(4) as backend for
  precise time event generation. This greatly improves granularity of
  callouts which are not anymore constrained to wait next tick to be
  scheduled.
  - Extend the callout KPI introducing a set of callout_reset_sbt* functions,
  which take a sbintime_t as timeout argument. The new KPI also offers a
  way for consumers to specify precision tolerance they allow, so that
  callout can coalesce events and reduce number of interrupts as well as
  potentially avoid scheduling a SWI thread.
  - Introduce support for dispatching callouts directly from hardware
  interrupt context, specifying an additional flag. This feature should be
  used carefully, as long as interrupt context has some limitations
  (e.g. no sleeping locks can be held).
  - Enhance mechanisms to gather informations about callwheel, introducing
  a new sysctl to obtain stats.
  
  This change breaks the KBI. struct callout fields has been changed, in
  particular 'int ticks' (4 bytes) has been replaced with 'sbintime_t'
  (8 bytes) and another 'sbintime_t' field was added for precision.
  
  Together with:        mav
  Reviewed by:  attilio, bde, luigi, phk
  Sponsored by: Google Summer of Code 2012, iXsystems inc.
  Tested by:    flo (amd64, sparc64), marius (sparc64), ian (arm),
                markj (amd64), mav, Fabian Keil

Modified:
  head/sys/conf/NOTES
  head/sys/conf/options
  head/sys/kern/kern_clock.c
  head/sys/kern/kern_clocksource.c
  head/sys/kern/kern_tc.c
  head/sys/kern/kern_timeout.c
  head/sys/kern/subr_param.c
  head/sys/netinet/tcp_timer.c
  head/sys/sys/_callout.h
  head/sys/sys/callout.h
  head/sys/sys/systm.h
  head/sys/sys/time.h

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES Mon Mar  4 10:41:54 2013        (r247776)
+++ head/sys/conf/NOTES Mon Mar  4 11:09:56 2013        (r247777)
@@ -259,6 +259,8 @@ options     SX_NOINLINE
 
 # SMP Debugging Options:
 #
+# CALLOUT_PROFILING enables rudimentary profiling of the callwheel data
+#        structure used as backend in callout(9).
 # PREEMPTION allows the threads that are in the kernel to be preempted by
 #        higher priority [interrupt] threads.  It helps with interactivity
 #        and allows interrupt threads to run sooner rather than waiting.
@@ -297,6 +299,9 @@ options     LOCK_PROFILING
 options        MPROF_BUFFERS="1536"
 options        MPROF_HASH_SIZE="1543"
 
+# Profiling for the callout(9) backend.
+options        CALLOUT_PROFILING
+
 # Profiling for internal hash tables.
 options        SLEEPQUEUE_PROFILING
 options        TURNSTILE_PROFILING

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options       Mon Mar  4 10:41:54 2013        (r247776)
+++ head/sys/conf/options       Mon Mar  4 11:09:56 2013        (r247777)
@@ -68,6 +68,7 @@ TEXTDUMP_VERBOSE      opt_ddb.h
 ADAPTIVE_LOCKMGRS
 ALQ
 AUDIT          opt_global.h
+CALLOUT_PROFILING
 CAPABILITIES   opt_capsicum.h
 CAPABILITY_MODE        opt_capsicum.h
 COMPAT_43      opt_compat.h

Modified: head/sys/kern/kern_clock.c
==============================================================================
--- head/sys/kern/kern_clock.c  Mon Mar  4 10:41:54 2013        (r247776)
+++ head/sys/kern/kern_clock.c  Mon Mar  4 11:09:56 2013        (r247777)
@@ -460,7 +460,7 @@ hardclock_cpu(int usermode)
        if (td->td_intr_frame != NULL)
                PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
 #endif
-       callout_tick();
+       callout_process(sbinuptime());
 }
 
 /*
@@ -550,7 +550,6 @@ hardclock_cnt(int cnt, int usermode)
        if (td->td_intr_frame != NULL)
                PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
 #endif
-       callout_tick();
        /* We are in charge to handle this tick duty. */
        if (newticks > 0) {
                /* Dangerous and no need to call these things concurrently. */

Modified: head/sys/kern/kern_clocksource.c
==============================================================================
--- head/sys/kern/kern_clocksource.c    Mon Mar  4 10:41:54 2013        
(r247776)
+++ head/sys/kern/kern_clocksource.c    Mon Mar  4 11:09:56 2013        
(r247777)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2010-2012 Alexander Motin <m...@freebsd.org>
+ * Copyright (c) 2010-2013 Alexander Motin <m...@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/kdb.h>
 #include <sys/ktr.h>
@@ -63,17 +64,14 @@ int                 cpu_can_deep_sleep = 0; /* C3 stat
 int                    cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
 
 static void            setuptimer(void);
-static void            loadtimer(struct bintime *now, int first);
+static void            loadtimer(sbintime_t now, int first);
 static int             doconfigtimer(void);
 static void            configtimer(int start);
 static int             round_freq(struct eventtimer *et, int freq);
 
-static void            getnextcpuevent(struct bintime *event, int idle);
-static void            getnextevent(struct bintime *event);
-static int             handleevents(struct bintime *now, int fake);
-#ifdef SMP
-static void            cpu_new_callout(int cpu, int ticks);
-#endif
+static sbintime_t      getnextcpuevent(int idle);
+static sbintime_t      getnextevent(void);
+static int             handleevents(sbintime_t now, int fake);
 
 static struct mtx      et_hw_mtx;
 
@@ -94,13 +92,11 @@ static struct mtx   et_hw_mtx;
        }
 
 static struct eventtimer *timer = NULL;
-static struct bintime  timerperiod;    /* Timer period for periodic mode. */
-static struct bintime  hardperiod;     /* hardclock() events period. */
-static struct bintime  statperiod;     /* statclock() events period. */
-static struct bintime  profperiod;     /* profclock() events period. */
-static struct bintime  nexttick;       /* Next global timer tick time. */
-static struct bintime  nexthard;       /* Next global hardlock() event. */
-static u_int           busy = 0;       /* Reconfiguration is in progress. */
+static sbintime_t      timerperiod;    /* Timer period for periodic mode. */
+static sbintime_t      statperiod;     /* statclock() events period. */
+static sbintime_t      profperiod;     /* profclock() events period. */
+static sbintime_t      nexttick;       /* Next global timer tick time. */
+static u_int           busy = 1;       /* Reconfiguration is in progress. */
 static int             profiling = 0;  /* Profiling events enabled. */
 
 static char            timername[32];  /* Wanted timer. */
@@ -116,11 +112,6 @@ TUNABLE_INT("kern.eventtimer.idletick", 
 SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
     0, "Run periodic events when idle");
 
-static u_int           activetick = 1; /* Run all periodic events when active. 
*/
-TUNABLE_INT("kern.eventtimer.activetick", &activetick);
-SYSCTL_UINT(_kern_eventtimer, OID_AUTO, activetick, CTLFLAG_RW, &activetick,
-    0, "Run all periodic events when active");
-
 static int             periodic = 0;   /* Periodic or one-shot mode. */
 static int             want_periodic = 0; /* What mode to prefer. */
 TUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
@@ -129,31 +120,23 @@ struct pcpu_state {
        struct mtx      et_hw_mtx;      /* Per-CPU timer mutex. */
        u_int           action;         /* Reconfiguration requests. */
        u_int           handle;         /* Immediate handle resuests. */
-       struct bintime  now;            /* Last tick time. */
-       struct bintime  nextevent;      /* Next scheduled event on this CPU. */
-       struct bintime  nexttick;       /* Next timer tick time. */
-       struct bintime  nexthard;       /* Next hardlock() event. */
-       struct bintime  nextstat;       /* Next statclock() event. */
-       struct bintime  nextprof;       /* Next profclock() event. */
+       sbintime_t      now;            /* Last tick time. */
+       sbintime_t      nextevent;      /* Next scheduled event on this CPU. */
+       sbintime_t      nexttick;       /* Next timer tick time. */
+       sbintime_t      nexthard;       /* Next hardlock() event. */
+       sbintime_t      nextstat;       /* Next statclock() event. */
+       sbintime_t      nextprof;       /* Next profclock() event. */
+       sbintime_t      nextcall;       /* Next callout event. */
+       sbintime_t      nextcallopt;    /* Next optional callout event. */
 #ifdef KDTRACE_HOOKS
-       struct bintime  nextcyc;        /* Next OpenSolaris cyclics event. */
+       sbintime_t      nextcyc;        /* Next OpenSolaris cyclics event. */
 #endif
        int             ipi;            /* This CPU needs IPI. */
        int             idle;           /* This CPU is in idle mode. */
 };
 
 static DPCPU_DEFINE(struct pcpu_state, timerstate);
-
-#define FREQ2BT(freq, bt)                                              \
-{                                                                      \
-       (bt)->sec = 0;                                                  \
-       (bt)->frac = ((uint64_t)0x8000000000000000  / (freq)) << 1;     \
-}
-#define BT2FREQ(bt)                                                    \
-       (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) /           \
-           ((bt)->frac >> 1))
-
-#define        SBT2FREQ(sbt)   ((SBT_1S + ((sbt) >> 1)) / (sbt))
+DPCPU_DEFINE(sbintime_t, hardclocktime);
 
 /*
  * Timer broadcast IPI handler.
@@ -161,7 +144,7 @@ static DPCPU_DEFINE(struct pcpu_state, t
 int
 hardclockintr(void)
 {
-       struct bintime now;
+       sbintime_t now;
        struct pcpu_state *state;
        int done;
 
@@ -169,10 +152,9 @@ hardclockintr(void)
                return (FILTER_HANDLED);
        state = DPCPU_PTR(timerstate);
        now = state->now;
-       CTR4(KTR_SPARE2, "ipi  at %d:    now  %d.%08x%08x",
-           curcpu, now.sec, (u_int)(now.frac >> 32),
-                            (u_int)(now.frac & 0xffffffff));
-       done = handleevents(&now, 0);
+       CTR3(KTR_SPARE2, "ipi  at %d:    now  %d.%08x",
+           curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
+       done = handleevents(now, 0);
        return (done ? FILTER_HANDLED : FILTER_STRAY);
 }
 
@@ -180,48 +162,43 @@ hardclockintr(void)
  * Handle all events for specified time on this CPU
  */
 static int
-handleevents(struct bintime *now, int fake)
+handleevents(sbintime_t now, int fake)
 {
-       struct bintime t;
+       sbintime_t t, *hct;
        struct trapframe *frame;
        struct pcpu_state *state;
-       uintfptr_t pc;
        int usermode;
        int done, runs;
 
-       CTR4(KTR_SPARE2, "handle at %d:  now  %d.%08x%08x",
-           curcpu, now->sec, (u_int)(now->frac >> 32),
-                    (u_int)(now->frac & 0xffffffff));
+       CTR3(KTR_SPARE2, "handle at %d:  now  %d.%08x",
+           curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
        done = 0;
        if (fake) {
                frame = NULL;
                usermode = 0;
-               pc = 0;
        } else {
                frame = curthread->td_intr_frame;
                usermode = TRAPF_USERMODE(frame);
-               pc = TRAPF_PC(frame);
        }
 
        state = DPCPU_PTR(timerstate);
 
        runs = 0;
-       while (bintime_cmp(now, &state->nexthard, >=)) {
-               bintime_addx(&state->nexthard, hardperiod.frac);
+       while (now >= state->nexthard) {
+               state->nexthard += tick_sbt;
                runs++;
        }
        if (runs) {
-               if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 &&
-                   bintime_cmp(&state->nexthard, &nexthard, >))
-                       nexthard = state->nexthard;
+               hct = DPCPU_PTR(hardclocktime);
+               *hct = state->nexthard - tick_sbt;
                if (fake < 2) {
                        hardclock_cnt(runs, usermode);
                        done = 1;
                }
        }
        runs = 0;
-       while (bintime_cmp(now, &state->nextstat, >=)) {
-               bintime_addx(&state->nextstat, statperiod.frac);
+       while (now >= state->nextstat) {
+               state->nextstat += statperiod;
                runs++;
        }
        if (runs && fake < 2) {
@@ -230,31 +207,29 @@ handleevents(struct bintime *now, int fa
        }
        if (profiling) {
                runs = 0;
-               while (bintime_cmp(now, &state->nextprof, >=)) {
-                       bintime_addx(&state->nextprof, profperiod.frac);
+               while (now >= state->nextprof) {
+                       state->nextprof += profperiod;
                        runs++;
                }
                if (runs && !fake) {
-                       profclock_cnt(runs, usermode, pc);
+                       profclock_cnt(runs, usermode, TRAPF_PC(frame));
                        done = 1;
                }
        } else
                state->nextprof = state->nextstat;
+       if (now >= state->nextcallopt) {
+               state->nextcall = state->nextcallopt = INT64_MAX;
+               callout_process(now);
+       }
 
 #ifdef KDTRACE_HOOKS
-       if (fake == 0 && cyclic_clock_func != NULL &&
-           state->nextcyc.sec != -1 &&
-           bintime_cmp(now, &state->nextcyc, >=)) {
-               state->nextcyc.sec = -1;
+       if (fake == 0 && now >= state->nextcyc && cyclic_clock_func != NULL) {
+               state->nextcyc = INT64_MAX;
                (*cyclic_clock_func)(frame);
        }
 #endif
 
-       getnextcpuevent(&t, 0);
-       if (fake == 2) {
-               state->nextevent = t;
-               return (done);
-       }
+       t = getnextcpuevent(0);
        ET_HW_LOCK(state);
        if (!busy) {
                state->idle = 0;
@@ -268,84 +243,81 @@ handleevents(struct bintime *now, int fa
 /*
  * Schedule binuptime of the next event on current CPU.
  */
-static void
-getnextcpuevent(struct bintime *event, int idle)
+static sbintime_t
+getnextcpuevent(int idle)
 {
-       struct bintime tmp;
+       sbintime_t event;
        struct pcpu_state *state;
-       int skip;
+       u_int hardfreq;
 
        state = DPCPU_PTR(timerstate);
-       /* Handle hardclock() events. */
-       *event = state->nexthard;
-       if (idle || (!activetick && !profiling &&
-           (timer->et_flags & ET_FLAGS_PERCPU) == 0)) {
-               skip = idle ? 4 : (stathz / 2);
-               if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > skip)
-                       skip = tc_min_ticktock_freq;
-               skip = callout_tickstofirst(hz / skip) - 1;
-               CTR2(KTR_SPARE2, "skip   at %d: %d", curcpu, skip);
-               tmp = hardperiod;
-               bintime_mul(&tmp, skip);
-               bintime_add(event, &tmp);
-       }
+       /* Handle hardclock() events, skipping some if CPU is idle. */
+       event = state->nexthard;
+       if (idle) {
+               hardfreq = (u_int)hz / 2;
+               if (tc_min_ticktock_freq > 2
+#ifdef SMP
+                   && curcpu == CPU_FIRST()
+#endif
+                   )
+                       hardfreq = hz / tc_min_ticktock_freq;
+               if (hardfreq > 1)
+                       event += tick_sbt * (hardfreq - 1);
+       }
+       /* Handle callout events. */
+       if (event > state->nextcall)
+               event = state->nextcall;
        if (!idle) { /* If CPU is active - handle other types of events. */
-               if (bintime_cmp(event, &state->nextstat, >))
-                       *event = state->nextstat;
-               if (profiling && bintime_cmp(event, &state->nextprof, >))
-                       *event = state->nextprof;
+               if (event > state->nextstat)
+                       event = state->nextstat;
+               if (profiling && event > state->nextprof)
+                       event = state->nextprof;
        }
 #ifdef KDTRACE_HOOKS
-       if (state->nextcyc.sec != -1 && bintime_cmp(event, &state->nextcyc, >))
-               *event = state->nextcyc;
+       if (event > state->nextcyc)
+               event = state->nextcyc;
 #endif
+       return (event);
 }
 
 /*
  * Schedule binuptime of the next event on all CPUs.
  */
-static void
-getnextevent(struct bintime *event)
+static sbintime_t
+getnextevent(void)
 {
        struct pcpu_state *state;
+       sbintime_t event;
 #ifdef SMP
        int     cpu;
 #endif
-       int     c, nonidle;
+       int     c;
 
        state = DPCPU_PTR(timerstate);
-       *event = state->nextevent;
-       c = curcpu;
-       nonidle = !state->idle;
-       if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
+       event = state->nextevent;
+       c = -1;
 #ifdef SMP
-               if (smp_started) {
-                       CPU_FOREACH(cpu) {
-                               if (curcpu == cpu)
-                                       continue;
-                               state = DPCPU_ID_PTR(cpu, timerstate);
-                               nonidle += !state->idle;
-                               if (bintime_cmp(event, &state->nextevent, >)) {
-                                       *event = state->nextevent;
-                                       c = cpu;
-                               }
+       if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
+               CPU_FOREACH(cpu) {
+                       state = DPCPU_ID_PTR(cpu, timerstate);
+                       if (event > state->nextevent) {
+                               event = state->nextevent;
+                               c = cpu;
                        }
                }
-#endif
-               if (nonidle != 0 && bintime_cmp(event, &nexthard, >))
-                       *event = nexthard;
        }
-       CTR5(KTR_SPARE2, "next at %d:    next %d.%08x%08x by %d",
-           curcpu, event->sec, (u_int)(event->frac >> 32),
-                            (u_int)(event->frac & 0xffffffff), c);
+#endif
+       CTR4(KTR_SPARE2, "next at %d:    next %d.%08x by %d",
+           curcpu, (int)(event >> 32), (u_int)(event & 0xffffffff), c);
+       return (event);
 }
 
 /* Hardware timer callback function. */
 static void
 timercb(struct eventtimer *et, void *arg)
 {
-       struct bintime now;
-       struct bintime *next;
+       sbintime_t now;
+       sbintime_t *next;
        struct pcpu_state *state;
 #ifdef SMP
        int cpu, bcast;
@@ -360,16 +332,14 @@ timercb(struct eventtimer *et, void *arg
                next = &state->nexttick;
        } else
                next = &nexttick;
-       binuptime(&now); 
-       if (periodic) { 
-               *next = now;
-               bintime_addx(next, timerperiod.frac); /* Next tick in 1 period. 
*/
-       } else
-               next->sec = -1; /* Next tick is not scheduled yet. */
+       now = sbinuptime();
+       if (periodic)
+               *next = now + timerperiod;
+       else
+               *next = -1;     /* Next tick is not scheduled yet. */
        state->now = now;
-       CTR4(KTR_SPARE2, "intr at %d:    now  %d.%08x%08x",
-           curcpu, (int)(now.sec), (u_int)(now.frac >> 32),
-                            (u_int)(now.frac & 0xffffffff));
+       CTR3(KTR_SPARE2, "intr at %d:    now  %d.%08x",
+           curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
 
 #ifdef SMP
        /* Prepare broadcasting to other CPUs for non-per-CPU timers. */
@@ -379,8 +349,8 @@ timercb(struct eventtimer *et, void *arg
                        state = DPCPU_ID_PTR(cpu, timerstate);
                        ET_HW_LOCK(state);
                        state->now = now;
-                       if (bintime_cmp(&now, &state->nextevent, >=)) {
-                               state->nextevent.sec++;
+                       if (now >= state->nextevent) {
+                               state->nextevent += SBT_1S;
                                if (curcpu != cpu) {
                                        state->ipi = 1;
                                        bcast = 1;
@@ -392,7 +362,7 @@ timercb(struct eventtimer *et, void *arg
 #endif
 
        /* Handle events for this time on this CPU. */
-       handleevents(&now, 0);
+       handleevents(now, 0);
 
 #ifdef SMP
        /* Broadcast interrupt to other CPUs for non-per-CPU timers. */
@@ -414,11 +384,11 @@ timercb(struct eventtimer *et, void *arg
  * Load new value into hardware timer.
  */
 static void
-loadtimer(struct bintime *now, int start)
+loadtimer(sbintime_t now, int start)
 {
        struct pcpu_state *state;
-       struct bintime new;
-       struct bintime *next;
+       sbintime_t new;
+       sbintime_t *next;
        uint64_t tmp;
        int eq;
 
@@ -433,30 +403,24 @@ loadtimer(struct bintime *now, int start
                         * Try to start all periodic timers aligned
                         * to period to make events synchronous.
                         */
-                       tmp = ((uint64_t)now->sec << 36) + (now->frac >> 28);
-                       tmp = (tmp % (timerperiod.frac >> 28)) << 28;
-                       new.sec = 0;
-                       new.frac = timerperiod.frac - tmp;
-                       if (new.frac < tmp)     /* Left less then passed. */
-                               bintime_addx(&new, timerperiod.frac);
+                       tmp = now % timerperiod;
+                       new = timerperiod - tmp;
+                       if (new < tmp)          /* Left less then passed. */
+                               new += timerperiod;
                        CTR5(KTR_SPARE2, "load p at %d:   now %d.%08x first in 
%d.%08x",
-                           curcpu, now->sec, (u_int)(now->frac >> 32),
-                           new.sec, (u_int)(new.frac >> 32));
-                       *next = new;
-                       bintime_add(next, now);
-                       et_start(timer, bttosbt(new), bttosbt(timerperiod));
+                           curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
+                           (int)(new >> 32), (u_int)(new & 0xffffffff));
+                       *next = new + now;
+                       et_start(timer, new, timerperiod);
                }
        } else {
-               getnextevent(&new);
-               eq = bintime_cmp(&new, next, ==);
-               CTR5(KTR_SPARE2, "load at %d:    next %d.%08x%08x eq %d",
-                   curcpu, new.sec, (u_int)(new.frac >> 32),
-                            (u_int)(new.frac & 0xffffffff),
-                            eq);
+               new = getnextevent();
+               eq = (new == *next);
+               CTR4(KTR_SPARE2, "load at %d:    next %d.%08x eq %d",
+                   curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq);
                if (!eq) {
                        *next = new;
-                       bintime_sub(&new, now);
-                       et_start(timer, bttosbt(new), 0);
+                       et_start(timer, new - now, 0);
                }
        }
 }
@@ -478,7 +442,7 @@ setuptimer(void)
        while (freq < (profiling ? profhz : stathz))
                freq += hz;
        freq = round_freq(timer, freq);
-       FREQ2BT(freq, &timerperiod);
+       timerperiod = SBT_1S / freq;
 }
 
 /*
@@ -487,15 +451,15 @@ setuptimer(void)
 static int
 doconfigtimer(void)
 {
-       struct bintime now;
+       sbintime_t now;
        struct pcpu_state *state;
 
        state = DPCPU_PTR(timerstate);
        switch (atomic_load_acq_int(&state->action)) {
        case 1:
-               binuptime(&now);
+               now = sbinuptime();
                ET_HW_LOCK(state);
-               loadtimer(&now, 1);
+               loadtimer(now, 1);
                ET_HW_UNLOCK(state);
                state->handle = 0;
                atomic_store_rel_int(&state->action, 0);
@@ -509,8 +473,8 @@ doconfigtimer(void)
                return (1);
        }
        if (atomic_readandclear_int(&state->handle) && !busy) {
-               binuptime(&now);
-               handleevents(&now, 0);
+               now = sbinuptime();
+               handleevents(now, 0);
                return (1);
        }
        return (0);
@@ -523,40 +487,45 @@ doconfigtimer(void)
 static void
 configtimer(int start)
 {
-       struct bintime now, next;
+       sbintime_t now, next;
        struct pcpu_state *state;
        int cpu;
 
        if (start) {
                setuptimer();
-               binuptime(&now);
-       }
+               now = sbinuptime();
+       } else
+               now = 0;
        critical_enter();
        ET_HW_LOCK(DPCPU_PTR(timerstate));
        if (start) {
                /* Initialize time machine parameters. */
-               next = now;
-               bintime_addx(&next, timerperiod.frac);
+               next = now + timerperiod;
                if (periodic)
                        nexttick = next;
                else
-                       nexttick.sec = -1;
+                       nexttick = -1;
                CPU_FOREACH(cpu) {
                        state = DPCPU_ID_PTR(cpu, timerstate);
                        state->now = now;
-                       state->nextevent = next;
+                       if (!smp_started && cpu != CPU_FIRST())
+                               state->nextevent = INT64_MAX;
+                       else
+                               state->nextevent = next;
                        if (periodic)
                                state->nexttick = next;
                        else
-                               state->nexttick.sec = -1;
+                               state->nexttick = -1;
                        state->nexthard = next;
                        state->nextstat = next;
                        state->nextprof = next;
+                       state->nextcall = next;
+                       state->nextcallopt = next;
                        hardclock_sync(cpu);
                }
                busy = 0;
                /* Start global timer or per-CPU timer of this CPU. */
-               loadtimer(&now, 1);
+               loadtimer(now, 1);
        } else {
                busy = 1;
                /* Stop global timer or per-CPU timer of this CPU. */
@@ -629,12 +598,11 @@ cpu_initclocks_bsp(void)
                state = DPCPU_ID_PTR(cpu, timerstate);
                mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
 #ifdef KDTRACE_HOOKS
-               state->nextcyc.sec = -1;
+               state->nextcyc = INT64_MAX;
 #endif
+               state->nextcall = INT64_MAX;
+               state->nextcallopt = INT64_MAX;
        }
-#ifdef SMP
-       callout_new_inserted = cpu_new_callout;
-#endif
        periodic = want_periodic;
        /* Grab requested timer or the best of present. */
        if (timername[0])
@@ -698,9 +666,10 @@ cpu_initclocks_bsp(void)
                profhz = round_freq(timer, stathz * 64);
        }
        tick = 1000000 / hz;
-       FREQ2BT(hz, &hardperiod);
-       FREQ2BT(stathz, &statperiod);
-       FREQ2BT(profhz, &profperiod);
+       tick_sbt = SBT_1S / hz;
+       tick_bt = sbttobt(tick_sbt);
+       statperiod = SBT_1S / stathz;
+       profperiod = SBT_1S / profhz;
        ET_LOCK();
        configtimer(1);
        ET_UNLOCK();
@@ -712,18 +681,22 @@ cpu_initclocks_bsp(void)
 void
 cpu_initclocks_ap(void)
 {
-       struct bintime now;
+       sbintime_t now;
        struct pcpu_state *state;
+       struct thread *td;
 
        state = DPCPU_PTR(timerstate);
-       binuptime(&now);
+       now = sbinuptime();
        ET_HW_LOCK(state);
        state->now = now;
        hardclock_sync(curcpu);
-       handleevents(&state->now, 2);
-       if (timer->et_flags & ET_FLAGS_PERCPU)
-               loadtimer(&now, 1);
+       spinlock_enter();
        ET_HW_UNLOCK(state);
+       td = curthread;
+       td->td_intr_nesting_level++;
+       handleevents(state->now, 2);
+       td->td_intr_nesting_level--;
+       spinlock_exit();
 }
 
 /*
@@ -772,7 +745,7 @@ cpu_stopprofclock(void)
 sbintime_t
 cpu_idleclock(void)
 {
-       struct bintime now, t;
+       sbintime_t now, t;
        struct pcpu_state *state;
 
        if (idletick || busy ||
@@ -786,19 +759,17 @@ cpu_idleclock(void)
        if (periodic)
                now = state->now;
        else
-               binuptime(&now);
-       CTR4(KTR_SPARE2, "idle at %d:    now  %d.%08x%08x",
-           curcpu, now.sec, (u_int)(now.frac >> 32),
-                            (u_int)(now.frac & 0xffffffff));
-       getnextcpuevent(&t, 1);
+               now = sbinuptime();
+       CTR3(KTR_SPARE2, "idle at %d:    now  %d.%08x",
+           curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
+       t = getnextcpuevent(1);
        ET_HW_LOCK(state);
        state->idle = 1;
        state->nextevent = t;
        if (!periodic)
-               loadtimer(&now, 0);
+               loadtimer(now, 0);
        ET_HW_UNLOCK(state);
-       bintime_sub(&t, &now);
-       return (MAX(bttosbt(t), 0));
+       return (MAX(t - now, 0));
 }
 
 /*
@@ -807,7 +778,7 @@ cpu_idleclock(void)
 void
 cpu_activeclock(void)
 {
-       struct bintime now;
+       sbintime_t now;
        struct pcpu_state *state;
        struct thread *td;
 
@@ -817,101 +788,98 @@ cpu_activeclock(void)
        if (periodic)
                now = state->now;
        else
-               binuptime(&now);
-       CTR4(KTR_SPARE2, "active at %d:  now  %d.%08x%08x",
-           curcpu, now.sec, (u_int)(now.frac >> 32),
-                            (u_int)(now.frac & 0xffffffff));
+               now = sbinuptime();
+       CTR3(KTR_SPARE2, "active at %d:  now  %d.%08x",
+           curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
        spinlock_enter();
        td = curthread;
        td->td_intr_nesting_level++;
-       handleevents(&now, 1);
+       handleevents(now, 1);
        td->td_intr_nesting_level--;
        spinlock_exit();
 }
 
 #ifdef KDTRACE_HOOKS
 void
-clocksource_cyc_set(const struct bintime *t)
+clocksource_cyc_set(const struct bintime *bt)
 {
-       struct bintime now;
+       sbintime_t now, t;
        struct pcpu_state *state;
 
+       /* Do not touch anything if somebody reconfiguring timers. */
+       if (busy)
+               return;
+       t = bttosbt(*bt);
        state = DPCPU_PTR(timerstate);
        if (periodic)
                now = state->now;
        else
-               binuptime(&now);
+               now = sbinuptime();
 
-       CTR4(KTR_SPARE2, "set_cyc at %d:  now  %d.%08x%08x",
-           curcpu, now.sec, (u_int)(now.frac >> 32),
-                            (u_int)(now.frac & 0xffffffff));
-       CTR4(KTR_SPARE2, "set_cyc at %d:  t  %d.%08x%08x",
-           curcpu, t->sec, (u_int)(t->frac >> 32),
-                            (u_int)(t->frac & 0xffffffff));
+       CTR5(KTR_SPARE2, "set_cyc at %d:  now  %d.%08x  t  %d.%08x",
+           curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
+           (int)(t >> 32), (u_int)(t & 0xffffffff));
 
        ET_HW_LOCK(state);
-       if (bintime_cmp(t, &state->nextcyc, ==)) {
-               ET_HW_UNLOCK(state);
-               return;
-       }
-       state->nextcyc = *t;
-       if (bintime_cmp(&state->nextcyc, &state->nextevent, >=)) {
-               ET_HW_UNLOCK(state);
-               return;
-       }
-       state->nextevent = state->nextcyc;
+       if (t == state->nextcyc)
+               goto done;
+       state->nextcyc = t;
+       if (t >= state->nextevent)
+               goto done;
+       state->nextevent = t;
        if (!periodic)
-               loadtimer(&now, 0);
+               loadtimer(now, 0);
+done:
        ET_HW_UNLOCK(state);
 }
 #endif
 
-#ifdef SMP
-static void
-cpu_new_callout(int cpu, int ticks)
+void
+cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt)
 {
-       struct bintime tmp;
        struct pcpu_state *state;
 
-       CTR3(KTR_SPARE2, "new co at %d:    on %d in %d",
-           curcpu, cpu, ticks);
+       /* Do not touch anything if somebody reconfiguring timers. */
+       if (busy)
+               return;
+       CTR6(KTR_SPARE2, "new co at %d:    on %d at %d.%08x - %d.%08x",
+           curcpu, cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff),
+           (int)(bt >> 32), (u_int)(bt & 0xffffffff));
        state = DPCPU_ID_PTR(cpu, timerstate);
        ET_HW_LOCK(state);
-       if (state->idle == 0 || busy) {
-               ET_HW_UNLOCK(state);
-               return;
-       }
+
        /*
-        * If timer is periodic - just update next event time for target CPU.
-        * If timer is global - there is chance it is already programmed.
+        * If there is callout time already set earlier -- do nothing.
+        * This check may appear redundant because we check already in
+        * callout_process() but this double check guarantees we're safe
+        * with respect to race conditions between interrupts execution
+        * and scheduling.
         */
-       if (periodic || (timer->et_flags & ET_FLAGS_PERCPU) == 0) {
-               tmp = hardperiod;
-               bintime_mul(&tmp, ticks - 1);
-               bintime_add(&tmp, &state->nexthard);
-               if (bintime_cmp(&tmp, &state->nextevent, <))
-                       state->nextevent = tmp;
-               if (periodic ||
-                   bintime_cmp(&state->nextevent, &nexttick, >=)) {
-                       ET_HW_UNLOCK(state);
-                       return;
-               }
+       state->nextcallopt = bt_opt;
+       if (bt >= state->nextcall)
+               goto done;
+       state->nextcall = bt;
+       /* If there is some other event set earlier -- do nothing. */
+       if (bt >= state->nextevent)
+               goto done;
+       state->nextevent = bt;
+       /* If timer is periodic -- there is nothing to reprogram. */
+       if (periodic)
+               goto done;
+       /* If timer is global or of the current CPU -- reprogram it. */
+       if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
+               loadtimer(sbinuptime(), 0);
+done:
+               ET_HW_UNLOCK(state);
+               return;
        }
-       /*
-        * Otherwise we have to wake that CPU up, as we can't get present
-        * bintime to reprogram global timer from here. If timer is per-CPU,
-        * we by definition can't do it from here.
-        */
+       /* Otherwise make other CPU to reprogram it. */
+       state->handle = 1;
        ET_HW_UNLOCK(state);
-       if (timer->et_flags & ET_FLAGS_PERCPU) {
-               state->handle = 1;
-               ipi_cpu(cpu, IPI_HARDCLOCK);
-       } else {
-               if (!cpu_idle_wakeup(cpu))
-                       ipi_cpu(cpu, IPI_AST);
-       }
-}
+#ifdef SMP
+       ipi_cpu(cpu, IPI_HARDCLOCK);
 #endif
+}
 
 /*
  * Report or change the active event timers hardware.

Modified: head/sys/kern/kern_tc.c
==============================================================================
--- head/sys/kern/kern_tc.c     Mon Mar  4 10:41:54 2013        (r247776)
+++ head/sys/kern/kern_tc.c     Mon Mar  4 11:09:56 2013        (r247777)
@@ -22,6 +22,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/limits.h>
 #ifdef FFCLOCK
 #include <sys/lock.h>
 #include <sys/mutex.h>
@@ -119,6 +120,21 @@ static int timestepwarnings;
 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
     &timestepwarnings, 0, "Log time steps");
 
+struct bintime bt_timethreshold;
+struct bintime bt_tickthreshold;
+sbintime_t sbt_timethreshold;
+sbintime_t sbt_tickthreshold;
+struct bintime tc_tick_bt;
+sbintime_t tc_tick_sbt;
+int tc_precexp;
+int tc_timepercentage = TC_DEFAULTPERC;
+TUNABLE_INT("kern.timecounter.alloweddeviation", &tc_timepercentage);
+static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
+    sysctl_kern_timecounter_adjprecision, "I",
+    "Allowed time interval deviation in percents");
+
 static void tc_windup(void);
 static void cpu_tick_calibrate(int);
 
@@ -1746,10 +1762,47 @@ tc_ticktock(int cnt)
        tc_windup();
 }
 
+static void __inline
+tc_adjprecision(void)
+{
+       int t;
+
+       if (tc_timepercentage > 0) {
+               t = (99 + tc_timepercentage) / tc_timepercentage;
+               tc_precexp = fls(t + (t >> 1)) - 1;
+               FREQ2BT(hz / tc_tick, &bt_timethreshold);
+               FREQ2BT(hz, &bt_tickthreshold);
+               bintime_shift(&bt_timethreshold, tc_precexp);
+               bintime_shift(&bt_tickthreshold, tc_precexp);
+       } else {
+               tc_precexp = 31;
+               bt_timethreshold.sec = INT_MAX;
+               bt_timethreshold.frac = ~(uint64_t)0;
+               bt_tickthreshold = bt_timethreshold;
+       }
+       sbt_timethreshold = bttosbt(bt_timethreshold);
+       sbt_tickthreshold = bttosbt(bt_tickthreshold);
+}
+
+static int
+sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS)
+{
+       int error, val;
+
+       val = tc_timepercentage;
+       error = sysctl_handle_int(oidp, &val, 0, req);
+       if (error != 0 || req->newptr == NULL)
+               return (error);
+       tc_timepercentage = val;
+       tc_adjprecision();
+       return (0);
+}
+
 static void
 inittimecounter(void *dummy)
 {
        u_int p;
+       int tick_rate;
 
        /*
         * Set the initial timeout to
@@ -1763,6 +1816,12 @@ inittimecounter(void *dummy)
                tc_tick = (hz + 500) / 1000;
        else
                tc_tick = 1;
+       tc_adjprecision();
+       FREQ2BT(hz, &tick_bt);
+       tick_sbt = bttosbt(tick_bt);
+       tick_rate = hz / tc_tick;
+       FREQ2BT(tick_rate, &tc_tick_bt);
+       tc_tick_sbt = bttosbt(tc_tick_bt);
        p = (tc_tick * 1000000) / hz;
        printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
 

Modified: head/sys/kern/kern_timeout.c
==============================================================================
--- head/sys/kern/kern_timeout.c        Mon Mar  4 10:41:54 2013        
(r247776)
+++ head/sys/kern/kern_timeout.c        Mon Mar  4 11:09:56 2013        
(r247777)
@@ -37,7 +37,11 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_callout_profiling.h"
 #include "opt_kdtrace.h"
+#if defined(__arm__)
+#include "opt_timer.h"
+#endif
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -59,6 +63,10 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpu.h>
 #endif
 
+#ifndef NO_EVENTTIMERS
+DPCPU_DECLARE(sbintime_t, hardclocktime);
+#endif
+
 SDT_PROVIDER_DEFINE(callout_execute);
 SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start);
 SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0,
@@ -67,6 +75,7 @@ SDT_PROBE_DEFINE(callout_execute, kernel
 SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0,
     "struct callout *");
 
+#ifdef CALLOUT_PROFILING
 static int avg_depth;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to