This is the next patch in the clock interrupt reorganization series.

Now that statclock() is cleaned up we can turn to hardclock().

The goal of the next four patches is to eliminate the need for the
hardclock on secondary CPUs.  Secondary CPUs don't need a hardclock.
hardclock() is only used on secondary CPUs to poll for events that
rarely happen: setitimer(2) hits, dt(4), and roundrobin().  We can
break all of these out into separate clock interrupt routines.

This patch moves the setitimer(2) code out of hardclock().  The big
idea is identical to what we did with profil(2)/profclock in the
profclock/gmonclock patch.

- Move the setitimer(2) polling code from hardclock() to a new clock
  interrupt routine, itimer_update(), in kern_time.c.  itimer_update()
  is periodic and runs at the same frequency as the hardclock.

- Each schedstate_percpu has its own itimer_update() handle, spc_itimer,
  initialized during sched_init_cpu().

- The itimer_update() on a given CPU is enabled/disabled in
  mi_switch()/sched_exit() if the running thread's process has enabled
  ITIMER_VIRTUAL/ITIMER_PROF.  A new scheduler flag, SPCF_ITIMER,
  signifies whether itimer_update() was started and needs stopping.

- A new per-process flag, PS_ITIMER, signifies whether any virtual
  interval timers are running.  The flag is updated from the helper
  routine process_reset_itimer_flag().  We use it during mi_switch()
  to decide whether to start itimer_update() without entering itimer_mtx.

- In setitimer(), call need_resched() when the process changes the
  state of ITIMER_VIRTUAL/ITIMER_PROF to force itimer_update() on/off.

regress/sys/kern/itimer passes.

ok?

Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.109
diff -u -p -r1.109 kern_clock.c
--- kern/kern_clock.c   25 Jul 2023 18:16:19 -0000      1.109
+++ kern/kern_clock.c   26 Jul 2023 14:41:02 -0000
@@ -86,6 +86,8 @@ int   ticks = INT_MAX - (15 * 60 * HZ);
 
 volatile unsigned long jiffies = ULONG_MAX - (10 * 60 * HZ);
 
+uint32_t hardclock_period;             /* [I] hardclock period (ns) */
+
 /*
  * Initialize clock frequencies and start both clocks running.
  */
@@ -97,6 +99,9 @@ initclocks(void)
         */
        cpu_initclocks();
 
+       KASSERT(hz > 0 && hz <= 1000000000);
+       hardclock_period = 1000000000 / hz;
+
        KASSERT(profhz >= stathz && profhz <= 1000000000);
        KASSERT(profhz % stathz == 0);
        profclock_period = 1000000000 / profhz;
@@ -105,41 +110,12 @@ initclocks(void)
 }
 
 /*
- * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL.
- * We don't want to send signals with psignal from hardclock because it makes
- * MULTIPROCESSOR locking very complicated. Instead, to use an idea from
- * FreeBSD, we set a flag on the thread and when it goes to return to
- * userspace it signals itself.
- */
-
-/*
  * The real-time timer, interrupting hz times per second.
  */
 void
 hardclock(struct clockframe *frame)
 {
-       struct proc *p;
        struct cpu_info *ci = curcpu();
-
-       p = curproc;
-       if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) {
-               struct process *pr = p->p_p;
-
-               /*
-                * Run current process's virtual and profile time, as needed.
-                */
-               if (CLKF_USERMODE(frame) &&
-                   timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) &&
-                   itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], tick_nsec) == 0) {
-                       atomic_setbits_int(&p->p_flag, P_ALRMPEND);
-                       need_proftick(p);
-               }
-               if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) &&
-                   itimerdecr(&pr->ps_timer[ITIMER_PROF], tick_nsec) == 0) {
-                       atomic_setbits_int(&p->p_flag, P_PROFPEND);
-                       need_proftick(p);
-               }
-       }
 
        if (--ci->ci_schedstate.spc_rrticks <= 0)
                roundrobin(ci);
Index: kern/kern_time.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_time.c,v
retrieving revision 1.163
diff -u -p -r1.163 kern_time.c
--- kern/kern_time.c    15 Feb 2023 10:07:50 -0000      1.163
+++ kern/kern_time.c    26 Jul 2023 14:41:02 -0000
@@ -35,6 +35,7 @@
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
+#include <sys/clockintr.h>
 #include <sys/mutex.h>
 #include <sys/rwlock.h>
 #include <sys/proc.h>
@@ -52,6 +53,7 @@
 #include <dev/clock_subr.h>
 
 int itimerfix(struct itimerval *);
+void process_reset_itimer_flag(struct process *);
 
 /* 
  * Time of day and interval timer support.
@@ -551,6 +553,10 @@ setitimer(int which, const struct itimer
                                timeout_del(&pr->ps_realit_to);
                }
                *itimer = its;
+               if (which != ITIMER_REAL) {
+                       process_reset_itimer_flag(pr);
+                       need_resched(curcpu());
+               }
        }
 
        if (which == ITIMER_REAL)
@@ -729,47 +735,70 @@ itimerfix(struct itimerval *itv)
 }
 
 /*
- * Decrement an interval timer by the given number of nanoseconds.
+ * Decrement an interval timer by the given duration.
  * If the timer expires and it is periodic then reload it.  When reloading
  * the timer we subtract any overrun from the next period so that the timer
  * does not drift.
  */
 int
-itimerdecr(struct itimerspec *itp, long nsec)
+itimerdecr(struct itimerspec *itp, const struct timespec *decrement)
 {
-       struct timespec decrement;
-
-       NSEC_TO_TIMESPEC(nsec, &decrement);
-
-       mtx_enter(&itimer_mtx);
-
-       /*
-        * Double-check that the timer is enabled.  A different thread
-        * in setitimer(2) may have disabled it while we were entering
-        * the mutex.
-        */
-       if (!timespecisset(&itp->it_value)) {
-               mtx_leave(&itimer_mtx);
-               return (1);
-       }
-
-       /*
-        * The timer is enabled.  Update and reload it as needed.
-        */
-       timespecsub(&itp->it_value, &decrement, &itp->it_value);
-       if (itp->it_value.tv_sec >= 0 && timespecisset(&itp->it_value)) {
-               mtx_leave(&itimer_mtx);
+       timespecsub(&itp->it_value, decrement, &itp->it_value);
+       if (itp->it_value.tv_sec >= 0 && timespecisset(&itp->it_value))
                return (1);
-       }
        if (!timespecisset(&itp->it_interval)) {
                timespecclear(&itp->it_value);
-               mtx_leave(&itimer_mtx);
                return (0);
        }
        while (itp->it_value.tv_sec < 0 || !timespecisset(&itp->it_value))
                timespecadd(&itp->it_value, &itp->it_interval, &itp->it_value);
-       mtx_leave(&itimer_mtx);
        return (0);
+}
+
+void
+itimer_update(struct clockintr *cl, void *cf)
+{
+       struct timespec elapsed;
+       uint64_t nsecs;
+       struct clockframe *frame = cf;
+       struct proc *p = curproc;
+       struct process *pr;
+
+       if (p == NULL || ISSET(p->p_flag, P_SYSTEM | P_WEXIT))
+               return;
+
+       pr = p->p_p;
+       if (!ISSET(pr->ps_flags, PS_ITIMER))
+               return;
+
+       nsecs = clockintr_advance(cl, hardclock_period) * hardclock_period;
+       NSEC_TO_TIMESPEC(nsecs, &elapsed);
+
+       mtx_enter(&itimer_mtx);
+       if (CLKF_USERMODE(frame) &&
+           timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) &&
+           itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], &elapsed) == 0) {
+               process_reset_itimer_flag(pr);
+               atomic_setbits_int(&p->p_flag, P_ALRMPEND);
+               need_proftick(p);
+       }
+       if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) &&
+           itimerdecr(&pr->ps_timer[ITIMER_PROF], &elapsed) == 0) {
+               process_reset_itimer_flag(pr);
+               atomic_setbits_int(&p->p_flag, P_PROFPEND);
+               need_proftick(p);
+       }
+       mtx_leave(&itimer_mtx);
+}
+
+void
+process_reset_itimer_flag(struct process *ps)
+{
+       if (timespecisset(&ps->ps_timer[ITIMER_VIRTUAL].it_value) ||
+           timespecisset(&ps->ps_timer[ITIMER_PROF].it_value))
+               atomic_setbits_int(&ps->ps_flags, PS_ITIMER);
+       else
+               atomic_clearbits_int(&ps->ps_flags, PS_ITIMER);
 }
 
 struct mutex ratecheck_mtx = MUTEX_INITIALIZER(IPL_HIGH);
Index: kern/kern_clockintr.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
retrieving revision 1.28
diff -u -p -r1.28 kern_clockintr.c
--- kern/kern_clockintr.c       25 Jul 2023 18:16:19 -0000      1.28
+++ kern/kern_clockintr.c       26 Jul 2023 14:41:02 -0000
@@ -35,7 +35,6 @@
  *     I       Immutable after initialization.
  */
 u_int clockintr_flags;                 /* [I] global state + behavior flags */
-uint32_t hardclock_period;             /* [I] hardclock period (ns) */
 uint32_t schedclock_period;            /* [I] schedclock period (ns) */
 uint32_t statclock_avg;                        /* [I] average statclock period 
(ns) */
 uint32_t statclock_min;                        /* [I] minimum statclock period 
(ns) */
@@ -64,9 +63,6 @@ clockintr_init(u_int flags)
        KASSERT(CPU_IS_PRIMARY(curcpu()));
        KASSERT(clockintr_flags == 0);
        KASSERT(!ISSET(flags, ~CL_FLAG_MASK));
-
-       KASSERT(hz > 0 && hz <= 1000000000);
-       hardclock_period = 1000000000 / hz;
 
        KASSERT(stathz >= 1 && stathz <= 1000000000);
 
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.80
diff -u -p -r1.80 kern_sched.c
--- kern/kern_sched.c   25 Jul 2023 18:16:19 -0000      1.80
+++ kern/kern_sched.c   26 Jul 2023 14:41:02 -0000
@@ -87,6 +87,16 @@ sched_init_cpu(struct cpu_info *ci)
 
        spc->spc_idleproc = NULL;
 
+       if (spc->spc_itimer == NULL) {
+               spc->spc_itimer = clockintr_establish(&ci->ci_queue,
+                   itimer_update);
+               if (spc->spc_itimer == NULL) {
+                       panic("%s: clockintr_establish itimer_update",
+                           __func__);
+               }
+               clockintr_stagger(spc->spc_itimer, hardclock_period,
+                   CPU_INFO_UNIT(ci), MAXCPUS);
+       }
        if (spc->spc_profclock == NULL) {
                spc->spc_profclock = clockintr_establish(&ci->ci_queue,
                    profclock);
@@ -225,6 +235,10 @@ sched_exit(struct proc *p)
        timespecsub(&ts, &spc->spc_runtime, &ts);
        timespecadd(&p->p_rtime, &ts, &p->p_rtime);
 
+       if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
+               atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
+               clockintr_cancel(spc->spc_itimer);
+       }
        if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
                atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
                clockintr_cancel(spc->spc_profclock);
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.78
diff -u -p -r1.78 sched_bsd.c
--- kern/sched_bsd.c    25 Jul 2023 18:16:19 -0000      1.78
+++ kern/sched_bsd.c    26 Jul 2023 14:41:02 -0000
@@ -350,7 +350,11 @@ mi_switch(void)
        /* add the time counts for this thread to the process's total */
        tuagg_unlocked(pr, p);
 
-       /* Stop the profclock if it's running. */
+       /* Stop any optional clock interrupts. */
+       if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
+               atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
+               clockintr_cancel(spc->spc_itimer);
+       }
        if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
                atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
                clockintr_cancel(spc->spc_profclock);
@@ -400,7 +404,13 @@ mi_switch(void)
         */
        KASSERT(p->p_cpu == curcpu());
 
-       /* Start the profclock if profil(2) is enabled. */
+       /* Start any optional clock interrupts needed by the thread. */
+       if (ISSET(p->p_p->ps_flags, PS_ITIMER)) {
+               atomic_setbits_int(&p->p_cpu->ci_schedstate.spc_schedflags,
+                   SPCF_ITIMER);
+               clockintr_advance(p->p_cpu->ci_schedstate.spc_itimer,
+                   hardclock_period);
+       }
        if (ISSET(p->p_p->ps_flags, PS_PROFIL)) {
                atomic_setbits_int(&p->p_cpu->ci_schedstate.spc_schedflags,
                    SPCF_PROFCLOCK);
Index: sys/time.h
===================================================================
RCS file: /cvs/src/sys/sys/time.h,v
retrieving revision 1.63
diff -u -p -r1.63 time.h
--- sys/time.h  13 Dec 2022 17:30:36 -0000      1.63
+++ sys/time.h  26 Jul 2023 14:41:02 -0000
@@ -330,8 +330,10 @@ uint64_t   getnsecuptime(void);
 struct proc;
 int    clock_gettime(struct proc *, clockid_t, struct timespec *);
 
+struct clockintr;
+void itimer_update(struct clockintr *, void *);
+
 void   cancel_all_itimers(void);
-int    itimerdecr(struct itimerspec *, long);
 int    settime(const struct timespec *);
 int    ratecheck(struct timeval *, const struct timeval *);
 int    ppsratecheck(struct timeval *, int *, int);
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.58
diff -u -p -r1.58 sched.h
--- sys/sched.h 25 Jul 2023 18:16:19 -0000      1.58
+++ sys/sched.h 26 Jul 2023 14:41:02 -0000
@@ -107,6 +107,7 @@ struct schedstate_percpu {
        u_char spc_curpriority;         /* usrpri of curproc */
        int spc_rrticks;                /* ticks until roundrobin() */
 
+       struct clockintr *spc_itimer;   /* [o] itimer_update handle */
        struct clockintr *spc_profclock; /* [o] profclock handle */
 
        u_int spc_nrun;                 /* procs on the run queues */
@@ -139,6 +140,7 @@ struct cpustats {
 #define SPCF_SHOULDHALT                0x0004  /* CPU should be vacated */
 #define SPCF_HALTED            0x0008  /* CPU has been halted */
 #define SPCF_PROFCLOCK         0x0010  /* profclock() was started */
+#define SPCF_ITIMER            0x0020  /* itimer_update() was started */
 
 #define        SCHED_PPQ       (128 / SCHED_NQS)       /* priorities per queue 
*/
 #define NICE_WEIGHT 2                  /* priorities per nice level */
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.346
diff -u -p -r1.346 proc.h
--- sys/proc.h  14 Jul 2023 07:07:08 -0000      1.346
+++ sys/proc.h  26 Jul 2023 14:41:02 -0000
@@ -282,6 +282,7 @@ struct process {
 #define        PS_ORPHAN       0x00800000      /* Process is on an orphan list 
*/
 #define        PS_CHROOT       0x01000000      /* Process is chrooted */
 #define        PS_NOBTCFI      0x02000000      /* No Branch Target CFI */
+#define        PS_ITIMER       0x04000000      /* Virtual interval timers 
running */
 
 #define        PS_BITS \
     ("\20" "\01CONTROLT" "\02EXEC" "\03INEXEC" "\04EXITING" "\05SUGID" \
@@ -289,7 +290,7 @@ struct process {
      "\013WAITED" "\014COREDUMP" "\015SINGLEEXIT" "\016SINGLEUNWIND" \
      "\017NOZOMBIE" "\020STOPPED" "\021SYSTEM" "\022EMBRYO" "\023ZOMBIE" \
      "\024NOBROADCASTKILL" "\025PLEDGE" "\026WXNEEDED" "\027EXECPLEDGE" \
-     "\030ORPHAN" "\031CHROOT" "\032NOBTCFI")
+     "\030ORPHAN" "\031CHROOT" "\032NOBTCFI" "\033ITIMER")
 
 
 struct kcov_dev;
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.163
diff -u -p -r1.163 systm.h
--- sys/systm.h 14 Jul 2023 07:07:08 -0000      1.163
+++ sys/systm.h 26 Jul 2023 14:41:02 -0000
@@ -233,6 +233,8 @@ int tvtohz(const struct timeval *);
 int    tstohz(const struct timespec *);
 void   realitexpire(void *);
 
+extern uint32_t hardclock_period;
+
 struct clockframe;
 void   hardclock(struct clockframe *);
 void   statclock(struct clockframe *);

Reply via email to