This is the next patch in the clock interrupt reorganization series. Now that statclock() is cleaned up we can turn to hardclock().
The goal of the next four patches is to eliminate the need for the hardclock on secondary CPUs. Secondary CPUs don't need a hardclock. hardclock() is only used on secondary CPUs to poll for events that rarely happen: setitimer(2) hits, dt(4), and roundrobin(). We can break all of these out into separate clock interrupt routines. This patch moves the setitimer(2) code out of hardclock(). The big idea is identical to what we did with profil(2)/profclock in the profclock/gmonclock patch. - Move the setitimer(2) polling code from hardclock() to a new clock interrupt routine, itimer_update(), in kern_time.c. itimer_update() is periodic and runs at the same frequency as the hardclock. - Each schedstate_percpu has its own itimer_update() handle, spc_itimer, initialized during sched_init_cpu(). - The itimer_update() on a given CPU is enabled/disabled in mi_switch()/sched_exit() if the running thread's process has enabled ITIMER_VIRTUAL/ITIMER_PROF. A new scheduler flag, SPCF_ITIMER, signifies whether itimer_update() was started and needs stopping. - A new per-process flag, PS_ITIMER, signifies whether any virtual interval timers are running. The flag is updated from the helper routine process_reset_itimer_flag(). We use it during mi_switch() to decide whether to start itimer_update() without entering itimer_mtx. - In setitimer(), call need_resched() when the process changes the state of ITIMER_VIRTUAL/ITIMER_PROF to force itimer_update() on/off. regress/sys/kern/itimer passes. ok? Index: kern/kern_clock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clock.c,v retrieving revision 1.109 diff -u -p -r1.109 kern_clock.c --- kern/kern_clock.c 25 Jul 2023 18:16:19 -0000 1.109 +++ kern/kern_clock.c 26 Jul 2023 14:41:02 -0000 @@ -86,6 +86,8 @@ int ticks = INT_MAX - (15 * 60 * HZ); volatile unsigned long jiffies = ULONG_MAX - (10 * 60 * HZ); +uint32_t hardclock_period; /* [I] hardclock period (ns) */ + /* * Initialize clock frequencies and start both clocks running. */ @@ -97,6 +99,9 @@ initclocks(void) */ cpu_initclocks(); + KASSERT(hz > 0 && hz <= 1000000000); + hardclock_period = 1000000000 / hz; + KASSERT(profhz >= stathz && profhz <= 1000000000); KASSERT(profhz % stathz == 0); profclock_period = 1000000000 / profhz; @@ -105,41 +110,12 @@ initclocks(void) } /* - * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL. - * We don't want to send signals with psignal from hardclock because it makes - * MULTIPROCESSOR locking very complicated. Instead, to use an idea from - * FreeBSD, we set a flag on the thread and when it goes to return to - * userspace it signals itself. - */ - -/* * The real-time timer, interrupting hz times per second. */ void hardclock(struct clockframe *frame) { - struct proc *p; struct cpu_info *ci = curcpu(); - - p = curproc; - if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) { - struct process *pr = p->p_p; - - /* - * Run current process's virtual and profile time, as needed. - */ - if (CLKF_USERMODE(frame) && - timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) && - itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], tick_nsec) == 0) { - atomic_setbits_int(&p->p_flag, P_ALRMPEND); - need_proftick(p); - } - if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) && - itimerdecr(&pr->ps_timer[ITIMER_PROF], tick_nsec) == 0) { - atomic_setbits_int(&p->p_flag, P_PROFPEND); - need_proftick(p); - } - } if (--ci->ci_schedstate.spc_rrticks <= 0) roundrobin(ci); Index: kern/kern_time.c =================================================================== RCS file: /cvs/src/sys/kern/kern_time.c,v retrieving revision 1.163 diff -u -p -r1.163 kern_time.c --- kern/kern_time.c 15 Feb 2023 10:07:50 -0000 1.163 +++ kern/kern_time.c 26 Jul 2023 14:41:02 -0000 @@ -35,6 +35,7 @@ #include <sys/param.h> #include <sys/kernel.h> #include <sys/systm.h> +#include <sys/clockintr.h> #include <sys/mutex.h> #include <sys/rwlock.h> #include <sys/proc.h> @@ -52,6 +53,7 @@ #include <dev/clock_subr.h> int itimerfix(struct itimerval *); +void process_reset_itimer_flag(struct process *); /* * Time of day and interval timer support. @@ -551,6 +553,10 @@ setitimer(int which, const struct itimer timeout_del(&pr->ps_realit_to); } *itimer = its; + if (which != ITIMER_REAL) { + process_reset_itimer_flag(pr); + need_resched(curcpu()); + } } if (which == ITIMER_REAL) @@ -729,47 +735,70 @@ itimerfix(struct itimerval *itv) } /* - * Decrement an interval timer by the given number of nanoseconds. + * Decrement an interval timer by the given duration. * If the timer expires and it is periodic then reload it. When reloading * the timer we subtract any overrun from the next period so that the timer * does not drift. */ int -itimerdecr(struct itimerspec *itp, long nsec) +itimerdecr(struct itimerspec *itp, const struct timespec *decrement) { - struct timespec decrement; - - NSEC_TO_TIMESPEC(nsec, &decrement); - - mtx_enter(&itimer_mtx); - - /* - * Double-check that the timer is enabled. A different thread - * in setitimer(2) may have disabled it while we were entering - * the mutex. - */ - if (!timespecisset(&itp->it_value)) { - mtx_leave(&itimer_mtx); - return (1); - } - - /* - * The timer is enabled. Update and reload it as needed. - */ - timespecsub(&itp->it_value, &decrement, &itp->it_value); - if (itp->it_value.tv_sec >= 0 && timespecisset(&itp->it_value)) { - mtx_leave(&itimer_mtx); + timespecsub(&itp->it_value, decrement, &itp->it_value); + if (itp->it_value.tv_sec >= 0 && timespecisset(&itp->it_value)) return (1); - } if (!timespecisset(&itp->it_interval)) { timespecclear(&itp->it_value); - mtx_leave(&itimer_mtx); return (0); } while (itp->it_value.tv_sec < 0 || !timespecisset(&itp->it_value)) timespecadd(&itp->it_value, &itp->it_interval, &itp->it_value); - mtx_leave(&itimer_mtx); return (0); +} + +void +itimer_update(struct clockintr *cl, void *cf) +{ + struct timespec elapsed; + uint64_t nsecs; + struct clockframe *frame = cf; + struct proc *p = curproc; + struct process *pr; + + if (p == NULL || ISSET(p->p_flag, P_SYSTEM | P_WEXIT)) + return; + + pr = p->p_p; + if (!ISSET(pr->ps_flags, PS_ITIMER)) + return; + + nsecs = clockintr_advance(cl, hardclock_period) * hardclock_period; + NSEC_TO_TIMESPEC(nsecs, &elapsed); + + mtx_enter(&itimer_mtx); + if (CLKF_USERMODE(frame) && + timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], &elapsed) == 0) { + process_reset_itimer_flag(pr); + atomic_setbits_int(&p->p_flag, P_ALRMPEND); + need_proftick(p); + } + if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) && + itimerdecr(&pr->ps_timer[ITIMER_PROF], &elapsed) == 0) { + process_reset_itimer_flag(pr); + atomic_setbits_int(&p->p_flag, P_PROFPEND); + need_proftick(p); + } + mtx_leave(&itimer_mtx); +} + +void +process_reset_itimer_flag(struct process *ps) +{ + if (timespecisset(&ps->ps_timer[ITIMER_VIRTUAL].it_value) || + timespecisset(&ps->ps_timer[ITIMER_PROF].it_value)) + atomic_setbits_int(&ps->ps_flags, PS_ITIMER); + else + atomic_clearbits_int(&ps->ps_flags, PS_ITIMER); } struct mutex ratecheck_mtx = MUTEX_INITIALIZER(IPL_HIGH); Index: kern/kern_clockintr.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clockintr.c,v retrieving revision 1.28 diff -u -p -r1.28 kern_clockintr.c --- kern/kern_clockintr.c 25 Jul 2023 18:16:19 -0000 1.28 +++ kern/kern_clockintr.c 26 Jul 2023 14:41:02 -0000 @@ -35,7 +35,6 @@ * I Immutable after initialization. */ u_int clockintr_flags; /* [I] global state + behavior flags */ -uint32_t hardclock_period; /* [I] hardclock period (ns) */ uint32_t schedclock_period; /* [I] schedclock period (ns) */ uint32_t statclock_avg; /* [I] average statclock period (ns) */ uint32_t statclock_min; /* [I] minimum statclock period (ns) */ @@ -64,9 +63,6 @@ clockintr_init(u_int flags) KASSERT(CPU_IS_PRIMARY(curcpu())); KASSERT(clockintr_flags == 0); KASSERT(!ISSET(flags, ~CL_FLAG_MASK)); - - KASSERT(hz > 0 && hz <= 1000000000); - hardclock_period = 1000000000 / hz; KASSERT(stathz >= 1 && stathz <= 1000000000); Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.80 diff -u -p -r1.80 kern_sched.c --- kern/kern_sched.c 25 Jul 2023 18:16:19 -0000 1.80 +++ kern/kern_sched.c 26 Jul 2023 14:41:02 -0000 @@ -87,6 +87,16 @@ sched_init_cpu(struct cpu_info *ci) spc->spc_idleproc = NULL; + if (spc->spc_itimer == NULL) { + spc->spc_itimer = clockintr_establish(&ci->ci_queue, + itimer_update); + if (spc->spc_itimer == NULL) { + panic("%s: clockintr_establish itimer_update", + __func__); + } + clockintr_stagger(spc->spc_itimer, hardclock_period, + CPU_INFO_UNIT(ci), MAXCPUS); + } if (spc->spc_profclock == NULL) { spc->spc_profclock = clockintr_establish(&ci->ci_queue, profclock); @@ -225,6 +235,10 @@ sched_exit(struct proc *p) timespecsub(&ts, &spc->spc_runtime, &ts); timespecadd(&p->p_rtime, &ts, &p->p_rtime); + if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { + atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); + clockintr_cancel(spc->spc_itimer); + } if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); clockintr_cancel(spc->spc_profclock); Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.78 diff -u -p -r1.78 sched_bsd.c --- kern/sched_bsd.c 25 Jul 2023 18:16:19 -0000 1.78 +++ kern/sched_bsd.c 26 Jul 2023 14:41:02 -0000 @@ -350,7 +350,11 @@ mi_switch(void) /* add the time counts for this thread to the process's total */ tuagg_unlocked(pr, p); - /* Stop the profclock if it's running. */ + /* Stop any optional clock interrupts. */ + if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { + atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); + clockintr_cancel(spc->spc_itimer); + } if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); clockintr_cancel(spc->spc_profclock); @@ -400,7 +404,13 @@ mi_switch(void) */ KASSERT(p->p_cpu == curcpu()); - /* Start the profclock if profil(2) is enabled. */ + /* Start any optional clock interrupts needed by the thread. */ + if (ISSET(p->p_p->ps_flags, PS_ITIMER)) { + atomic_setbits_int(&p->p_cpu->ci_schedstate.spc_schedflags, + SPCF_ITIMER); + clockintr_advance(p->p_cpu->ci_schedstate.spc_itimer, + hardclock_period); + } if (ISSET(p->p_p->ps_flags, PS_PROFIL)) { atomic_setbits_int(&p->p_cpu->ci_schedstate.spc_schedflags, SPCF_PROFCLOCK); Index: sys/time.h =================================================================== RCS file: /cvs/src/sys/sys/time.h,v retrieving revision 1.63 diff -u -p -r1.63 time.h --- sys/time.h 13 Dec 2022 17:30:36 -0000 1.63 +++ sys/time.h 26 Jul 2023 14:41:02 -0000 @@ -330,8 +330,10 @@ uint64_t getnsecuptime(void); struct proc; int clock_gettime(struct proc *, clockid_t, struct timespec *); +struct clockintr; +void itimer_update(struct clockintr *, void *); + void cancel_all_itimers(void); -int itimerdecr(struct itimerspec *, long); int settime(const struct timespec *); int ratecheck(struct timeval *, const struct timeval *); int ppsratecheck(struct timeval *, int *, int); Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.58 diff -u -p -r1.58 sched.h --- sys/sched.h 25 Jul 2023 18:16:19 -0000 1.58 +++ sys/sched.h 26 Jul 2023 14:41:02 -0000 @@ -107,6 +107,7 @@ struct schedstate_percpu { u_char spc_curpriority; /* usrpri of curproc */ int spc_rrticks; /* ticks until roundrobin() */ + struct clockintr *spc_itimer; /* [o] itimer_update handle */ struct clockintr *spc_profclock; /* [o] profclock handle */ u_int spc_nrun; /* procs on the run queues */ @@ -139,6 +140,7 @@ struct cpustats { #define SPCF_SHOULDHALT 0x0004 /* CPU should be vacated */ #define SPCF_HALTED 0x0008 /* CPU has been halted */ #define SPCF_PROFCLOCK 0x0010 /* profclock() was started */ +#define SPCF_ITIMER 0x0020 /* itimer_update() was started */ #define SCHED_PPQ (128 / SCHED_NQS) /* priorities per queue */ #define NICE_WEIGHT 2 /* priorities per nice level */ Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.346 diff -u -p -r1.346 proc.h --- sys/proc.h 14 Jul 2023 07:07:08 -0000 1.346 +++ sys/proc.h 26 Jul 2023 14:41:02 -0000 @@ -282,6 +282,7 @@ struct process { #define PS_ORPHAN 0x00800000 /* Process is on an orphan list */ #define PS_CHROOT 0x01000000 /* Process is chrooted */ #define PS_NOBTCFI 0x02000000 /* No Branch Target CFI */ +#define PS_ITIMER 0x04000000 /* Virtual interval timers running */ #define PS_BITS \ ("\20" "\01CONTROLT" "\02EXEC" "\03INEXEC" "\04EXITING" "\05SUGID" \ @@ -289,7 +290,7 @@ struct process { "\013WAITED" "\014COREDUMP" "\015SINGLEEXIT" "\016SINGLEUNWIND" \ "\017NOZOMBIE" "\020STOPPED" "\021SYSTEM" "\022EMBRYO" "\023ZOMBIE" \ "\024NOBROADCASTKILL" "\025PLEDGE" "\026WXNEEDED" "\027EXECPLEDGE" \ - "\030ORPHAN" "\031CHROOT" "\032NOBTCFI") + "\030ORPHAN" "\031CHROOT" "\032NOBTCFI" "\033ITIMER") struct kcov_dev; Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.163 diff -u -p -r1.163 systm.h --- sys/systm.h 14 Jul 2023 07:07:08 -0000 1.163 +++ sys/systm.h 26 Jul 2023 14:41:02 -0000 @@ -233,6 +233,8 @@ int tvtohz(const struct timeval *); int tstohz(const struct timespec *); void realitexpire(void *); +extern uint32_t hardclock_period; + struct clockframe; void hardclock(struct clockframe *); void statclock(struct clockframe *);