On Thu, Jul 27, 2023 at 10:45:50PM -0500, Scott Cheloha wrote: > On Wed, Jul 26, 2023 at 11:16:19AM -0500, Scott Cheloha wrote: > > This is the next patch in the clock interrupt reorganization series. > > > > Now that statclock() is cleaned up we can turn to hardclock(). > > > > [...] > > > > This patch moves the setitimer(2) code out of hardclock(). The big > > idea is identical to what we did with profil(2)/profclock in the > > profclock/gmonclock patch. > > > > - Move the setitimer(2) polling code from hardclock() to a new clock > > interrupt routine, itimer_update(), in kern_time.c. itimer_update() > > is periodic and runs at the same frequency as the hardclock. > > > > - Each schedstate_percpu has its own itimer_update() handle, spc_itimer, > > initialized during sched_init_cpu(). > > > > - The itimer_update() on a given CPU is enabled/disabled in > > mi_switch()/sched_exit() if the running thread's process has enabled > > ITIMER_VIRTUAL/ITIMER_PROF. A new scheduler flag, SPCF_ITIMER, > > signifies whether itimer_update() was started and needs stopping. > > > > - A new per-process flag, PS_ITIMER, signifies whether any virtual > > interval timers are running. The flag is updated from the helper > > routine process_reset_itimer_flag(). We use it during mi_switch() > > to decide whether to start itimer_update() without entering itimer_mtx. > > > > - In setitimer(), call need_resched() when the process changes the > > state of ITIMER_VIRTUAL/ITIMER_PROF to force itimer_update() on/off. > > > > regress/sys/kern/itimer passes. > > Updated patch: > > - Rebase on kern_clockintr.c,v1.29 and kern_sched.c,v1.81 > > - Stagger spc_itimer in clockintr_cpu_init() alongside spc_profclock > until I can figure out where else to do it
Ping. Index: kern/kern_clock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clock.c,v retrieving revision 1.110 diff -u -p -r1.110 kern_clock.c --- kern/kern_clock.c 1 Aug 2023 07:57:55 -0000 1.110 +++ kern/kern_clock.c 2 Aug 2023 23:32:39 -0000 @@ -106,41 +106,12 @@ initclocks(void) } /* - * hardclock does the accounting needed for ITIMER_PROF and ITIMER_VIRTUAL. - * We don't want to send signals with psignal from hardclock because it makes - * MULTIPROCESSOR locking very complicated. Instead, to use an idea from - * FreeBSD, we set a flag on the thread and when it goes to return to - * userspace it signals itself. - */ - -/* * The real-time timer, interrupting hz times per second. */ void hardclock(struct clockframe *frame) { - struct proc *p; struct cpu_info *ci = curcpu(); - - p = curproc; - if (p && ((p->p_flag & (P_SYSTEM | P_WEXIT)) == 0)) { - struct process *pr = p->p_p; - - /* - * Run current process's virtual and profile time, as needed. - */ - if (CLKF_USERMODE(frame) && - timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) && - itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], tick_nsec) == 0) { - atomic_setbits_int(&p->p_flag, P_ALRMPEND); - need_proftick(p); - } - if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) && - itimerdecr(&pr->ps_timer[ITIMER_PROF], tick_nsec) == 0) { - atomic_setbits_int(&p->p_flag, P_PROFPEND); - need_proftick(p); - } - } if (--ci->ci_schedstate.spc_rrticks <= 0) roundrobin(ci); Index: kern/kern_time.c =================================================================== RCS file: /cvs/src/sys/kern/kern_time.c,v retrieving revision 1.163 diff -u -p -r1.163 kern_time.c --- kern/kern_time.c 15 Feb 2023 10:07:50 -0000 1.163 +++ kern/kern_time.c 2 Aug 2023 23:32:39 -0000 @@ -35,6 +35,7 @@ #include <sys/param.h> #include <sys/kernel.h> #include <sys/systm.h> +#include <sys/clockintr.h> #include <sys/mutex.h> #include <sys/rwlock.h> #include <sys/proc.h> @@ -52,6 +53,7 @@ #include <dev/clock_subr.h> int itimerfix(struct itimerval *); +void process_reset_itimer_flag(struct process *); /* * Time of day and interval timer support. @@ -551,6 +553,10 @@ setitimer(int which, const struct itimer timeout_del(&pr->ps_realit_to); } *itimer = its; + if (which == ITIMER_VIRTUAL || which == ITIMER_PROF) { + process_reset_itimer_flag(pr); + need_resched(curcpu()); + } } if (which == ITIMER_REAL) @@ -729,47 +735,70 @@ itimerfix(struct itimerval *itv) } /* - * Decrement an interval timer by the given number of nanoseconds. + * Decrement an interval timer by the given duration. * If the timer expires and it is periodic then reload it. When reloading * the timer we subtract any overrun from the next period so that the timer * does not drift. */ int -itimerdecr(struct itimerspec *itp, long nsec) +itimerdecr(struct itimerspec *itp, const struct timespec *decrement) { - struct timespec decrement; - - NSEC_TO_TIMESPEC(nsec, &decrement); - - mtx_enter(&itimer_mtx); - - /* - * Double-check that the timer is enabled. A different thread - * in setitimer(2) may have disabled it while we were entering - * the mutex. - */ - if (!timespecisset(&itp->it_value)) { - mtx_leave(&itimer_mtx); - return (1); - } - - /* - * The timer is enabled. Update and reload it as needed. - */ - timespecsub(&itp->it_value, &decrement, &itp->it_value); - if (itp->it_value.tv_sec >= 0 && timespecisset(&itp->it_value)) { - mtx_leave(&itimer_mtx); + timespecsub(&itp->it_value, decrement, &itp->it_value); + if (itp->it_value.tv_sec >= 0 && timespecisset(&itp->it_value)) return (1); - } if (!timespecisset(&itp->it_interval)) { timespecclear(&itp->it_value); - mtx_leave(&itimer_mtx); return (0); } while (itp->it_value.tv_sec < 0 || !timespecisset(&itp->it_value)) timespecadd(&itp->it_value, &itp->it_interval, &itp->it_value); - mtx_leave(&itimer_mtx); return (0); +} + +void +itimer_update(struct clockintr *cl, void *cf) +{ + struct timespec elapsed; + uint64_t nsecs; + struct clockframe *frame = cf; + struct proc *p = curproc; + struct process *pr; + + if (p == NULL || ISSET(p->p_flag, P_SYSTEM | P_WEXIT)) + return; + + pr = p->p_p; + if (!ISSET(pr->ps_flags, PS_ITIMER)) + return; + + nsecs = clockintr_advance(cl, hardclock_period) * hardclock_period; + NSEC_TO_TIMESPEC(nsecs, &elapsed); + + mtx_enter(&itimer_mtx); + if (CLKF_USERMODE(frame) && + timespecisset(&pr->ps_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pr->ps_timer[ITIMER_VIRTUAL], &elapsed) == 0) { + process_reset_itimer_flag(pr); + atomic_setbits_int(&p->p_flag, P_ALRMPEND); + need_proftick(p); + } + if (timespecisset(&pr->ps_timer[ITIMER_PROF].it_value) && + itimerdecr(&pr->ps_timer[ITIMER_PROF], &elapsed) == 0) { + process_reset_itimer_flag(pr); + atomic_setbits_int(&p->p_flag, P_PROFPEND); + need_proftick(p); + } + mtx_leave(&itimer_mtx); +} + +void +process_reset_itimer_flag(struct process *ps) +{ + if (timespecisset(&ps->ps_timer[ITIMER_VIRTUAL].it_value) || + timespecisset(&ps->ps_timer[ITIMER_PROF].it_value)) + atomic_setbits_int(&ps->ps_flags, PS_ITIMER); + else + atomic_clearbits_int(&ps->ps_flags, PS_ITIMER); } struct mutex ratecheck_mtx = MUTEX_INITIALIZER(IPL_HIGH); Index: kern/kern_clockintr.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clockintr.c,v retrieving revision 1.29 diff -u -p -r1.29 kern_clockintr.c --- kern/kern_clockintr.c 27 Jul 2023 17:52:53 -0000 1.29 +++ kern/kern_clockintr.c 2 Aug 2023 23:32:40 -0000 @@ -196,6 +196,10 @@ clockintr_cpu_init(const struct intrcloc * XXX Need to find a better place to do this. We can't do it in * sched_init_cpu() because initclocks() runs after it. */ + if (spc->spc_itimer->cl_expiration == 0) { + clockintr_stagger(spc->spc_itimer, hardclock_period, + multiplier, MAXCPUS); + } if (spc->spc_profclock->cl_expiration == 0) { clockintr_stagger(spc->spc_profclock, profclock_period, multiplier, MAXCPUS); Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.81 diff -u -p -r1.81 kern_sched.c --- kern/kern_sched.c 27 Jul 2023 17:52:53 -0000 1.81 +++ kern/kern_sched.c 2 Aug 2023 23:32:40 -0000 @@ -87,6 +87,14 @@ sched_init_cpu(struct cpu_info *ci) spc->spc_idleproc = NULL; + if (spc->spc_itimer == NULL) { + spc->spc_itimer = clockintr_establish(&ci->ci_queue, + itimer_update); + if (spc->spc_itimer == NULL) { + panic("%s: clockintr_establish itimer_update", + __func__); + } + } if (spc->spc_profclock == NULL) { spc->spc_profclock = clockintr_establish(&ci->ci_queue, profclock); @@ -223,6 +231,10 @@ sched_exit(struct proc *p) timespecsub(&ts, &spc->spc_runtime, &ts); timespecadd(&p->p_rtime, &ts, &p->p_rtime); + if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { + atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); + clockintr_cancel(spc->spc_itimer); + } if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); clockintr_cancel(spc->spc_profclock); Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.78 diff -u -p -r1.78 sched_bsd.c --- kern/sched_bsd.c 25 Jul 2023 18:16:19 -0000 1.78 +++ kern/sched_bsd.c 2 Aug 2023 23:32:40 -0000 @@ -350,7 +350,11 @@ mi_switch(void) /* add the time counts for this thread to the process's total */ tuagg_unlocked(pr, p); - /* Stop the profclock if it's running. */ + /* Stop any optional clock interrupts. */ + if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { + atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); + clockintr_cancel(spc->spc_itimer); + } if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); clockintr_cancel(spc->spc_profclock); @@ -400,7 +404,13 @@ mi_switch(void) */ KASSERT(p->p_cpu == curcpu()); - /* Start the profclock if profil(2) is enabled. */ + /* Start any optional clock interrupts needed by the thread. */ + if (ISSET(p->p_p->ps_flags, PS_ITIMER)) { + atomic_setbits_int(&p->p_cpu->ci_schedstate.spc_schedflags, + SPCF_ITIMER); + clockintr_advance(p->p_cpu->ci_schedstate.spc_itimer, + hardclock_period); + } if (ISSET(p->p_p->ps_flags, PS_PROFIL)) { atomic_setbits_int(&p->p_cpu->ci_schedstate.spc_schedflags, SPCF_PROFCLOCK); Index: sys/time.h =================================================================== RCS file: /cvs/src/sys/sys/time.h,v retrieving revision 1.63 diff -u -p -r1.63 time.h --- sys/time.h 13 Dec 2022 17:30:36 -0000 1.63 +++ sys/time.h 2 Aug 2023 23:32:40 -0000 @@ -330,8 +330,10 @@ uint64_t getnsecuptime(void); struct proc; int clock_gettime(struct proc *, clockid_t, struct timespec *); +struct clockintr; +void itimer_update(struct clockintr *, void *); + void cancel_all_itimers(void); -int itimerdecr(struct itimerspec *, long); int settime(const struct timespec *); int ratecheck(struct timeval *, const struct timeval *); int ppsratecheck(struct timeval *, int *, int); Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.58 diff -u -p -r1.58 sched.h --- sys/sched.h 25 Jul 2023 18:16:19 -0000 1.58 +++ sys/sched.h 2 Aug 2023 23:32:40 -0000 @@ -107,6 +107,7 @@ struct schedstate_percpu { u_char spc_curpriority; /* usrpri of curproc */ int spc_rrticks; /* ticks until roundrobin() */ + struct clockintr *spc_itimer; /* [o] itimer_update handle */ struct clockintr *spc_profclock; /* [o] profclock handle */ u_int spc_nrun; /* procs on the run queues */ @@ -139,6 +140,7 @@ struct cpustats { #define SPCF_SHOULDHALT 0x0004 /* CPU should be vacated */ #define SPCF_HALTED 0x0008 /* CPU has been halted */ #define SPCF_PROFCLOCK 0x0010 /* profclock() was started */ +#define SPCF_ITIMER 0x0020 /* itimer_update() was started */ #define SCHED_PPQ (128 / SCHED_NQS) /* priorities per queue */ #define NICE_WEIGHT 2 /* priorities per nice level */ Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.346 diff -u -p -r1.346 proc.h --- sys/proc.h 14 Jul 2023 07:07:08 -0000 1.346 +++ sys/proc.h 2 Aug 2023 23:32:40 -0000 @@ -282,6 +282,7 @@ struct process { #define PS_ORPHAN 0x00800000 /* Process is on an orphan list */ #define PS_CHROOT 0x01000000 /* Process is chrooted */ #define PS_NOBTCFI 0x02000000 /* No Branch Target CFI */ +#define PS_ITIMER 0x04000000 /* Virtual interval timers running */ #define PS_BITS \ ("\20" "\01CONTROLT" "\02EXEC" "\03INEXEC" "\04EXITING" "\05SUGID" \ @@ -289,7 +290,7 @@ struct process { "\013WAITED" "\014COREDUMP" "\015SINGLEEXIT" "\016SINGLEUNWIND" \ "\017NOZOMBIE" "\020STOPPED" "\021SYSTEM" "\022EMBRYO" "\023ZOMBIE" \ "\024NOBROADCASTKILL" "\025PLEDGE" "\026WXNEEDED" "\027EXECPLEDGE" \ - "\030ORPHAN" "\031CHROOT" "\032NOBTCFI") + "\030ORPHAN" "\031CHROOT" "\032NOBTCFI" "\033ITIMER") struct kcov_dev; Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.163 diff -u -p -r1.163 systm.h --- sys/systm.h 14 Jul 2023 07:07:08 -0000 1.163 +++ sys/systm.h 2 Aug 2023 23:32:40 -0000 @@ -233,6 +233,8 @@ int tvtohz(const struct timeval *); int tstohz(const struct timespec *); void realitexpire(void *); +extern uint32_t hardclock_period; + struct clockframe; void hardclock(struct clockframe *); void statclock(struct clockframe *);