On Fri, Nov 05, 2021 at 10:04:50AM +0100, Martin Pieuchot wrote:
> New poll/select(2) implementation convert 'struct pollfd' and 'fdset' to
> knotes (kqueue event descriptors) then pass them to the kqueue subsystem.
> A knote is allocated, with kqueue_register(), for every read, write and
> except condition watched on a given FD. That means at most 3 allocations
> might be necessary per FD.
>
> The diff below reduce the overhead of per-syscall allocation/free of those
> descriptors by leaving those which didn't trigger on the kqueue across
> syscall. Leaving knotes on the kqueue allows kqueue_register() to re-use
> existing descriptor instead of re-allocating a new one.
>
> With this knotes are now lazily removed. The mechanism uses a serial
> number which is incremented for every syscall that indicates if a knote
> sitting in the kqueue is still valid or should be freed.
>
> Note that performance improvements might not be visible with this diff
> alone because kqueue_register() still pre-allocate a descriptor then drop
> it.
>
> visa@ already pointed out that the lazy removal logic could be integrated
> in kqueue_scan() which would reduce the complexity of those two syscalls.
> I'm arguing for doing this in a next step in-tree.
I think it would make more sense to add the removal logic to the scan
function first as doing so would keep the code modifications more
logical and simpler. This would also avoid the need to go through
a temporary removal approach.
Index: kern/kern_event.c
===================================================================
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.170
diff -u -p -r1.170 kern_event.c
--- kern/kern_event.c 6 Nov 2021 05:48:47 -0000 1.170
+++ kern/kern_event.c 6 Nov 2021 15:31:04 -0000
@@ -73,6 +73,7 @@ void kqueue_terminate(struct proc *p, st
void KQREF(struct kqueue *);
void KQRELE(struct kqueue *);
+void kqueue_purge(struct proc *, struct kqueue *);
int kqueue_sleep(struct kqueue *, struct timespec *);
int kqueue_read(struct file *, struct uio *, int);
@@ -806,6 +807,22 @@ kqpoll_exit(void)
}
void
+kqpoll_done(unsigned int num)
+{
+ struct proc *p = curproc;
+
+ KASSERT(p->p_kq != NULL);
+
+ if (p->p_kq_serial + num >= p->p_kq_serial) {
+ p->p_kq_serial += num;
+ } else {
+ /* Clear all knotes after serial wraparound. */
+ kqueue_purge(p, p->p_kq);
+ p->p_kq_serial = 1;
+ }
+}
+
+void
kqpoll_dequeue(struct proc *p, int all)
{
struct knote marker;
@@ -1383,6 +1400,15 @@ retry:
mtx_leave(&kq->kq_lock);
+ /* Drop spurious events. */
+ if (p->p_kq == kq &&
+ p->p_kq_serial > (unsigned long)kn->kn_udata) {
+ filter_detach(kn);
+ knote_drop(kn, p);
+ mtx_enter(&kq->kq_lock);
+ continue;
+ }
+
memset(kevp, 0, sizeof(*kevp));
if (filter_process(kn, kevp) == 0) {
mtx_enter(&kq->kq_lock);
Index: kern/sys_generic.c
===================================================================
RCS file: src/sys/kern/sys_generic.c,v
retrieving revision 1.139
diff -u -p -r1.139 sys_generic.c
--- kern/sys_generic.c 29 Oct 2021 15:52:44 -0000 1.139
+++ kern/sys_generic.c 6 Nov 2021 15:31:04 -0000
@@ -730,8 +730,7 @@ done:
if (pibits[0] != (fd_set *)&bits[0])
free(pibits[0], M_TEMP, 6 * ni);
- kqueue_purge(p, p->p_kq);
- p->p_kq_serial += nd;
+ kqpoll_done(nd);
return (error);
}
@@ -1230,8 +1229,7 @@ bad:
if (pl != pfds)
free(pl, M_TEMP, sz);
- kqueue_purge(p, p->p_kq);
- p->p_kq_serial += nfds;
+ kqpoll_done(nfds);
return (error);
}
@@ -1251,8 +1249,7 @@ ppollcollect(struct proc *p, struct keve
/*
* Lazily delete spurious events.
*
- * This should not happen as long as kqueue_purge() is called
- * at the end of every syscall. It migh be interesting to do
+ * This should not happen. It might be interesting to do
* like DragonFlyBSD and not always allocated a new knote in
* kqueue_register() with that lazy removal makes sense.
*/
Index: sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.57
diff -u -p -r1.57 event.h
--- sys/event.h 24 Oct 2021 07:02:47 -0000 1.57
+++ sys/event.h 6 Nov 2021 15:31:04 -0000
@@ -289,6 +289,7 @@ extern const struct klistops socket_klis
extern void kqpoll_init(void);
extern void kqpoll_exit(void);
+extern void kqpoll_done(unsigned int);
extern void knote(struct klist *list, long hint);
extern void knote_fdclose(struct proc *p, int fd);
extern void knote_processexit(struct proc *);
@@ -302,7 +303,6 @@ extern int kqueue_scan(struct kqueue_sca
struct timespec *, struct proc *, int *);
extern void kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *);
extern void kqueue_scan_finish(struct kqueue_scan_state *);
-extern void kqueue_purge(struct proc *, struct kqueue *);
extern int filt_seltrue(struct knote *kn, long hint);
extern int seltrue_kqfilter(dev_t, struct knote *);
extern void klist_init(struct klist *, const struct klistops *, void *);