On Fri, Jun 15, 2018 at 10:22:42AM +0200, Mark Kettenis wrote:
> > Date: Fri, 15 Jun 2018 09:59:47 +0200
> > From: Anton Lindqvist <[email protected]>
> >
> > On Tue, Jun 12, 2018 at 09:00:14PM +0200, Anton Lindqvist wrote:
> > > Hi,
> > > This diff moves the kqueue related members from struct filedesc to
> > > struct kqueue with the prime motivation of fixing a panic in
> > > knote_processexit() that can occur when the filedesc is gone.
> > > Since filedesc is no longer shared between kqueues belong to the same
> > > process, I added a list of the current kqueues to struct process. This
> > > list is used in knote_closefd() in order to remove all knotes using the
> > > given fd as its ident from all kqueues belonging to the given process.
> > >
> > > Similiar work has been done in both FreeBSD[1] and DragonFlyBSD[2].
> > >
> > > Testing would be much appreciated. Not asking for anything particular,
> > > just apply the diff and exercise the kernel with your daily tasks.
> > >
> > > [1]
> > > https://github.com/freebsd/freebsd/commit/bc1805c6e871c178d0b6516c3baa774ffd77224a
> > > [2]
> > > http://gitweb.dragonflybsd.org/dragonfly.git/commitdiff/ccafe911a3aa55fd5262850ecfc5765cd31a56a2
> >
> > Me and tb@ have been running this diff without problems. It also
> > survived a `make release' and upgrade. Already got an ok from visa@,
> > anyone else that can comment on the code and hopefully give another ok?
>
> The kqueue regression tests still pass isn't it?
Correct
>
> ok kettenis@
>
> > > Index: kern/kern_descrip.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/kern/kern_descrip.c,v
> > > retrieving revision 1.164
> > > diff -u -p -r1.164 kern_descrip.c
> > > --- kern/kern_descrip.c 5 Jun 2018 09:29:05 -0000 1.164
> > > +++ kern/kern_descrip.c 12 Jun 2018 17:11:44 -0000
> > > @@ -650,8 +650,7 @@ finishdup(struct proc *p, struct file *f
> > > *retval = new;
> > >
> > > if (oldfp != NULL) {
> > > - if (new < fdp->fd_knlistsize)
> > > - knote_fdclose(p, new);
> > > + knote_fdclose(p, new);
> > > closef(oldfp, p);
> > > }
> > >
> > > @@ -686,8 +685,7 @@ fdrelease(struct proc *p, int fd)
> > > FREF(fp);
> > > *fpp = NULL;
> > > fd_unused(fdp, fd);
> > > - if (fd < fdp->fd_knlistsize)
> > > - knote_fdclose(p, fd);
> > > + knote_fdclose(p, fd);
> > > return (closef(fp, p));
> > > }
> > >
> > > @@ -999,7 +997,6 @@ fdinit(void)
> > > newfdp->fd_fd.fd_nfiles = NDFILE;
> > > newfdp->fd_fd.fd_himap = newfdp->fd_dhimap;
> > > newfdp->fd_fd.fd_lomap = newfdp->fd_dlomap;
> > > - newfdp->fd_fd.fd_knlistsize = -1;
> > >
> > > newfdp->fd_fd.fd_freefile = 0;
> > > newfdp->fd_fd.fd_lastfile = 0;
> > > @@ -1129,8 +1126,6 @@ fdfree(struct proc *p)
> > > vrele(fdp->fd_cdir);
> > > if (fdp->fd_rdir)
> > > vrele(fdp->fd_rdir);
> > > - free(fdp->fd_knlist, M_TEMP, fdp->fd_knlistsize * sizeof(struct klist));
> > > - hashfree(fdp->fd_knhash, KN_HASHSIZE, M_TEMP);
> > > pool_put(&fdesc_pool, fdp);
> > > }
> > >
> > > Index: kern/kern_event.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/kern/kern_event.c,v
> > > retrieving revision 1.91
> > > diff -u -p -r1.91 kern_event.c
> > > --- kern/kern_event.c 5 Jun 2018 09:29:05 -0000 1.91
> > > +++ kern/kern_event.c 12 Jun 2018 17:11:44 -0000
> > > @@ -80,8 +80,8 @@ struct fileops kqueueops = {
> > > .fo_close = kqueue_close
> > > };
> > >
> > > -void knote_attach(struct knote *kn, struct filedesc *fdp);
> > > -void knote_drop(struct knote *kn, struct proc *p, struct filedesc
> > > *fdp);
> > > +void knote_attach(struct knote *kn);
> > > +void knote_drop(struct knote *kn, struct proc *p);
> > > void knote_enqueue(struct knote *kn);
> > > void knote_dequeue(struct knote *kn);
> > > #define knote_alloc() ((struct knote *)pool_get(&knote_pool, PR_WAITOK))
> > > @@ -152,9 +152,13 @@ KQREF(struct kqueue *kq)
> > > void
> > > KQRELE(struct kqueue *kq)
> > > {
> > > - if (--kq->kq_refs == 0) {
> > > - pool_put(&kqueue_pool, kq);
> > > - }
> > > + if (--kq->kq_refs > 0)
> > > + return;
> > > +
> > > + LIST_REMOVE(kq, kq_next);
> > > + free(kq->kq_knlist, M_TEMP, kq->kq_knlistsize * sizeof(struct klist));
> > > + hashfree(kq->kq_knhash, KN_HASHSIZE, M_TEMP);
> > > + pool_put(&kqueue_pool, kq);
> > > }
> > >
> > > void kqueue_init(void);
> > > @@ -453,10 +457,9 @@ sys_kqueue(struct proc *p, void *v, regi
> > > fp->f_data = kq;
> > > KQREF(kq);
> > > *retval = fd;
> > > - if (fdp->fd_knlistsize < 0)
> > > - fdp->fd_knlistsize = 0; /* this process has a kq */
> > > kq->kq_fdp = fdp;
> > > FILE_SET_MATURE(fp, p);
> > > + LIST_INSERT_HEAD(&p->p_p->ps_kqlist, kq, kq_next);
> > > return (0);
> > > }
> > >
> > > @@ -583,19 +586,19 @@ kqueue_register(struct kqueue *kq, struc
> > > if ((fp = fd_getfile(fdp, kev->ident)) == NULL)
> > > return (EBADF);
> > >
> > > - if (kev->ident < fdp->fd_knlistsize) {
> > > - SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link)
> > > {
> > > + if (kev->ident < kq->kq_knlistsize) {
> > > + SLIST_FOREACH(kn, &kq->kq_knlist[kev->ident], kn_link) {
> > > if (kq == kn->kn_kq &&
> > > kev->filter == kn->kn_filter)
> > > break;
> > > }
> > > }
> > > } else {
> > > - if (fdp->fd_knhashmask != 0) {
> > > + if (kq->kq_knhashmask != 0) {
> > > struct klist *list;
> > >
> > > - list = &fdp->fd_knhash[
> > > - KN_HASH((u_long)kev->ident, fdp->fd_knhashmask)];
> > > + list = &kq->kq_knhash[
> > > + KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
> > > SLIST_FOREACH(kn, list, kn_link) {
> > > if (kev->ident == kn->kn_id &&
> > > kq == kn->kn_kq &&
> > > @@ -637,9 +640,9 @@ kqueue_register(struct kqueue *kq, struc
> > > kev->data = 0;
> > > kn->kn_kevent = *kev;
> > >
> > > - knote_attach(kn, fdp);
> > > + knote_attach(kn);
> > > if ((error = fops->f_attach(kn)) != 0) {
> > > - knote_drop(kn, p, fdp);
> > > + knote_drop(kn, p);
> > > goto done;
> > > }
> > > } else {
> > > @@ -660,7 +663,7 @@ kqueue_register(struct kqueue *kq, struc
> > >
> > > } else if (kev->flags & EV_DELETE) {
> > > kn->kn_fop->f_detach(kn);
> > > - knote_drop(kn, p, p->p_fd);
> > > + knote_drop(kn, p);
> > > goto done;
> > > }
> > >
> > > @@ -796,7 +799,7 @@ start:
> > > kn->kn_status &= ~KN_QUEUED;
> > > splx(s);
> > > kn->kn_fop->f_detach(kn);
> > > - knote_drop(kn, p, p->p_fd);
> > > + knote_drop(kn, p);
> > > s = splhigh();
> > > } else if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) {
> > > if (kn->kn_flags & EV_CLEAR) {
> > > @@ -900,12 +903,11 @@ int
> > > kqueue_close(struct file *fp, struct proc *p)
> > > {
> > > struct kqueue *kq = fp->f_data;
> > > - struct filedesc *fdp = p->p_fd;
> > > struct knote **knp, *kn, *kn0;
> > > int i;
> > >
> > > - for (i = 0; i < fdp->fd_knlistsize; i++) {
> > > - knp = &SLIST_FIRST(&fdp->fd_knlist[i]);
> > > + for (i = 0; i < kq->kq_knlistsize; i++) {
> > > + knp = &SLIST_FIRST(&kq->kq_knlist[i]);
> > > kn = *knp;
> > > while (kn != NULL) {
> > > kn0 = SLIST_NEXT(kn, kn_link);
> > > @@ -920,9 +922,9 @@ kqueue_close(struct file *fp, struct pro
> > > kn = kn0;
> > > }
> > > }
> > > - if (fdp->fd_knhashmask != 0) {
> > > - for (i = 0; i < fdp->fd_knhashmask + 1; i++) {
> > > - knp = &SLIST_FIRST(&fdp->fd_knhash[i]);
> > > + if (kq->kq_knhashmask != 0) {
> > > + for (i = 0; i < kq->kq_knhashmask + 1; i++) {
> > > + knp = &SLIST_FIRST(&kq->kq_knhash[i]);
> > > kn = *knp;
> > > while (kn != NULL) {
> > > kn0 = SLIST_NEXT(kn, kn_link);
> > > @@ -994,7 +996,7 @@ knote_remove(struct proc *p, struct klis
> > >
> > > while ((kn = SLIST_FIRST(list)) != NULL) {
> > > kn->kn_fop->f_detach(kn);
> > > - knote_drop(kn, p, p->p_fd);
> > > + knote_drop(kn, p);
> > > }
> > > }
> > >
> > > @@ -1004,10 +1006,16 @@ knote_remove(struct proc *p, struct klis
> > > void
> > > knote_fdclose(struct proc *p, int fd)
> > > {
> > > - struct filedesc *fdp = p->p_fd;
> > > - struct klist *list = &fdp->fd_knlist[fd];
> > > + struct kqueue *kq;
> > > + struct klist *list;
> > >
> > > - knote_remove(p, list);
> > > + LIST_FOREACH(kq, &p->p_p->ps_kqlist, kq_next) {
> > > + if (fd >= kq->kq_knlistsize)
> > > + continue;
> > > +
> > > + list = &kq->kq_knlist[fd];
> > > + knote_remove(p, list);
> > > + }
> > > }
> > >
> > > /*
> > > @@ -1026,35 +1034,36 @@ knote_processexit(struct proc *p)
> > > }
> > >
> > > void
> > > -knote_attach(struct knote *kn, struct filedesc *fdp)
> > > +knote_attach(struct knote *kn)
> > > {
> > > + struct kqueue *kq = kn->kn_kq;
> > > struct klist *list;
> > > int size;
> > >
> > > if (!kn->kn_fop->f_isfd) {
> > > - if (fdp->fd_knhashmask == 0)
> > > - fdp->fd_knhash = hashinit(KN_HASHSIZE, M_TEMP,
> > > - M_WAITOK, &fdp->fd_knhashmask);
> > > - list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
> > > + if (kq->kq_knhashmask == 0)
> > > + kq->kq_knhash = hashinit(KN_HASHSIZE, M_TEMP,
> > > + M_WAITOK, &kq->kq_knhashmask);
> > > + list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
> > > goto done;
> > > }
> > >
> > > - if (fdp->fd_knlistsize <= kn->kn_id) {
> > > - size = fdp->fd_knlistsize;
> > > + if (kq->kq_knlistsize <= kn->kn_id) {
> > > + size = kq->kq_knlistsize;
> > > while (size <= kn->kn_id)
> > > size += KQEXTENT;
> > > list = mallocarray(size, sizeof(struct klist), M_TEMP,
> > > M_WAITOK);
> > > - memcpy(list, fdp->fd_knlist,
> > > - fdp->fd_knlistsize * sizeof(struct klist));
> > > - memset(&list[fdp->fd_knlistsize], 0,
> > > - (size - fdp->fd_knlistsize) * sizeof(struct klist));
> > > - free(fdp->fd_knlist, M_TEMP,
> > > - fdp->fd_knlistsize * sizeof(struct klist));
> > > - fdp->fd_knlistsize = size;
> > > - fdp->fd_knlist = list;
> > > + memcpy(list, kq->kq_knlist,
> > > + kq->kq_knlistsize * sizeof(struct klist));
> > > + memset(&list[kq->kq_knlistsize], 0,
> > > + (size - kq->kq_knlistsize) * sizeof(struct klist));
> > > + free(kq->kq_knlist, M_TEMP,
> > > + kq->kq_knlistsize * sizeof(struct klist));
> > > + kq->kq_knlistsize = size;
> > > + kq->kq_knlist = list;
> > > }
> > > - list = &fdp->fd_knlist[kn->kn_id];
> > > + list = &kq->kq_knlist[kn->kn_id];
> > > done:
> > > SLIST_INSERT_HEAD(list, kn, kn_link);
> > > kn->kn_status = 0;
> > > @@ -1065,14 +1074,15 @@ done:
> > > * while calling FRELE and knote_free.
> > > */
> > > void
> > > -knote_drop(struct knote *kn, struct proc *p, struct filedesc *fdp)
> > > +knote_drop(struct knote *kn, struct proc *p)
> > > {
> > > + struct kqueue *kq = kn->kn_kq;
> > > struct klist *list;
> > >
> > > if (kn->kn_fop->f_isfd)
> > > - list = &fdp->fd_knlist[kn->kn_id];
> > > + list = &kq->kq_knlist[kn->kn_id];
> > > else
> > > - list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)];
> > > + list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)];
> > >
> > > SLIST_REMOVE(list, kn, knote, kn_link);
> > > if (kn->kn_status & KN_QUEUED)
> > > Index: kern/kern_fork.c
> > > ===================================================================
> > > RCS file: /cvs/src/sys/kern/kern_fork.c,v
> > > retrieving revision 1.202
> > > diff -u -p -r1.202 kern_fork.c
> > > --- kern/kern_fork.c 30 Dec 2017 20:47:00 -0000 1.202
> > > +++ kern/kern_fork.c 12 Jun 2018 17:11:44 -0000
> > > @@ -198,6 +198,7 @@ process_initialize(struct process *pr, s
> > > KASSERT(p->p_ucred->cr_ref >= 2); /* new thread and new process */
> > >
> > > LIST_INIT(&pr->ps_children);
> > > + LIST_INIT(&pr->ps_kqlist);
> > >
> > > timeout_set(&pr->ps_realit_to, realitexpire, pr);
> > > }
> > > Index: sys/eventvar.h
> > > ===================================================================
> > > RCS file: /cvs/src/sys/sys/eventvar.h,v
> > > retrieving revision 1.4
> > > diff -u -p -r1.4 eventvar.h
> > > --- sys/eventvar.h 11 Oct 2017 08:01:10 -0000 1.4
> > > +++ sys/eventvar.h 12 Jun 2018 17:11:44 -0000
> > > @@ -40,6 +40,14 @@ struct kqueue {
> > > int kq_refs; /* number of references */
> > > struct selinfo kq_sel;
> > > struct filedesc *kq_fdp;
> > > +
> > > + LIST_ENTRY(kqueue) kq_next;
> > > +
> > > + int kq_knlistsize; /* size of knlist */
> > > + struct klist *kq_knlist; /* list of attached knotes */
> > > + u_long kq_knhashmask; /* size of knhash */
> > > + struct klist *kq_knhash; /* hash table for attached
> > > knotes */
> > > +
> > > int kq_state;
> > > #define KQ_SEL 0x01
> > > #define KQ_SLEEP 0x02
> > > Index: sys/filedesc.h
> > > ===================================================================
> > > RCS file: /cvs/src/sys/sys/filedesc.h,v
> > > retrieving revision 1.37
> > > diff -u -p -r1.37 filedesc.h
> > > --- sys/filedesc.h 5 Jun 2018 09:29:05 -0000 1.37
> > > +++ sys/filedesc.h 12 Jun 2018 17:11:44 -0000
> > > @@ -73,11 +73,6 @@ struct filedesc {
> > > /* held when writing to fd_ofiles, */
> > > /* fd_ofileflags, or fd_{hi,lo}map */
> > >
> > > - int fd_knlistsize; /* size of knlist */
> > > - struct klist *fd_knlist; /* list of attached knotes */
> > > - u_long fd_knhashmask; /* size of knhash */
> > > - struct klist *fd_knhash; /* hash table for attached knotes */
> > > -
> > > int fd_flags; /* flags on the file descriptor table */
> > > };
> > >
> > > Index: sys/proc.h
> > > ===================================================================
> > > RCS file: /cvs/src/sys/sys/proc.h,v
> > > retrieving revision 1.248
> > > diff -u -p -r1.248 proc.h
> > > --- sys/proc.h 12 Apr 2018 17:13:44 -0000 1.248
> > > +++ sys/proc.h 12 Jun 2018 17:11:44 -0000
> > > @@ -168,6 +168,8 @@ struct process {
> > > struct vmspace *ps_vmspace; /* Address space */
> > > pid_t ps_pid; /* Process identifier. */
> > >
> > > + LIST_HEAD(, kqueue) ps_kqlist; /* kqueues attached to this process */
> > > +
> > > /* The following fields are all zeroed upon creation in process_new. */
> > > #define ps_startzero ps_klist
> > > struct klist ps_klist; /* knotes attached to this process */
> >
> >