This diff makes pipe event filters ready to run without the kernel lock.
The code pattern in the callbacks is the same as in sockets. Pipes
have a klist lock already.
So far, pipe event filters have used read-locking. The patch changes
that to write-locking for clarity. This should not be a real loss,
though, because the lock is fine-grained and there is little multiple-
readers parallelism to be utilized.
OK?
Index: kern/sys_pipe.c
===================================================================
RCS file: src/sys/kern/sys_pipe.c,v
retrieving revision 1.127
diff -u -p -r1.127 sys_pipe.c
--- kern/sys_pipe.c 22 Oct 2021 05:00:26 -0000 1.127
+++ kern/sys_pipe.c 22 Oct 2021 12:17:57 -0000
@@ -78,20 +78,30 @@ static const struct fileops pipeops = {
void filt_pipedetach(struct knote *kn);
int filt_piperead(struct knote *kn, long hint);
+int filt_pipereadmodify(struct kevent *kev, struct knote *kn);
+int filt_pipereadprocess(struct knote *kn, struct kevent *kev);
+int filt_piperead_common(struct knote *kn, struct pipe *rpipe);
int filt_pipewrite(struct knote *kn, long hint);
+int filt_pipewritemodify(struct kevent *kev, struct knote *kn);
+int filt_pipewriteprocess(struct knote *kn, struct kevent *kev);
+int filt_pipewrite_common(struct knote *kn, struct pipe *rpipe);
const struct filterops pipe_rfiltops = {
- .f_flags = FILTEROP_ISFD,
+ .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
.f_detach = filt_pipedetach,
.f_event = filt_piperead,
+ .f_modify = filt_pipereadmodify,
+ .f_process = filt_pipereadprocess,
};
const struct filterops pipe_wfiltops = {
- .f_flags = FILTEROP_ISFD,
+ .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
.f_attach = NULL,
.f_detach = filt_pipedetach,
.f_event = filt_pipewrite,
+ .f_modify = filt_pipewritemodify,
+ .f_process = filt_pipewriteprocess,
};
/*
@@ -362,9 +372,7 @@ pipeselwakeup(struct pipe *cpipe)
cpipe->pipe_state &= ~PIPE_SEL;
selwakeup(&cpipe->pipe_sel);
} else {
- KERNEL_LOCK();
- KNOTE(&cpipe->pipe_sel.si_note, NOTE_SUBMIT);
- KERNEL_UNLOCK();
+ KNOTE(&cpipe->pipe_sel.si_note, 0);
}
if (cpipe->pipe_state & PIPE_ASYNC)
@@ -929,45 +937,76 @@ filt_pipedetach(struct knote *kn)
}
int
-filt_piperead(struct knote *kn, long hint)
+filt_piperead_common(struct knote *kn, struct pipe *rpipe)
{
- struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
- struct rwlock *lock = rpipe->pipe_lock;
+ struct pipe *wpipe;
+
+ rw_assert_wrlock(rpipe->pipe_lock);
- if ((hint & NOTE_SUBMIT) == 0)
- rw_enter_read(lock);
wpipe = pipe_peer(rpipe);
kn->kn_data = rpipe->pipe_buffer.cnt;
if ((rpipe->pipe_state & PIPE_EOF) || wpipe == NULL) {
- if ((hint & NOTE_SUBMIT) == 0)
- rw_exit_read(lock);
kn->kn_flags |= EV_EOF;
if (kn->kn_flags & __EV_POLL)
kn->kn_flags |= __EV_HUP;
return (1);
}
- if ((hint & NOTE_SUBMIT) == 0)
- rw_exit_read(lock);
-
return (kn->kn_data > 0);
}
int
-filt_pipewrite(struct knote *kn, long hint)
+filt_piperead(struct knote *kn, long hint)
{
- struct pipe *rpipe = kn->kn_fp->f_data, *wpipe;
- struct rwlock *lock = rpipe->pipe_lock;
+ struct pipe *rpipe = kn->kn_fp->f_data;
+
+ return (filt_piperead_common(kn, rpipe));
+}
+
+int
+filt_pipereadmodify(struct kevent *kev, struct knote *kn)
+{
+ struct pipe *rpipe = kn->kn_fp->f_data;
+ int active;
+
+ rw_enter_write(rpipe->pipe_lock);
+ knote_modify(kev, kn);
+ active = filt_piperead_common(kn, rpipe);
+ rw_exit_write(rpipe->pipe_lock);
+
+ return (active);
+}
+
+int
+filt_pipereadprocess(struct knote *kn, struct kevent *kev)
+{
+ struct pipe *rpipe = kn->kn_fp->f_data;
+ int active;
+
+ rw_enter_write(rpipe->pipe_lock);
+ if (kev != NULL && (kn->kn_flags & EV_ONESHOT))
+ active = 1;
+ else
+ active = filt_piperead_common(kn, rpipe);
+ if (active)
+ knote_submit(kn, kev);
+ rw_exit_write(rpipe->pipe_lock);
+
+ return (active);
+}
+
+int
+filt_pipewrite_common(struct knote *kn, struct pipe *rpipe)
+{
+ struct pipe *wpipe;
+
+ rw_assert_wrlock(rpipe->pipe_lock);
- if ((hint & NOTE_SUBMIT) == 0)
- rw_enter_read(lock);
wpipe = pipe_peer(rpipe);
if (wpipe == NULL) {
- if ((hint & NOTE_SUBMIT) == 0)
- rw_exit_read(lock);
kn->kn_data = 0;
kn->kn_flags |= EV_EOF;
if (kn->kn_flags & __EV_POLL)
@@ -976,12 +1015,49 @@ filt_pipewrite(struct knote *kn, long hi
}
kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
- if ((hint & NOTE_SUBMIT) == 0)
- rw_exit_read(lock);
-
return (kn->kn_data >= PIPE_BUF);
}
+int
+filt_pipewrite(struct knote *kn, long hint)
+{
+ struct pipe *rpipe = kn->kn_fp->f_data;
+
+ return (filt_pipewrite_common(kn, rpipe));
+}
+
+int
+filt_pipewritemodify(struct kevent *kev, struct knote *kn)
+{
+ struct pipe *rpipe = kn->kn_fp->f_data;
+ int active;
+
+ rw_enter_write(rpipe->pipe_lock);
+ knote_modify(kev, kn);
+ active = filt_pipewrite_common(kn, rpipe);
+ rw_exit_write(rpipe->pipe_lock);
+
+ return (active);
+}
+
+int
+filt_pipewriteprocess(struct knote *kn, struct kevent *kev)
+{
+ struct pipe *rpipe = kn->kn_fp->f_data;
+ int active;
+
+ rw_enter_write(rpipe->pipe_lock);
+ if (kev != NULL && (kn->kn_flags & EV_ONESHOT))
+ active = 1;
+ else
+ active = filt_pipewrite_common(kn, rpipe);
+ if (active)
+ knote_submit(kn, kev);
+ rw_exit_write(rpipe->pipe_lock);
+
+ return (active);
+}
+
void
pipe_init(void)
{