This diff sets klist lock for sockets. Since sockets use custom lock
functions, the diff introduces socket_klistops for use with the klist
interface.
In soo_kqfilter(), the diff adds socket locking for accessing
so_options and changing the klist.
The patch replaces the use of the SB_KNOTE flag with a klist_empty()
predicate in sb_notify(). The flag is just a roundabout way of
determining whether the klist has elements. The removal of the flag
makes sb_flagsintr redundant.
As a result of this klist change, it becomes more explicit that the
socket has to be locked when calling selwakeup() on sb_sel. If the lock
is not held, the lock assert in knote() will trigger. The socket-related
f_event routines already expect the lock when NOTE_SUBMIT is set in
the hints parameter.
OK?
Index: sys/kern/uipc_socket.c
===================================================================
RCS file: src/sys/kern/uipc_socket.c,v
retrieving revision 1.252
diff -u -p -r1.252 uipc_socket.c
--- sys/kern/uipc_socket.c 25 Dec 2020 12:59:52 -0000 1.252
+++ sys/kern/uipc_socket.c 5 Jan 2021 16:46:42 -0000
@@ -150,6 +150,8 @@ socreate(int dom, struct socket **aso, i
if (prp->pr_type != type)
return (EPROTOTYPE);
so = pool_get(&socket_pool, PR_WAITOK | PR_ZERO);
+ klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
+ klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
sigio_init(&so->so_sigio);
TAILQ_INIT(&so->so_q0);
TAILQ_INIT(&so->so_q);
@@ -239,6 +241,8 @@ sofree(struct socket *so, int s)
}
}
sigio_free(&so->so_sigio);
+ klist_free(&so->so_rcv.sb_sel.si_note);
+ klist_free(&so->so_snd.sb_sel.si_note);
#ifdef SOCKET_SPLICE
if (so->so_sp) {
if (issplicedback(so)) {
@@ -2017,9 +2021,9 @@ soo_kqfilter(struct file *fp, struct kno
{
struct socket *so = kn->kn_fp->f_data;
struct sockbuf *sb;
+ int s;
- KERNEL_ASSERT_LOCKED();
-
+ s = solock(so);
switch (kn->kn_filter) {
case EVFILT_READ:
if (so->so_options & SO_ACCEPTCONN)
@@ -2037,11 +2041,12 @@ soo_kqfilter(struct file *fp, struct kno
sb = &so->so_rcv;
break;
default:
+ sounlock(so, s);
return (EINVAL);
}
klist_insert_locked(&sb->sb_sel.si_note, kn);
- sb->sb_flagsintr |= SB_KNOTE;
+ sounlock(so, s);
return (0);
}
@@ -2051,11 +2056,7 @@ filt_sordetach(struct knote *kn)
{
struct socket *so = kn->kn_fp->f_data;
- KERNEL_ASSERT_LOCKED();
-
- klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
- if (klist_empty(&so->so_rcv.sb_sel.si_note))
- so->so_rcv.sb_flagsintr &= ~SB_KNOTE;
+ klist_remove(&so->so_rcv.sb_sel.si_note, kn);
}
int
@@ -2104,11 +2105,7 @@ filt_sowdetach(struct knote *kn)
{
struct socket *so = kn->kn_fp->f_data;
- KERNEL_ASSERT_LOCKED();
-
- klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
- if (klist_empty(&so->so_snd.sb_sel.si_note))
- so->so_snd.sb_flagsintr &= ~SB_KNOTE;
+ klist_remove(&so->so_snd.sb_sel.si_note, kn);
}
int
@@ -2159,6 +2156,36 @@ filt_solisten(struct knote *kn, long hin
return (kn->kn_data != 0);
}
+static void
+klist_soassertlk(void *arg)
+{
+ struct socket *so = arg;
+
+ soassertlocked(so);
+}
+
+static int
+klist_solock(void *arg)
+{
+ struct socket *so = arg;
+
+ return (solock(so));
+}
+
+static void
+klist_sounlock(void *arg, int ls)
+{
+ struct socket *so = arg;
+
+ sounlock(so, ls);
+}
+
+const struct klistops socket_klistops = {
+ .klo_assertlk = klist_soassertlk,
+ .klo_lock = klist_solock,
+ .klo_unlock = klist_sounlock,
+};
+
#ifdef DDB
void
sobuf_print(struct sockbuf *,
@@ -2179,7 +2206,6 @@ sobuf_print(struct sockbuf *sb,
(*pr)("\tsb_mbtail: %p\n", sb->sb_mbtail);
(*pr)("\tsb_lastrecord: %p\n", sb->sb_lastrecord);
(*pr)("\tsb_sel: ...\n");
- (*pr)("\tsb_flagsintr: %d\n", sb->sb_flagsintr);
(*pr)("\tsb_flags: %i\n", sb->sb_flags);
(*pr)("\tsb_timeo_nsecs: %llu\n", sb->sb_timeo_nsecs);
}
Index: sys/kern/uipc_socket2.c
===================================================================
RCS file: src/sys/kern/uipc_socket2.c,v
retrieving revision 1.104
diff -u -p -r1.104 uipc_socket2.c
--- sys/kern/uipc_socket2.c 11 Apr 2020 14:07:06 -0000 1.104
+++ sys/kern/uipc_socket2.c 5 Jan 2021 16:46:42 -0000
@@ -186,6 +186,8 @@ sonewconn(struct socket *head, int conns
so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
so->so_rcv.sb_timeo_nsecs = head->so_rcv.sb_timeo_nsecs;
+ klist_init(&so->so_rcv.sb_sel.si_note, &socket_klistops, so);
+ klist_init(&so->so_snd.sb_sel.si_note, &socket_klistops, so);
sigio_init(&so->so_sigio);
sigio_copy(&so->so_sigio, &head->so_sigio);
@@ -193,6 +195,8 @@ sonewconn(struct socket *head, int conns
if ((*so->so_proto->pr_attach)(so, 0)) {
(void) soqremque(so, soqueue);
sigio_free(&so->so_sigio);
+ klist_free(&so->so_rcv.sb_sel.si_note);
+ klist_free(&so->so_snd.sb_sel.si_note);
pool_put(&socket_pool, so);
return (NULL);
}
Index: sys/miscfs/fifofs/fifo_vnops.c
===================================================================
RCS file: src/sys/miscfs/fifofs/fifo_vnops.c,v
retrieving revision 1.78
diff -u -p -r1.78 fifo_vnops.c
--- sys/miscfs/fifofs/fifo_vnops.c 25 Dec 2020 12:59:52 -0000 1.78
+++ sys/miscfs/fifofs/fifo_vnops.c 5 Jan 2021 16:46:42 -0000
@@ -532,8 +532,7 @@ fifo_kqfilter(void *v)
ap->a_kn->kn_hook = so;
- klist_insert_locked(&sb->sb_sel.si_note, ap->a_kn);
- sb->sb_flagsintr |= SB_KNOTE;
+ klist_insert(&sb->sb_sel.si_note, ap->a_kn);
return (0);
}
@@ -543,9 +542,7 @@ filt_fifordetach(struct knote *kn)
{
struct socket *so = (struct socket *)kn->kn_hook;
- klist_remove_locked(&so->so_rcv.sb_sel.si_note, kn);
- if (klist_empty(&so->so_rcv.sb_sel.si_note))
- so->so_rcv.sb_flagsintr &= ~SB_KNOTE;
+ klist_remove(&so->so_rcv.sb_sel.si_note, kn);
}
int
@@ -579,9 +576,7 @@ filt_fifowdetach(struct knote *kn)
{
struct socket *so = (struct socket *)kn->kn_hook;
- klist_remove_locked(&so->so_snd.sb_sel.si_note, kn);
- if (klist_empty(&so->so_snd.sb_sel.si_note))
- so->so_snd.sb_flagsintr &= ~SB_KNOTE;
+ klist_remove(&so->so_snd.sb_sel.si_note, kn);
}
int
Index: sys/sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.52
diff -u -p -r1.52 event.h
--- sys/sys/event.h 25 Dec 2020 12:59:53 -0000 1.52
+++ sys/sys/event.h 5 Jan 2021 16:46:42 -0000
@@ -226,6 +226,7 @@ struct timespec;
extern const struct filterops sig_filtops;
extern const struct filterops dead_filtops;
+extern const struct klistops socket_klistops;
extern void kqpoll_init(void);
extern void kqpoll_exit(void);
Index: sys/sys/socketvar.h
===================================================================
RCS file: src/sys/sys/socketvar.h,v
retrieving revision 1.91
diff -u -p -r1.91 socketvar.h
--- sys/sys/socketvar.h 15 Jan 2020 13:17:35 -0000 1.91
+++ sys/sys/socketvar.h 5 Jan 2021 16:46:42 -0000
@@ -113,7 +113,6 @@ struct socket {
short sb_flags; /* flags, see below */
/* End area that is zeroed on flush. */
#define sb_endzero sb_flags
- int sb_flagsintr; /* flags, changed atomically */
uint64_t sb_timeo_nsecs;/* timeout for read/write */
struct selinfo sb_sel; /* process selecting read/write */
} so_rcv, so_snd;
@@ -125,7 +124,6 @@ struct socket {
#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
#define SB_SPLICE 0x20 /* buffer is splice source or
drain */
#define SB_NOINTR 0x40 /* operations not interruptible
*/
-#define SB_KNOTE 0x80 /* kernel note attached */
void (*so_upcall)(struct socket *so, caddr_t arg, int waitf);
caddr_t so_upcallarg; /* Arg for above */
@@ -177,11 +175,10 @@ void soassertlocked(struct socket *);
static inline int
sb_notify(struct socket *so, struct sockbuf *sb)
{
- int flags = (sb->sb_flags | sb->sb_flagsintr);
-
KASSERT(sb == &so->so_rcv || sb == &so->so_snd);
soassertlocked(so);
- return ((flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE|SB_KNOTE)) != 0);
+ return ((sb->sb_flags & (SB_WAIT|SB_SEL|SB_ASYNC|SB_SPLICE)) != 0 ||
+ !klist_empty(&sb->sb_sel.si_note));
}
/*
Index: usr.bin/netstat/inet.c
===================================================================
RCS file: src/usr.bin/netstat/inet.c,v
retrieving revision 1.169
diff -u -p -r1.169 inet.c
--- usr.bin/netstat/inet.c 23 Dec 2020 22:20:18 -0000 1.169
+++ usr.bin/netstat/inet.c 5 Jan 2021 16:46:42 -0000
@@ -1378,7 +1378,6 @@ sockbuf_dump(struct sockbuf *sb, const c
p("%lu", sb_mbmax, ", ");
p("%ld", sb_lowat, "\n ");
printf("%s ", name);
- p("%#.8x", sb_flagsintr, ", ");
p("%#.4x", sb_flags, ", ");
p("%llu", sb_timeo_nsecs, "\n ");
#undef p