On Wed, Jul 03, 2019 at 11:49:51PM +0200, Alexander Bluhm wrote:
> Hi,
>
> I would like to remove a useless kernel lock during socket splicing.
>
> We have a socket "so" that splices data to socket "sosp". Everytime
> when space in sosp gets available, we add a task to move data from
> so to sosp. Additionally we call sowakeup() from sowwakeup(). I
> have added this as it is possible to splice from so to sosp and
> write from userland to sosp simultaneously. In practice this does
> not make sense as the data streams from two sources would get mixed.
> Nothing uses this. So it is not neccessay to inform userland that
> it is possible to write.
>
> Note that sowakeup() takes the kernel lock. So when I do not call
> it for a spliced socket, there is no kernel lock in the TCP splicing
> path anymore.
>
> But then I have to wakeup userland for the wirting socket in
> sounsplice().
>
> ok?
I think in general this makes sense. One comment inline.
> Index: sys/kern/uipc_socket.c
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.231
> diff -u -p -r1.231 uipc_socket.c
> --- sys/kern/uipc_socket.c 17 Dec 2018 16:46:59 -0000 1.231
> +++ sys/kern/uipc_socket.c 3 Jul 2019 13:56:09 -0000
> @@ -220,9 +220,10 @@ sofree(struct socket *so, int s)
> if (so->so_sp) {
> if (issplicedback(so))
> sounsplice(so->so_sp->ssp_soback, so,
> - so->so_sp->ssp_soback != so);
> + so->so_sp->ssp_soback == so ? 3 : 2);
> if (isspliced(so))
> - sounsplice(so, so->so_sp->ssp_socket, 0);
> + sounsplice(so, so->so_sp->ssp_socket,
> + so == so->so_sp->ssp_socket ? 3 : 1);
> }
> #endif /* SOCKET_SPLICE */
> sbrelease(so, &so->so_snd);
> @@ -1148,7 +1149,7 @@ sosplice(struct socket *so, int fd, off_
> return (error);
> }
> if (so->so_sp->ssp_socket)
> - sounsplice(so, so->so_sp->ssp_socket, 1);
> + sounsplice(so, so->so_sp->ssp_socket, 0);
> sbunlock(so, &so->so_rcv);
> return (0);
> }
> @@ -1227,7 +1228,7 @@ sosplice(struct socket *so, int fd, off_
> }
>
> void
> -sounsplice(struct socket *so, struct socket *sosp, int wakeup)
> +sounsplice(struct socket *so, struct socket *sosp, int freeing)
> {
> soassertlocked(so);
>
> @@ -1236,8 +1237,11 @@ sounsplice(struct socket *so, struct soc
> sosp->so_snd.sb_flags &= ~SB_SPLICE;
> so->so_rcv.sb_flags &= ~SB_SPLICE;
> so->so_sp->ssp_socket = sosp->so_sp->ssp_soback = NULL;
> - if (wakeup && soreadable(so))
> + /* Do not wakeup a socket that is about to be freed. */
> + if ((freeing & 1) == 0 && soreadable(so))
> sorwakeup(so);
> + if ((freeing & 2) == 0 && sowriteable(sosp))
> + sowwakeup(sosp);
Would it be possible to use some #defined flags here instead of 1,2,3?
Maybe use FREAD/FWRITE or define something new.
> }
>
> void
> @@ -1249,7 +1253,7 @@ soidle(void *arg)
> s = solock(so);
> if (so->so_rcv.sb_flags & SB_SPLICE) {
> so->so_error = ETIMEDOUT;
> - sounsplice(so, so->so_sp->ssp_socket, 1);
> + sounsplice(so, so->so_sp->ssp_socket, 0);
> }
> sounlock(so, s);
> }
> @@ -1574,7 +1578,7 @@ somove(struct socket *so, int wait)
> so->so_error = error;
> if (((so->so_state & SS_CANTRCVMORE) && so->so_rcv.sb_cc == 0) ||
> (sosp->so_state & SS_CANTSENDMORE) || maxreached || error) {
> - sounsplice(so, sosp, 1);
> + sounsplice(so, sosp, 0);
> return (0);
> }
> if (timerisset(&so->so_idletv))
> @@ -1620,6 +1624,8 @@ sowwakeup(struct socket *so)
> #ifdef SOCKET_SPLICE
> if (so->so_snd.sb_flags & SB_SPLICE)
> task_add(sosplice_taskq, &so->so_sp->ssp_soback->so_splicetask);
> + if (issplicedback(so))
> + return;
> #endif
> sowakeup(so, &so->so_snd);
> }
> Index: share/man/man9/sosplice.9
> ===================================================================
> RCS file: /data/mirror/openbsd/cvs/src/share/man/man9/sosplice.9,v
> retrieving revision 1.9
> diff -u -p -r1.9 sosplice.9
> --- share/man/man9/sosplice.9 15 Aug 2018 12:10:49 -0000 1.9
> +++ share/man/man9/sosplice.9 3 Jul 2019 21:42:34 -0000
> @@ -206,10 +206,13 @@ will call
> to trigger the transfer when new data or buffer space is available.
> While socket splicing is active, any
> .Xr read 2
> -from the source socket will block and the wakeup will not be delivered
> -to the file descriptor.
> -A read event or a socket error is signaled to userland after
> -dissolving.
> +from the source socket will block.
> +Neither read nor write wakeups will be delivered to the file
> +descriptors.
> +After dissolving, a read event or a socket error is signaled to
> +userland on the source socket.
> +If space is available, a write event will be signaled on the drain
> +socket.
> .Sh RETURN VALUES
> .Fn sosplice
> returns 0 on success and otherwise the error number.
>
--
:wq Claudio