The branch main has been updated by dchagin:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=bb66c59753836cd8abb596fe316dcdb77ea66999

commit bb66c59753836cd8abb596fe316dcdb77ea66999
Author:     James McLaughlin <james_mclgh.net>
AuthorDate: 2023-08-17 19:57:17 +0000
Commit:     Dmitry Chagin <[email protected]>
CommitDate: 2023-08-17 19:57:17 +0000

    linux(4): Add sendfile fallback for non-socket fds
    
    Before Linux 2.6.33, out_fd must refer to a socket. Since Linux 2.6.33
    it can be any file.
    The patch was originally provided by James McLaughlin and adapted by me
    for copy_file_range.
    
    PR:                     262535
    Differential revision:  https://reviews.freebsd.org/D34555
    MFC after:              1 month
---
 sys/compat/linux/linux_socket.c | 209 ++++++++++++++++++++++++++++++++++------
 1 file changed, 177 insertions(+), 32 deletions(-)

diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c
index 45b94cb2f994..f768392be546 100644
--- a/sys/compat/linux/linux_socket.c
+++ b/sys/compat/linux/linux_socket.c
@@ -36,10 +36,12 @@
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
+#include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
+#include <sys/vnode.h>
 #include <sys/un.h>
 #include <sys/unistd.h>
 
@@ -2374,57 +2376,200 @@ out:
        return (error);
 }
 
+/*
+ * Based on sendfile_getsock from kern_sendfile.c
+ * Determines whether an fd is a stream socket that can be used
+ * with FreeBSD sendfile.
+ */
+static bool
+is_stream_socket(struct file *fp)
+{
+       struct socket *so;
+
+       /*
+        * The socket must be a stream socket and connected.
+        */
+       if (fp->f_type != DTYPE_SOCKET)
+               return (false);
+       so = fp->f_data;
+       if (so->so_type != SOCK_STREAM)
+               return (false);
+       /*
+        * SCTP one-to-one style sockets currently don't work with
+        * sendfile().
+        */
+       if (so->so_proto->pr_protocol == IPPROTO_SCTP)
+               return (false);
+       return (!SOLISTENING(so));
+}
+
+static bool
+is_regular_file(struct file *fp)
+{
+
+       return (fp->f_type == DTYPE_VNODE && fp->f_vnode != NULL &&
+           fp->f_vnode->v_type == VREG);
+}
+
 static int
-linux_sendfile_common(struct thread *td, l_int out, l_int in,
-    off_t *offset, l_size_t count)
+sendfile_fallback(struct thread *td, struct file *fp, l_int out,
+    off_t *offset, l_size_t count, off_t *sbytes)
 {
-       off_t bytes_read;
-       int error;
-       l_loff_t current_offset;
-       struct file *fp;
+       off_t current_offset, out_offset, to_send;
+       l_size_t bytes_sent, n_read;
+       struct file *ofp;
+       struct iovec aiov;
+       struct uio auio;
+       bool seekable;
+       size_t bufsz;
+       void *buf;
+       int flags, error;
 
-       AUDIT_ARG_FD(in);
-       error = fget_read(td, in, &cap_pread_rights, &fp);
+       if (offset == NULL) {
+               if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0)
+                       return (error);
+               current_offset = td->td_uretoff.tdu_off;
+       } else {
+               if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0)
+                       return (ESPIPE);
+               current_offset = *offset;
+       }
+       error = fget_write(td, out, &cap_pwrite_rights, &ofp);
        if (error != 0)
                return (error);
-
-       if (offset != NULL) {
-               current_offset = *offset;
-       } else {
-               error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
-                   fo_seek(fp, 0, SEEK_CUR, td) : ESPIPE;
-               if (error != 0)
+       seekable = (ofp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0;
+       if (seekable) {
+               if ((error = fo_seek(ofp, 0, SEEK_CUR, td)) != 0)
                        goto drop;
+               out_offset = td->td_uretoff.tdu_off;
+       } else
+               out_offset = 0;
+
+       flags = FOF_OFFSET | FOF_NOUPDATE;
+       bufsz = min(count, MAXPHYS);
+       buf = malloc(bufsz, M_LINUX, M_WAITOK);
+       bytes_sent = 0;
+       while (bytes_sent < count) {
+               to_send = min(count - bytes_sent, bufsz);
+               aiov.iov_base = buf;
+               aiov.iov_len = bufsz;
+               auio.uio_iov = &aiov;
+               auio.uio_iovcnt = 1;
+               auio.uio_segflg = UIO_SYSSPACE;
+               auio.uio_td = td;
+               auio.uio_rw = UIO_READ;
+               auio.uio_offset = current_offset;
+               auio.uio_resid = to_send;
+               error = fo_read(fp, &auio, fp->f_cred, flags, td);
+               if (error != 0)
+                       break;
+               n_read = to_send - auio.uio_resid;
+               if (n_read == 0)
+                       break;
+               aiov.iov_base = buf;
+               aiov.iov_len = bufsz;
+               auio.uio_iov = &aiov;
+               auio.uio_iovcnt = 1;
+               auio.uio_segflg = UIO_SYSSPACE;
+               auio.uio_td = td;
+               auio.uio_rw = UIO_WRITE;
+               auio.uio_offset = (seekable) ? out_offset : 0;
+               auio.uio_resid = n_read;
+               error = fo_write(ofp, &auio, ofp->f_cred, flags, td);
+               if (error != 0)
+                       break;
+               bytes_sent += n_read;
+               current_offset += n_read;
+               out_offset += n_read;
+       }
+       free(buf, M_LINUX);
+
+       if (error == 0) {
+               *sbytes = bytes_sent;
+               if (offset != NULL)
+                       *offset = current_offset;
+               else
+                       error = fo_seek(fp, current_offset, SEEK_SET, td);
+       }
+       if (error == 0 && seekable)
+               error = fo_seek(ofp, out_offset, SEEK_SET, td);
+
+drop:
+       fdrop(ofp, td);
+       return (error);
+}
+
+static int
+sendfile_sendfile(struct thread *td, struct file *fp, l_int out,
+    off_t *offset, l_size_t count, off_t *sbytes)
+{
+       off_t current_offset;
+       int error;
+
+       if (offset == NULL) {
+               if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0)
+                       return (ESPIPE);
+               if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0)
+                       return (error);
                current_offset = td->td_uretoff.tdu_off;
+       } else
+               current_offset = *offset;
+       error = fo_sendfile(fp, out, NULL, NULL, current_offset, count,
+           sbytes, 0, td);
+       if (error == 0) {
+               current_offset += *sbytes;
+               if (offset != NULL)
+                       *offset = current_offset;
+               else
+                       error = fo_seek(fp, current_offset, SEEK_SET, td);
        }
+       return (error);
+}
 
-       bytes_read = 0;
+static int
+linux_sendfile_common(struct thread *td, l_int out, l_int in,
+    off_t *offset, l_size_t count)
+{
+       struct file *fp, *ofp;
+       off_t sbytes;
+       int error;
 
        /* Linux cannot have 0 count. */
-       if (count <= 0 || current_offset < 0) {
+       if (count <= 0 || (offset != NULL && *offset < 0))
+               return (EINVAL);
+
+       AUDIT_ARG_FD(in);
+       error = fget_read(td, in, &cap_pread_rights, &fp);
+       if (error != 0)
+               return (error);
+       if ((fp->f_type != DTYPE_SHM && fp->f_type != DTYPE_VNODE) ||
+           (fp->f_type == DTYPE_VNODE &&
+           (fp->f_vnode == NULL || fp->f_vnode->v_type != VREG))) {
                error = EINVAL;
                goto drop;
        }
-
-       error = fo_sendfile(fp, out, NULL, NULL, current_offset, count,
-           &bytes_read, 0, td);
+       error = fget_unlocked(td, out, &cap_no_rights, &ofp);
        if (error != 0)
                goto drop;
-       current_offset += bytes_read;
 
-       if (offset != NULL) {
-               *offset = current_offset;
+       if (is_regular_file(fp) && is_regular_file(ofp)) {
+               error = kern_copy_file_range(td, in, offset, out, NULL, count,
+                   0);
        } else {
-               error = fo_seek(fp, current_offset, SEEK_SET, td);
-               if (error != 0)
-                       goto drop;
+               sbytes = 0;
+               if (is_stream_socket(ofp))
+                       error = sendfile_sendfile(td, fp, out, offset, count,
+                           &sbytes);
+               else
+                       error = sendfile_fallback(td, fp, out, offset, count,
+                           &sbytes);
+               if (error == 0)
+                       td->td_retval[0] = sbytes;
        }
+       fdrop(ofp, td);
 
-       td->td_retval[0] = (ssize_t)bytes_read;
 drop:
        fdrop(fp, td);
-       if (error == ENOTSOCK)
-               error = EINVAL;
        return (error);
 }
 
@@ -2434,10 +2579,10 @@ linux_sendfile(struct thread *td, struct 
linux_sendfile_args *arg)
        /*
         * Differences between FreeBSD and Linux sendfile:
         * - Linux doesn't send anything when count is 0 (FreeBSD uses 0 to
-        *   mean send the whole file.)  In linux_sendfile given fds are still
-        *   checked for validity when the count is 0.
+        *   mean send the whole file).
         * - Linux can send to any fd whereas FreeBSD only supports sockets.
-        *   The same restriction follows for linux_sendfile.
+        *   We therefore use FreeBSD sendfile where possible for performance,
+        *   but fall back on a manual copy (sendfile_fallback).
         * - Linux doesn't have an equivalent for FreeBSD's flags and sf_hdtr.
         * - Linux takes an offset pointer and updates it to the read location.
         *   FreeBSD takes in an offset and a 'bytes read' parameter which is

Reply via email to