Hello, developers.

Here is simple no-way-zero-copy support for sendfile
for socket <-> socket connections.
It is quite convenient for programmer to not use
recv/sendfile, but sendfile/sendfile instead.

If this way is usefull and should be cleaned up, 
I will finish it and probably implement sendpage()
for fs, so it could be really usefull, else just FYI.

Thanks.

Here is quite simple and stupid implemetation:

diff --git a/fs/read_write.c b/fs/read_write.c
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -685,7 +685,7 @@ static ssize_t do_sendfile(int out_fd, i
        retval = -EINVAL;
        if (unlikely(pos < 0))
                goto fput_out;
-       if (unlikely(pos + count > max)) {
+       if (unlikely((unsigned long long)(pos + count) > (unsigned long 
long)max)) {
                retval = -EOVERFLOW;
                if (pos >= max)
                        goto fput_out;
diff --git a/net/socket.c b/net/socket.c
--- a/net/socket.c
+++ b/net/socket.c
@@ -44,6 +44,8 @@
  *             Tigran Aivazian :       sys_send(args) calls sys_sendto(args, 
NULL, 0)
  *             Tigran Aivazian :       Made listen(2) backlog sanity checks 
  *                                     protocol-independent
+ *             Evgeniy Polyakov:       Added recv sendfile path for 
socket<->socket
+ *                                     communications, i.e. sock_sendfile().
  *
  *
  *             This program is free software; you can redistribute it and/or
@@ -114,6 +116,7 @@ static ssize_t sock_writev(struct file *
                          unsigned long count, loff_t *ppos);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
                             int offset, size_t size, loff_t *ppos, int more);
+ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, 
read_actor_t actor, void *target);
 
 
 /*
@@ -134,7 +137,8 @@ static struct file_operations socket_fil
        .fasync =       sock_fasync,
        .readv =        sock_readv,
        .writev =       sock_writev,
-       .sendpage =     sock_sendpage
+       .sendpage =     sock_sendpage,
+       .sendfile =     sock_sendfile,
 };
 
 /*
@@ -720,6 +724,81 @@ static ssize_t sock_aio_write(struct kio
        return __sock_sendmsg(iocb, sock, &x->async_msg, size);
 }
 
+ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, 
read_actor_t actor, void *target)
+{
+       struct socket *sock;
+       struct page *page;
+       int err = 0;
+       struct msghdr msg;
+       struct kvec iov;
+       size_t size;
+       read_descriptor_t desc;
+       unsigned long offset = 0;
+       size_t orig_count = count;
+
+       printk("%s: ppos=%Lx, count=%lx.\n", __func__, *ppos, count);
+       
+       if (!count)
+               return 0;
+
+       if (!ppos != 0)
+               return -ERANGE;
+       
+       desc.written = 0;
+       desc.count = count;
+       desc.arg.data = target;
+       desc.error = 0;
+
+       page = alloc_page(GFP_KERNEL);
+       if (!page)
+               return -ENOMEM;
+
+       sock = SOCKET_I(file->f_dentry->d_inode);
+       
+       while (count) {
+
+               size = min(count, PAGE_SIZE);
+               
+               sock->sk->sk_allocation |= GFP_NOIO;
+               iov.iov_base = page_address(page);
+               iov.iov_len = size;
+               msg.msg_name = NULL;
+               msg.msg_namelen = 0;
+               msg.msg_control = NULL;
+               msg.msg_controllen = 0;
+               msg.msg_namelen = 0;
+               msg.msg_flags = MSG_NOSIGNAL;
+
+               err = kernel_recvmsg(sock, &msg, &iov, 1, size, 0);
+
+               if (signal_pending(current)) {
+                       flush_signals(current);
+                       printk("Interrupted by signal\n");
+                       return -ERESTARTSYS;
+               }
+
+               if (err <= 0) {
+                       printk("Failed to receive message: size=%lu, 
err=%d.\n", iov.iov_len, err);
+
+                       if (!err)
+                               err = -EPIPE;
+                       break;
+               }
+               
+               count -= err;
+               
+               err = actor(&desc, page, offset, size);
+               *ppos += err;
+       }
+
+       __free_pages(page, 0);
+
+       if (err)
+               return err;
+
+       return orig_count;
+}
+
 ssize_t sock_sendpage(struct file *file, struct page *page,
                      int offset, size_t size, loff_t *ppos, int more)
 {


-- 
        Evgeniy Polyakov
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to