Hello, developers.

sock_sendfile() and generic_file_sendpage() were implemented
and presented in the attached patch.
Such methods allows to use sendfile() for any file descriptor <-> file
descriptor usage, especially usefull it is in the case socket -> file,
when there are no copy_from_user() cases when writing the data.

I do not have nice hairy beard which all VFS developers must have,
so probably something is completely incorrect, but it passed my tests
with ext3 filesystem.

Signed-off-by: Evgeniy Polyakov <[EMAIL PROTECTED]>

diff --git a/fs/ext3/file.c b/fs/ext3/file.c
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -119,6 +119,7 @@ struct file_operations ext3_file_operati
        .release        = ext3_release_file,
        .fsync          = ext3_sync_file,
        .sendfile       = generic_file_sendfile,
+       .sendpage       = generic_file_sendpage,
 };
 
 struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/read_write.c b/fs/read_write.c
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -14,6 +14,13 @@
 #include <linux/security.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
+#include <linux/mm.h>
+#include <linux/aio.h>
+#include <linux/swap.h>
+#include <linux/mman.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -325,6 +332,81 @@ static inline void file_pos_write(struct
        file->f_pos = pos;
 }
 
+extern struct page * __grab_cache_page(struct address_space *, unsigned long, 
struct page **, struct pagevec *);
+
+ssize_t generic_file_sendpage(struct file *file, struct page *in_page, int 
offset, size_t size, loff_t *ppos, int more)
+{
+       ssize_t err;
+       struct address_space * mapping = file->f_mapping;
+       struct address_space_operations *a_ops = mapping->a_ops;
+       struct inode *inode = mapping->host;
+       struct page *out_page;
+       struct page *cached_page = NULL;
+       struct pagevec lru_pvec;
+       unsigned long index;
+       unsigned long page_offset;
+       unsigned long bytes;
+       size_t written;
+       loff_t pos = *ppos;
+
+       err = generic_write_checks(file, &pos, &size, S_ISBLK(inode->i_mode));
+       if (err)
+               goto err_out_exit;
+
+       pagevec_init(&lru_pvec, 0);
+
+       written = 0;
+
+       while (size) {
+               page_offset = (pos & (PAGE_CACHE_SIZE -1));
+               index = pos >> PAGE_CACHE_SHIFT;
+               bytes = PAGE_CACHE_SIZE - page_offset;
+               if (bytes > size)
+                       bytes = size;
+
+               out_page = __grab_cache_page(mapping, index, &cached_page, 
&lru_pvec);
+               if (!out_page) {
+                       err = -ENOMEM;
+                       goto err_out_exit;
+               }
+
+               err = a_ops->prepare_write(file, out_page, page_offset, 
page_offset+bytes);
+               if (unlikely(err))
+                       goto err_out_unlock;
+
+               memcpy(page_address(out_page)+page_offset, 
page_address(in_page)+offset, bytes);
+
+               flush_dcache_page(out_page);
+               err = a_ops->commit_write(file, out_page, page_offset, 
page_offset+bytes);
+               unlock_page(out_page);
+               mark_page_accessed(out_page);
+               page_cache_release(out_page);
+
+               if (err < 0)
+                       goto err_out_exit;
+
+               balance_dirty_pages_ratelimited(mapping);
+
+               size -= bytes;
+               written += bytes;
+       }
+       
+       if (cached_page)
+               page_cache_release(cached_page);
+       
+       pagevec_lru_add(&lru_pvec);
+       file_pos_write(file, pos + written);
+
+       return written;
+
+err_out_unlock:
+       unlock_page(out_page);
+       page_cache_release(out_page);
+err_out_exit:
+       
+       return err;
+}
+
 asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count)
 {
        struct file *file;
@@ -667,8 +749,10 @@ static ssize_t do_sendfile(int out_fd, i
        if (!(out_file->f_mode & FMODE_WRITE))
                goto fput_out;
        retval = -EINVAL;
-       if (!out_file->f_op || !out_file->f_op->sendpage)
+       if (!out_file->f_op || !out_file->f_op->sendpage) {
+               printk("%s: out_file->f_op->sendpage=%p.\n", __func__, 
out_file->f_op->sendpage);
                goto fput_out;
+       }
        out_inode = out_file->f_dentry->d_inode;
        retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count);
        if (retval)
@@ -685,7 +769,7 @@ static ssize_t do_sendfile(int out_fd, i
        retval = -EINVAL;
        if (unlikely(pos < 0))
                goto fput_out;
-       if (unlikely(pos + count > max)) {
+       if (unlikely((unsigned long long)(pos + count) > (unsigned long 
long)max)) {
                retval = -EOVERFLOW;
                if (pos >= max)
                        goto fput_out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1511,6 +1511,7 @@ extern ssize_t do_sync_write(struct file
 ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
                                unsigned long nr_segs, loff_t *ppos);
 extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, 
read_actor_t, void *);
+extern ssize_t generic_file_sendpage(struct file *, struct page *, int, 
size_t, loff_t *, int);
 extern void do_generic_mapping_read(struct address_space *mapping,
                                    struct file_ra_state *, struct file *,
                                    loff_t *, read_descriptor_t *, 
read_actor_t);
diff --git a/mm/filemap.c b/mm/filemap.c
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1653,7 +1653,7 @@ EXPORT_SYMBOL(read_cache_page);
  * caller's lru-buffering pagevec.  This function is specifically for
  * generic_file_write().
  */
-static inline struct page *
+struct page *
 __grab_cache_page(struct address_space *mapping, unsigned long index,
                        struct page **cached_page, struct pagevec *lru_pvec)
 {
@@ -1682,6 +1682,8 @@ repeat:
        return page;
 }
 
+EXPORT_SYMBOL(__grab_cache_page);
+
 /*
  * The logic we want is
  *
diff --git a/net/socket.c b/net/socket.c
--- a/net/socket.c
+++ b/net/socket.c
@@ -44,6 +44,7 @@
  *             Tigran Aivazian :       sys_send(args) calls sys_sendto(args, 
NULL, 0)
  *             Tigran Aivazian :       Made listen(2) backlog sanity checks 
  *                                     protocol-independent
+ *             Evgeniy Polyakov:       Added 
sock_sendfile()/generic_file_sendpage().
  *
  *
  *             This program is free software; you can redistribute it and/or
@@ -114,6 +115,7 @@ static ssize_t sock_writev(struct file *
                          unsigned long count, loff_t *ppos);
 static ssize_t sock_sendpage(struct file *file, struct page *page,
                             int offset, size_t size, loff_t *ppos, int more);
+ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, 
read_actor_t actor, void *target);
 
 
 /*
@@ -134,7 +136,8 @@ static struct file_operations socket_fil
        .fasync =       sock_fasync,
        .readv =        sock_readv,
        .writev =       sock_writev,
-       .sendpage =     sock_sendpage
+       .sendpage =     sock_sendpage,
+       .sendfile =     sock_sendfile,
 };
 
 /*
@@ -720,6 +723,82 @@ static ssize_t sock_aio_write(struct kio
        return __sock_sendmsg(iocb, sock, &x->async_msg, size);
 }
 
+ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, 
read_actor_t actor, void *target)
+{
+       struct socket *sock;
+       struct page *page;
+       int err = 0;
+       struct msghdr msg;
+       struct kvec iov;
+       size_t size;
+       read_descriptor_t desc;
+       unsigned long offset = 0;
+       size_t written = 0;
+       
+       if (!count)
+               return 0;
+
+       if (!ppos != 0)
+               return -ERANGE;
+       
+       desc.written = 0;
+       desc.count = count;
+       desc.arg.data = target;
+       desc.error = 0;
+
+       page = alloc_page(GFP_KERNEL);
+       if (!page)
+               return -ENOMEM;
+
+       sock = SOCKET_I(file->f_dentry->d_inode);
+       
+       while (count) {
+
+               size = min(count, PAGE_SIZE);
+               
+               sock->sk->sk_allocation |= GFP_NOIO;
+               iov.iov_base = page_address(page);
+               iov.iov_len = size;
+               msg.msg_name = NULL;
+               msg.msg_namelen = 0;
+               msg.msg_control = NULL;
+               msg.msg_controllen = 0;
+               msg.msg_namelen = 0;
+               msg.msg_flags = MSG_NOSIGNAL;
+
+               memset(iov.iov_base, 0, PAGE_SIZE);
+
+               err = kernel_recvmsg(sock, &msg, &iov, 1, size, 0);
+
+               if (signal_pending(current)) {
+                       flush_signals(current);
+                       printk("Interrupted by signal\n");
+                       return -ERESTARTSYS;
+               }
+
+               if (err < 0) {
+                       printk("Failed to receive message: size=%zx, 
err=%d.\n", iov.iov_len, err);
+                       break;
+               }
+
+               if (err == 0)
+                       break;
+               
+               count -= err;
+               
+               err = actor(&desc, page, offset, err);
+               *ppos += err;
+               written += err;
+       }
+
+       __free_pages(page, 0);
+
+       if (err)
+               return err;
+
+       return written;
+}
+
 ssize_t sock_sendpage(struct file *file, struct page *page,
                      int offset, size_t size, loff_t *ppos, int more)
 {

-- 
        Evgeniy Polyakov
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to