Hello, developers. sock_sendfile() and generic_file_sendpage() were implemented and presented in the attached patch. Such methods allows to use sendfile() for any file descriptor <-> file descriptor usage, especially usefull it is in the case socket -> file, when there are no copy_from_user() cases when writing the data.
I do not have nice hairy beard which all VFS developers must have, so probably something is completely incorrect, but it passed my tests with ext3 filesystem. Signed-off-by: Evgeniy Polyakov <[EMAIL PROTECTED]> diff --git a/fs/ext3/file.c b/fs/ext3/file.c --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -119,6 +119,7 @@ struct file_operations ext3_file_operati .release = ext3_release_file, .fsync = ext3_sync_file, .sendfile = generic_file_sendfile, + .sendpage = generic_file_sendpage, }; struct inode_operations ext3_file_inode_operations = { diff --git a/fs/read_write.c b/fs/read_write.c --- a/fs/read_write.c +++ b/fs/read_write.c @@ -14,6 +14,13 @@ #include <linux/security.h> #include <linux/module.h> #include <linux/syscalls.h> +#include <linux/mm.h> +#include <linux/aio.h> +#include <linux/swap.h> +#include <linux/mman.h> +#include <linux/pagemap.h> +#include <linux/writeback.h> +#include <linux/pagevec.h> #include <asm/uaccess.h> #include <asm/unistd.h> @@ -325,6 +332,81 @@ static inline void file_pos_write(struct file->f_pos = pos; } +extern struct page * __grab_cache_page(struct address_space *, unsigned long, struct page **, struct pagevec *); + +ssize_t generic_file_sendpage(struct file *file, struct page *in_page, int offset, size_t size, loff_t *ppos, int more) +{ + ssize_t err; + struct address_space * mapping = file->f_mapping; + struct address_space_operations *a_ops = mapping->a_ops; + struct inode *inode = mapping->host; + struct page *out_page; + struct page *cached_page = NULL; + struct pagevec lru_pvec; + unsigned long index; + unsigned long page_offset; + unsigned long bytes; + size_t written; + loff_t pos = *ppos; + + err = generic_write_checks(file, &pos, &size, S_ISBLK(inode->i_mode)); + if (err) + goto err_out_exit; + + pagevec_init(&lru_pvec, 0); + + written = 0; + + while (size) { + page_offset = (pos & (PAGE_CACHE_SIZE -1)); + index = pos >> PAGE_CACHE_SHIFT; + bytes = PAGE_CACHE_SIZE - page_offset; + if (bytes > size) + bytes = size; + + out_page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec); + if (!out_page) { + err = -ENOMEM; + goto err_out_exit; + } + + err = a_ops->prepare_write(file, out_page, page_offset, page_offset+bytes); + if (unlikely(err)) + goto err_out_unlock; + + memcpy(page_address(out_page)+page_offset, page_address(in_page)+offset, bytes); + + flush_dcache_page(out_page); + err = a_ops->commit_write(file, out_page, page_offset, page_offset+bytes); + unlock_page(out_page); + mark_page_accessed(out_page); + page_cache_release(out_page); + + if (err < 0) + goto err_out_exit; + + balance_dirty_pages_ratelimited(mapping); + + size -= bytes; + written += bytes; + } + + if (cached_page) + page_cache_release(cached_page); + + pagevec_lru_add(&lru_pvec); + file_pos_write(file, pos + written); + + return written; + +err_out_unlock: + unlock_page(out_page); + page_cache_release(out_page); +err_out_exit: + + return err; +} + asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) { struct file *file; @@ -667,8 +749,10 @@ static ssize_t do_sendfile(int out_fd, i if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; retval = -EINVAL; - if (!out_file->f_op || !out_file->f_op->sendpage) + if (!out_file->f_op || !out_file->f_op->sendpage) { + printk("%s: out_file->f_op->sendpage=%p.\n", __func__, out_file->f_op->sendpage); goto fput_out; + } out_inode = out_file->f_dentry->d_inode; retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); if (retval) @@ -685,7 +769,7 @@ static ssize_t do_sendfile(int out_fd, i retval = -EINVAL; if (unlikely(pos < 0)) goto fput_out; - if (unlikely(pos + count > max)) { + if (unlikely((unsigned long long)(pos + count) > (unsigned long long)max)) { retval = -EOVERFLOW; if (pos >= max) goto fput_out; diff --git a/include/linux/fs.h b/include/linux/fs.h --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1511,6 +1511,7 @@ extern ssize_t do_sync_write(struct file ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, unsigned long nr_segs, loff_t *ppos); extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); +extern ssize_t generic_file_sendpage(struct file *, struct page *, int, size_t, loff_t *, int); extern void do_generic_mapping_read(struct address_space *mapping, struct file_ra_state *, struct file *, loff_t *, read_descriptor_t *, read_actor_t); diff --git a/mm/filemap.c b/mm/filemap.c --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1653,7 +1653,7 @@ EXPORT_SYMBOL(read_cache_page); * caller's lru-buffering pagevec. This function is specifically for * generic_file_write(). */ -static inline struct page * +struct page * __grab_cache_page(struct address_space *mapping, unsigned long index, struct page **cached_page, struct pagevec *lru_pvec) { @@ -1682,6 +1682,8 @@ repeat: return page; } +EXPORT_SYMBOL(__grab_cache_page); + /* * The logic we want is * diff --git a/net/socket.c b/net/socket.c --- a/net/socket.c +++ b/net/socket.c @@ -44,6 +44,7 @@ * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) * Tigran Aivazian : Made listen(2) backlog sanity checks * protocol-independent + * Evgeniy Polyakov: Added sock_sendfile()/generic_file_sendpage(). * * * This program is free software; you can redistribute it and/or @@ -114,6 +115,7 @@ static ssize_t sock_writev(struct file * unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); +ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, read_actor_t actor, void *target); /* @@ -134,7 +136,8 @@ static struct file_operations socket_fil .fasync = sock_fasync, .readv = sock_readv, .writev = sock_writev, - .sendpage = sock_sendpage + .sendpage = sock_sendpage, + .sendfile = sock_sendfile, }; /* @@ -720,6 +723,82 @@ static ssize_t sock_aio_write(struct kio return __sock_sendmsg(iocb, sock, &x->async_msg, size); } +ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, read_actor_t actor, void *target) +{ + struct socket *sock; + struct page *page; + int err = 0; + struct msghdr msg; + struct kvec iov; + size_t size; + read_descriptor_t desc; + unsigned long offset = 0; + size_t written = 0; + + if (!count) + return 0; + + if (!ppos != 0) + return -ERANGE; + + desc.written = 0; + desc.count = count; + desc.arg.data = target; + desc.error = 0; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + sock = SOCKET_I(file->f_dentry->d_inode); + + while (count) { + + size = min(count, PAGE_SIZE); + + sock->sk->sk_allocation |= GFP_NOIO; + iov.iov_base = page_address(page); + iov.iov_len = size; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + msg.msg_flags = MSG_NOSIGNAL; + + memset(iov.iov_base, 0, PAGE_SIZE); + + err = kernel_recvmsg(sock, &msg, &iov, 1, size, 0); + + if (signal_pending(current)) { + flush_signals(current); + printk("Interrupted by signal\n"); + return -ERESTARTSYS; + } + + if (err < 0) { + printk("Failed to receive message: size=%zx, err=%d.\n", iov.iov_len, err); + break; + } + + if (err == 0) + break; + + count -= err; + + err = actor(&desc, page, offset, err); + *ppos += err; + written += err; + } + + __free_pages(page, 0); + + if (err) + return err; + + return written; +} + ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more) { -- Evgeniy Polyakov - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html