Hello, developers. Here is simple no-way-zero-copy support for sendfile for socket <-> socket connections. It is quite convenient for programmer to not use recv/sendfile, but sendfile/sendfile instead.
If this way is usefull and should be cleaned up, I will finish it and probably implement sendpage() for fs, so it could be really usefull, else just FYI. Thanks. Here is quite simple and stupid implemetation: diff --git a/fs/read_write.c b/fs/read_write.c --- a/fs/read_write.c +++ b/fs/read_write.c @@ -685,7 +685,7 @@ static ssize_t do_sendfile(int out_fd, i retval = -EINVAL; if (unlikely(pos < 0)) goto fput_out; - if (unlikely(pos + count > max)) { + if (unlikely((unsigned long long)(pos + count) > (unsigned long long)max)) { retval = -EOVERFLOW; if (pos >= max) goto fput_out; diff --git a/net/socket.c b/net/socket.c --- a/net/socket.c +++ b/net/socket.c @@ -44,6 +44,8 @@ * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) * Tigran Aivazian : Made listen(2) backlog sanity checks * protocol-independent + * Evgeniy Polyakov: Added recv sendfile path for socket<->socket + * communications, i.e. sock_sendfile(). * * * This program is free software; you can redistribute it and/or @@ -114,6 +116,7 @@ static ssize_t sock_writev(struct file * unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); +ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, read_actor_t actor, void *target); /* @@ -134,7 +137,8 @@ static struct file_operations socket_fil .fasync = sock_fasync, .readv = sock_readv, .writev = sock_writev, - .sendpage = sock_sendpage + .sendpage = sock_sendpage, + .sendfile = sock_sendfile, }; /* @@ -720,6 +724,81 @@ static ssize_t sock_aio_write(struct kio return __sock_sendmsg(iocb, sock, &x->async_msg, size); } +ssize_t sock_sendfile(struct file *file, loff_t *ppos, size_t count, read_actor_t actor, void *target) +{ + struct socket *sock; + struct page *page; + int err = 0; + struct msghdr msg; + struct kvec iov; + size_t size; + read_descriptor_t desc; + unsigned long offset = 0; + size_t orig_count = count; + + printk("%s: ppos=%Lx, count=%lx.\n", __func__, *ppos, count); + + if (!count) + return 0; + + if (!ppos != 0) + return -ERANGE; + + desc.written = 0; + desc.count = count; + desc.arg.data = target; + desc.error = 0; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + sock = SOCKET_I(file->f_dentry->d_inode); + + while (count) { + + size = min(count, PAGE_SIZE); + + sock->sk->sk_allocation |= GFP_NOIO; + iov.iov_base = page_address(page); + iov.iov_len = size; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + msg.msg_flags = MSG_NOSIGNAL; + + err = kernel_recvmsg(sock, &msg, &iov, 1, size, 0); + + if (signal_pending(current)) { + flush_signals(current); + printk("Interrupted by signal\n"); + return -ERESTARTSYS; + } + + if (err <= 0) { + printk("Failed to receive message: size=%lu, err=%d.\n", iov.iov_len, err); + + if (!err) + err = -EPIPE; + break; + } + + count -= err; + + err = actor(&desc, page, offset, size); + *ppos += err; + } + + __free_pages(page, 0); + + if (err) + return err; + + return orig_count; +} + ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more) { -- Evgeniy Polyakov - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html