This patch adds an eor bit to the TCP_SKB_CB.  When MSG_EOR
is passed to tcp_sendmsg/tcp_sendpage, the eor bit will
be set at the skb containing the last byte of the userland's
msg.  The eor bit will prevent data from appending to that
skb in the future.

This patch handles the tcp_sendmsg and tcp_sendpage cases.

The followup patches will handle other skb coalescing
and fragment cases.

One potential use case is to use MSG_EOR with
SOF_TIMESTAMPING_TX_ACK to get a more accurate
TCP ack timestamping on application protocol with
multiple outgoing response messages (e.g. HTTP2).

Signed-off-by: Martin KaFai Lau <ka...@fb.com>
Cc: Eric Dumazet <eduma...@google.com>
Cc: Neal Cardwell <ncardw...@google.com>
Cc: Soheil Hassas Yeganeh <soh...@google.com>
Cc: Willem de Bruijn <will...@google.com>
Cc: Yuchung Cheng <ych...@google.com>
Suggested-by: Eric Dumazet <eduma...@google.com>
---
 include/net/tcp.h | 3 ++-
 net/ipv4/tcp.c    | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c0ef054..ac31798 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -762,7 +762,8 @@ struct tcp_skb_cb {
 
        __u8            ip_dsfield;     /* IPv4 tos or IPv6 dsfield     */
        __u8            txstamp_ack:1,  /* Record TX timestamp for ack? */
-                       unused:7;
+                       eor:1,          /* Is skb MSG_EOR marked */
+                       unused:6;
        __u32           ack_seq;        /* Sequence number ACK'd        */
        union {
                struct inet_skb_parm    h4;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4d73858..7df0c1a88 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -908,7 +908,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct 
page *page, int offset,
                int copy, i;
                bool can_coalesce;
 
-               if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
+               if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
+                   TCP_SKB_CB(skb)->eor) {
 new_segment:
                        if (!sk_stream_memory_free(sk))
                                goto wait_for_sndbuf;
@@ -960,6 +961,7 @@ new_segment:
                size -= copy;
                if (!size) {
                        tcp_tx_timestamp(sk, sk->sk_tsflags, skb);
+                       TCP_SKB_CB(skb)->eor = !!(flags & MSG_EOR);
                        goto out;
                }
 
@@ -1156,7 +1158,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, 
size_t size)
                        copy = max - skb->len;
                }
 
-               if (copy <= 0) {
+               if (copy <= 0 || TCP_SKB_CB(skb)->eor) {
 new_segment:
                        /* Allocate new segment. If the interface is SG,
                         * allocate skb fitting to single page.
@@ -1250,6 +1252,7 @@ new_segment:
                copied += copy;
                if (!msg_data_left(msg)) {
                        tcp_tx_timestamp(sk, sockc.tsflags, skb);
+                       TCP_SKB_CB(skb)->eor = !!(flags & MSG_EOR);
                        goto out;
                }
 
-- 
2.5.1

Reply via email to