From: Ilpo Järvinen <i...@kernel.org>

These three byte counters track IP ECN field payload byte sums for
all arriving (acceptable) packets for ECT0, ECT1, and CE. The
AccECN option (added by a later patch in the series) echoes these
counters back to sender side; therefore, it is placed within the
group of tcp_sock_write_txrx.

Below are the pahole outcomes before and after this patch, in which
the group size of tcp_sock_write_txrx is increased from 95 + 4 to
107 + 4 and an extra 4-byte hole is created but will be exploited
in later patches:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ce;         /*  2640     4 */
    u32                        received_ce;          /*  2644     4 */
    u32                        app_limited;          /*  2648     4 */
    u32                        rcv_wnd;              /*  2652     4 */
    struct tcp_options_received rx_opt;              /*  2656    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2680     0 */

    [...]
    /* size: 3264, cachelines: 51, members: 169 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ce;         /*  2640     4 */
    u32                        received_ce;          /*  2644     4 */
    u32                        received_ecn_bytes[3];/*  2648    12 */
    u32                        app_limited;          /*  2660     4 */
    u32                        rcv_wnd;              /*  2664     4 */
    struct tcp_options_received rx_opt;              /*  2668    24 */
    __cacheline_group_end__tcp_sock_write_txrx[0];   /*  2692     0 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    /* size: 3264, cachelines: 51, members: 170 */
}

Signed-off-by: Ilpo Järvinen <i...@kernel.org>
Signed-off-by: Neal Cardwell <ncardw...@google.com>
Co-developed-by: Chia-Yu Chang <chia-yu.ch...@nokia-bell-labs.com>
Signed-off-by: Chia-Yu Chang <chia-yu.ch...@nokia-bell-labs.com>
---
 .../networking/net_cachelines/tcp_sock.rst     |  1 +
 include/linux/tcp.h                            |  4 ++++
 include/net/tcp.h                              | 18 +++++++++++++++++-
 net/ipv4/tcp.c                                 |  3 ++-
 net/ipv4/tcp_input.c                           | 13 +++++++++----
 net/ipv4/tcp_minisocks.c                       |  3 ++-
 6 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst 
b/Documentation/networking/net_cachelines/tcp_sock.rst
index eef8700085de..64306ae096cd 100644
--- a/Documentation/networking/net_cachelines/tcp_sock.rst
+++ b/Documentation/networking/net_cachelines/tcp_sock.rst
@@ -102,6 +102,7 @@ u32                           prr_out                 
read_mostly         read_m
 u32                           delivered               read_mostly         
read_write          tcp_rate_skb_sent, tcp_newly_delivered(tx);tcp_ack, 
tcp_rate_gen, tcp_clean_rtx_queue (rx)
 u32                           delivered_ce            read_mostly         
read_write          tcp_rate_skb_sent(tx);tcp_rate_gen(rx)
 u32                           received_ce             read_mostly         
read_write
+u32[3]                        received_ecn_bytes      read_mostly         
read_write
 u8:4                          received_ce_pending     read_mostly         
read_write
 u8:2                          syn_ect_snt             write_mostly        
read_write
 u8:2                          syn_ect_rcv             read_mostly         
read_write
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b0e6a11a3fa1..427f2adab247 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -307,6 +307,10 @@ struct tcp_sock {
        u32     delivered;      /* Total data packets delivered incl. rexmits */
        u32     delivered_ce;   /* Like the above but only ECE marked packets */
        u32     received_ce;    /* Like the above but for rcvd CE marked pkts */
+       u32     received_ecn_bytes[3]; /* received byte counters for three ECN
+                                       * types: INET_ECN_ECT_1, INET_ECN_ECT_0,
+                                       * and INET_ECN_CE
+                                       */
        u32     app_limited;    /* limited until "delivered" reaches this val */
        u32     rcv_wnd;        /* Current receiver window              */
 /*
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5fce4b49e132..101e07b8a089 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -467,7 +467,8 @@ static inline int tcp_accecn_extract_syn_ect(u8 ace)
 bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace, u8 sent_ect);
 void tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,
                          u8 syn_ect_snt);
-void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb);
+void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb,
+                              u32 payload_len);
 
 enum tcp_tw_status {
        TCP_TW_SUCCESS = 0,
@@ -1035,6 +1036,20 @@ static inline u32 tcp_rsk_tsval(const struct 
tcp_request_sock *treq)
  * See draft-ietf-tcpm-accurate-ecn for the latest values.
  */
 #define TCP_ACCECN_CEP_INIT_OFFSET 5
+#define TCP_ACCECN_E1B_INIT_OFFSET 1
+#define TCP_ACCECN_E0B_INIT_OFFSET 1
+#define TCP_ACCECN_CEB_INIT_OFFSET 0
+
+static inline void __tcp_accecn_init_bytes_counters(int *counter_array)
+{
+       BUILD_BUG_ON(INET_ECN_ECT_1 != 0x1);
+       BUILD_BUG_ON(INET_ECN_ECT_0 != 0x2);
+       BUILD_BUG_ON(INET_ECN_CE != 0x3);
+
+       counter_array[INET_ECN_ECT_1 - 1] = 0;
+       counter_array[INET_ECN_ECT_0 - 1] = 0;
+       counter_array[INET_ECN_CE - 1] = 0;
+}
 
 /* The highest ECN variant (Accurate ECN, ECN, or no ECN) that is
  * attemped to be negotiated and requested for incoming connection
@@ -1053,6 +1068,7 @@ static inline void tcp_accecn_init_counters(struct 
tcp_sock *tp)
 {
        tp->received_ce = 0;
        tp->received_ce_pending = 0;
+       __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
 }
 
 /* State flags for sacked in struct tcp_skb_cb */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 29146bb3585a..af52ebfdd734 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -5127,6 +5127,7 @@ static void __init tcp_struct_check(void)
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, 
delivered);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, 
delivered_ce);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, 
received_ce);
+       CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, 
received_ecn_bytes);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, 
app_limited);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, 
rcv_wnd);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, 
rx_opt);
@@ -5134,7 +5135,7 @@ static void __init tcp_struct_check(void)
        /* 32bit arches with 8byte alignment on u64 fields might need padding
         * before tcp_clock_cache.
         */
-       CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 95 + 
4);
+       CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 107 + 
4);
 
        /* RX read-write hotpath cache lines */
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, 
bytes_received);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f0475f95e112..76fda2a63add 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6135,7 +6135,8 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, 
const struct tcphdr *t
 }
 
 /* Updates Accurate ECN received counters from the received IP ECN field */
-void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb)
+void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb,
+                              u32 payload_len)
 {
        u8 ecnfield = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
        u8 is_ce = INET_ECN_is_ce(ecnfield);
@@ -6150,6 +6151,9 @@ void tcp_ecn_received_counters(struct sock *sk, const 
struct sk_buff *skb)
                tp->received_ce += pcount;
                tp->received_ce_pending = min(tp->received_ce_pending + pcount,
                                              0xfU);
+
+               if (payload_len > 0)
+                       tp->received_ecn_bytes[ecnfield - 1] += payload_len;
        }
 }
 
@@ -6423,7 +6427,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff 
*skb)
                                        flag |= __tcp_replace_ts_recent(tp,
                                                                        delta);
 
-                               tcp_ecn_received_counters(sk, skb);
+                               tcp_ecn_received_counters(sk, skb, 0);
 
                                /* We know that such packets are checksummed
                                 * on entry.
@@ -6469,7 +6473,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff 
*skb)
                        /* Bulk data transfer: receiver */
                        tcp_cleanup_skb(skb);
                        __skb_pull(skb, tcp_header_len);
-                       tcp_ecn_received_counters(sk, skb);
+                       tcp_ecn_received_counters(sk, skb,
+                                                 len - tcp_header_len);
                        eaten = tcp_queue_rcv(sk, skb, &fragstolen);
 
                        tcp_event_data_recv(sk, skb);
@@ -6516,7 +6521,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff 
*skb)
                        tcp_accecn_third_ack(sk, skb, tp->syn_ect_snt);
                tcp_fast_path_on(tp);
        }
-       tcp_ecn_received_counters(sk, skb);
+       tcp_ecn_received_counters(sk, skb, len - th->doff * 4);
 
        reason = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT);
        if ((int)reason < 0) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 779a206a5ca6..3f8225bae49f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -497,10 +497,11 @@ static void tcp_ecn_openreq_child(struct sock *sk,
        struct tcp_sock *tp = tcp_sk(sk);
 
        if (treq->accecn_ok) {
+               const struct tcphdr *th = (const struct tcphdr *)skb->data;
                tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
                tp->syn_ect_snt = treq->syn_ect_snt;
                tcp_accecn_third_ack(sk, skb, treq->syn_ect_snt);
-               tcp_ecn_received_counters(sk, skb);
+               tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
        } else {
                tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok ?
                                     TCP_ECN_MODE_RFC3168 :
-- 
2.34.1


Reply via email to