From: Paolo Abeni <[email protected]>
The MPTCP output path access locklessly the MPTCP-level ack_seq
in multiple times, using possibly different values for the data_ack
in the DSS option and to compute the announced rcv wnd for the same
packet.
Refactor the cote to avoid inconsistencies which may confuse the
peer. Also ensure that the MPTCP level rcv wnd is updated only when
the egress packet actually contains a DSS ack.
Fixes: fa3fe2b15031 ("mptcp: track window announced to peer")
Cc: [email protected]
Signed-off-by: Paolo Abeni <[email protected]>
Reviewed-by: Matthieu Baerts (NGI0) <[email protected]>
Signed-off-by: Matthieu Baerts (NGI0) <[email protected]>
---
net/mptcp/options.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 8a1c5698983c..5c228344e83f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -570,7 +570,6 @@ static bool mptcp_established_options_dss(struct sock *sk,
struct sk_buff *skb,
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
- u64 ack_seq;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
mpext = skb ? mptcp_get_ext(skb) : NULL;
@@ -601,14 +600,11 @@ static bool mptcp_established_options_dss(struct sock
*sk, struct sk_buff *skb,
return ret;
}
- ack_seq = READ_ONCE(msk->ack_seq);
if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
- opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
} else {
ack_size = TCPOLEN_MPTCP_DSS_ACK32;
- opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
@@ -1297,19 +1293,14 @@ bool mptcp_incoming_options(struct sock *sk, struct
sk_buff *skb)
return true;
}
-static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
+static u64 mptcp_set_rwin(struct mptcp_sock *msk, struct tcp_sock *tp,
+ struct tcphdr *th, u64 ack_seq)
{
const struct sock *ssk = (const struct sock *)tp;
- struct mptcp_subflow_context *subflow;
- u64 ack_seq, rcv_wnd_old, rcv_wnd_new;
- struct mptcp_sock *msk;
+ u64 rcv_wnd_old, rcv_wnd_new;
u32 new_win;
u64 win;
- subflow = mptcp_subflow_ctx(ssk);
- msk = mptcp_sk(subflow->conn);
-
- ack_seq = READ_ONCE(msk->ack_seq);
rcv_wnd_new = ack_seq + tp->rcv_wnd;
rcv_wnd_old = atomic64_read(&msk->rcv_wnd_sent);
@@ -1362,7 +1353,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct
tcphdr *th)
update_wspace:
WRITE_ONCE(msk->old_wspace, tp->rcv_wnd);
- subflow->rcv_wnd_sent = rcv_wnd_new;
+ return rcv_wnd_new;
}
static void mptcp_track_rwin(struct tcp_sock *tp)
@@ -1474,13 +1465,25 @@ void mptcp_write_options(struct tcphdr *th, __be32
*ptr, struct tcp_sock *tp,
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
if (mpext->use_ack) {
+ struct mptcp_sock *msk;
+ u64 ack_seq;
+
+ /* DSS option is set only by mptcp_established_option,
+ * the caller is __tcp_transmit_skb() and ssk is always
+ * not NULL.
+ */
+ subflow = mptcp_subflow_ctx(ssk);
+ msk = mptcp_sk(subflow->conn);
+ ack_seq = READ_ONCE(msk->ack_seq);
if (mpext->ack64) {
- put_unaligned_be64(mpext->data_ack, ptr);
+ put_unaligned_be64(ack_seq, ptr);
ptr += 2;
} else {
- put_unaligned_be32(mpext->data_ack32, ptr);
+ put_unaligned_be32(ack_seq, ptr);
ptr += 1;
}
+ subflow->rcv_wnd_sent = mptcp_set_rwin(msk, tp, th,
+ ack_seq);
}
if (mpext->use_map) {
@@ -1708,9 +1711,6 @@ void mptcp_write_options(struct tcphdr *th, __be32 *ptr,
struct tcp_sock *tp,
i += 4;
}
}
-
- if (tp)
- mptcp_set_rwin(tp, th);
}
__be32 mptcp_get_reset_option(const struct sk_buff *skb)
--
2.53.0