This breaks NOIP kernel builds. On 7/6/21, Andrew Gallatin <galla...@freebsd.org> wrote: > The branch main has been updated by gallatin: > > URL: > https://cgit.FreeBSD.org/src/commit/?id=28d0a740dd9a67e4a4fa9fda5bb39b5963316f35 > > commit 28d0a740dd9a67e4a4fa9fda5bb39b5963316f35 > Author: Andrew Gallatin <galla...@freebsd.org> > AuthorDate: 2021-07-06 14:17:33 +0000 > Commit: Andrew Gallatin <galla...@freebsd.org> > CommitDate: 2021-07-06 14:28:32 +0000 > > ktls: auto-disable ifnet (inline hw) kTLS > > Ifnet (inline) hw kTLS NICs typically keep state within > a TLS record, so that when transmitting in-order, > they can continue encryption on each segment sent without > DMA'ing extra state from the host. > > This breaks down when transmits are out of order (eg, > TCP retransmits). In this case, the NIC must re-DMA > the entire TLS record up to and including the segment > being retransmitted. This means that when re-transmitting > the last 1448 byte segment of a TLS record, the NIC will > have to re-DMA the entire 16KB TLS record. This can lead > to the NIC running out of PCIe bus bandwidth well before > it saturates the network link if a lot of TCP connections have > a high retransmoit rate. > > This change introduces a new sysctl > (kern.ipc.tls.ifnet_max_rexmit_pct), > where TCP connections with higher retransmit rate will be > switched to SW kTLS so as to conserve PCIe bandwidth. > > Reviewed by: hselasky, markj, rrs > Sponsored by: Netflix > Differential Revision: https://reviews.freebsd.org/D30908 > --- > sys/kern/uipc_ktls.c | 107 > ++++++++++++++++++++++++++++++++++++++++++++++++++ > sys/netinet/tcp_var.h | 13 +++++- > sys/sys/ktls.h | 15 ++++++- > 3 files changed, 133 insertions(+), 2 deletions(-) > > diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c > index 7e87e7c740e3..88e29157289d 100644 > --- a/sys/kern/uipc_ktls.c > +++ b/sys/kern/uipc_ktls.c > @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$"); > > #include "opt_inet.h" > #include "opt_inet6.h" > +#include "opt_kern_tls.h" > #include "opt_ratelimit.h" > #include "opt_rss.h" > > @@ -121,6 +122,11 @@ SYSCTL_INT(_kern_ipc_tls_stats, OID_AUTO, threads, > CTLFLAG_RD, > &ktls_number_threads, 0, > "Number of TLS threads in thread-pool"); > > +unsigned int ktls_ifnet_max_rexmit_pct = 2; > +SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, ifnet_max_rexmit_pct, CTLFLAG_RWTUN, > + &ktls_ifnet_max_rexmit_pct, 2, > + "Max percent bytes retransmitted before ifnet TLS is disabled"); > + > static bool ktls_offload_enable; > SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, enable, CTLFLAG_RWTUN, > &ktls_offload_enable, 0, > @@ -184,6 +190,14 @@ static COUNTER_U64_DEFINE_EARLY(ktls_switch_failed); > SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_failed, > CTLFLAG_RD, > &ktls_switch_failed, "TLS sessions unable to switch between SW and > ifnet"); > > +static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_fail); > +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_failed, > CTLFLAG_RD, > + &ktls_ifnet_disable_fail, "TLS sessions unable to switch to SW from > ifnet"); > + > +static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_ok); > +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_ok, > CTLFLAG_RD, > + &ktls_ifnet_disable_ok, "TLS sessions able to switch to SW from > ifnet"); > + > SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, sw, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, > "Software TLS session stats"); > SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, ifnet, CTLFLAG_RD | CTLFLAG_MPSAFE, > 0, > @@ -2187,3 +2201,96 @@ ktls_work_thread(void *ctx) > } > } > } > + > +static void > +ktls_disable_ifnet_help(void *context, int pending __unused) > +{ > + struct ktls_session *tls; > + struct inpcb *inp; > + struct tcpcb *tp; > + struct socket *so; > + int err; > + > + tls = context; > + inp = tls->inp; > + if (inp == NULL) > + return; > + INP_WLOCK(inp); > + so = inp->inp_socket; > + MPASS(so != NULL); > + if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) || > + (inp->inp_flags2 & INP_FREED)) { > + goto out; > + } > + > + if (so->so_snd.sb_tls_info != NULL) > + err = ktls_set_tx_mode(so, TCP_TLS_MODE_SW); > + else > + err = ENXIO; > + if (err == 0) { > + counter_u64_add(ktls_ifnet_disable_ok, 1); > + /* ktls_set_tx_mode() drops inp wlock, so recheck flags */ > + if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0 && > + (inp->inp_flags2 & INP_FREED) == 0 && > + (tp = intotcpcb(inp)) != NULL && > + tp->t_fb->tfb_hwtls_change != NULL) > + (*tp->t_fb->tfb_hwtls_change)(tp, 0); > + } else { > + counter_u64_add(ktls_ifnet_disable_fail, 1); > + } > + > +out: > + SOCK_LOCK(so); > + sorele(so); > + if (!in_pcbrele_wlocked(inp)) > + INP_WUNLOCK(inp); > + ktls_free(tls); > +} > + > +/* > + * Called when re-transmits are becoming a substantial portion of the > + * sends on this connection. When this happens, we transition the > + * connection to software TLS. This is needed because most inline TLS > + * NICs keep crypto state only for in-order transmits. This means > + * that to handle a TCP rexmit (which is out-of-order), the NIC must > + * re-DMA the entire TLS record up to and including the current > + * segment. This means that when re-transmitting the last ~1448 byte > + * segment of a 16KB TLS record, we could wind up re-DMA'ing an order > + * of magnitude more data than we are sending. This can cause the > + * PCIe link to saturate well before the network, which can cause > + * output drops, and a general loss of capacity. > + */ > +void > +ktls_disable_ifnet(void *arg) > +{ > + struct tcpcb *tp; > + struct inpcb *inp; > + struct socket *so; > + struct ktls_session *tls; > + > + tp = arg; > + inp = tp->t_inpcb; > + INP_WLOCK_ASSERT(inp); > + so = inp->inp_socket; > + SOCK_LOCK(so); > + tls = so->so_snd.sb_tls_info; > + if (tls->disable_ifnet_pending) { > + SOCK_UNLOCK(so); > + return; > + } > + > + /* > + * note that disable_ifnet_pending is never cleared; disabling > + * ifnet can only be done once per session, so we never want > + * to do it again > + */ > + > + (void)ktls_hold(tls); > + in_pcbref(inp); > + soref(so); > + tls->disable_ifnet_pending = true; > + tls->inp = inp; > + SOCK_UNLOCK(so); > + TASK_INIT(&tls->disable_ifnet_task, 0, ktls_disable_ifnet_help, tls); > + (void)taskqueue_enqueue(taskqueue_thread, &tls->disable_ifnet_task); > +} > diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h > index dd30f89896d2..3f72a821e71f 100644 > --- a/sys/netinet/tcp_var.h > +++ b/sys/netinet/tcp_var.h > @@ -39,8 +39,10 @@ > #include <netinet/tcp_fsm.h> > > #ifdef _KERNEL > +#include "opt_kern_tls.h" > #include <net/vnet.h> > #include <sys/mbuf.h> > +#include <sys/ktls.h> > #endif > > #define TCP_END_BYTE_INFO 8 /* Bytes that makeup the "end information > array" */ > @@ -1139,8 +1141,10 @@ tcp_fields_to_net(struct tcphdr *th) > > static inline void > tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt, > - uint8_t is_tlp, int hw_tls __unused) > + uint8_t is_tlp, int hw_tls) > { > + uint64_t rexmit_percent; > + > if (is_tlp) { > tp->t_sndtlppack++; > tp->t_sndtlpbyte += len; > @@ -1150,6 +1154,13 @@ tcp_account_for_send(struct tcpcb *tp, uint32_t len, > uint8_t is_rxt, > tp->t_snd_rxt_bytes += len; > else > tp->t_sndbytes += len; > + > + if (hw_tls && is_rxt) { > + rexmit_percent = (1000ULL * tp->t_snd_rxt_bytes) / (10ULL * > (tp->t_snd_rxt_bytes + tp->t_sndbytes)); > + if (rexmit_percent > ktls_ifnet_max_rexmit_pct) > + ktls_disable_ifnet(tp); > + } > + > } > #endif /* _KERNEL */ > > diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h > index b28c94965c97..7fd8831878b4 100644 > --- a/sys/sys/ktls.h > +++ b/sys/sys/ktls.h > @@ -189,10 +189,12 @@ struct ktls_session { > u_int wq_index; > volatile u_int refcount; > int mode; > - bool reset_pending; > > struct task reset_tag_task; > + struct task disable_ifnet_task; > struct inpcb *inp; > + bool reset_pending; > + bool disable_ifnet_pending; > } __aligned(CACHE_LINE_SIZE); > > void ktls_check_rx(struct sockbuf *sb); > @@ -231,5 +233,16 @@ ktls_free(struct ktls_session *tls) > ktls_destroy(tls); > } > > +#ifdef KERN_TLS > +extern unsigned int ktls_ifnet_max_rexmit_pct; > +void ktls_disable_ifnet(void *arg); > +#else > +#define ktls_ifnet_max_rexmit_pct 1 > +inline void > +ktls_disable_ifnet(void *arg __unused) > +{ > +} > +#endif > + > #endif /* !_KERNEL */ > #endif /* !_SYS_KTLS_H_ */ >
-- Mateusz Guzik <mjguzik gmail.com> _______________________________________________ dev-commits-src-main@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"