This breaks NOIP kernel builds.

On 7/6/21, Andrew Gallatin <galla...@freebsd.org> wrote:
> The branch main has been updated by gallatin:
>
> URL:
> https://cgit.FreeBSD.org/src/commit/?id=28d0a740dd9a67e4a4fa9fda5bb39b5963316f35
>
> commit 28d0a740dd9a67e4a4fa9fda5bb39b5963316f35
> Author:     Andrew Gallatin <galla...@freebsd.org>
> AuthorDate: 2021-07-06 14:17:33 +0000
> Commit:     Andrew Gallatin <galla...@freebsd.org>
> CommitDate: 2021-07-06 14:28:32 +0000
>
>     ktls: auto-disable ifnet (inline hw) kTLS
>
>     Ifnet (inline) hw kTLS NICs typically keep state within
>     a TLS record, so that when transmitting in-order,
>     they can continue encryption on each segment sent without
>     DMA'ing extra state from the host.
>
>     This breaks down when transmits are out of order (eg,
>     TCP retransmits).  In this case, the NIC must re-DMA
>     the entire TLS record up to and including the segment
>     being retransmitted.  This means that when re-transmitting
>     the last 1448 byte segment of a TLS record, the NIC will
>     have to re-DMA the entire 16KB TLS record. This can lead
>     to the NIC running out of PCIe bus bandwidth well before
>     it saturates the network link if a lot of TCP connections have
>     a high retransmoit rate.
>
>     This change introduces a new sysctl
> (kern.ipc.tls.ifnet_max_rexmit_pct),
>     where TCP connections with higher retransmit rate will be
>     switched to SW kTLS so as to conserve PCIe bandwidth.
>
>     Reviewed by:    hselasky, markj, rrs
>     Sponsored by:   Netflix
>     Differential Revision:  https://reviews.freebsd.org/D30908
> ---
>  sys/kern/uipc_ktls.c  | 107
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  sys/netinet/tcp_var.h |  13 +++++-
>  sys/sys/ktls.h        |  15 ++++++-
>  3 files changed, 133 insertions(+), 2 deletions(-)
>
> diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c
> index 7e87e7c740e3..88e29157289d 100644
> --- a/sys/kern/uipc_ktls.c
> +++ b/sys/kern/uipc_ktls.c
> @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$");
>
>  #include "opt_inet.h"
>  #include "opt_inet6.h"
> +#include "opt_kern_tls.h"
>  #include "opt_ratelimit.h"
>  #include "opt_rss.h"
>
> @@ -121,6 +122,11 @@ SYSCTL_INT(_kern_ipc_tls_stats, OID_AUTO, threads,
> CTLFLAG_RD,
>      &ktls_number_threads, 0,
>      "Number of TLS threads in thread-pool");
>
> +unsigned int ktls_ifnet_max_rexmit_pct = 2;
> +SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, ifnet_max_rexmit_pct, CTLFLAG_RWTUN,
> +    &ktls_ifnet_max_rexmit_pct, 2,
> +    "Max percent bytes retransmitted before ifnet TLS is disabled");
> +
>  static bool ktls_offload_enable;
>  SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, enable, CTLFLAG_RWTUN,
>      &ktls_offload_enable, 0,
> @@ -184,6 +190,14 @@ static COUNTER_U64_DEFINE_EARLY(ktls_switch_failed);
>  SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_failed,
> CTLFLAG_RD,
>      &ktls_switch_failed, "TLS sessions unable to switch between SW and
> ifnet");
>
> +static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_fail);
> +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_failed,
> CTLFLAG_RD,
> +    &ktls_ifnet_disable_fail, "TLS sessions unable to switch to SW from
> ifnet");
> +
> +static COUNTER_U64_DEFINE_EARLY(ktls_ifnet_disable_ok);
> +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ifnet_disable_ok,
> CTLFLAG_RD,
> +    &ktls_ifnet_disable_ok, "TLS sessions able to switch to SW from
> ifnet");
> +
>  SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, sw, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
>      "Software TLS session stats");
>  SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, ifnet, CTLFLAG_RD | CTLFLAG_MPSAFE,
> 0,
> @@ -2187,3 +2201,96 @@ ktls_work_thread(void *ctx)
>               }
>       }
>  }
> +
> +static void
> +ktls_disable_ifnet_help(void *context, int pending __unused)
> +{
> +     struct ktls_session *tls;
> +     struct inpcb *inp;
> +     struct tcpcb *tp;
> +     struct socket *so;
> +     int err;
> +
> +     tls = context;
> +     inp = tls->inp;
> +     if (inp == NULL)
> +             return;
> +     INP_WLOCK(inp);
> +     so = inp->inp_socket;
> +     MPASS(so != NULL);
> +     if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) ||
> +         (inp->inp_flags2 & INP_FREED)) {
> +             goto out;
> +     }
> +
> +     if (so->so_snd.sb_tls_info != NULL)
> +             err = ktls_set_tx_mode(so, TCP_TLS_MODE_SW);
> +     else
> +             err = ENXIO;
> +     if (err == 0) {
> +             counter_u64_add(ktls_ifnet_disable_ok, 1);
> +             /* ktls_set_tx_mode() drops inp wlock, so recheck flags */
> +             if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0 &&
> +                 (inp->inp_flags2 & INP_FREED) == 0 &&
> +                 (tp = intotcpcb(inp)) != NULL &&
> +                 tp->t_fb->tfb_hwtls_change != NULL)
> +                     (*tp->t_fb->tfb_hwtls_change)(tp, 0);
> +     } else {
> +             counter_u64_add(ktls_ifnet_disable_fail, 1);
> +     }
> +
> +out:
> +     SOCK_LOCK(so);
> +     sorele(so);
> +     if (!in_pcbrele_wlocked(inp))
> +             INP_WUNLOCK(inp);
> +     ktls_free(tls);
> +}
> +
> +/*
> + * Called when re-transmits are becoming a substantial portion of the
> + * sends on this connection.  When this happens, we transition the
> + * connection to software TLS.  This is needed because most inline TLS
> + * NICs keep crypto state only for in-order transmits.  This means
> + * that to handle a TCP rexmit (which is out-of-order), the NIC must
> + * re-DMA the entire TLS record up to and including the current
> + * segment.  This means that when re-transmitting the last ~1448 byte
> + * segment of a 16KB TLS record, we could wind up re-DMA'ing an order
> + * of magnitude more data than we are sending.  This can cause the
> + * PCIe link to saturate well before the network, which can cause
> + * output drops, and a general loss of capacity.
> + */
> +void
> +ktls_disable_ifnet(void *arg)
> +{
> +     struct tcpcb *tp;
> +     struct inpcb *inp;
> +     struct socket *so;
> +     struct ktls_session *tls;
> +
> +     tp = arg;
> +     inp = tp->t_inpcb;
> +     INP_WLOCK_ASSERT(inp);
> +     so = inp->inp_socket;
> +     SOCK_LOCK(so);
> +     tls = so->so_snd.sb_tls_info;
> +     if (tls->disable_ifnet_pending) {
> +             SOCK_UNLOCK(so);
> +             return;
> +     }
> +
> +     /*
> +      * note that disable_ifnet_pending is never cleared; disabling
> +      * ifnet can only be done once per session, so we never want
> +      * to do it again
> +      */
> +
> +     (void)ktls_hold(tls);
> +     in_pcbref(inp);
> +     soref(so);
> +     tls->disable_ifnet_pending = true;
> +     tls->inp = inp;
> +     SOCK_UNLOCK(so);
> +     TASK_INIT(&tls->disable_ifnet_task, 0, ktls_disable_ifnet_help, tls);
> +     (void)taskqueue_enqueue(taskqueue_thread, &tls->disable_ifnet_task);
> +}
> diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
> index dd30f89896d2..3f72a821e71f 100644
> --- a/sys/netinet/tcp_var.h
> +++ b/sys/netinet/tcp_var.h
> @@ -39,8 +39,10 @@
>  #include <netinet/tcp_fsm.h>
>
>  #ifdef _KERNEL
> +#include "opt_kern_tls.h"
>  #include <net/vnet.h>
>  #include <sys/mbuf.h>
> +#include <sys/ktls.h>
>  #endif
>
>  #define TCP_END_BYTE_INFO 8  /* Bytes that makeup the "end information
> array" */
> @@ -1139,8 +1141,10 @@ tcp_fields_to_net(struct tcphdr *th)
>
>  static inline void
>  tcp_account_for_send(struct tcpcb *tp, uint32_t len, uint8_t is_rxt,
> -    uint8_t is_tlp, int hw_tls __unused)
> +    uint8_t is_tlp, int hw_tls)
>  {
> +     uint64_t rexmit_percent;
> +
>       if (is_tlp) {
>               tp->t_sndtlppack++;
>               tp->t_sndtlpbyte += len;
> @@ -1150,6 +1154,13 @@ tcp_account_for_send(struct tcpcb *tp, uint32_t len,
> uint8_t is_rxt,
>               tp->t_snd_rxt_bytes += len;
>       else
>               tp->t_sndbytes += len;
> +
> +     if (hw_tls && is_rxt) {
> +             rexmit_percent = (1000ULL * tp->t_snd_rxt_bytes) / (10ULL *
> (tp->t_snd_rxt_bytes + tp->t_sndbytes));
> +             if (rexmit_percent > ktls_ifnet_max_rexmit_pct)
> +                     ktls_disable_ifnet(tp);
> +     }
> +
>  }
>  #endif /* _KERNEL */
>
> diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h
> index b28c94965c97..7fd8831878b4 100644
> --- a/sys/sys/ktls.h
> +++ b/sys/sys/ktls.h
> @@ -189,10 +189,12 @@ struct ktls_session {
>       u_int   wq_index;
>       volatile u_int refcount;
>       int mode;
> -     bool reset_pending;
>
>       struct task reset_tag_task;
> +     struct task disable_ifnet_task;
>       struct inpcb *inp;
> +     bool reset_pending;
> +     bool disable_ifnet_pending;
>  } __aligned(CACHE_LINE_SIZE);
>
>  void ktls_check_rx(struct sockbuf *sb);
> @@ -231,5 +233,16 @@ ktls_free(struct ktls_session *tls)
>               ktls_destroy(tls);
>  }
>
> +#ifdef KERN_TLS
> +extern unsigned int ktls_ifnet_max_rexmit_pct;
> +void ktls_disable_ifnet(void *arg);
> +#else
> +#define ktls_ifnet_max_rexmit_pct 1
> +inline void
> +ktls_disable_ifnet(void *arg __unused)
> +{
> +}
> +#endif
> +
>  #endif /* !_KERNEL */
>  #endif /* !_SYS_KTLS_H_ */
>


-- 
Mateusz Guzik <mjguzik gmail.com>
_______________________________________________
dev-commits-src-main@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/dev-commits-src-main
To unsubscribe, send any mail to "dev-commits-src-main-unsubscr...@freebsd.org"

Reply via email to