Hans Petter Selesky wrote:
> On 09/05/14 23:19, Eric Joyner wrote:
> > There are some concerns if we use this with devices that ixl
> > supports:
> >
> > - The maximum fragment size is 16KB-1, which isn't a power of 2.
> >
> 
> Hi Eric,
> 
> Multiplying by powers of two are more fast, than non-powers of two.
> So
> in this case you would have to use 8KB as a maximum.
> 
Well, I'm no architecture expert, but I really doubt the CPU delay of a
non-power of 2 multiply/divide is significant related to doing smaller
TSO segments. Long ago (as in 1970s) I did work on machines where shifts
for power of 2 multiply/divide was preferable, but these days I doubt it
is going to matter??

> > - You can't get the maximum TSO size for ixl devices by multiplying
> > the
> > maximum number of fragments by the maximum size.
> > Instead the number of fragments is AFAIK unlimited, but a segment
> > can only
> > span 8 mbufs (including the [up to 3] mbufs containing the header),
> > and the
> > maximum TSO size is 256KB.
> >
> > And one question:
> >
> > - Is hdr_size_log2 supposed to be the length of the L2 header? We
> > can fit
> > 254 L2 bytes in our hardware during a TSO, so if that's the value,
> > I guess
> > that's fine, barring the it-not-being-a-power-of-2 issue.
> 
> This is the ethernet / vlan headers. It is added with the
> TCP/IP-header
> in the end.
> 
> >
> > With all that said, the 8 mbuf limit per segment issue is a TSO
> > limitation
> > that we'd like to notify the stack about, so I wonder if that could
> > be
> > incorporated along with this. Right now, our driver checks to see
> > if a
> > segment in a TSO spans more than six mbufs and then m_defrag()'s
> > the entire
> > chain if one exists; it's less than optimal but necessary to
> > prevent errors.
> 
At this time, if there is a limit of 8 TSO segments (mbufs) in a
transmit list, you will need to set:
 ifp->if_hw_tsomax = 8 * MCLBYTES - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);

- just before the call to
 ether_ifattach(ifp);

I do have an untested patch (attached in case anyone is interested) which
adds if_hw_tsomaxseg that drivers can set to their maximum number of transmit
segments (mbufs) fot TSO. This value is then used by tcp_output() to generate
appropriately sized TSO segments.
However, I'm just working on getting a way to test this patch, so I can't say
if/when it will be in head.

rick

 

> It is not impossible to move from log2 syntax to non-log2 syntax,
> hence
> the logic will be exactly the same, only that the required division
> and
> multiplication will have a bit overhead I guess.
> 
> Could you make a patch on top of my patch with the changes you think
> are
> required to fully support the ixl hardware? Or propose a new patch
> which
> also serves the MLX needs?
> 
> Thank you!
> 
> --HPS
> 
> _______________________________________________
> freebsd-net@freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to
> "freebsd-net-unsubscr...@freebsd.org"
> 
--- kern/uipc_sockbuf.c.sav	2014-01-30 20:27:17.000000000 -0500
+++ kern/uipc_sockbuf.c	2014-01-30 22:12:08.000000000 -0500
@@ -965,6 +965,39 @@ sbsndptr(struct sockbuf *sb, u_int off, 
 }
 
 /*
+ * Return the first mbuf for the provided offset.
+ */
+struct mbuf *
+sbsndmbuf(struct sockbuf *sb, u_int off, long *first_len)
+{
+	struct mbuf *m;
+
+	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
+
+	*first_len = 0;
+	/*
+	 * Is off below stored offset? Happens on retransmits.
+	 * If so, just use sb_mb.
+	 */
+	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off)
+		m = sb->sb_mb;
+	else {
+		m = sb->sb_sndptr;
+		off -= sb->sb_sndptroff;
+	}
+	while (off > 0 && m != NULL) {
+		if (off < m->m_len)
+			break;
+		off -= m->m_len;
+		m = m->m_next;
+	}
+	if (m != NULL)
+		*first_len = m->m_len - off;
+
+	return (m);
+}
+
+/*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
--- sys/sockbuf.h.sav	2014-01-30 20:42:28.000000000 -0500
+++ sys/sockbuf.h	2014-01-30 22:08:43.000000000 -0500
@@ -153,6 +153,8 @@ int	sbreserve_locked(struct sockbuf *sb,
 	    struct thread *td);
 struct mbuf *
 	sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff);
+struct mbuf *
+	sbsndmbuf(struct sockbuf *sb, u_int off, long *first_len);
 void	sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
 int	sbwait(struct sockbuf *sb);
 int	sblock(struct sockbuf *sb, int flags);
--- netinet/tcp_input.c.sav	2014-01-30 19:37:52.000000000 -0500
+++ netinet/tcp_input.c	2014-01-30 19:39:07.000000000 -0500
@@ -3627,6 +3627,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 	if (cap.ifcap & CSUM_TSO) {
 		tp->t_flags |= TF_TSO;
 		tp->t_tsomax = cap.tsomax;
+		tp->t_tsomaxsegs = cap.tsomaxsegs;
 	}
 }
 
--- netinet/tcp_output.c.sav	2014-01-30 18:55:15.000000000 -0500
+++ netinet/tcp_output.c	2014-01-30 22:18:56.000000000 -0500
@@ -166,8 +166,8 @@ int
 tcp_output(struct tcpcb *tp)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
-	long len, recwin, sendwin;
-	int off, flags, error = 0;	/* Keep compiler happy */
+	long len, recwin, sendwin, tso_tlen;
+	int cnt, off, flags, error = 0;	/* Keep compiler happy */
 	struct mbuf *m;
 	struct ip *ip = NULL;
 	struct ipovly *ipov = NULL;
@@ -780,6 +780,24 @@ send:
 			}
 
 			/*
+			 * Limit the number of TSO transmit segments (mbufs
+			 * in mbuf list) to tp->t_tsomaxsegs.
+			 */
+			cnt = 0;
+			m = sbsndmbuf(&so->so_snd, off, &tso_tlen);
+			while (m != NULL && cnt < tp->t_tsomaxsegs &&
+			    tso_tlen < len) {
+				if (cnt > 0)
+					tso_tlen += m->m_len;
+				cnt++;
+				m = m->m_next;
+			}
+			if (m != NULL && tso_tlen < len) {
+				len = tso_tlen;
+				sendalot = 1;
+			}
+
+			/*
 			 * Prevent the last segment from being
 			 * fractional unless the send sockbuf can
 			 * be emptied.
--- netinet/tcp_subr.c.sav	2014-01-30 19:44:35.000000000 -0500
+++ netinet/tcp_subr.c	2014-01-30 20:56:12.000000000 -0500
@@ -1800,6 +1800,12 @@ tcp_maxmtu(struct in_conninfo *inc, stru
 			    ifp->if_hwassist & CSUM_TSO)
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
+#ifdef notyet
+				cap->tsomaxsegs = ifp->if_hw_tsomaxsegs;
+#endif
+				if (cap->tsomaxsegs == 0)
+					cap->tsomaxsegs =
+					    TCPTSO_MAX_TX_SEGS_DEFAULT;
 		}
 		RTFREE(sro.ro_rt);
 	}
--- netinet/tcp_var.h.sav	2014-01-30 19:39:22.000000000 -0500
+++ netinet/tcp_var.h	2014-01-30 20:52:57.000000000 -0500
@@ -209,6 +209,7 @@ struct tcpcb {
 	u_int	t_keepcnt;		/* number of keepalives before close */
 
 	u_int	t_tsomax;		/* tso burst length limit */
+	u_int	t_tsomaxsegs;		/* tso burst segment limit */
 
 	uint32_t t_ispare[8];		/* 5 UTO, 3 TBD */
 	void	*t_pspare2[4];		/* 4 TBD */
@@ -268,6 +269,11 @@ struct tcpcb {
 #define	TCPOOB_HAVEDATA	0x01
 #define	TCPOOB_HADDATA	0x02
 
+/*
+ * Default value for TSO maximum number of transmit segments (count of mbufs).
+ */
+#define	TCPTSO_MAX_TX_SEGS_DEFAULT	30
+
 #ifdef TCP_SIGNATURE
 /*
  * Defines which are needed by the xform_tcp module and tcp_[in|out]put
@@ -333,6 +339,7 @@ struct hc_metrics_lite {	/* must stay in
 struct tcp_ifcap {
 	int	ifcap;
 	u_int	tsomax;
+	u_int	tsomaxsegs;
 };
 
 #ifndef _NETINET_IN_PCB_H_
_______________________________________________
freebsd-net@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-net
To unsubscribe, send any mail to "freebsd-net-unsubscr...@freebsd.org"

Reply via email to