STT implementation I saw performance improvements with linearizing skb for SLUB case. So following patch skips zero copy operation for such a case. First change is to reassembly code where in-order packet is merged to head, if there is no room to merge it then combined packet is linearized. Second case is of reassembly of out-of-order packets. In this case the list of packets is linearized before sending it up to datapath.
Performance number for large packet TCP test using netperf. OVS branch TCP Host0 Host1 version Gbps CPU% CPU% ----------------------------------------- 2.5 9.4 272 315 master + 9.4 230 285 patch Tested-By: Vasmi Abidi <vab...@vmware.com> Signed-off-by: Pravin B Shelar <pshe...@ovn.org> --- v2-v3: - Fixed memory leak. - Fixed memory allocation buffer size. - removed unnecessary checks. v1-v2: - while reassembly, try to merge skb before copying the skbs. - define separate coalesce_skb() to copy skb for SLUB case. --- datapath/linux/compat/stt.c | 242 ++++++++++++++++++++++++++++++-------------- 1 file changed, 168 insertions(+), 74 deletions(-) diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c index eb397e8..86d225e 100644 --- a/datapath/linux/compat/stt.c +++ b/datapath/linux/compat/stt.c @@ -49,6 +49,14 @@ #define STT_DST_PORT 7471 #ifdef OVS_STT +#ifdef CONFIG_SLUB +/* + * We saw better performance with skipping zero copy in case of SLUB. + * So skip zero copy for SLUB case. + */ +#define SKIP_ZERO_COPY +#endif + #define STT_VER 0 /* @list: Per-net list of STT ports. @@ -219,73 +227,6 @@ static int clear_gso(struct sk_buff *skb) return 0; } -static struct sk_buff *normalize_frag_list(struct sk_buff *head, - struct sk_buff **skbp) -{ - struct sk_buff *skb = *skbp; - struct sk_buff *last; - - do { - struct sk_buff *frags; - - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - - if (unlikely(!nskb)) - return ERR_PTR(-ENOMEM); - - nskb->next = skb->next; - consume_skb(skb); - skb = nskb; - *skbp = skb; - } - - if (head) { - head->len -= skb->len; - head->data_len -= skb->len; - head->truesize -= skb->truesize; - } - - frags = skb_shinfo(skb)->frag_list; - if (frags) { - int err; - - err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - return ERR_PTR(err); - - last = normalize_frag_list(skb, &frags); - if (IS_ERR(last)) - return last; - - skb_shinfo(skb)->frag_list = NULL; - last->next = skb->next; - skb->next = frags; - } else { - last = skb; - } - - skbp = &skb->next; - } while ((skb = skb->next)); - - return last; -} - -/* Takes a linked list of skbs, which potentially contain frag_list - * (whose members in turn potentially contain frag_lists, etc.) and - * converts them into a single linear linked list. - */ -static int straighten_frag_list(struct sk_buff **skbp) -{ - struct sk_buff *err_skb; - - err_skb = normalize_frag_list(NULL, skbp); - if (IS_ERR(err_skb)) - return PTR_ERR(err_skb); - - return 0; -} - static void copy_skb_metadata(struct sk_buff *to, struct sk_buff *from) { to->protocol = from->protocol; @@ -465,6 +406,74 @@ static int skb_list_segment(struct sk_buff *head, bool ipv4, int l4_offset) return 0; } +#ifndef SKIP_ZERO_COPY +static struct sk_buff *normalize_frag_list(struct sk_buff *head, + struct sk_buff **skbp) +{ + struct sk_buff *skb = *skbp; + struct sk_buff *last; + + do { + struct sk_buff *frags; + + if (skb_shared(skb)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (unlikely(!nskb)) + return ERR_PTR(-ENOMEM); + + nskb->next = skb->next; + consume_skb(skb); + skb = nskb; + *skbp = skb; + } + + if (head) { + head->len -= skb->len; + head->data_len -= skb->len; + head->truesize -= skb->truesize; + } + + frags = skb_shinfo(skb)->frag_list; + if (frags) { + int err; + + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + return ERR_PTR(err); + + last = normalize_frag_list(skb, &frags); + if (IS_ERR(last)) + return last; + + skb_shinfo(skb)->frag_list = NULL; + last->next = skb->next; + skb->next = frags; + } else { + last = skb; + } + + skbp = &skb->next; + } while ((skb = skb->next)); + + return last; +} + +/* Takes a linked list of skbs, which potentially contain frag_list + * (whose members in turn potentially contain frag_lists, etc.) and + * converts them into a single linear linked list. + */ +static int straighten_frag_list(struct sk_buff **skbp) +{ + struct sk_buff *err_skb; + + err_skb = normalize_frag_list(NULL, skbp); + if (IS_ERR(err_skb)) + return PTR_ERR(err_skb); + + return 0; +} + static int coalesce_skb(struct sk_buff **headp) { struct sk_buff *frag, *head, *prev; @@ -510,6 +519,34 @@ static int coalesce_skb(struct sk_buff **headp) head->next = NULL; return 0; } +#else +static int coalesce_skb(struct sk_buff **headp) +{ + struct sk_buff *frag, *head = *headp, *next; + int delta = FRAG_CB(head)->first.tot_len - skb_headlen(head); + int err; + + if (unlikely(!head->next)) + return 0; + + err = pskb_expand_head(head, 0, delta, GFP_ATOMIC); + if (unlikely(err)) + return err; + + if (unlikely(!__pskb_pull_tail(head, head->data_len))) + BUG(); + + for (frag = head->next; frag; frag = next) { + skb_copy_bits(frag, 0, skb_put(head, frag->len), frag->len); + next = frag->next; + kfree_skb(frag); + } + + head->next = NULL; + head->truesize = SKB_TRUESIZE(head->len); + return 0; +} +#endif static int __try_to_segment(struct sk_buff *skb, bool csum_partial, bool ipv4, bool tcp, int l4_offset) @@ -522,6 +559,12 @@ static int __try_to_segment(struct sk_buff *skb, bool csum_partial, static int try_to_segment(struct sk_buff *skb) { +#ifdef SKIP_ZERO_COPY + /* coalesce_skb() since does not generate frag-list no need to + * linearize it here. + */ + return 0; +#else struct stthdr *stth = stt_hdr(skb); bool csum_partial = !!(stth->flags & STT_CSUM_PARTIAL); bool ipv4 = !!(stth->flags & STT_PROTO_IPV4); @@ -529,16 +572,19 @@ static int try_to_segment(struct sk_buff *skb) int l4_offset = stth->l4_offset; return __try_to_segment(skb, csum_partial, ipv4, tcp, l4_offset); +#endif } static int segment_skb(struct sk_buff **headp, bool csum_partial, bool ipv4, bool tcp, int l4_offset) { +#ifndef SKIP_ZERO_COPY int err; err = coalesce_skb(headp); if (err) return err; +#endif if (skb_shinfo(*headp)->frag_list) return __try_to_segment(*headp, csum_partial, @@ -1054,16 +1100,58 @@ static struct pkt_frag *lookup_frag(struct net *net, return victim_frag; } +#ifdef SKIP_ZERO_COPY +static int __copy_skb(struct sk_buff *to, struct sk_buff *from, + int *delta, bool *headstolen) +{ + int err; + + if (unlikely(to->next)) + return -EINVAL; + + if (unlikely(FRAG_CB(to)->offset)) + return -EINVAL; + + if (unlikely(skb_unclone(to, GFP_ATOMIC))) + return -ENOMEM; + + if (skb_try_coalesce(to, from, headstolen, delta)) + return 0; + + *headstolen = false; + err = pskb_expand_head(to, 0, to->data_len + from->len, GFP_ATOMIC); + if (unlikely(err)) + return err; + + if (unlikely(!__pskb_pull_tail(to, to->data_len))) + BUG(); + + skb_copy_bits(from, 0, skb_put(to, from->len), from->len); + + *delta = from->len; + to->truesize += from->len; + return 0; +} +#else +static int __copy_skb(struct sk_buff *to, struct sk_buff *from, + int *delta, bool *headstolen) +{ + *headstolen = false; + return -EINVAL; +} +#endif + static struct sk_buff *reassemble(struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); struct tcphdr *tcph = tcp_hdr(skb); u32 seq = ntohl(tcph->seq); struct stt_percpu *stt_percpu; - struct sk_buff *last_skb; + struct sk_buff *last_skb, *copied_skb = NULL; struct pkt_frag *frag; struct pkt_key key; - int tot_len; + int tot_len, delta = skb->truesize; + bool headstolen; u32 hash; tot_len = seq >> STT_SEQ_LEN_SHIFT; @@ -1103,7 +1191,6 @@ static struct sk_buff *reassemble(struct sk_buff *skb) FRAG_CB(skb)->first.set_ecn_ce = false; list_add_tail(&frag->lru_node, &stt_percpu->frag_lru); stt_percpu->frag_mem_used += skb->truesize; - skb = NULL; goto unlock; } @@ -1114,8 +1201,13 @@ static struct sk_buff *reassemble(struct sk_buff *skb) last_skb = FRAG_CB(frag->skbs)->first.last_skb; if (likely(FRAG_CB(last_skb)->offset + last_skb->len == FRAG_CB(skb)->offset)) { - last_skb->next = skb; - FRAG_CB(frag->skbs)->first.last_skb = skb; + + if (!__copy_skb(frag->skbs, skb, &delta, &headstolen)) { + copied_skb = skb; + } else { + last_skb->next = skb; + FRAG_CB(frag->skbs)->first.last_skb = skb; + } } else { struct sk_buff *prev = NULL, *next; @@ -1154,8 +1246,8 @@ static struct sk_buff *reassemble(struct sk_buff *skb) FRAG_CB(frag->skbs)->first.set_ecn_ce |= INET_ECN_is_ce(iph->tos); FRAG_CB(frag->skbs)->first.rcvd_len += skb->len; - FRAG_CB(frag->skbs)->first.mem_used += skb->truesize; - stt_percpu->frag_mem_used += skb->truesize; + stt_percpu->frag_mem_used += delta; + FRAG_CB(frag->skbs)->first.mem_used += delta; if (FRAG_CB(frag->skbs)->first.tot_len == FRAG_CB(frag->skbs)->first.rcvd_len) { @@ -1174,6 +1266,8 @@ static struct sk_buff *reassemble(struct sk_buff *skb) skb = NULL; } + if (copied_skb) + kfree_skb_partial(copied_skb, headstolen); goto unlock; unlock_free: -- 2.5.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev