STT can generate large packets. Following patch adds support to handle packet which are larger than device MTU size. To support such packets following members are added dp-packet structure. type: type of packet, e.g tcp, tcpv6, geneve, etc. mss: maximum segment size. netdev can set the flags in netdev->supported_lso_types if it supports segmentation. Software fallback is also defined in case netdev implementation does not support large packet segmentation.
Signed-off-by: Pravin B Shelar <pshe...@ovn.org> --- lib/automake.mk | 2 + lib/dp-packet-lso.c | 402 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/dp-packet-lso.h | 57 +++++++ lib/dp-packet.c | 2 + lib/dp-packet.h | 21 +++ lib/dpif-netdev.c | 36 ++++- lib/netdev-provider.h | 1 + lib/netdev.c | 124 ++++++++++++++-- lib/packets.h | 2 + 9 files changed, 630 insertions(+), 17 deletions(-) create mode 100644 lib/dp-packet-lso.c create mode 100644 lib/dp-packet-lso.h diff --git a/lib/automake.mk b/lib/automake.mk index a3c3464..7972392 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -69,6 +69,8 @@ lib_libopenvswitch_la_SOURCES = \ lib/dpctl.h \ lib/dp-packet.h \ lib/dp-packet.c \ + lib/dp-packet-lso.c \ + lib/dp-packet-lso.h \ lib/dpif-netdev.c \ lib/dpif-netdev.h \ lib/dpif-provider.h \ diff --git a/lib/dp-packet-lso.c b/lib/dp-packet-lso.c new file mode 100644 index 0000000..14a5ed8 --- /dev/null +++ b/lib/dp-packet-lso.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2016 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#include <errno.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <net/if.h> +#include <netinet/ip6.h> +#include <sys/ioctl.h> + +#include <errno.h> +#include <stdlib.h> +#include <sys/time.h> + +#include "openvswitch/list.h" + +#include "byte-order.h" +#include "csum.h" +#include "daemon.h" +#include "dirs.h" +#include "dpif.h" +#include "dp-packet.h" +#include "entropy.h" +#include "flow.h" +#include "hash.h" +#include "hmap.h" +#include "id-pool.h" +#include "netdev-provider.h" +#include "netdev-vport.h" +#include "netdev-vport-private.h" +#include "odp-netlink.h" +#include "dp-packet.h" +#include "dp-packet-lso.h" +#include "ovs-router.h" +#include "packets.h" +#include "poll-loop.h" +#include "random.h" +#include "route-table.h" +#include "shash.h" +#include "socket-util.h" +#include "timeval.h" +#include "netdev-native-tnl.h" +#include "openvswitch/vlog.h" +#include "unaligned.h" +#include "unixctl.h" +#include "util.h" + +VLOG_DEFINE_THIS_MODULE(dp_packet_lso); +static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5); + +#define UDP_CSUM_OFFSET offsetof(struct udp_header, udp_csum) +#define TCP_CSUM_OFFSET offsetof(struct tcp_header, tcp_csum) + +static struct dp_packet * +segment_packet__(struct dp_packet *orig, int header_len) +{ + struct dp_packet *seg_list = NULL, *prev = NULL; + uint16_t mss = orig->lso.mss; + int offset = header_len; + int size = dp_packet_size(orig); + struct dp_packet *seg; + + if (!mss) { + seg_list = dp_packet_clone(orig); + memset(&seg_list->lso, 0, sizeof seg_list->lso); + PACKET_LSO_CTX(seg_list)->next = NULL; + return seg_list; + } + while (offset < size) { + int current_seg_size; + unsigned char *data; + + current_seg_size = size < (offset + mss) ? (size - offset) : mss; + seg = dp_packet_new(0); + dp_packet_put(seg, dp_packet_data(orig), header_len); + + data = (unsigned char *)dp_packet_data(orig) + offset; + dp_packet_put(seg, data, current_seg_size); + offset += mss; + seg->l2_pad_size = orig->l2_pad_size; + seg->l2_5_ofs = orig->l2_5_ofs; + seg->l3_ofs = orig->l3_ofs; + seg->l4_ofs = orig->l4_ofs; + PACKET_LSO_CTX(seg)->next = NULL; + + if (prev) { + PACKET_LSO_CTX(prev)->next = seg; + } + prev = seg; + if (!seg_list) { + seg_list = seg; + } + } + return seg_list; +} + +void +fixup_packet_cheksum(struct dp_packet *pkt, int l4_offset, int csum_offset, + int new_ip_tot_len, int old_ip_tot_len) +{ + ovs_be16 *data_ptr, *csum; + uint32_t l4_csum; + + data_ptr = (ovs_be16 *) ((uint16_t *) dp_packet_data(pkt) + (l4_offset >> 1)); + csum = data_ptr + (csum_offset >> 1); + + l4_csum = csum_continue(0, data_ptr, dp_packet_size(pkt) - l4_offset); + *csum = csum_finish(l4_csum); + if (new_ip_tot_len != old_ip_tot_len) { + *csum = recalc_csum16(*csum, htons(old_ip_tot_len), htons(new_ip_tot_len)); + } +} + +static void +fixup_segment_cheksum(struct dp_packet *pkt, struct dp_packet *orig, int csum_offset) +{ + int l4_offset = orig->l4_ofs; + int old_ip_tot_len = dp_packet_size(orig) - l4_offset; + int new_ip_tot_len = dp_packet_size(pkt) - l4_offset; + + fixup_packet_cheksum(pkt, l4_offset, csum_offset, + new_ip_tot_len, old_ip_tot_len); +} + +static struct dp_packet * +segment_udp_packet(struct dp_packet *orig) +{ + struct dp_packet *seg_list, *seg; + + seg_list = segment_packet__(orig, orig->l4_ofs + sizeof(struct udp_header)); + FOR_EACH_LSO_SEG(seg_list, seg) { + struct udp_header *udp; + + udp = dp_packet_l4(seg); + udp->udp_len = htons(dp_packet_size(seg) - seg->l4_ofs); + fixup_segment_cheksum(seg, orig, UDP_CSUM_OFFSET); + } + return seg_list; +} + +static struct dp_packet * +segment_tcp_packet(struct dp_packet *orig) +{ + struct dp_packet *seg_list, *seg; + struct tcp_header *orig_tcph = dp_packet_l4(orig); + int tcph_len = TCP_OFFSET(orig_tcph->tcp_ctl) * 4; + uint32_t tcp_seq = ntohl(get_16aligned_be32(&orig_tcph->tcp_seq)); + int mss = orig->lso.mss; + + seg_list = segment_packet__(orig, orig->l4_ofs + tcph_len); + FOR_EACH_LSO_SEG(seg_list, seg) { + + if (mss) { + struct tcp_header *tcph = dp_packet_l4(seg); + + put_16aligned_be32(&tcph->tcp_seq, htonl(tcp_seq)); + tcp_seq += mss; + tcph->tcp_ctl = htons(ntohs(tcph->tcp_ctl) & + ~(TCP_FIN | TCP_PSH)); + } + + fixup_segment_cheksum(seg, orig, TCP_CSUM_OFFSET); + } + return seg_list; +} + +static struct dp_packet * +segment_l4_packet(struct dp_packet *orig) +{ + if (orig->lso.type & (DPBUF_LSO_TCPv4 | DPBUF_LSO_TCPv6)) { + return segment_tcp_packet(orig); + } else if (orig->lso.type & (DPBUF_LSO_UDPv4 | DPBUF_LSO_UDPv6)) { + return segment_udp_packet(orig); + } + OVS_NOT_REACHED(); + return NULL; +} + +static struct dp_packet * +segment_ipv4_packet(struct dp_packet *orig) +{ + struct dp_packet *seg_list, *seg; + struct ip_header *orig_iph = dp_packet_l3(orig); + int ip_id = 0; + int ip_offset = 0; + bool inc_ip_id = false; + + if (orig->lso.type & DPBUF_LSO_TCPv4) { + inc_ip_id = true; + ip_id = ntohs(orig_iph->ip_id); + } + + orig->l4_ofs = orig->l3_ofs + IP_HEADER_LEN; + seg_list = segment_l4_packet(orig); + + FOR_EACH_LSO_SEG(seg_list, seg) { + struct ip_header *iph = dp_packet_l3(seg); + int new_len = dp_packet_size(seg) - orig->l3_ofs; + + if (inc_ip_id) { + iph->ip_id = htons(ip_id++); + } else { + iph->ip_frag_off = htons(ip_offset); + ip_offset += (new_len - IP_HEADER_LEN); + if (PACKET_LSO_CTX(seg)->next) { + iph->ip_frag_off |= IP_MORE_FRAGMENTS; + } + } + iph->ip_tot_len = htons(new_len); + iph->ip_csum = 0; + iph->ip_csum = csum(iph, sizeof *iph); + } + return seg_list; +} + +static void +update_ipv6_frag_hdr(struct dp_packet *pkt, int *ipv6_frag_offset) +{ + int size = dp_packet_size(pkt); + struct ip6_hdr *nh = dp_packet_l3(pkt); + uint8_t nw_proto = nh->ip6_nxt; + void *data = nh + 1; + int offset = (unsigned char *)data - (unsigned char *)dp_packet_l2(pkt); + + while (1) { + if (OVS_LIKELY((nw_proto != IPPROTO_HOPOPTS) + && (nw_proto != IPPROTO_ROUTING) + && (nw_proto != IPPROTO_DSTOPTS) + && (nw_proto != IPPROTO_AH) + && (nw_proto != IPPROTO_FRAGMENT))) { + break; + } + + if ((nw_proto == IPPROTO_HOPOPTS) + || (nw_proto == IPPROTO_ROUTING) + || (nw_proto == IPPROTO_DSTOPTS)) { + const struct ip6_ext *ext_hdr = data; + + nw_proto = ext_hdr->ip6e_nxt; + + offset += (ext_hdr->ip6e_len + 1) * 8; + if (offset > size) { + goto out; + } + } else if (nw_proto == IPPROTO_AH) { + const struct ip6_ext *ext_hdr = data; + nw_proto = ext_hdr->ip6e_nxt; + + offset += (ext_hdr->ip6e_len + 2) * 4; + if (offset > size) { + goto out; + } + } else if (nw_proto == IPPROTO_FRAGMENT) { + struct ovs_16aligned_ip6_frag *frag_hdr; + + offset += sizeof (struct ovs_16aligned_ip6_frag); + if (offset > size) { + goto out; + } + frag_hdr = data; + frag_hdr->ip6f_offlg = htons(*ipv6_frag_offset); + if (PACKET_LSO_CTX(pkt)->next) { + frag_hdr->ip6f_offlg |= htons(IPV6_MF); + } + return; + } + data = (unsigned char *)data + offset; + } +out: + VLOG_ERR_RL(&err_rl, "could not find frag header"); +} + +static int +ipv6_set_l4_offset(struct dp_packet *pkt) +{ + int size = dp_packet_size(pkt); + struct ip6_hdr *nh = dp_packet_l3(pkt); + uint8_t nw_proto = nh->ip6_nxt; + void *data = nh + 1; + int offset = pkt->l3_ofs + sizeof *nh; + + while (1) { + if (OVS_LIKELY((nw_proto != IPPROTO_HOPOPTS) + && (nw_proto != IPPROTO_ROUTING) + && (nw_proto != IPPROTO_DSTOPTS) + && (nw_proto != IPPROTO_AH) + && (nw_proto != IPPROTO_FRAGMENT))) { + break; + } + + if ((nw_proto == IPPROTO_HOPOPTS) + || (nw_proto == IPPROTO_ROUTING) + || (nw_proto == IPPROTO_DSTOPTS)) { + const struct ip6_ext *ext_hdr = data; + + nw_proto = ext_hdr->ip6e_nxt; + + offset += (ext_hdr->ip6e_len + 1) * 8; + if (offset > size) { + goto out; + } + } else if (nw_proto == IPPROTO_AH) { + const struct ip6_ext *ext_hdr = data; + nw_proto = ext_hdr->ip6e_nxt; + + offset += (ext_hdr->ip6e_len + 2) * 4; + if (offset > size) { + goto out; + } + } else if (nw_proto == IPPROTO_FRAGMENT) { + struct ovs_16aligned_ip6_frag *frag_hdr; + + offset += sizeof (struct ovs_16aligned_ip6_frag); + if (offset > size) { + goto out; + } + frag_hdr = data; + nw_proto = frag_hdr->ip6f_nxt; + } + data = (unsigned char *)data + offset; + } + pkt->l4_ofs = offset; + return 0; +out: + VLOG_ERR_RL(&err_rl, "Could not parse ipv6 ext header"); + return -EINVAL; +} + +static struct dp_packet * +segment_ipv6_packet(struct dp_packet *orig) +{ + struct dp_packet *seg_list, *seg; + int ip_offset = 0; + + ipv6_set_l4_offset(orig); + seg_list = segment_l4_packet(orig); + + FOR_EACH_LSO_SEG(seg_list, seg) { + struct ip6_hdr *nh = dp_packet_l3(seg); + int new_len = dp_packet_size(seg) - orig->l4_ofs; + + nh->ip6_plen = htons(new_len); + + if (orig->lso.type & DPBUF_LSO_UDPv6) { + update_ipv6_frag_hdr(seg, &ip_offset); + + ip_offset += (new_len - sizeof (struct ovs_16aligned_ip6_frag)); + } + } + return seg_list; +} + +static struct dp_packet * +segment_eth_packet(struct dp_packet *orig, int offset) +{ + int header_len = ETH_HEADER_LEN; + struct dp_packet *seg_list; + struct eth_header *eth; + ovs_be16 eth_type; + + dp_packet_reset_packet(orig, offset); + eth = dp_packet_data(orig); + eth_type = eth->eth_type; + if (eth_type_vlan(eth->eth_type)) { + struct vlan_eth_header *vethh = (struct vlan_eth_header *) dp_packet_data(orig); + + eth_type = vethh->veth_next_type; + header_len += VLAN_HEADER_LEN; + } + orig->l3_ofs = header_len; + + if (eth_type == htons(ETH_TYPE_IP)) { + seg_list = segment_ipv4_packet(orig); + } else if (eth_type == htons(ETH_TYPE_IPV6)) { + seg_list = segment_ipv6_packet(orig); + } else { + return NULL; + } + return seg_list; +} + +struct dp_packet * +segment_packet(struct dp_packet *orig) +{ + PACKET_LSO_CTX(orig)->next = NULL; + return segment_eth_packet(orig, 0); +} diff --git a/lib/dp-packet-lso.h b/lib/dp-packet-lso.h new file mode 100644 index 0000000..09815e8 --- /dev/null +++ b/lib/dp-packet-lso.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2016 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DPBUF_LSO_H +#define DPBUF_LSO_H 1 + +#include <stddef.h> +#include <stdint.h> + +#include "openvswitch/list.h" + +#include "packets.h" +#include "util.h" +#include "netdev-dpdk.h" + + +#define DPBUF_LSO_TCPv4 (1 << 0) +#define DPBUF_LSO_TCPv6 (1 << 1) +#define DPBUF_LSO_UDPv4 (1 << 2) +#define DPBUF_LSO_UDPv6 (1 << 3) + +struct dp_packet_lso_ctx { + struct dp_packet *next; /* Used to list lso segments. */ +}; + +BUILD_ASSERT_DECL(DP_PACKET_CONTEXT_SIZE >= sizeof(struct dp_packet_lso_ctx)); + +#define PACKET_LSO_CTX(packet) ((struct dp_packet_lso_ctx *)(packet)->data) + +struct dp_packet *segment_packet(struct dp_packet *orig); + +#define FOR_EACH_LSO_SEG(lso_list, seg) \ + for (seg = lso_list; seg; seg = PACKET_LSO_CTX(seg)->next) + +#define FOR_EACH_LSO_SEG_SAFE(lso_list, seg, next) \ + for (seg = lso_list, next = PACKET_LSO_CTX(lso_list)->next; \ + (seg ? next = PACKET_LSO_CTX(seg)->next, 1: 0); \ + seg = next) + +void +fixup_packet_cheksum(struct dp_packet *pkt, int l4_offset, int csum_offset, + int new_ip_tot_len, int old_ip_tot_len); + +#endif /* dp-packet-lso.h */ diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 0c85d50..1f8b891 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -30,6 +30,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so dp_packet_reset_offsets(b); pkt_metadata_init(&b->md, 0); dp_packet_rss_invalidate(b); + memset(&b->lso, 0, sizeof b->lso); } static void @@ -168,6 +169,7 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom) new_buffer->l3_ofs = buffer->l3_ofs; new_buffer->l4_ofs = buffer->l4_ofs; new_buffer->md = buffer->md; + new_buffer->lso = buffer->lso; #ifdef DPDK_NETDEV new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags; #else diff --git a/lib/dp-packet.h b/lib/dp-packet.h index ce223e8..e247712 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -64,6 +64,10 @@ struct dp_packet { struct pkt_metadata md; uint64_t data[DP_PACKET_CONTEXT_SIZE / 8]; }; + struct { + uint16_t mss; + uint8_t type; + } lso; }; static inline void *dp_packet_data(const struct dp_packet *); @@ -567,12 +571,14 @@ enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */ struct dp_packet_batch { int count; + uint8_t lso_type; struct dp_packet *packets[NETDEV_MAX_BURST]; }; static inline void dp_packet_batch_init(struct dp_packet_batch *b) { b->count = 0; + b->lso_type = 0; } static inline void @@ -585,15 +591,30 @@ dp_packet_batch_clone(struct dp_packet_batch *dst, dst->packets[i] = dp_packet_clone(src->packets[i]); } dst->count = src->count; + dst->lso_type = src->lso_type; } static inline void packet_batch_init_packet(struct dp_packet_batch *b, struct dp_packet *p) { b->count = 1; + b->lso_type = p->lso.type; b->packets[0] = p; } +static inline bool +dp_packet_batch_is_full(struct dp_packet_batch *b) +{ + return b->count == NETDEV_MAX_BURST; +} + +static inline void +dp_packet_batch_add_pkt(struct dp_packet_batch *b, struct dp_packet *p) +{ + b->packets[b->count++] = p; + b->lso_type |= p->lso.type; +} + #ifdef __cplusplus } #endif diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 00f130c..ba58483 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -36,6 +36,7 @@ #include "coverage.h" #include "csum.h" #include "dp-packet.h" +#include "dp-packet-lso.h" #include "dpif.h" #include "dpif-provider.h" #include "dummy.h" @@ -3347,7 +3348,7 @@ packet_batch_per_flow_update(struct packet_batch_per_flow *batch, { batch->byte_count += dp_packet_size(packet); batch->tcp_flags |= miniflow_get_tcp_flags(mf); - batch->array.packets[batch->array.count++] = packet; + dp_packet_batch_add_pkt(&batch->array, packet); } static inline void @@ -3578,8 +3579,19 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd, } miss_cnt++; - handle_packet(pmd, packets[i], &keys[i], &actions, &put_actions, - &lost_cnt); + if (!packets[i]->lso.type) { + handle_packet(pmd, packets[i], &keys[i], &actions, &put_actions, + &lost_cnt); + } else { + struct dp_packet *seg_list, *seg, *next; + + seg_list = segment_packet(packets[i]); + FOR_EACH_LSO_SEG_SAFE(seg_list, seg, next) { + handle_packet(pmd, seg, &keys[i], &actions, &put_actions, + &lost_cnt); + } + dp_packet_delete(packets[i]); + } } ofpbuf_uninit(&actions); @@ -3845,8 +3857,22 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, for (i = 0; i < packets_->count; i++) { flow_extract(packets[i], &flow); dpif_flow_hash(dp->dpif, &flow, sizeof flow, &ufid); - dp_execute_userspace_action(pmd, packets[i], may_steal, &flow, - &ufid, &actions, userdata); + + if (!packets[i]->lso.type) { + dp_execute_userspace_action(pmd, packets[i], may_steal, &flow, + &ufid, &actions, userdata); + } else { + struct dp_packet *seg_list, *seg, *next; + + seg_list = segment_packet(packets[i]); + FOR_EACH_LSO_SEG_SAFE(seg_list, seg, next) { + dp_execute_userspace_action(pmd, seg, true, &flow, + &ufid, &actions, userdata); + } + if (may_steal) { + dp_packet_delete(packets[i]); + } + } } ofpbuf_uninit(&actions); fat_rwlock_unlock(&dp->upcall_rwlock); diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 6af0708..f4792a3 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -62,6 +62,7 @@ struct netdev { int ref_cnt; /* Times this devices was opened. */ struct shash_node *node; /* Pointer to element in global map. */ struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". */ + uint8_t supported_lso_types; }; static inline void diff --git a/lib/netdev.c b/lib/netdev.c index 0a8e15c..1cc178a 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -34,6 +34,7 @@ #include "coverage.h" #include "dpif.h" #include "dp-packet.h" +#include "dp-packet-lso.h" #include "openvswitch/dynamic-string.h" #include "fatal-signal.h" #include "hash.h" @@ -724,6 +725,88 @@ netdev_set_multiq(struct netdev *netdev, unsigned int n_txq, return error; } +static int +send_packet__(struct netdev *netdev, int qid, struct dp_packet_batch *b, + struct dp_packet *pkt, bool may_steal) +{ + dp_packet_batch_add_pkt(b, pkt); + if (dp_packet_batch_is_full(b)) { + int error = netdev->netdev_class->send(netdev, qid, + b->packets, b->count, + may_steal); + if (!error) { + dp_packet_batch_init(b); + } + return error; + } + return 0; +} + +static int +netdev_send_lso(struct netdev *netdev, int qid, struct dp_packet_batch *s, + bool may_steal) +{ + struct dp_packet_batch b; + int i, error; + + dp_packet_batch_init(&b); + for (i = 0; i < s->count; i++) { + struct dp_packet *seg, *seg_list, *next, *pkt = s->packets[i]; + + if (!pkt->lso.type) { + if (!may_steal) { + pkt = dp_packet_clone(pkt); + } + error = send_packet__(netdev, qid, &b, pkt, true); + if (OVS_UNLIKELY(error)) { + seg = NULL; + goto err; + } + } else { + seg_list = segment_packet(pkt); + if (may_steal) { + dp_packet_delete(pkt); + } + error = 0; + FOR_EACH_LSO_SEG_SAFE(seg_list, seg, next) { + if (OVS_UNLIKELY(error)) { + dp_packet_delete(seg); + continue; + } + error = send_packet__(netdev, qid, &b, seg, true); + if (OVS_UNLIKELY(error)) { + dp_packet_delete(seg); + } + } + if (OVS_UNLIKELY(error)) { + goto err; + } + } + } + if (b.count) { + error = netdev->netdev_class->send(netdev, qid, + b.packets, b.count, true); + if (!error) { + dp_packet_batch_init(&b); + } else { + goto err; + } + } + + return 0; +err: + if (may_steal) { + for (i = i + 1; i < s->count; i++) { + dp_packet_delete(s->packets[i]); + } + } + + for (i = 0; i < b.count; i++) { + dp_packet_delete(b.packets[i]); + } + return error; +} + /* Sends 'buffers' on 'netdev'. Returns 0 if successful (for every packet), * otherwise a positive errno value. Returns EAGAIN without blocking if * at least one the packets cannot be queued immediately. Returns EMSGSIZE @@ -749,20 +832,30 @@ int netdev_send(struct netdev *netdev, int qid, struct dp_packet_batch *batch, bool may_steal) { - if (!netdev->netdev_class->send) { - if (may_steal) { - for (int i = 0; i < batch->count; i++) { - dp_packet_delete(batch->packets[i]); - } - } - return EOPNOTSUPP; - } + int error; - int error = netdev->netdev_class->send(netdev, qid, + if (!netdev->netdev_class->send) { + error = EOPNOTSUPP; + } else if (batch->lso_type & ~netdev->supported_lso_types) { + return netdev_send_lso(netdev, qid, batch, may_steal); + } else { + error = netdev->netdev_class->send(netdev, qid, batch->packets, batch->count, may_steal); + } + if (!error) { COVERAGE_INC(netdev_sent); + } else { + goto err; + } + return 0; + +err: + if (may_steal) { + for (int i = 0; i < batch->count; i++) { + dp_packet_delete(batch->packets[i]); + } } return error; } @@ -778,9 +871,12 @@ netdev_pop_header(struct netdev *netdev, struct dp_packet_batch *batch) } for (i = 0; i < batch->count; i++) { - buffers[i] = netdev->netdev_class->pop_header(buffers[i]); - if (buffers[i]) { - buffers[n_cnt++] = buffers[i]; + struct dp_packet *inner_pkt; + + inner_pkt = netdev->netdev_class->pop_header(buffers[i]); + if (inner_pkt) { + batch->lso_type |= inner_pkt->lso.type; + buffers[n_cnt++] = inner_pkt; } } batch->count = n_cnt; @@ -808,6 +904,10 @@ netdev_push_header(const struct netdev *netdev, return -EINVAL; } + if (batch->lso_type & ~netdev->supported_lso_types) { + return -EINVAL; + } + for (i = 0; i < batch->count; i++) { netdev->netdev_class->push_header(batch->packets[i], data); pkt_metadata_init(&batch->packets[i]->md, u32_to_odp(data->out_port)); diff --git a/lib/packets.h b/lib/packets.h index 8139a6b..f1e29f8 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -785,6 +785,8 @@ struct ovs_16aligned_ip6_hdr { union ovs_16aligned_in6_addr ip6_dst; }; +#define IPV6_MF 0x0001 + /* Like struct in6_frag, but whereas that struct requires 32-bit alignment, * this one only requires 16-bit alignment. */ struct ovs_16aligned_ip6_frag { -- 2.5.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev