I will resend a V5 without checksum on receive if flags are 0 after I test it. Disregard this patch.
Thanks, Paul > -----Original Message----- > From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Paul Boca > Sent: Thursday, May 5, 2016 7:18 PM > To: dev@openvswitch.org > Subject: [ovs-dev] [PATCH V4] datapath-windows: Improved offloading on STT > tunnel > > *Added OvsExtractLayers - populates only the layers field without unnecessary > memory operations for flow part > *If in STT header the flags are 0 then force packets checksums calculation > on receive. > *Ensure correct pseudo checksum is set for LSO both on send and receive. > Both Windows and Linux includes the segment length to TCP pseudo-checksum > conforming to RFC 793 but in case of LSO offload this is expected to be only > on > Source IP Address, Destination IP Address, and Protocol. > *Fragment expiration on rx side of STT was set to 30 seconds, but the correct > timeout would be TTL of the packet > > Signed-off-by: Paul-Daniel Boca <pb...@cloudbasesolutions.com> > --- > v2: Fixed a NULL pointer dereference. > Removed some unused local variables and multiple initializations. > v3: Use LSO V2 in OvsDoEncapStt > Fixed alignment and code style > Use IpHdr TTL for fragment expiration on receive instead 30s > V4: Use stored MSS in STT header on rx for lsoInfo of encapsulated packet > If STT_CSUM_VERIFIED flag is set then we don't have to extract > layers on receive. > --- > datapath-windows/ovsext/Flow.c | 243 ++++++++++++++++++++++++++++- > ---- > datapath-windows/ovsext/Flow.h | 2 + > datapath-windows/ovsext/PacketParser.c | 97 +++++++------ > datapath-windows/ovsext/PacketParser.h | 8 +- > datapath-windows/ovsext/Stt.c | 158 +++++++++++++++++---- > datapath-windows/ovsext/Stt.h | 1 - > datapath-windows/ovsext/User.c | 17 ++- > 7 files changed, 410 insertions(+), 116 deletions(-) > > diff --git a/datapath-windows/ovsext/Flow.c b/datapath- > windows/ovsext/Flow.c > index 1f23625..a49a60c 100644 > --- a/datapath-windows/ovsext/Flow.c > +++ b/datapath-windows/ovsext/Flow.c > @@ -1566,7 +1566,8 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > > ndKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ND]); > RtlCopyMemory(&icmp6FlowPutKey->ndTarget, > - ndKey->nd_target, sizeof > (icmp6FlowPutKey->ndTarget)); > + ndKey->nd_target, > + sizeof (icmp6FlowPutKey->ndTarget)); > RtlCopyMemory(icmp6FlowPutKey->arpSha, > ndKey->nd_sll, ETH_ADDR_LEN); > RtlCopyMemory(icmp6FlowPutKey->arpTha, > @@ -1596,8 +1597,10 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > arpFlowPutKey->nwSrc = arpKey->arp_sip; > arpFlowPutKey->nwDst = arpKey->arp_tip; > > - RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, > ETH_ADDR_LEN); > - RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, > ETH_ADDR_LEN); > + RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, > + ETH_ADDR_LEN); > + RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, > + ETH_ADDR_LEN); > /* Kernel datapath assumes 'arpFlowPutKey->nwProto' to be in host > * order. */ > arpFlowPutKey->nwProto = (UINT8)ntohs((arpKey->arp_op)); > @@ -1846,29 +1849,195 @@ OvsGetFlowMetadata(OvsFlowKey *key, > return status; > } > > + > /* > - > *---------------------------------------------------------------------------- > - * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and > - * 'ofp_in_port'. > - * > - * Initializes 'packet' header pointers as follows: > - * > - * - packet->l2 to the start of the Ethernet header. > - * > - * - packet->l3 to just past the Ethernet header, or just past the > - * vlan_header if one is present, to the first byte of the payload of > the > - * Ethernet frame. > - * > - * - packet->l4 to just past the IPv4 header, if one is present and has a > - * correct length, and otherwise NULL. > - * > - * - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is > - * present and has a correct length, and otherwise NULL. > - * > - * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data cannot be > accessed > - * (e.g. if Pkt_CopyBytesOut() returns an error). > - > *---------------------------------------------------------------------------- > - */ > +*---------------------------------------------------------------------------- > +* Initializes 'layers' members from 'packet' > +* > +* Initializes 'layers' header pointers as follows: > +* > +* - layers->l2 to the start of the Ethernet header. > +* > +* - layers->l3 to just past the Ethernet header, or just past the > +* vlan_header if one is present, to the first byte of the payload of the > +* Ethernet frame. > +* > +* - layers->l4 to just past the IPv4 header, if one is present and has a > +* correct length, and otherwise NULL. > +* > +* - layers->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is > +* present and has a correct length, and otherwise NULL. > +* > +* - layers->isIPv4/isIPv6/isTcp/isUdp/isSctp based on the packet type > +* > +* Returns NDIS_STATUS_SUCCESS normally. > +* Fails only if packet data cannot be accessed. > +* (e.g. if OvsParseIPv6() returns an error). > +*---------------------------------------------------------------------------- > +*/ > +NDIS_STATUS > +OvsExtractLayers(const NET_BUFFER_LIST *packet, > + POVS_PACKET_HDR_INFO layers) > +{ > + struct Eth_Header *eth; > + UINT8 offset = 0; > + PVOID vlanTagValue; > + ovs_be16 dlType; > + > + layers->value = 0; > + > + /* Link layer. */ > + eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); > + > + /* > + * vlan_tci. > + */ > + vlanTagValue = NET_BUFFER_LIST_INFO(packet, > Ieee8021QNetBufferListInfo); > + if (!vlanTagValue) { > + if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { > + offset = sizeof(Eth_802_1pq_Tag); > + } > + > + /* > + * XXX Please note after this point, src mac and dst mac should > + * not be accessed through eth > + */ > + eth = (Eth_Header *)((UINT8 *)eth + offset); > + } > + > + /* > + * dl_type. > + * > + * XXX assume that at least the first > + * 12 bytes of received packets are mapped. This code has the stronger > + * assumption that at least the first 22 bytes of 'packet' is mapped (if > my > + * arithmetic is right). > + */ > + if (ETH_TYPENOT8023(eth->dix.typeNBO)) { > + dlType = eth->dix.typeNBO; > + layers->l3Offset = ETH_HEADER_LEN_DIX + offset; > + } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && > + eth->e802_3.llc.dsap == 0xaa && > + eth->e802_3.llc.ssap == 0xaa && > + eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && > + eth->e802_3.snap.snapOrg[0] == 0x00 && > + eth->e802_3.snap.snapOrg[1] == 0x00 && > + eth->e802_3.snap.snapOrg[2] == 0x00) { > + dlType = eth->e802_3.snap.snapType.typeNBO; > + layers->l3Offset = ETH_HEADER_LEN_802_3 + offset; > + } else { > + dlType = htons(OVSWIN_DL_TYPE_NONE); > + layers->l3Offset = ETH_HEADER_LEN_DIX + offset; > + } > + > + /* Network layer. */ > + if (dlType == htons(ETH_TYPE_IPV4)) { > + struct IPHdr ip_storage; > + const struct IPHdr *nh; > + > + layers->isIPv4 = 1; > + nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); > + if (nh) { > + layers->l4Offset = layers->l3Offset + nh->ihl * 4; > + > + if (!(nh->frag_off & htons(IP_OFFSET))) { > + if (nh->protocol == SOCKET_IPPROTO_TCP) { > + OvsParseTcp(packet, NULL, layers); > + } else if (nh->protocol == SOCKET_IPPROTO_UDP) { > + OvsParseUdp(packet, NULL, layers); > + } else if (nh->protocol == SOCKET_IPPROTO_SCTP) { > + OvsParseSctp(packet, NULL, layers); > + } else if (nh->protocol == SOCKET_IPPROTO_ICMP) { > + ICMPHdr icmpStorage; > + const ICMPHdr *icmp; > + > + icmp = OvsGetIcmp(packet, layers->l4Offset, > &icmpStorage); > + if (icmp) { > + layers->l7Offset = layers->l4Offset + sizeof *icmp; > + } > + } > + } > + } > + } else if (dlType == htons(ETH_TYPE_IPV6)) { > + NDIS_STATUS status; > + Ipv6Key ipv6Key; > + > + status = OvsParseIPv6(packet, &ipv6Key, layers); > + if (status != NDIS_STATUS_SUCCESS) { > + return status; > + } > + layers->isIPv6 = 1; > + > + if (ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { > + OvsParseTcp(packet, &(ipv6Key.l4), layers); > + } else if (ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { > + OvsParseUdp(packet, &(ipv6Key.l4), layers); > + } else if (ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { > + OvsParseSctp(packet, &ipv6Key.l4, layers); > + } else if (ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { > + Icmp6Key icmp6Key; > + OvsParseIcmpV6(packet, NULL, &icmp6Key, layers); > + } > + } else if (OvsEthertypeIsMpls(dlType)) { > + MPLSHdr mplsStorage; > + const MPLSHdr *mpls; > + > + /* > + * In the presence of an MPLS label stack the end of the L2 > + * header and the beginning of the L3 header differ. > + * > + * A network packet may contain multiple MPLS labels, but we > + * are only interested in the topmost label stack entry. > + * > + * Advance network header to the beginning of the L3 header. > + * layers->l3Offset corresponds to the end of the L2 header. > + */ > + for (UINT32 i = 0; i < FLOW_MAX_MPLS_LABELS; i++) { > + mpls = OvsGetMpls(packet, layers->l3Offset, &mplsStorage); > + if (!mpls) { > + break; > + } > + > + layers->l3Offset += MPLS_HLEN; > + layers->l4Offset += MPLS_HLEN; > + > + if (mpls->lse & htonl(MPLS_BOS_MASK)) { > + /* > + * Bottom of Stack bit is set, which means there are no > + * remaining MPLS labels in the packet. > + */ > + break; > + } > + } > + } > + > + return NDIS_STATUS_SUCCESS; > +} > + > +/* > +*---------------------------------------------------------------------------- > +* Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and > +* 'ofp_in_port'. > +* > +* Initializes 'packet' header pointers as follows: > +* > +* - packet->l2 to the start of the Ethernet header. > +* > +* - packet->l3 to just past the Ethernet header, or just past the > +* vlan_header if one is present, to the first byte of the payload of the > +* Ethernet frame. > +* > +* - packet->l4 to just past the IPv4 header, if one is present and has a > +* correct length, and otherwise NULL. > +* > +* - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one is > +* present and has a correct length, and otherwise NULL. > +* > +* Returns NDIS_STATUS_SUCCESS normally. > +* Fails only if packet data cannot be accessed. > +* (e.g. if Pkt_CopyBytesOut() returns an error). > +*---------------------------------------------------------------------------- > +*/ > NDIS_STATUS > OvsExtractFlow(const NET_BUFFER_LIST *packet, > UINT32 inPort, > @@ -1900,8 +2069,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > > /* Link layer. */ > eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); > - memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); > - memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); > + RtlCopyMemory(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); > + RtlCopyMemory(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); > > /* > * vlan_tci. > @@ -1923,8 +2092,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > flow->l2.vlanTci = 0; > } > /* > - * XXX > - * Please note after this point, src mac and dst mac should > + * XXX Please note after this point, src mac and dst mac should > * not be accessed through eth > */ > eth = (Eth_Header *)((UINT8 *)eth + offset); > @@ -1955,7 +2123,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > layers->l3Offset = ETH_HEADER_LEN_DIX + offset; > } > > - flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow- > >l2.offset; > + flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE > + - flow->l2.offset; > /* Network layer. */ > if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { > struct IPHdr ip_storage; > @@ -2012,9 +2181,9 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { > NDIS_STATUS status; > flow->l2.keyLen += OVS_IPV6_KEY_SIZE; > - status = OvsParseIPv6(packet, flow, layers); > + status = OvsParseIPv6(packet, &flow->ipv6Key, layers); > if (status != NDIS_STATUS_SUCCESS) { > - memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); > + RtlZeroMemory(&flow->ipv6Key, sizeof (Ipv6Key)); > return status; > } > layers->isIPv6 = 1; > @@ -2029,7 +2198,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { > OvsParseSctp(packet, &flow->ipv6Key.l4, layers); > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { > - OvsParseIcmpV6(packet, flow, layers); > + OvsParseIcmpV6(packet, &flow->ipv6Key, &flow->icmp6Key, layers); > flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); > } > } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { > @@ -2051,10 +2220,10 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } > if (arpKey->nwProto == ARPOP_REQUEST > || arpKey->nwProto == ARPOP_REPLY) { > - memcpy(&arpKey->nwSrc, arp->arp_spa, 4); > - memcpy(&arpKey->nwDst, arp->arp_tpa, 4); > - memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); > - memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); > + RtlCopyMemory(&arpKey->nwSrc, arp->arp_spa, 4); > + RtlCopyMemory(&arpKey->nwDst, arp->arp_tpa, 4); > + RtlCopyMemory(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); > + RtlCopyMemory(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); > } > } > } else if (OvsEthertypeIsMpls(flow->l2.dlType)) { > diff --git a/datapath-windows/ovsext/Flow.h b/datapath- > windows/ovsext/Flow.h > index 310c472..88240b5 100644 > --- a/datapath-windows/ovsext/Flow.h > +++ b/datapath-windows/ovsext/Flow.h > @@ -53,6 +53,8 @@ NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH > *datapath, > > NDIS_STATUS OvsGetFlowMetadata(OvsFlowKey *key, > PNL_ATTR *keyAttrs); > +NDIS_STATUS OvsExtractLayers(const NET_BUFFER_LIST *packet, > + POVS_PACKET_HDR_INFO layers); > NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, > OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, > OvsIPv4TunnelKey *tunKey); > diff --git a/datapath-windows/ovsext/PacketParser.c b/datapath- > windows/ovsext/PacketParser.c > index 93df342..c4a04d0 100644 > --- a/datapath-windows/ovsext/PacketParser.c > +++ b/datapath-windows/ovsext/PacketParser.c > @@ -84,14 +84,13 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, > > NDIS_STATUS > OvsParseIPv6(const NET_BUFFER_LIST *packet, > - OvsFlowKey *key, > + Ipv6Key *ipv6Key, > POVS_PACKET_HDR_INFO layers) > { > UINT16 ofs = layers->l3Offset; > IPv6Hdr ipv6HdrStorage; > const IPv6Hdr *nh; > UINT32 nextHdr; > - Ipv6Key *flow= &key->ipv6Key; > > nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage); > if (!nh) { > @@ -99,15 +98,15 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > } > > nextHdr = nh->nexthdr; > - memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16); > - memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16); > + RtlCopyMemory(&ipv6Key->ipv6Src, nh->saddr.s6_addr, 16); > + RtlCopyMemory(&ipv6Key->ipv6Dst, nh->daddr.s6_addr, 16); > > - flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); > - flow->ipv6Label = > + ipv6Key->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); > + ipv6Key->ipv6Label = > ((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | > nh->flow_lbl[2]; > - flow->nwTtl = nh->hop_limit; > - flow->nwProto = SOCKET_IPPROTO_NONE; > - flow->nwFrag = OVS_FRAG_TYPE_NONE; > + ipv6Key->nwTtl = nh->hop_limit; > + ipv6Key->nwProto = SOCKET_IPPROTO_NONE; > + ipv6Key->nwFrag = OVS_FRAG_TYPE_NONE; > > // Parse extended headers and compute L4 offset > ofs += sizeof(IPv6Hdr); > @@ -160,9 +159,9 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > /* We only process the first fragment. */ > if (fragHdr->offlg != htons(0)) { > if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == htons(0)) > { > - flow->nwFrag = OVS_FRAG_TYPE_FIRST; > + ipv6Key->nwFrag = OVS_FRAG_TYPE_FIRST; > } else { > - flow->nwFrag = OVS_FRAG_TYPE_LATER; > + ipv6Key->nwFrag = OVS_FRAG_TYPE_LATER; > nextHdr = SOCKET_IPPROTO_FRAGMENT; > break; > } > @@ -170,7 +169,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > } > } > > - flow->nwProto = (UINT8)nextHdr; > + ipv6Key->nwProto = (UINT8)nextHdr; > layers->l4Offset = ofs; > return NDIS_STATUS_SUCCESS; > } > @@ -183,10 +182,14 @@ OvsParseTcp(const NET_BUFFER_LIST *packet, > TCPHdr tcpStorage; > const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); > if (tcp) { > - flow->tpSrc = tcp->source; > - flow->tpDst = tcp->dest; > - layers->isTcp = 1; > - layers->l7Offset = layers->l4Offset + 4 * tcp->doff; > + if (flow) { > + flow->tpSrc = tcp->source; > + flow->tpDst = tcp->dest; > + } > + if (layers) { > + layers->isTcp = 1; > + layers->l7Offset = layers->l4Offset + 4 * tcp->doff; > + } > } > } > > @@ -198,10 +201,14 @@ OvsParseSctp(const NET_BUFFER_LIST *packet, > SCTPHdr sctpStorage; > const SCTPHdr *sctp = OvsGetSctp(packet, layers->l4Offset, &sctpStorage); > if (sctp) { > - flow->tpSrc = sctp->source; > - flow->tpDst = sctp->dest; > - layers->isSctp = 1; > - layers->l7Offset = layers->l4Offset + sizeof *sctp; > + if (flow) { > + flow->tpSrc = sctp->source; > + flow->tpDst = sctp->dest; > + } > + if (layers) { > + layers->isSctp = 1; > + layers->l7Offset = layers->l4Offset + sizeof *sctp; > + } > } > } > > @@ -213,29 +220,33 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, > UDPHdr udpStorage; > const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage); > if (udp) { > - flow->tpSrc = udp->source; > - flow->tpDst = udp->dest; > - layers->isUdp = 1; > - if (udp->check == 0) { > - layers->udpCsumZero = 1; > + if (flow) { > + flow->tpSrc = udp->source; > + flow->tpDst = udp->dest; > + } > + if (layers) { > + layers->isUdp = 1; > + if (udp->check == 0) { > + layers->udpCsumZero = 1; > + } > + layers->l7Offset = layers->l4Offset + sizeof *udp; > } > - layers->l7Offset = layers->l4Offset + sizeof *udp; > } > } > > NDIS_STATUS > OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > - OvsFlowKey *key, > - POVS_PACKET_HDR_INFO layers) > + Ipv6Key *ipv6Key, > + Icmp6Key *icmp6Key, > + POVS_PACKET_HDR_INFO layers) > { > UINT16 ofs = layers->l4Offset; > ICMPHdr icmpStorage; > const ICMPHdr *icmp; > - Icmp6Key *flow = &key->icmp6Key; > > - memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); > - memset(flow->arpSha, 0, sizeof(flow->arpSha)); > - memset(flow->arpTha, 0, sizeof(flow->arpTha)); > + memset(&icmp6Key->ndTarget, 0, sizeof(icmp6Key->ndTarget)); > + memset(icmp6Key->arpSha, 0, sizeof(icmp6Key->arpSha)); > + memset(icmp6Key->arpTha, 0, sizeof(icmp6Key->arpTha)); > > icmp = OvsGetIcmp(packet, ofs, &icmpStorage); > if (!icmp) { > @@ -247,8 +258,10 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > * The ICMPv6 type and code fields use the 16-bit transport port > * fields, so we need to store them in 16-bit network byte order. > */ > - key->ipv6Key.l4.tpSrc = htons(icmp->type); > - key->ipv6Key.l4.tpDst = htons(icmp->code); > + if (ipv6Key) { > + ipv6Key->l4.tpSrc = htons(icmp->type); > + ipv6Key->l4.tpDst = htons(icmp->code); > + } > > if (icmp->code == 0 && > (icmp->type == ND_NEIGHBOR_SOLICIT || > @@ -261,7 +274,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > if (!ndTarget) { > return NDIS_STATUS_FAILURE; > } > - flow->ndTarget = *ndTarget; > + icmp6Key->ndTarget = *ndTarget; > > while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { > /* > @@ -288,14 +301,14 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > * layer option is specified twice. > */ > if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) { > - if (Eth_IsNullAddr(flow->arpSha)) { > - memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); > + if (Eth_IsNullAddr(icmp6Key->arpSha)) { > + memcpy(icmp6Key->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); > } else { > goto invalid; > } > } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen == 8) > { > - if (Eth_IsNullAddr(flow->arpTha)) { > - memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); > + if (Eth_IsNullAddr(icmp6Key->arpTha)) { > + memcpy(icmp6Key->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); > } else { > goto invalid; > } > @@ -309,9 +322,9 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > return NDIS_STATUS_SUCCESS; > > invalid: > - memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); > - memset(flow->arpSha, 0, sizeof(flow->arpSha)); > - memset(flow->arpTha, 0, sizeof(flow->arpTha)); > + RtlZeroMemory(&icmp6Key->ndTarget, sizeof(icmp6Key->ndTarget)); > + RtlZeroMemory(icmp6Key->arpSha, sizeof(icmp6Key->arpSha)); > + RtlZeroMemory(icmp6Key->arpTha, sizeof(icmp6Key->arpTha)); > > return NDIS_STATUS_FAILURE; > } > diff --git a/datapath-windows/ovsext/PacketParser.h b/datapath- > windows/ovsext/PacketParser.h > index 47d227f..f1d7f28 100644 > --- a/datapath-windows/ovsext/PacketParser.h > +++ b/datapath-windows/ovsext/PacketParser.h > @@ -22,7 +22,7 @@ > > const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, > UINT32 SrcOffset, VOID *storage); > -NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey > *key, > +NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, Ipv6Key *key, > POVS_PACKET_HDR_INFO layers); > VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); > @@ -30,8 +30,10 @@ VOID OvsParseUdp(const NET_BUFFER_LIST *packet, > L4Key *flow, > POVS_PACKET_HDR_INFO layers); > VOID OvsParseSctp(const NET_BUFFER_LIST *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); > -NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey > *key, > - POVS_PACKET_HDR_INFO layers); > +NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > + Ipv6Key *ipv6Key, > + Icmp6Key *flow, > + POVS_PACKET_HDR_INFO layers); > > static __inline ULONG > OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) > diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c > index dd7bf92..8ae9b52 100644 > --- a/datapath-windows/ovsext/Stt.c > +++ b/datapath-windows/ovsext/Stt.c > @@ -217,6 +217,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > } else { > innerPartialChecksum = TRUE; > } > + } else if (!layers->isIPv4) { > + innerChecksumVerified = TRUE; > } > > status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); > @@ -231,8 +233,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > * memory. > */ > curMdl = NET_BUFFER_CURRENT_MDL(curNb); > - ASSERT((int) (MmGetMdlByteCount(curMdl) - > NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) > - >= (int) headRoom); > + ASSERT((int) (MmGetMdlByteCount(curMdl) - > + NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >= (int) headRoom); > > buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); > if (!buf) { > @@ -288,8 +290,10 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > /* Calculate pseudo header chksum */ > tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; > ASSERT(tcpChksumLen < 65535); > - outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) > &tunKey->dst, > - IPPROTO_TCP, (uint16) > tcpChksumLen); > + outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, > + (uint32 *) &tunKey->dst, > + IPPROTO_TCP, > + (uint16) tcpChksumLen); > sttHdr->version = 0; > > /* Set STT Header */ > @@ -327,8 +331,16 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > NET_BUFFER_LIST_INFO(curNbl, > TcpIpChecksumNetBufferListInfo) = csumInfo.Value; > > - UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - > sizeof(TCPHdr); > + UINT32 encapMss = OvsGetExternalMtu(switchContext) > + - sizeof(IPHdr) > + - sizeof(TCPHdr); > if (ipTotalLen > encapMss) { > + outerIpHdr->check = IPChecksum((UINT8 *)outerIpHdr, > + sizeof *outerIpHdr, 0); > + outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, > + (uint32 *) &tunKey->dst, > + IPPROTO_TCP, (uint16) 0); > + > lsoInfo.Value = 0; > lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset; > lsoInfo.LsoV2Transmit.MSS = encapMss; > @@ -616,7 +628,8 @@ OvsSttReassemble(POVS_SWITCH_CONTEXT > switchContext, > > UINT64 currentTime; > NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime); > - entry->timeout = currentTime + STT_ENTRY_TIMEOUT; > + // use IpHdr TTL for fragment expiration > + entry->timeout = currentTime + ((UINT64)ipHdr->ttl*1000*1000*10); > > if (segOffset == 0) { > entry->sttHdr = *sttHdr; > @@ -655,7 +668,8 @@ handle_error: > if (lastPacket) { > /* Retrieve the original STT header */ > NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr)); > - targetPNbl = OvsAllocateNBLFromBuffer(switchContext, pktFragEntry- > >packetBuf, > + targetPNbl = OvsAllocateNBLFromBuffer(switchContext, > + pktFragEntry->packetBuf, > innerPacketLen); > > /* Delete this entry and free up the memory/ */ > @@ -668,16 +682,67 @@ handle_error: > return lastPacket ? targetPNbl : NULL; > } > > -VOID > -OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr) > + > +/* > +*---------------------------------------------------------------------------- > +* OvsDecapSetOffloads > +* Processes received STT header and sets TcpIpChecksumNetBufferListInfo > +* accordingly. > +* For TCP packets with total length bigger than destination MSS it > +* populates TcpLargeSendNetBufferListInfo. > +* > +* Returns NDIS_STATUS_SUCCESS normally. > +* Fails only if packet data is invalid. > +* (e.g. if OvsExtractLayers() returns an error). > +*---------------------------------------------------------------------------- > +*/ > +NDIS_STATUS > +OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr) > { > - if ((sttHdr->flags & STT_CSUM_VERIFIED) > - || !(sttHdr->flags & STT_CSUM_PARTIAL)) { > - return; > + NDIS_STATUS status; > + OVS_PACKET_HDR_INFO layers; > + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > + > + // if STT_CSUM_PARTIAL is not set we have two options: > + // - STT_CSUM_VERIFIED is set - we pass the packet as is > + // - no flag set - we must compute the checksums > + if (!(sttHdr->flags & STT_CSUM_PARTIAL)) { > + if (sttHdr->flags & STT_CSUM_VERIFIED) { > + NET_BUFFER_LIST_INFO(*curNbl, > + TcpIpChecksumNetBufferListInfo) = 0; > + return NDIS_STATUS_SUCCESS; > + } > + > + status = OvsExtractLayers(*curNbl, &layers); > + if (status != NDIS_STATUS_SUCCESS) { > + return status; > + } > + > + csumInfo.Value = 0; > + csumInfo.Transmit.IsIPv4 = layers.isIPv4; > + csumInfo.Transmit.IsIPv6 = layers.isIPv6; > + > + /* Set Transmit fields in order to calculate the checksums */ > + csumInfo.Transmit.IpHeaderChecksum = layers.isIPv4; > + csumInfo.Transmit.TcpChecksum = layers.isTcp; > + csumInfo.Transmit.UdpChecksum = layers.isUdp; > + > + status = OvsApplySWChecksumOnNB(&layers, *curNbl, &csumInfo); > + if (status != NDIS_STATUS_SUCCESS) { > + return status; > + } > + > + csumInfo.Value = 0; > + csumInfo.Transmit.IpHeaderChecksum = 0; > + csumInfo.Transmit.TcpChecksum = 0; > + csumInfo.Transmit.UdpChecksum = 0; > + NET_BUFFER_LIST_INFO(*curNbl, > + TcpIpChecksumNetBufferListInfo) = csumInfo.Value; > + > + return NDIS_STATUS_SUCCESS; > } > > UINT8 protoType; > - NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > csumInfo.Value = 0; > csumInfo.Transmit.IpHeaderChecksum = 0; > csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset; > @@ -703,25 +768,69 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, > SttHdr *sttHdr) > csumInfo.Transmit.IsIPv6 = 1; > csumInfo.Transmit.UdpChecksum = 1; > } > - NET_BUFFER_LIST_INFO(curNbl, > + NET_BUFFER_LIST_INFO(*curNbl, > TcpIpChecksumNetBufferListInfo) = csumInfo.Value; > > if (sttHdr->mss) { > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; > + > + if (sttHdr->flags & STT_PROTO_TCP) > + { > + PMDL curMdl = NULL; > + PNET_BUFFER curNb; > + PUINT8 buf = NULL; > + > + status = OvsExtractLayers(*curNbl, &layers); > + if (status != NDIS_STATUS_SUCCESS) { > + return status; > + } > + > + curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl); > + curMdl = NET_BUFFER_CURRENT_MDL(curNb); > + > + buf = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, > + LowPagePriority); > + buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); > + > + // apply pseudo checksum on extracted packet > + if (sttHdr->flags & STT_PROTO_IPV4) { > + IPHdr *ipHdr; > + TCPHdr *tcpHdr; > + > + ipHdr = (IPHdr *)(buf + layers.l3Offset); > + tcpHdr = (TCPHdr *)(buf + layers.l4Offset); > + > + tcpHdr->check = IPPseudoChecksum(&ipHdr->saddr, > + (uint32 *)&ipHdr->daddr, > + IPPROTO_TCP, 0); > + } else { > + IPv6Hdr *ipHdr; > + TCPHdr *tcpHdr; > + > + ipHdr = (IPv6Hdr *)(buf + layers.l3Offset); > + tcpHdr = (TCPHdr *)(buf + layers.l4Offset); > + > + tcpHdr->check = IPv6PseudoChecksum((UINT32*)&ipHdr->saddr, > + (UINT32*)&ipHdr->daddr, > + IPPROTO_TCP, 0); > + } > + } > + > + // setup LSO > lsoInfo.Value = 0; > lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset; > - lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU > - - sizeof(IPHdr) > - - sizeof(TCPHdr); > + lsoInfo.LsoV2Transmit.MSS = ntohs(sttHdr->mss) + sizeof(TCPHdr); > lsoInfo.LsoV2Transmit.Type = > NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; > if (sttHdr->flags & STT_PROTO_IPV4) { > lsoInfo.LsoV2Transmit.IPVersion = > NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; > } else { > lsoInfo.LsoV2Transmit.IPVersion = > NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6; > } > - NET_BUFFER_LIST_INFO(curNbl, > + NET_BUFFER_LIST_INFO(*curNbl, > TcpLargeSendNetBufferListInfo) = lsoInfo.Value; > } > + > + return NDIS_STATUS_SUCCESS; > } > > /* > @@ -736,15 +845,14 @@ OvsDecapStt(POVS_SWITCH_CONTEXT > switchContext, > OvsIPv4TunnelKey *tunKey, > PNET_BUFFER_LIST *newNbl) > { > - NDIS_STATUS status = NDIS_STATUS_FAILURE; > - PNET_BUFFER curNb, newNb; > + NDIS_STATUS status; > + PNET_BUFFER curNb; > IPHdr *ipHdr; > char *ipBuf[sizeof(IPHdr)]; > SttHdr stt; > SttHdr *sttHdr; > char *sttBuf[STT_HDR_LEN]; > UINT32 advanceCnt, hdrLen; > - BOOLEAN isLsoPacket = FALSE; > > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); > @@ -767,7 +875,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4); > > /* Skip IP & TCP headers */ > - hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), > + hdrLen = (ipHdr->ihl * 4) + (tcp->doff * 4); > NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); > advanceCnt += hdrLen; > > @@ -775,7 +883,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT); > UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len) > - (ipHdr->ihl * 4) > - - (sizeof * tcp); > + - (tcp->doff * 4); > > /* Check if incoming packet requires reassembly */ > if (totalLen != payloadLen) { > @@ -788,7 +896,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > } > > *newNbl = pNbl; > - isLsoPacket = TRUE; > } else { > /* STT Header */ > sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, > @@ -812,7 +919,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > OvsCompleteNBL(switchContext, *newNbl, TRUE); > return NDIS_STATUS_FAILURE; > } > - newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); > > ASSERT(sttHdr); > > @@ -826,7 +932,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > tunKey->pad = 0; > > /* Set Checksum and LSO offload flags */ > - OvsDecapSetOffloads(*newNbl, sttHdr); > + OvsDecapSetOffloads(newNbl, sttHdr); > > return NDIS_STATUS_SUCCESS; > } > diff --git a/datapath-windows/ovsext/Stt.h b/datapath-windows/ovsext/Stt.h > index a3e3915..20066e6 100644 > --- a/datapath-windows/ovsext/Stt.h > +++ b/datapath-windows/ovsext/Stt.h > @@ -36,7 +36,6 @@ > > #define STT_HASH_TABLE_SIZE ((UINT32)1 << 10) > #define STT_HASH_TABLE_MASK (STT_HASH_TABLE_SIZE - 1) > -#define STT_ENTRY_TIMEOUT 300000000 // 30s > #define STT_CLEANUP_INTERVAL 300000000 // 30s > > #define STT_ETH_PAD 2 > diff --git a/datapath-windows/ovsext/User.c b/datapath- > windows/ovsext/User.c > index 34f38f4..7de7685 100644 > --- a/datapath-windows/ovsext/User.c > +++ b/datapath-windows/ovsext/User.c > @@ -736,7 +736,8 @@ OvsCreateAndAddPackets(PVOID userData, > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; > UINT32 packetLength; > > - tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, > TcpLargeSendNetBufferListInfo); > + tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, > + TcpLargeSendNetBufferListInfo); > nb = NET_BUFFER_LIST_FIRST_NB(nbl); > packetLength = NET_BUFFER_DATA_LENGTH(nb); > > @@ -838,7 +839,8 @@ OvsCompletePacketHeader(UINT8 *packet, > (UINT32 > *)&ipHdr->DestinationAddress, > IPPROTO_TCP, hdrInfoOut->l4PayLoad); > } else { > - PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + > hdrInfoIn->l3Offset); > + PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + > + hdrInfoIn->l3Offset); > hdrInfoOut->l4PayLoad = > (UINT16)(ntohs(ipv6Hdr->PayloadLength) + > hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)- > @@ -852,9 +854,9 @@ OvsCompletePacketHeader(UINT8 *packet, > hdrInfoOut->tcpCsumNeeded = 1; > ovsUserStats.recalTcpCsum++; > } else if (!isRecv) { > - if (csumInfo.Transmit.TcpChecksum) { > + if (hdrInfoIn->isTcp && csumInfo.Transmit.TcpChecksum) { > hdrInfoOut->tcpCsumNeeded = 1; > - } else if (csumInfo.Transmit.UdpChecksum) { > + } else if (hdrInfoIn->isUdp && csumInfo.Transmit.UdpChecksum) { > hdrInfoOut->udpCsumNeeded = 1; > } > if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) { > @@ -864,7 +866,8 @@ OvsCompletePacketHeader(UINT8 *packet, > hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP; > #endif > if (hdrInfoIn->isIPv4) { > - PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + > hdrInfoIn->l3Offset); > + PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + > + hdrInfoIn->l3Offset); > hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) - > (ipHdr->HeaderLength << 2)); > #ifdef DBG > @@ -972,8 +975,8 @@ OvsCreateQueueNlPacket(PVOID userData, > csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, > TcpIpChecksumNetBufferListInfo); > > if (isRecv && (csumInfo.Receive.TcpChecksumFailed || > - (csumInfo.Receive.UdpChecksumFailed && > !hdrInfo->udpCsumZero) > || > - csumInfo.Receive.IpChecksumFailed)) { > + (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) || > + csumInfo.Receive.IpChecksumFailed)) { > OVS_LOG_INFO("Packet dropped due to checksum failure."); > ovsUserStats.dropDuetoChecksum++; > return NULL; > -- > 2.7.2.windows.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev