Thanks for making the changes. Acked-by: Sairam Venugopal <vsai...@vmware.com>
On 6/6/16, 9:45 AM, "Paul Boca" <pb...@cloudbasesolutions.com> wrote: >*Added OvsExtractLayers - populates only the layers field without >unnecessary >memory operations for flow part >*If in STT header the flags are 0 then force packets checksums calculation >on receive. >*Ensure correct pseudo checksum is set for LSO both on send and receive. >Linux includes the segment length to TCP pseudo-checksum conforming to >RFC 793 but in case of LSO Windows expects this to be only on >Source IP Address, Destination IP Address, and Protocol. >*Fragment expiration on rx side of STT was set to 30 seconds, but the >correct >timeout would be TTL of the packet > >Signed-off-by: Paul-Daniel Boca <pb...@cloudbasesolutions.com> >--- >V2: Use STT_ENTRY_TIMEOUT on STT reassmble. > Small refactoring and added LSO comment with specific requirements. >--- > datapath-windows/ovsext/Flow.c | 243 >++++++++++++++++++++++++++++----- > datapath-windows/ovsext/Flow.h | 2 + > datapath-windows/ovsext/PacketParser.c | 97 +++++++------ > datapath-windows/ovsext/PacketParser.h | 8 +- > datapath-windows/ovsext/Stt.c | 124 +++++++++++++---- > datapath-windows/ovsext/User.c | 17 ++- > 6 files changed, 377 insertions(+), 114 deletions(-) > >diff --git a/datapath-windows/ovsext/Flow.c >b/datapath-windows/ovsext/Flow.c >index c2e0227..2a91855 100644 >--- a/datapath-windows/ovsext/Flow.c >+++ b/datapath-windows/ovsext/Flow.c >@@ -1570,7 +1570,8 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > > ndKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ND]); > RtlCopyMemory(&icmp6FlowPutKey->ndTarget, >- ndKey->nd_target, sizeof >(icmp6FlowPutKey->ndTarget)); >+ ndKey->nd_target, >+ sizeof (icmp6FlowPutKey->ndTarget)); > RtlCopyMemory(icmp6FlowPutKey->arpSha, > ndKey->nd_sll, ETH_ADDR_LEN); > RtlCopyMemory(icmp6FlowPutKey->arpTha, >@@ -1600,8 +1601,10 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > arpFlowPutKey->nwSrc = arpKey->arp_sip; > arpFlowPutKey->nwDst = arpKey->arp_tip; > >- RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, >ETH_ADDR_LEN); >- RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, >ETH_ADDR_LEN); >+ RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, >+ ETH_ADDR_LEN); >+ RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, >+ ETH_ADDR_LEN); > /* Kernel datapath assumes 'arpFlowPutKey->nwProto' to be in >host > * order. */ > arpFlowPutKey->nwProto = (UINT8)ntohs((arpKey->arp_op)); >@@ -1850,29 +1853,195 @@ OvsGetFlowMetadata(OvsFlowKey *key, > return status; > } > >+ > /* >- >*------------------------------------------------------------------------- >--- >- * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', >and >- * 'ofp_in_port'. >- * >- * Initializes 'packet' header pointers as follows: >- * >- * - packet->l2 to the start of the Ethernet header. >- * >- * - packet->l3 to just past the Ethernet header, or just past the >- * vlan_header if one is present, to the first byte of the payload >of the >- * Ethernet frame. >- * >- * - packet->l4 to just past the IPv4 header, if one is present and >has a >- * correct length, and otherwise NULL. >- * >- * - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if >one is >- * present and has a correct length, and otherwise NULL. >- * >- * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data >cannot be accessed >- * (e.g. if Pkt_CopyBytesOut() returns an error). >- >*------------------------------------------------------------------------- >--- >- */ >+*------------------------------------------------------------------------ >---- >+* Initializes 'layers' members from 'packet' >+* >+* Initializes 'layers' header pointers as follows: >+* >+* - layers->l2 to the start of the Ethernet header. >+* >+* - layers->l3 to just past the Ethernet header, or just past the >+* vlan_header if one is present, to the first byte of the payload >of the >+* Ethernet frame. >+* >+* - layers->l4 to just past the IPv4 header, if one is present and >has a >+* correct length, and otherwise NULL. >+* >+* - layers->l7 to just past the TCP, UDP, SCTP or ICMP header, if one >is >+* present and has a correct length, and otherwise NULL. >+* >+* - layers->isIPv4/isIPv6/isTcp/isUdp/isSctp based on the packet type >+* >+* Returns NDIS_STATUS_SUCCESS normally. >+* Fails only if packet data cannot be accessed. >+* (e.g. if OvsParseIPv6() returns an error). >+*------------------------------------------------------------------------ >---- >+*/ >+NDIS_STATUS >+OvsExtractLayers(const NET_BUFFER_LIST *packet, >+ POVS_PACKET_HDR_INFO layers) >+{ >+ struct Eth_Header *eth; >+ UINT8 offset = 0; >+ PVOID vlanTagValue; >+ ovs_be16 dlType; >+ >+ layers->value = 0; >+ >+ /* Link layer. */ >+ eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); >+ >+ /* >+ * vlan_tci. >+ */ >+ vlanTagValue = NET_BUFFER_LIST_INFO(packet, >Ieee8021QNetBufferListInfo); >+ if (!vlanTagValue) { >+ if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { >+ offset = sizeof(Eth_802_1pq_Tag); >+ } >+ >+ /* >+ * XXX Please note after this point, src mac and dst mac should >+ * not be accessed through eth >+ */ >+ eth = (Eth_Header *)((UINT8 *)eth + offset); >+ } >+ >+ /* >+ * dl_type. >+ * >+ * XXX assume that at least the first >+ * 12 bytes of received packets are mapped. This code has the >stronger >+ * assumption that at least the first 22 bytes of 'packet' is mapped >(if my >+ * arithmetic is right). >+ */ >+ if (ETH_TYPENOT8023(eth->dix.typeNBO)) { >+ dlType = eth->dix.typeNBO; >+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset; >+ } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && >+ eth->e802_3.llc.dsap == 0xaa && >+ eth->e802_3.llc.ssap == 0xaa && >+ eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && >+ eth->e802_3.snap.snapOrg[0] == 0x00 && >+ eth->e802_3.snap.snapOrg[1] == 0x00 && >+ eth->e802_3.snap.snapOrg[2] == 0x00) { >+ dlType = eth->e802_3.snap.snapType.typeNBO; >+ layers->l3Offset = ETH_HEADER_LEN_802_3 + offset; >+ } else { >+ dlType = htons(OVSWIN_DL_TYPE_NONE); >+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset; >+ } >+ >+ /* Network layer. */ >+ if (dlType == htons(ETH_TYPE_IPV4)) { >+ struct IPHdr ip_storage; >+ const struct IPHdr *nh; >+ >+ layers->isIPv4 = 1; >+ nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); >+ if (nh) { >+ layers->l4Offset = layers->l3Offset + nh->ihl * 4; >+ >+ if (!(nh->frag_off & htons(IP_OFFSET))) { >+ if (nh->protocol == SOCKET_IPPROTO_TCP) { >+ OvsParseTcp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_UDP) { >+ OvsParseUdp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_SCTP) { >+ OvsParseSctp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_ICMP) { >+ ICMPHdr icmpStorage; >+ const ICMPHdr *icmp; >+ >+ icmp = OvsGetIcmp(packet, layers->l4Offset, >&icmpStorage); >+ if (icmp) { >+ layers->l7Offset = layers->l4Offset + sizeof >*icmp; >+ } >+ } >+ } >+ } >+ } else if (dlType == htons(ETH_TYPE_IPV6)) { >+ NDIS_STATUS status; >+ Ipv6Key ipv6Key; >+ >+ status = OvsParseIPv6(packet, &ipv6Key, layers); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ layers->isIPv6 = 1; >+ >+ if (ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { >+ OvsParseTcp(packet, &(ipv6Key.l4), layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { >+ OvsParseUdp(packet, &(ipv6Key.l4), layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { >+ OvsParseSctp(packet, &ipv6Key.l4, layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { >+ Icmp6Key icmp6Key; >+ OvsParseIcmpV6(packet, NULL, &icmp6Key, layers); >+ } >+ } else if (OvsEthertypeIsMpls(dlType)) { >+ MPLSHdr mplsStorage; >+ const MPLSHdr *mpls; >+ >+ /* >+ * In the presence of an MPLS label stack the end of the L2 >+ * header and the beginning of the L3 header differ. >+ * >+ * A network packet may contain multiple MPLS labels, but we >+ * are only interested in the topmost label stack entry. >+ * >+ * Advance network header to the beginning of the L3 header. >+ * layers->l3Offset corresponds to the end of the L2 header. >+ */ >+ for (UINT32 i = 0; i < FLOW_MAX_MPLS_LABELS; i++) { >+ mpls = OvsGetMpls(packet, layers->l3Offset, &mplsStorage); >+ if (!mpls) { >+ break; >+ } >+ >+ layers->l3Offset += MPLS_HLEN; >+ layers->l4Offset += MPLS_HLEN; >+ >+ if (mpls->lse & htonl(MPLS_BOS_MASK)) { >+ /* >+ * Bottom of Stack bit is set, which means there are no >+ * remaining MPLS labels in the packet. >+ */ >+ break; >+ } >+ } >+ } >+ >+ return NDIS_STATUS_SUCCESS; >+} >+ >+/* >+*------------------------------------------------------------------------ >---- >+* Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and >+* 'ofp_in_port'. >+* >+* Initializes 'packet' header pointers as follows: >+* >+* - packet->l2 to the start of the Ethernet header. >+* >+* - packet->l3 to just past the Ethernet header, or just past the >+* vlan_header if one is present, to the first byte of the payload >of the >+* Ethernet frame. >+* >+* - packet->l4 to just past the IPv4 header, if one is present and >has a >+* correct length, and otherwise NULL. >+* >+* - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one >is >+* present and has a correct length, and otherwise NULL. >+* >+* Returns NDIS_STATUS_SUCCESS normally. >+* Fails only if packet data cannot be accessed. >+* (e.g. if Pkt_CopyBytesOut() returns an error). >+*------------------------------------------------------------------------ >---- >+*/ > NDIS_STATUS > OvsExtractFlow(const NET_BUFFER_LIST *packet, > UINT32 inPort, >@@ -1904,8 +2073,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > > /* Link layer. */ > eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); >- memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); >- memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); >+ RtlCopyMemory(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); >+ RtlCopyMemory(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); > > /* > * vlan_tci. >@@ -1927,8 +2096,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > flow->l2.vlanTci = 0; > } > /* >- * XXX >- * Please note after this point, src mac and dst mac should >+ * XXX Please note after this point, src mac and dst mac should > * not be accessed through eth > */ > eth = (Eth_Header *)((UINT8 *)eth + offset); >@@ -1959,7 +2127,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > layers->l3Offset = ETH_HEADER_LEN_DIX + offset; > } > >- flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - >flow->l2.offset; >+ flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE >+ - flow->l2.offset; > /* Network layer. */ > if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { > struct IPHdr ip_storage; >@@ -2016,9 +2185,9 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { > NDIS_STATUS status; > flow->l2.keyLen += OVS_IPV6_KEY_SIZE; >- status = OvsParseIPv6(packet, flow, layers); >+ status = OvsParseIPv6(packet, &flow->ipv6Key, layers); > if (status != NDIS_STATUS_SUCCESS) { >- memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); >+ RtlZeroMemory(&flow->ipv6Key, sizeof (Ipv6Key)); > return status; > } > layers->isIPv6 = 1; >@@ -2033,7 +2202,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { > OvsParseSctp(packet, &flow->ipv6Key.l4, layers); > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { >- OvsParseIcmpV6(packet, flow, layers); >+ OvsParseIcmpV6(packet, &flow->ipv6Key, &flow->icmp6Key, >layers); > flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); > } > } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { >@@ -2055,10 +2224,10 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } > if (arpKey->nwProto == ARPOP_REQUEST > || arpKey->nwProto == ARPOP_REPLY) { >- memcpy(&arpKey->nwSrc, arp->arp_spa, 4); >- memcpy(&arpKey->nwDst, arp->arp_tpa, 4); >- memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); >- memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); >+ RtlCopyMemory(&arpKey->nwSrc, arp->arp_spa, 4); >+ RtlCopyMemory(&arpKey->nwDst, arp->arp_tpa, 4); >+ RtlCopyMemory(arpKey->arpSha, arp->arp_sha, >ETH_ADDR_LENGTH); >+ RtlCopyMemory(arpKey->arpTha, arp->arp_tha, >ETH_ADDR_LENGTH); > } > } > } else if (OvsEthertypeIsMpls(flow->l2.dlType)) { >diff --git a/datapath-windows/ovsext/Flow.h >b/datapath-windows/ovsext/Flow.h >index fb3fb59..d39db45 100644 >--- a/datapath-windows/ovsext/Flow.h >+++ b/datapath-windows/ovsext/Flow.h >@@ -53,6 +53,8 @@ NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, > > NDIS_STATUS OvsGetFlowMetadata(OvsFlowKey *key, > PNL_ATTR *keyAttrs); >+NDIS_STATUS OvsExtractLayers(const NET_BUFFER_LIST *packet, >+ POVS_PACKET_HDR_INFO layers); > NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, > OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, > OvsIPv4TunnelKey *tunKey); >diff --git a/datapath-windows/ovsext/PacketParser.c >b/datapath-windows/ovsext/PacketParser.c >index 93df342..c4a04d0 100644 >--- a/datapath-windows/ovsext/PacketParser.c >+++ b/datapath-windows/ovsext/PacketParser.c >@@ -84,14 +84,13 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, > > NDIS_STATUS > OvsParseIPv6(const NET_BUFFER_LIST *packet, >- OvsFlowKey *key, >+ Ipv6Key *ipv6Key, > POVS_PACKET_HDR_INFO layers) > { > UINT16 ofs = layers->l3Offset; > IPv6Hdr ipv6HdrStorage; > const IPv6Hdr *nh; > UINT32 nextHdr; >- Ipv6Key *flow= &key->ipv6Key; > > nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage); > if (!nh) { >@@ -99,15 +98,15 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > } > > nextHdr = nh->nexthdr; >- memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16); >- memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16); >+ RtlCopyMemory(&ipv6Key->ipv6Src, nh->saddr.s6_addr, 16); >+ RtlCopyMemory(&ipv6Key->ipv6Dst, nh->daddr.s6_addr, 16); > >- flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); >- flow->ipv6Label = >+ ipv6Key->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << >4); >+ ipv6Key->ipv6Label = > ((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | >nh->flow_lbl[2]; >- flow->nwTtl = nh->hop_limit; >- flow->nwProto = SOCKET_IPPROTO_NONE; >- flow->nwFrag = OVS_FRAG_TYPE_NONE; >+ ipv6Key->nwTtl = nh->hop_limit; >+ ipv6Key->nwProto = SOCKET_IPPROTO_NONE; >+ ipv6Key->nwFrag = OVS_FRAG_TYPE_NONE; > > // Parse extended headers and compute L4 offset > ofs += sizeof(IPv6Hdr); >@@ -160,9 +159,9 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > /* We only process the first fragment. */ > if (fragHdr->offlg != htons(0)) { > if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == >htons(0)) { >- flow->nwFrag = OVS_FRAG_TYPE_FIRST; >+ ipv6Key->nwFrag = OVS_FRAG_TYPE_FIRST; > } else { >- flow->nwFrag = OVS_FRAG_TYPE_LATER; >+ ipv6Key->nwFrag = OVS_FRAG_TYPE_LATER; > nextHdr = SOCKET_IPPROTO_FRAGMENT; > break; > } >@@ -170,7 +169,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > } > } > >- flow->nwProto = (UINT8)nextHdr; >+ ipv6Key->nwProto = (UINT8)nextHdr; > layers->l4Offset = ofs; > return NDIS_STATUS_SUCCESS; > } >@@ -183,10 +182,14 @@ OvsParseTcp(const NET_BUFFER_LIST *packet, > TCPHdr tcpStorage; > const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); > if (tcp) { >- flow->tpSrc = tcp->source; >- flow->tpDst = tcp->dest; >- layers->isTcp = 1; >- layers->l7Offset = layers->l4Offset + 4 * tcp->doff; >+ if (flow) { >+ flow->tpSrc = tcp->source; >+ flow->tpDst = tcp->dest; >+ } >+ if (layers) { >+ layers->isTcp = 1; >+ layers->l7Offset = layers->l4Offset + 4 * tcp->doff; >+ } > } > } > >@@ -198,10 +201,14 @@ OvsParseSctp(const NET_BUFFER_LIST *packet, > SCTPHdr sctpStorage; > const SCTPHdr *sctp = OvsGetSctp(packet, layers->l4Offset, >&sctpStorage); > if (sctp) { >- flow->tpSrc = sctp->source; >- flow->tpDst = sctp->dest; >- layers->isSctp = 1; >- layers->l7Offset = layers->l4Offset + sizeof *sctp; >+ if (flow) { >+ flow->tpSrc = sctp->source; >+ flow->tpDst = sctp->dest; >+ } >+ if (layers) { >+ layers->isSctp = 1; >+ layers->l7Offset = layers->l4Offset + sizeof *sctp; >+ } > } > } > >@@ -213,29 +220,33 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, > UDPHdr udpStorage; > const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage); > if (udp) { >- flow->tpSrc = udp->source; >- flow->tpDst = udp->dest; >- layers->isUdp = 1; >- if (udp->check == 0) { >- layers->udpCsumZero = 1; >+ if (flow) { >+ flow->tpSrc = udp->source; >+ flow->tpDst = udp->dest; >+ } >+ if (layers) { >+ layers->isUdp = 1; >+ if (udp->check == 0) { >+ layers->udpCsumZero = 1; >+ } >+ layers->l7Offset = layers->l4Offset + sizeof *udp; > } >- layers->l7Offset = layers->l4Offset + sizeof *udp; > } > } > > NDIS_STATUS > OvsParseIcmpV6(const NET_BUFFER_LIST *packet, >- OvsFlowKey *key, >- POVS_PACKET_HDR_INFO layers) >+ Ipv6Key *ipv6Key, >+ Icmp6Key *icmp6Key, >+ POVS_PACKET_HDR_INFO layers) > { > UINT16 ofs = layers->l4Offset; > ICMPHdr icmpStorage; > const ICMPHdr *icmp; >- Icmp6Key *flow = &key->icmp6Key; > >- memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); >- memset(flow->arpSha, 0, sizeof(flow->arpSha)); >- memset(flow->arpTha, 0, sizeof(flow->arpTha)); >+ memset(&icmp6Key->ndTarget, 0, sizeof(icmp6Key->ndTarget)); >+ memset(icmp6Key->arpSha, 0, sizeof(icmp6Key->arpSha)); >+ memset(icmp6Key->arpTha, 0, sizeof(icmp6Key->arpTha)); > > icmp = OvsGetIcmp(packet, ofs, &icmpStorage); > if (!icmp) { >@@ -247,8 +258,10 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > * The ICMPv6 type and code fields use the 16-bit transport port > * fields, so we need to store them in 16-bit network byte order. > */ >- key->ipv6Key.l4.tpSrc = htons(icmp->type); >- key->ipv6Key.l4.tpDst = htons(icmp->code); >+ if (ipv6Key) { >+ ipv6Key->l4.tpSrc = htons(icmp->type); >+ ipv6Key->l4.tpDst = htons(icmp->code); >+ } > > if (icmp->code == 0 && > (icmp->type == ND_NEIGHBOR_SOLICIT || >@@ -261,7 +274,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > if (!ndTarget) { > return NDIS_STATUS_FAILURE; > } >- flow->ndTarget = *ndTarget; >+ icmp6Key->ndTarget = *ndTarget; > > while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { > /* >@@ -288,14 +301,14 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > * layer option is specified twice. > */ > if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) { >- if (Eth_IsNullAddr(flow->arpSha)) { >- memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); >+ if (Eth_IsNullAddr(icmp6Key->arpSha)) { >+ memcpy(icmp6Key->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); > } else { > goto invalid; > } > } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen >== 8) { >- if (Eth_IsNullAddr(flow->arpTha)) { >- memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); >+ if (Eth_IsNullAddr(icmp6Key->arpTha)) { >+ memcpy(icmp6Key->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); > } else { > goto invalid; > } >@@ -309,9 +322,9 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > return NDIS_STATUS_SUCCESS; > > invalid: >- memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); >- memset(flow->arpSha, 0, sizeof(flow->arpSha)); >- memset(flow->arpTha, 0, sizeof(flow->arpTha)); >+ RtlZeroMemory(&icmp6Key->ndTarget, sizeof(icmp6Key->ndTarget)); >+ RtlZeroMemory(icmp6Key->arpSha, sizeof(icmp6Key->arpSha)); >+ RtlZeroMemory(icmp6Key->arpTha, sizeof(icmp6Key->arpTha)); > > return NDIS_STATUS_FAILURE; > } >diff --git a/datapath-windows/ovsext/PacketParser.h >b/datapath-windows/ovsext/PacketParser.h >index 47d227f..f1d7f28 100644 >--- a/datapath-windows/ovsext/PacketParser.h >+++ b/datapath-windows/ovsext/PacketParser.h >@@ -22,7 +22,7 @@ > > const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, > UINT32 SrcOffset, VOID *storage); >-NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, >+NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, Ipv6Key *key, > POVS_PACKET_HDR_INFO layers); > VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); >@@ -30,8 +30,10 @@ VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key >*flow, > POVS_PACKET_HDR_INFO layers); > VOID OvsParseSctp(const NET_BUFFER_LIST *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); >-NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey >*key, >- POVS_PACKET_HDR_INFO layers); >+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, >+ Ipv6Key *ipv6Key, >+ Icmp6Key *flow, >+ POVS_PACKET_HDR_INFO layers); > > static __inline ULONG > OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) >diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c >index dd7bf92..c93db75 100644 >--- a/datapath-windows/ovsext/Stt.c >+++ b/datapath-windows/ovsext/Stt.c >@@ -194,7 +194,7 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > if (layers->isIPv4) { > IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset); > if (!ip->tot_len) { >- ip->tot_len = htons(innerFrameLen - sizeof(EthHdr)); >+ ip->tot_len = htons(innerFrameLen - layers->l3Offset); > } > if (!ip->check) { > ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0); >@@ -231,8 +231,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > * memory. > */ > curMdl = NET_BUFFER_CURRENT_MDL(curNb); >- ASSERT((int) (MmGetMdlByteCount(curMdl) - >NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >- >= (int) headRoom); >+ ASSERT((int) (MmGetMdlByteCount(curMdl) - >+ NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >= (int) headRoom); > > buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); > if (!buf) { >@@ -288,12 +288,12 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > /* Calculate pseudo header chksum */ > tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; > ASSERT(tcpChksumLen < 65535); >- outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) >&tunKey->dst, >- IPPROTO_TCP, (uint16) >tcpChksumLen); > sttHdr->version = 0; > > /* Set STT Header */ > sttHdr->flags = 0; >+ sttHdr->mss = 0; >+ sttHdr->l4Offset = 0; > if (innerPartialChecksum) { > sttHdr->flags |= STT_CSUM_PARTIAL; > if (layers->isIPv4) { >@@ -327,8 +327,22 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > NET_BUFFER_LIST_INFO(curNbl, > TcpIpChecksumNetBufferListInfo) = >csumInfo.Value; > >- UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - >sizeof(TCPHdr); >+ UINT32 encapMss = OvsGetExternalMtu(switchContext) >+ - sizeof(IPHdr) >+ - sizeof(TCPHdr); > if (ipTotalLen > encapMss) { >+ /* For Windows LSO, the TCP pseudo checksum must contain Source >IP >+ * Address, Destination IP Address, and Protocol; the length of >the >+ * payload is excluded because the underlying miniport driver >and NIC >+ * generate TCP segments from the large packet that is passed >down by >+ * the TCP/IP transport, the transport does not know the size of >the >+ * TCP payload for each TCP segment and therefore cannot include >the >+ * TCP Length in the pseudo-header. >+ */ >+ outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, >+ (uint32 *) &tunKey->dst, >+ IPPROTO_TCP, (uint16) 0); >+ > lsoInfo.Value = 0; > lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset; > lsoInfo.LsoV2Transmit.MSS = encapMss; >@@ -336,6 +350,11 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; > NET_BUFFER_LIST_INFO(curNbl, > TcpLargeSendNetBufferListInfo) = >lsoInfo.Value; >+ } else { >+ outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, >+ (uint32 *) &tunKey->dst, >+ IPPROTO_TCP, >+ (uint16) tcpChksumLen); > } > > return STATUS_SUCCESS; >@@ -655,7 +674,8 @@ handle_error: > if (lastPacket) { > /* Retrieve the original STT header */ > NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof >(SttHdr)); >- targetPNbl = OvsAllocateNBLFromBuffer(switchContext, >pktFragEntry->packetBuf, >+ targetPNbl = OvsAllocateNBLFromBuffer(switchContext, >+ pktFragEntry->packetBuf, > innerPacketLen); > > /* Delete this entry and free up the memory/ */ >@@ -668,16 +688,32 @@ handle_error: > return lastPacket ? targetPNbl : NULL; > } > >-VOID >-OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr) >+ >+/* >+*------------------------------------------------------------------------ >---- >+* OvsDecapSetOffloads >+* Processes received STT header and sets >TcpIpChecksumNetBufferListInfo >+* accordingly. >+* For TCP packets with total length bigger than destination MSS it >+* populates TcpLargeSendNetBufferListInfo. >+* >+* Returns NDIS_STATUS_SUCCESS normally. >+* Fails only if packet data is invalid. >+* (e.g. if OvsExtractLayers() returns an error). >+*------------------------------------------------------------------------ >---- >+*/ >+NDIS_STATUS >+OvsDecapSetOffloads(PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr) > { > if ((sttHdr->flags & STT_CSUM_VERIFIED) > || !(sttHdr->flags & STT_CSUM_PARTIAL)) { >- return; >+ return NDIS_STATUS_SUCCESS; > } > >- UINT8 protoType; >+ NDIS_STATUS status; > NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; >+ UINT8 protoType; >+ > csumInfo.Value = 0; > csumInfo.Transmit.IpHeaderChecksum = 0; > csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset; >@@ -703,25 +739,66 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr >*sttHdr) > csumInfo.Transmit.IsIPv6 = 1; > csumInfo.Transmit.UdpChecksum = 1; > } >- NET_BUFFER_LIST_INFO(curNbl, >+ NET_BUFFER_LIST_INFO(*curNbl, > TcpIpChecksumNetBufferListInfo) = >csumInfo.Value; > >- if (sttHdr->mss) { >+ if (sttHdr->mss && (sttHdr->flags & STT_PROTO_TCP)) { > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; >+ PMDL curMdl = NULL; >+ PNET_BUFFER curNb; >+ PUINT8 buf = NULL; >+ OVS_PACKET_HDR_INFO layers; >+ >+ status = OvsExtractLayers(*curNbl, &layers); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ >+ curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl); >+ curMdl = NET_BUFFER_CURRENT_MDL(curNb); >+ >+ buf = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, >+ LowPagePriority); >+ buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); >+ >+ // apply pseudo checksum on extracted packet >+ if (sttHdr->flags & STT_PROTO_IPV4) { >+ IPHdr *ipHdr; >+ TCPHdr *tcpHdr; >+ >+ ipHdr = (IPHdr *)(buf + layers.l3Offset); >+ tcpHdr = (TCPHdr *)(buf + layers.l4Offset); >+ >+ tcpHdr->check = IPPseudoChecksum(&ipHdr->saddr, >+ (uint32 *)&ipHdr->daddr, >+ IPPROTO_TCP, 0); >+ } else { >+ IPv6Hdr *ipHdr; >+ TCPHdr *tcpHdr; >+ >+ ipHdr = (IPv6Hdr *)(buf + layers.l3Offset); >+ tcpHdr = (TCPHdr *)(buf + layers.l4Offset); >+ >+ tcpHdr->check = IPv6PseudoChecksum((UINT32*)&ipHdr->saddr, >+ (UINT32*)&ipHdr->daddr, >+ IPPROTO_TCP, 0); >+ } >+ >+ // setup LSO > lsoInfo.Value = 0; > lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset; >- lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU >- - sizeof(IPHdr) >- - sizeof(TCPHdr); >+ lsoInfo.LsoV2Transmit.MSS = ntohs(sttHdr->mss); > lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; > if (sttHdr->flags & STT_PROTO_IPV4) { > lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; > } else { > lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6; > } >- NET_BUFFER_LIST_INFO(curNbl, >+ NET_BUFFER_LIST_INFO(*curNbl, > TcpLargeSendNetBufferListInfo) = >lsoInfo.Value; > } >+ >+ return NDIS_STATUS_SUCCESS; > } > > /* >@@ -736,15 +813,14 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > OvsIPv4TunnelKey *tunKey, > PNET_BUFFER_LIST *newNbl) > { >- NDIS_STATUS status = NDIS_STATUS_FAILURE; >- PNET_BUFFER curNb, newNb; >+ NDIS_STATUS status; >+ PNET_BUFFER curNb; > IPHdr *ipHdr; > char *ipBuf[sizeof(IPHdr)]; > SttHdr stt; > SttHdr *sttHdr; > char *sttBuf[STT_HDR_LEN]; > UINT32 advanceCnt, hdrLen; >- BOOLEAN isLsoPacket = FALSE; > > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); >@@ -767,7 +843,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4); > > /* Skip IP & TCP headers */ >- hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), >+ hdrLen = (ipHdr->ihl * 4) + (tcp->doff * 4); > NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); > advanceCnt += hdrLen; > >@@ -775,7 +851,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT); > UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len) > - (ipHdr->ihl * 4) >- - (sizeof * tcp); >+ - (tcp->doff * 4); > > /* Check if incoming packet requires reassembly */ > if (totalLen != payloadLen) { >@@ -788,7 +864,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > } > > *newNbl = pNbl; >- isLsoPacket = TRUE; > } else { > /* STT Header */ > sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, >@@ -812,7 +887,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > OvsCompleteNBL(switchContext, *newNbl, TRUE); > return NDIS_STATUS_FAILURE; > } >- newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); > > ASSERT(sttHdr); > >@@ -826,7 +900,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > tunKey->pad = 0; > > /* Set Checksum and LSO offload flags */ >- OvsDecapSetOffloads(*newNbl, sttHdr); >+ OvsDecapSetOffloads(newNbl, sttHdr); > > return NDIS_STATUS_SUCCESS; > } >diff --git a/datapath-windows/ovsext/User.c >b/datapath-windows/ovsext/User.c >index 92a71e1..c7ac284 100644 >--- a/datapath-windows/ovsext/User.c >+++ b/datapath-windows/ovsext/User.c >@@ -768,7 +768,8 @@ OvsCreateAndAddPackets(PVOID userData, > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; > UINT32 packetLength; > >- tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, >TcpLargeSendNetBufferListInfo); >+ tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, >+ >TcpLargeSendNetBufferListInfo); > nb = NET_BUFFER_LIST_FIRST_NB(nbl); > packetLength = NET_BUFFER_DATA_LENGTH(nb); > >@@ -870,7 +871,8 @@ OvsCompletePacketHeader(UINT8 *packet, > (UINT32 >*)&ipHdr->DestinationAddress, > IPPROTO_TCP, >hdrInfoOut->l4PayLoad); > } else { >- PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + >hdrInfoIn->l3Offset); >+ PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + >+ hdrInfoIn->l3Offset); > hdrInfoOut->l4PayLoad = > (UINT16)(ntohs(ipv6Hdr->PayloadLength) + > hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)- >@@ -884,9 +886,9 @@ OvsCompletePacketHeader(UINT8 *packet, > hdrInfoOut->tcpCsumNeeded = 1; > ovsUserStats.recalTcpCsum++; > } else if (!isRecv) { >- if (csumInfo.Transmit.TcpChecksum) { >+ if (hdrInfoIn->isTcp && csumInfo.Transmit.TcpChecksum) { > hdrInfoOut->tcpCsumNeeded = 1; >- } else if (csumInfo.Transmit.UdpChecksum) { >+ } else if (hdrInfoIn->isUdp && csumInfo.Transmit.UdpChecksum) { > hdrInfoOut->udpCsumNeeded = 1; > } > if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) { >@@ -896,7 +898,8 @@ OvsCompletePacketHeader(UINT8 *packet, > hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP; > #endif > if (hdrInfoIn->isIPv4) { >- PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + >hdrInfoIn->l3Offset); >+ PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + >+ hdrInfoIn->l3Offset); > hdrInfoOut->l4PayLoad = >(UINT16)(ntohs(ipHdr->TotalLength) - > (ipHdr->HeaderLength << 2)); > #ifdef DBG >@@ -1004,8 +1007,8 @@ OvsCreateQueueNlPacket(PVOID userData, > csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, >TcpIpChecksumNetBufferListInfo); > > if (isRecv && (csumInfo.Receive.TcpChecksumFailed || >- (csumInfo.Receive.UdpChecksumFailed && >!hdrInfo->udpCsumZero) || >- csumInfo.Receive.IpChecksumFailed)) { >+ (csumInfo.Receive.UdpChecksumFailed && >!hdrInfo->udpCsumZero) || >+ csumInfo.Receive.IpChecksumFailed)) { > OVS_LOG_INFO("Packet dropped due to checksum failure."); > ovsUserStats.dropDuetoChecksum++; > return NULL; >-- >2.7.2.windows.1 >_______________________________________________ >dev mailing list >dev@openvswitch.org >https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma >n_listinfo_dev&d=CwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Dc >ruz40PROJ40ROzSpxyQSLw6fcrOWpJgEcEmNR3JEQ&m=n7gnXMsR2UrNtQVxrxnTBjwnREGH51 >0CQQKFRzywjr8&s=ZrsjgTjpUOOE_2up2V7PwwHo9VOuqVWBzyGzA1tw6es&e= _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev