Hi Paul, Please see the comments inlined.
Thanks, Sairam On 4/18/16, 1:31 PM, "Paul Boca" <pb...@cloudbasesolutions.com> wrote: >Added OvsExtractLayers - populates only the layers field without >unnecessary >memory operations for flow part >If in STT header the flags are 0 then force packets checksums calculation >Ensure correct pseudo checksum is set for LSO both on send and receive > >Signed-off-by: Paul-Daniel Boca <pb...@cloudbasesolutions.com> >--- >v2: Fixed a NULL pointer dereference. > Removed some unused local variables and multiple initializations. >--- > datapath-windows/ovsext/Flow.c | 219 >+++++++++++++++++++++++++++------ > datapath-windows/ovsext/Flow.h | 2 + > datapath-windows/ovsext/PacketParser.c | 60 +++++---- > datapath-windows/ovsext/PacketParser.h | 8 +- > datapath-windows/ovsext/Stt.c | 161 +++++++++++++++++++----- > datapath-windows/ovsext/User.c | 20 +-- > 6 files changed, 368 insertions(+), 102 deletions(-) > >diff --git a/datapath-windows/ovsext/Flow.c >b/datapath-windows/ovsext/Flow.c >index f74ce12..e26f068 100644 >--- a/datapath-windows/ovsext/Flow.c >+++ b/datapath-windows/ovsext/Flow.c >@@ -712,7 +712,7 @@ done: > static NTSTATUS > _MapFlowInfoToNl(PNL_BUFFER nlBuf, OvsFlowInfo *flowInfo) > { >- NTSTATUS rc = STATUS_SUCCESS; >+ NTSTATUS rc; > > rc = MapFlowKeyToNlKey(nlBuf, &(flowInfo->key), OVS_FLOW_ATTR_KEY, > OVS_KEY_ATTR_TUNNEL); >@@ -1515,7 +1515,8 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > > ndKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ND]); > RtlCopyMemory(&icmp6FlowPutKey->ndTarget, >- ndKey->nd_target, sizeof >(icmp6FlowPutKey->ndTarget)); >+ ndKey->nd_target, >+ sizeof (icmp6FlowPutKey->ndTarget)); > RtlCopyMemory(icmp6FlowPutKey->arpSha, > ndKey->nd_sll, ETH_ADDR_LEN); > RtlCopyMemory(icmp6FlowPutKey->arpTha, >@@ -1545,8 +1546,10 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > arpFlowPutKey->nwSrc = arpKey->arp_sip; > arpFlowPutKey->nwDst = arpKey->arp_tip; > >- RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, >ETH_ADDR_LEN); >- RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, >ETH_ADDR_LEN); >+ RtlCopyMemory(arpFlowPutKey->arpSha, >+ arpKey->arp_sha, ETH_ADDR_LEN); Sai: Can you align the arguments? We usually try to restrict lines to 78 characters before introducing newline. Eg: RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, ETH_ADDR_LEN); >+ RtlCopyMemory(arpFlowPutKey->arpTha, >+ arpKey->arp_tha, ETH_ADDR_LEN); Sai: Same. > /* Kernel datapath assumes 'arpFlowPutKey->nwProto' to be in >host > * order. */ > arpFlowPutKey->nwProto = (UINT8)ntohs((arpKey->arp_op)); >@@ -1777,29 +1780,172 @@ OvsGetFlowMetadata(OvsFlowKey *key, > return status; > } Sai: Can you add a description summary for OvsExtractLayers? We usually do that for public methods. > >+NDIS_STATUS >+OvsExtractLayers(const NET_BUFFER_LIST *packet, >+ POVS_PACKET_HDR_INFO layers) >+{ >+ struct Eth_Header *eth; >+ UINT8 offset = 0; >+ PVOID vlanTagValue; >+ ovs_be16 dlType; >+ >+ layers->value = 0; >+ >+ /* Link layer. */ >+ eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); >+ >+ /* >+ * vlan_tci. >+ */ >+ vlanTagValue = NET_BUFFER_LIST_INFO(packet, >Ieee8021QNetBufferListInfo); >+ if (!vlanTagValue) { >+ if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { >+ offset = sizeof(Eth_802_1pq_Tag); >+ } >+ >+ /* >+ * XXX Sai: You can continue after XXX on the same line. >+ * Please note after this point, src mac and dst mac should >+ * not be accessed through eth >+ */ >+ eth = (Eth_Header *)((UINT8 *)eth + offset); >+ } >+ >+ /* >+ * dl_type. >+ * >+ * XXX assume that at least the first >+ * 12 bytes of received packets are mapped. This code has the >stronger >+ * assumption that at least the first 22 bytes of 'packet' is mapped >(if my >+ * arithmetic is right). >+ */ >+ if (ETH_TYPENOT8023(eth->dix.typeNBO)) { >+ dlType = eth->dix.typeNBO; >+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset; >+ } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && Sai: Align the conditions with the if (. >+ eth->e802_3.llc.dsap == 0xaa && >+ eth->e802_3.llc.ssap == 0xaa && >+ eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && >+ eth->e802_3.snap.snapOrg[0] == 0x00 && >+ eth->e802_3.snap.snapOrg[1] == 0x00 && >+ eth->e802_3.snap.snapOrg[2] == 0x00) { >+ dlType = eth->e802_3.snap.snapType.typeNBO; >+ layers->l3Offset = ETH_HEADER_LEN_802_3 + offset; >+ } else { >+ dlType = htons(OVSWIN_DL_TYPE_NONE); >+ layers->l3Offset = ETH_HEADER_LEN_DIX + offset; >+ } >+ >+ /* Network layer. */ >+ if (dlType == htons(ETH_TYPE_IPV4)) { >+ struct IPHdr ip_storage; >+ const struct IPHdr *nh; >+ >+ layers->isIPv4 = 1; >+ nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); >+ if (nh) { >+ layers->l4Offset = layers->l3Offset + nh->ihl * 4; >+ >+ if (!(nh->frag_off & htons(IP_OFFSET))) { >+ if (nh->protocol == SOCKET_IPPROTO_TCP) { >+ OvsParseTcp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_UDP) { >+ OvsParseUdp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_SCTP) { >+ OvsParseSctp(packet, NULL, layers); >+ } else if (nh->protocol == SOCKET_IPPROTO_ICMP) { >+ ICMPHdr icmpStorage; >+ const ICMPHdr *icmp; >+ >+ icmp = OvsGetIcmp(packet, layers->l4Offset, >&icmpStorage); >+ if (icmp) { >+ layers->l7Offset = layers->l4Offset + sizeof >*icmp; >+ } >+ } >+ } >+ } >+ } else if (dlType == htons(ETH_TYPE_IPV6)) { >+ NDIS_STATUS status; >+ Ipv6Key ipv6Key; >+ >+ status = OvsParseIPv6(packet, &ipv6Key, layers); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ layers->isIPv6 = 1; >+ >+ if (ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { >+ OvsParseTcp(packet, &(ipv6Key.l4), layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { >+ OvsParseUdp(packet, &(ipv6Key.l4), layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { >+ OvsParseSctp(packet, &ipv6Key.l4, layers); >+ } else if (ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { >+ Icmp6Key icmp6Key; >+ OvsParseIcmpV6(packet, NULL, &icmp6Key, layers); >+ } >+ } else if (dlType == htons(ETH_TYPE_ARP)) { >+ // nothing to extract here Sai: Do we need the if condition in that case? >+ } else if (OvsEthertypeIsMpls(dlType)) { >+ MPLSHdr mplsStorage; >+ const MPLSHdr *mpls; >+ >+ /* >+ * In the presence of an MPLS label stack the end of the L2 >+ * header and the beginning of the L3 header differ. >+ * >+ * A network packet may contain multiple MPLS labels, but we >+ * are only interested in the topmost label stack entry. >+ * >+ * Advance network header to the beginning of the L3 header. >+ * layers->l3Offset corresponds to the end of the L2 header. >+ */ >+ for (UINT32 i = 0; i < FLOW_MAX_MPLS_LABELS; i++) { >+ mpls = OvsGetMpls(packet, layers->l3Offset, &mplsStorage); >+ if (!mpls) { >+ break; >+ } >+ >+ layers->l3Offset += MPLS_HLEN; >+ layers->l4Offset += MPLS_HLEN; >+ >+ if (mpls->lse & htonl(MPLS_BOS_MASK)) { >+ /* >+ * Bottom of Stack bit is set, which means there are no >+ * remaining MPLS labels in the packet. >+ */ >+ break; >+ } >+ } >+ } >+ >+ return NDIS_STATUS_SUCCESS; >+} >+ > /* >- >*------------------------------------------------------------------------- >--- >- * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', >and >- * 'ofp_in_port'. >- * >- * Initializes 'packet' header pointers as follows: >- * >- * - packet->l2 to the start of the Ethernet header. >- * >- * - packet->l3 to just past the Ethernet header, or just past the >- * vlan_header if one is present, to the first byte of the payload >of the >- * Ethernet frame. >- * >- * - packet->l4 to just past the IPv4 header, if one is present and >has a >- * correct length, and otherwise NULL. >- * >- * - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if >one is >- * present and has a correct length, and otherwise NULL. >- * >- * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data >cannot be accessed >- * (e.g. if Pkt_CopyBytesOut() returns an error). >- >*------------------------------------------------------------------------- >--- >- */ >+*------------------------------------------------------------------------ >---- >+* Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and >+* 'ofp_in_port'. >+* >+* Initializes 'packet' header pointers as follows: >+* >+* - packet->l2 to the start of the Ethernet header. >+* >+* - packet->l3 to just past the Ethernet header, or just past the >+* vlan_header if one is present, to the first byte of the payload >of the >+* Ethernet frame. >+* >+* - packet->l4 to just past the IPv4 header, if one is present and >has a >+* correct length, and otherwise NULL. >+* >+* - packet->l7 to just past the TCP, UDP, SCTP or ICMP header, if one >is >+* present and has a correct length, and otherwise NULL. >+* >+* Returns NDIS_STATUS_SUCCESS normally. >+* Fails only if packet data cannot be accessed. >+* (e.g. if Pkt_CopyBytesOut() returns an error). >+*------------------------------------------------------------------------ >---- >+*/ Sai: Are you going to split ExtractFlow into ExtractFlow and ExtractLayers? Or are you just Introducing ExtractLayers similar to ExtractFlows? > NDIS_STATUS > OvsExtractFlow(const NET_BUFFER_LIST *packet, > UINT32 inPort, >@@ -1831,8 +1977,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > > /* Link layer. */ > eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); >- memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); >- memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); >+ RtlCopyMemory(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); >+ RtlCopyMemory(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); > > /* > * vlan_tci. >@@ -1886,7 +2032,8 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > layers->l3Offset = ETH_HEADER_LEN_DIX + offset; > } > >- flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - >flow->l2.offset; >+ flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + >+ OVS_L2_KEY_SIZE - flow->l2.offset; Sai: Fix the alignment. > /* Network layer. */ > if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { > struct IPHdr ip_storage; >@@ -1943,9 +2090,9 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { > NDIS_STATUS status; > flow->l2.keyLen += OVS_IPV6_KEY_SIZE; >- status = OvsParseIPv6(packet, flow, layers); >+ status = OvsParseIPv6(packet, &flow->ipv6Key, layers); > if (status != NDIS_STATUS_SUCCESS) { >- memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); >+ RtlZeroMemory(&flow->ipv6Key, sizeof (Ipv6Key)); > return status; > } > layers->isIPv6 = 1; >@@ -1960,7 +2107,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_SCTP) { > OvsParseSctp(packet, &flow->ipv6Key.l4, layers); > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { >- OvsParseIcmpV6(packet, flow, layers); >+ OvsParseIcmpV6(packet, &flow->ipv6Key, &flow->icmp6Key, >layers); > flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); > } > } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { >@@ -1982,10 +2129,10 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } > if (arpKey->nwProto == ARPOP_REQUEST > || arpKey->nwProto == ARPOP_REPLY) { >- memcpy(&arpKey->nwSrc, arp->arp_spa, 4); >- memcpy(&arpKey->nwDst, arp->arp_tpa, 4); >- memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); >- memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); >+ RtlCopyMemory(&arpKey->nwSrc, arp->arp_spa, 4); >+ RtlCopyMemory(&arpKey->nwDst, arp->arp_tpa, 4); >+ RtlCopyMemory(arpKey->arpSha, arp->arp_sha, >ETH_ADDR_LENGTH); >+ RtlCopyMemory(arpKey->arpTha, arp->arp_tha, >ETH_ADDR_LENGTH); > } > } > } else if (OvsEthertypeIsMpls(flow->l2.dlType)) { >diff --git a/datapath-windows/ovsext/Flow.h >b/datapath-windows/ovsext/Flow.h >index 310c472..88240b5 100644 >--- a/datapath-windows/ovsext/Flow.h >+++ b/datapath-windows/ovsext/Flow.h >@@ -53,6 +53,8 @@ NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, > > NDIS_STATUS OvsGetFlowMetadata(OvsFlowKey *key, > PNL_ATTR *keyAttrs); >+NDIS_STATUS OvsExtractLayers(const NET_BUFFER_LIST *packet, >+ POVS_PACKET_HDR_INFO layers); > NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, > OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, > OvsIPv4TunnelKey *tunKey); >diff --git a/datapath-windows/ovsext/PacketParser.c >b/datapath-windows/ovsext/PacketParser.c >index bd6910c..635558d 100644 >--- a/datapath-windows/ovsext/PacketParser.c >+++ b/datapath-windows/ovsext/PacketParser.c >@@ -84,24 +84,22 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, > > NDIS_STATUS > OvsParseIPv6(const NET_BUFFER_LIST *packet, >- OvsFlowKey *key, >+ Ipv6Key *flow, Sai - Can¹t you call this ipv6Key instead? Flow is misleading. It seems like we refer to these keys as flow everywhere in PacketParser.c > POVS_PACKET_HDR_INFO layers) > { > UINT16 ofs = layers->l3Offset; > IPv6Hdr ipv6HdrStorage; > const IPv6Hdr *nh; > UINT32 nextHdr; >- Ipv6Key *flow= &key->ipv6Key; > >- ofs = layers->l3Offset; > nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage); > if (!nh) { > return NDIS_STATUS_FAILURE; > } > > nextHdr = nh->nexthdr; >- memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16); >- memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16); >+ RtlCopyMemory(&flow->ipv6Src, nh->saddr.s6_addr, 16); >+ RtlCopyMemory(&flow->ipv6Dst, nh->daddr.s6_addr, 16); > > flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); > flow->ipv6Label = >@@ -184,8 +182,10 @@ OvsParseTcp(const NET_BUFFER_LIST *packet, > TCPHdr tcpStorage; > const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); > if (tcp) { >- flow->tpSrc = tcp->source; >- flow->tpDst = tcp->dest; >+ if (flow) { >+ flow->tpSrc = tcp->source; >+ flow->tpDst = tcp->dest; >+ } Sai: Ideally you would want a NULL check for both Layers/Flow or neither. > layers->isTcp = 1; > layers->l7Offset = layers->l4Offset + 4 * tcp->doff; > } >@@ -199,8 +199,10 @@ OvsParseSctp(const NET_BUFFER_LIST *packet, > SCTPHdr sctpStorage; > const SCTPHdr *sctp = OvsGetSctp(packet, layers->l4Offset, >&sctpStorage); > if (sctp) { >- flow->tpSrc = sctp->source; >- flow->tpDst = sctp->dest; >+ if (flow) { >+ flow->tpSrc = sctp->source; >+ flow->tpDst = sctp->dest; >+ } > layers->isSctp = 1; > layers->l7Offset = layers->l4Offset + sizeof *sctp; > } >@@ -214,8 +216,10 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, > UDPHdr udpStorage; > const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage); > if (udp) { >- flow->tpSrc = udp->source; >- flow->tpDst = udp->dest; >+ if (flow) { >+ flow->tpSrc = udp->source; >+ flow->tpDst = udp->dest; >+ } > layers->isUdp = 1; > if (udp->check == 0) { > layers->udpCsumZero = 1; >@@ -226,17 +230,17 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, > > NDIS_STATUS > OvsParseIcmpV6(const NET_BUFFER_LIST *packet, >- OvsFlowKey *key, >- POVS_PACKET_HDR_INFO layers) >+ Ipv6Key *ipv6Key, >+ Icmp6Key *icmp6Key, >+ POVS_PACKET_HDR_INFO layers) > { > UINT16 ofs = layers->l4Offset; > ICMPHdr icmpStorage; > const ICMPHdr *icmp; >- Icmp6Key *flow = &key->icmp6Key; > >- memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); >- memset(flow->arpSha, 0, sizeof(flow->arpSha)); >- memset(flow->arpTha, 0, sizeof(flow->arpTha)); >+ memset(&icmp6Key->ndTarget, 0, sizeof(icmp6Key->ndTarget)); >+ memset(icmp6Key->arpSha, 0, sizeof(icmp6Key->arpSha)); >+ memset(icmp6Key->arpTha, 0, sizeof(icmp6Key->arpTha)); > > icmp = OvsGetIcmp(packet, ofs, &icmpStorage); > if (!icmp) { >@@ -248,8 +252,10 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > * The ICMPv6 type and code fields use the 16-bit transport port > * fields, so we need to store them in 16-bit network byte order. > */ >- key->ipv6Key.l4.tpSrc = htons(icmp->type); >- key->ipv6Key.l4.tpDst = htons(icmp->code); >+ if (ipv6Key) { >+ ipv6Key->l4.tpSrc = htons(icmp->type); >+ ipv6Key->l4.tpDst = htons(icmp->code); >+ } > > if (icmp->code == 0 && > (icmp->type == ND_NEIGHBOR_SOLICIT || >@@ -262,7 +268,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > if (!ndTarget) { > return NDIS_STATUS_FAILURE; > } >- flow->ndTarget = *ndTarget; >+ icmp6Key->ndTarget = *ndTarget; > > while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { > /* >@@ -289,14 +295,14 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > * layer option is specified twice. > */ > if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) { >- if (Eth_IsNullAddr(flow->arpSha)) { >- memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); >+ if (Eth_IsNullAddr(icmp6Key->arpSha)) { >+ memcpy(icmp6Key->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); > } else { > goto invalid; > } > } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen >== 8) { >- if (Eth_IsNullAddr(flow->arpTha)) { >- memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); >+ if (Eth_IsNullAddr(icmp6Key->arpTha)) { >+ memcpy(icmp6Key->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); > } else { > goto invalid; > } >@@ -310,9 +316,9 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > return NDIS_STATUS_SUCCESS; > > invalid: >- memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); >- memset(flow->arpSha, 0, sizeof(flow->arpSha)); >- memset(flow->arpTha, 0, sizeof(flow->arpTha)); >+ RtlZeroMemory(&icmp6Key->ndTarget, sizeof(icmp6Key->ndTarget)); >+ RtlZeroMemory(icmp6Key->arpSha, sizeof(icmp6Key->arpSha)); >+ RtlZeroMemory(icmp6Key->arpTha, sizeof(icmp6Key->arpTha)); > > return NDIS_STATUS_FAILURE; > } >diff --git a/datapath-windows/ovsext/PacketParser.h >b/datapath-windows/ovsext/PacketParser.h >index 47d227f..f1d7f28 100644 >--- a/datapath-windows/ovsext/PacketParser.h >+++ b/datapath-windows/ovsext/PacketParser.h >@@ -22,7 +22,7 @@ > > const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, > UINT32 SrcOffset, VOID *storage); >-NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, >+NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, Ipv6Key *key, > POVS_PACKET_HDR_INFO layers); > VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); >@@ -30,8 +30,10 @@ VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key >*flow, > POVS_PACKET_HDR_INFO layers); > VOID OvsParseSctp(const NET_BUFFER_LIST *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); >-NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey >*key, >- POVS_PACKET_HDR_INFO layers); >+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, >+ Ipv6Key *ipv6Key, >+ Icmp6Key *flow, >+ POVS_PACKET_HDR_INFO layers); > > static __inline ULONG > OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) >diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c >index dd7bf92..dcec6ed 100644 >--- a/datapath-windows/ovsext/Stt.c >+++ b/datapath-windows/ovsext/Stt.c >@@ -173,6 +173,9 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, > >TcpIpChecksumNetBufferListInfo); >+ lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, >+ TcpLargeSendNetBufferListInfo); >+ > *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, > FALSE /*copy NblInfo*/); > if (*newNbl == NULL) { >@@ -217,6 +220,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > } else { > innerPartialChecksum = TRUE; > } >+ } else if (!layers->isIPv4) { >+ innerChecksumVerified = TRUE; > } > > status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); >@@ -231,8 +236,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > * memory. > */ > curMdl = NET_BUFFER_CURRENT_MDL(curNb); >- ASSERT((int) (MmGetMdlByteCount(curMdl) - >NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >- >= (int) headRoom); >+ ASSERT((int) (MmGetMdlByteCount(curMdl) - >+ NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) >= (int) headRoom); > > buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); > if (!buf) { >@@ -288,8 +293,10 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > /* Calculate pseudo header chksum */ > tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; > ASSERT(tcpChksumLen < 65535); >- outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) >&tunKey->dst, >- IPPROTO_TCP, (uint16) >tcpChksumLen); >+ outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, >+ (uint32 *) &tunKey->dst, >+ IPPROTO_TCP, >+ (uint16) tcpChksumLen); > sttHdr->version = 0; > > /* Set STT Header */ >@@ -327,13 +334,32 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, > NET_BUFFER_LIST_INFO(curNbl, > TcpIpChecksumNetBufferListInfo) = >csumInfo.Value; > >- UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - >sizeof(TCPHdr); >+ UINT32 encapMss = OvsGetExternalMtu(switchContext) >+ - sizeof(IPHdr) >+ - sizeof(TCPHdr); > if (ipTotalLen > encapMss) { >- lsoInfo.Value = 0; >- lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset; >- lsoInfo.LsoV2Transmit.MSS = encapMss; >- lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; >- lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; >+ outerIpHdr->check = IPChecksum((UINT8 *)outerIpHdr, >+ sizeof *outerIpHdr, 0); >+ outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr, >+ (uint32 *) &tunKey->dst, >+ IPPROTO_TCP, (uint16) 0); >+ Sai: We discussed about this in the Hyper-V IRC meeting. You can get rid of the V1 type since we can¹t enforce the type of an inner VM on outer host. >+ switch (lsoInfo.Transmit.Type) { >+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE: >+ lsoInfo.Value = 0; >+ lsoInfo.LsoV1Transmit.TcpHeaderOffset = tcpHeaderOffset; >+ lsoInfo.LsoV1Transmit.MSS = encapMss; >+ break; >+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE: >+ lsoInfo.Value = 0; >+ lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset; >+ lsoInfo.LsoV2Transmit.MSS = encapMss; >+ lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; >+ break; >+ default: >+ OVS_LOG_ERROR("Unknown LSO transmit type:%d", >+ lsoInfo.Transmit.Type); >+ } > NET_BUFFER_LIST_INFO(curNbl, > TcpLargeSendNetBufferListInfo) = >lsoInfo.Value; > } >@@ -655,7 +681,8 @@ handle_error: > if (lastPacket) { > /* Retrieve the original STT header */ > NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof >(SttHdr)); >- targetPNbl = OvsAllocateNBLFromBuffer(switchContext, >pktFragEntry->packetBuf, >+ targetPNbl = OvsAllocateNBLFromBuffer(switchContext, >+ pktFragEntry->packetBuf, > innerPacketLen); > > /* Delete this entry and free up the memory/ */ >@@ -668,16 +695,51 @@ handle_error: > return lastPacket ? targetPNbl : NULL; > } > >-VOID >-OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr) >+NDIS_STATUS >+OvsDecapApplyOffloads(POVS_SWITCH_CONTEXT switchContext, >+ PNET_BUFFER_LIST *curNbl, SttHdr *sttHdr) > { >- if ((sttHdr->flags & STT_CSUM_VERIFIED) >- || !(sttHdr->flags & STT_CSUM_PARTIAL)) { >- return; Sai: Can you clarify what this method does? I will hold my comments until this. >+ NDIS_STATUS status; >+ OVS_PACKET_HDR_INFO layers; >+ NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; >+ >+ if (!(sttHdr->flags & STT_CSUM_PARTIAL)) { >+ status = OvsExtractLayers(*curNbl, &layers); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ >+ csumInfo.Value = 0; >+ csumInfo.Transmit.IsIPv4 = layers.isIPv4; >+ csumInfo.Transmit.IsIPv6 = layers.isIPv6; >+ >+ if (sttHdr->flags & STT_CSUM_VERIFIED) { >+ NET_BUFFER_LIST_INFO(*curNbl, >+ TcpIpChecksumNetBufferListInfo) = csumInfo.Value; >+ return NDIS_STATUS_SUCCESS; >+ } >+ >+ /* Set Transmit fields in order to calculate the checksums */ >+ csumInfo.Transmit.IpHeaderChecksum = layers.isIPv4; >+ csumInfo.Transmit.TcpChecksum = layers.isTcp; >+ csumInfo.Transmit.UdpChecksum = layers.isUdp; >+ >+ status = OvsApplySWChecksumOnNB(&layers, *curNbl, &csumInfo); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ >+ csumInfo.Value = 0; >+ csumInfo.Transmit.IpHeaderChecksum = 0; >+ csumInfo.Transmit.TcpChecksum = 0; >+ csumInfo.Transmit.UdpChecksum = 0; >+ NET_BUFFER_LIST_INFO(*curNbl, >+ TcpIpChecksumNetBufferListInfo) = csumInfo.Value; >+ >+ return NDIS_STATUS_SUCCESS; > } > > UINT8 protoType; >- NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; > csumInfo.Value = 0; > csumInfo.Transmit.IpHeaderChecksum = 0; > csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset; >@@ -703,14 +765,58 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr >*sttHdr) > csumInfo.Transmit.IsIPv6 = 1; > csumInfo.Transmit.UdpChecksum = 1; > } >- NET_BUFFER_LIST_INFO(curNbl, >+ NET_BUFFER_LIST_INFO(*curNbl, > TcpIpChecksumNetBufferListInfo) = >csumInfo.Value; > > if (sttHdr->mss) { > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; >+ >+ if (sttHdr->flags & STT_PROTO_TCP) >+ { >+ PMDL curMdl = NULL; >+ PNET_BUFFER curNb; >+ PUINT8 buf = NULL; >+ >+ // apply pseudo checksum on extracted packet >+ status = OvsExtractLayers(*curNbl, &layers); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ >+ curNb = NET_BUFFER_LIST_FIRST_NB(*curNbl); >+ curMdl = NET_BUFFER_CURRENT_MDL(curNb); >+ >+ buf = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, >+ LowPagePriority); >+ buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); >+ >+ if (sttHdr->flags & STT_PROTO_IPV4) { >+ IPHdr *ipHdr; >+ TCPHdr *tcpHdr; >+ >+ ipHdr = (IPHdr *)(buf + layers.l3Offset); >+ tcpHdr = (TCPHdr *)(buf + layers.l4Offset); >+ >+ tcpHdr->check = IPPseudoChecksum(&ipHdr->saddr, >+ (uint32 *)&ipHdr->daddr, >+ IPPROTO_TCP, 0); >+ } else { >+ IPv6Hdr *ipHdr; >+ TCPHdr *tcpHdr; >+ >+ ipHdr = (IPv6Hdr *)(buf + layers.l3Offset); >+ tcpHdr = (TCPHdr *)(buf + layers.l4Offset); >+ >+ tcpHdr->check = >IPv6PseudoChecksum((UINT32*)&ipHdr->saddr, >+ (UINT32*)&ipHdr->daddr, >+ IPPROTO_TCP, 0); >+ } >+ } >+ >+ // setup LSO > lsoInfo.Value = 0; > lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset; >- lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU >+ lsoInfo.LsoV2Transmit.MSS = OvsGetExternalMtu(switchContext) > - sizeof(IPHdr) > - sizeof(TCPHdr); > lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; >@@ -719,9 +825,11 @@ OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr >*sttHdr) > } else { > lsoInfo.LsoV2Transmit.IPVersion = >NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6; > } >- NET_BUFFER_LIST_INFO(curNbl, >+ NET_BUFFER_LIST_INFO(*curNbl, > TcpLargeSendNetBufferListInfo) = >lsoInfo.Value; > } >+ >+ return NDIS_STATUS_SUCCESS; > } > > /* >@@ -736,15 +844,14 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > OvsIPv4TunnelKey *tunKey, > PNET_BUFFER_LIST *newNbl) > { >- NDIS_STATUS status = NDIS_STATUS_FAILURE; >- PNET_BUFFER curNb, newNb; >+ NDIS_STATUS status; >+ PNET_BUFFER curNb; > IPHdr *ipHdr; > char *ipBuf[sizeof(IPHdr)]; > SttHdr stt; > SttHdr *sttHdr; > char *sttBuf[STT_HDR_LEN]; > UINT32 advanceCnt, hdrLen; >- BOOLEAN isLsoPacket = FALSE; > > curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); > ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); >@@ -767,7 +874,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4); > > /* Skip IP & TCP headers */ >- hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), >+ hdrLen = (ipHdr->ihl * 4) + (tcp->doff * 4); > NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); > advanceCnt += hdrLen; > >@@ -775,7 +882,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT); > UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len) > - (ipHdr->ihl * 4) >- - (sizeof * tcp); >+ - (tcp->doff * 4); > > /* Check if incoming packet requires reassembly */ > if (totalLen != payloadLen) { >@@ -788,7 +895,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > } > > *newNbl = pNbl; >- isLsoPacket = TRUE; > } else { > /* STT Header */ > sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, >@@ -812,7 +918,6 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > OvsCompleteNBL(switchContext, *newNbl, TRUE); > return NDIS_STATUS_FAILURE; > } >- newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); > > ASSERT(sttHdr); > >@@ -826,7 +931,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, > tunKey->pad = 0; > > /* Set Checksum and LSO offload flags */ >- OvsDecapSetOffloads(*newNbl, sttHdr); >+ OvsDecapApplyOffloads(switchContext, newNbl, sttHdr); > > return NDIS_STATUS_SUCCESS; > } >diff --git a/datapath-windows/ovsext/User.c >b/datapath-windows/ovsext/User.c >index 6b2d94a..294576f 100644 >--- a/datapath-windows/ovsext/User.c >+++ b/datapath-windows/ovsext/User.c >@@ -469,7 +469,8 @@ NTSTATUS > OvsPurgeDpIoctl(PFILE_OBJECT fileObject) > { > POVS_OPEN_INSTANCE instance = >(POVS_OPEN_INSTANCE)fileObject->FsContext; >- POVS_USER_PACKET_QUEUE queue = >(POVS_USER_PACKET_QUEUE)instance->packetQueue; >+ POVS_USER_PACKET_QUEUE queue = >+ (POVS_USER_PACKET_QUEUE)instance->packetQueue; > > if (queue == NULL) { > return STATUS_INVALID_PARAMETER; >@@ -746,7 +747,8 @@ OvsCreateAndAddPackets(PVOID userData, > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; > UINT32 packetLength; > >- tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, >TcpLargeSendNetBufferListInfo); >+ tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, >+ TcpLargeSendNetBufferListInfo); > nb = NET_BUFFER_LIST_FIRST_NB(nbl); > packetLength = NET_BUFFER_DATA_LENGTH(nb); > >@@ -848,7 +850,8 @@ OvsCompletePacketHeader(UINT8 *packet, > (UINT32 >*)&ipHdr->DestinationAddress, > IPPROTO_TCP, >hdrInfoOut->l4PayLoad); > } else { >- PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + >hdrInfoIn->l3Offset); >+ PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + >+ hdrInfoIn->l3Offset); > hdrInfoOut->l4PayLoad = > (UINT16)(ntohs(ipv6Hdr->PayloadLength) + > hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)- >@@ -862,9 +865,9 @@ OvsCompletePacketHeader(UINT8 *packet, > hdrInfoOut->tcpCsumNeeded = 1; > ovsUserStats.recalTcpCsum++; > } else if (!isRecv) { >- if (csumInfo.Transmit.TcpChecksum) { >+ if (hdrInfoIn->isTcp && csumInfo.Transmit.TcpChecksum) { > hdrInfoOut->tcpCsumNeeded = 1; >- } else if (csumInfo.Transmit.UdpChecksum) { >+ } else if (hdrInfoIn->isUdp && csumInfo.Transmit.UdpChecksum) { > hdrInfoOut->udpCsumNeeded = 1; > } > if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) { >@@ -874,7 +877,8 @@ OvsCompletePacketHeader(UINT8 *packet, > hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP; > #endif > if (hdrInfoIn->isIPv4) { >- PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + >hdrInfoIn->l3Offset); >+ PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + >+ hdrInfoIn->l3Offset); > hdrInfoOut->l4PayLoad = >(UINT16)(ntohs(ipHdr->TotalLength) - > (ipHdr->HeaderLength << 2)); > #ifdef DBG >@@ -982,8 +986,8 @@ OvsCreateQueueNlPacket(PVOID userData, > csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, >TcpIpChecksumNetBufferListInfo); > > if (isRecv && (csumInfo.Receive.TcpChecksumFailed || >- (csumInfo.Receive.UdpChecksumFailed && >!hdrInfo->udpCsumZero) || >- csumInfo.Receive.IpChecksumFailed)) { >+ (csumInfo.Receive.UdpChecksumFailed && >!hdrInfo->udpCsumZero) || >+ csumInfo.Receive.IpChecksumFailed)) { > OVS_LOG_INFO("Packet dropped due to checksum failure."); > ovsUserStats.dropDuetoChecksum++; > return NULL; >-- >2.7.2.windows.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev