Create a NBL for each NB when required ovs/ovs-issues#15
All NET_BUFFERs of a NET_BUFFER_LIST must go through the pipeline: extract, find flow, execute. Previously, only the first NET_BUFFER of a NET_BUFFER_LIST was going through this pipeline, which was erroneous. OvsPartialCopyToMultipleNBLs is used to make each NET_BUFFER have its own NET_BUFFER_LIST. Some functions that used to take as argument a NET_BUFFER_LIST now take as argument a NET_BUFFER. I have also added a few ASSERTs where the NET_BUFFER_LIST is expected to have only one NET_BUFFER." Signed-off-by: Samuel Ghinet <sghi...@cloudbasesolutions.com> --- datapath-windows/ovsext/OvsActions.c | 21 ++++-- datapath-windows/ovsext/OvsBufferMgmt.c | 6 +- datapath-windows/ovsext/OvsBufferMgmt.h | 5 ++ datapath-windows/ovsext/OvsChecksum.c | 7 +- datapath-windows/ovsext/OvsFlow.c | 44 ++++++------ datapath-windows/ovsext/OvsFlow.h | 6 +- datapath-windows/ovsext/OvsPacketIO.c | 107 ++++++++++++++++++------------ datapath-windows/ovsext/OvsPacketParser.c | 20 +++--- datapath-windows/ovsext/OvsPacketParser.h | 30 ++++----- datapath-windows/ovsext/OvsTunnel.c | 16 +++-- datapath-windows/ovsext/OvsUser.c | 22 ++++-- datapath-windows/ovsext/OvsVxlan.c | 37 ++++++++--- 12 files changed, 199 insertions(+), 122 deletions(-) diff --git a/datapath-windows/ovsext/OvsActions.c b/datapath-windows/ovsext/OvsActions.c index 4a2c117..e342d75 100644 --- a/datapath-windows/ovsext/OvsActions.c +++ b/datapath-windows/ovsext/OvsActions.c @@ -495,6 +495,9 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) OvsFlow *flow; UINT64 hash; NDIS_STATUS status; + VOID* vlanTagValue; + NET_BUFFER* pNb; + POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo); if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { @@ -505,12 +508,18 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) return NDIS_STATUS_SUCCESS; } ASSERT(vport->nicState == NdisSwitchNicStateConnected); + ASSERT(ovsFwdCtx->curNbl->FirstNetBuffer->Next == NULL); /* Assert that in the Rx direction, key is always setup. */ ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0); - status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo, - &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ? - &ovsFwdCtx->tunKey : NULL); + vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl, + Ieee8021QNetBufferListInfo); + + pNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + + status = OvsExtractFlow(pNb, ovsFwdCtx->srcVportNo, + &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ? + &ovsFwdCtx->tunKey : NULL, vlanTagValue); if (status != NDIS_STATUS_SUCCESS) { OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Flow extract failed"); @@ -520,7 +529,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE); if (flow) { - OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers); + OvsFlowUsed(flow, pNb, &ovsFwdCtx->layers); ovsFwdCtx->switchContext->datapath.hits++; status = OvsActionsExecute(ovsFwdCtx->switchContext, ovsFwdCtx->completionList, ovsFwdCtx->curNbl, @@ -828,6 +837,8 @@ dropit: * * Side effects: * This function consumes the NBL. + + * XXX: it is called by OvsIpHlprCbVxlan, which is not used! * -------------------------------------------------------------------------- */ VOID @@ -1348,6 +1359,8 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + ASSERT(curNbl->FirstNetBuffer->Next == NULL); + /* XXX: ASSERT that the flow table lock is held. */ status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo, sendFlags, fwdDetail, completionList, diff --git a/datapath-windows/ovsext/OvsBufferMgmt.c b/datapath-windows/ovsext/OvsBufferMgmt.c index 8aa8060..18eff07 100644 --- a/datapath-windows/ovsext/OvsBufferMgmt.c +++ b/datapath-windows/ovsext/OvsBufferMgmt.c @@ -1085,7 +1085,7 @@ nblcopy_error: * -------------------------------------------------------------------------- */ static NDIS_STATUS -GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, +GetSegmentHeaderInfo(PNET_BUFFER nb, const POVS_PACKET_HDR_INFO hdrInfo, UINT32 *hdrSize, UINT32 *seqNumber) { @@ -1093,7 +1093,7 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, const TCPHdr *tcp; /* Parse the orginal Eth/IP/TCP header */ - tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage); + tcp = OvsGetPacketBytes(nb, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage); if (tcp == NULL) { return NDIS_STATUS_FAILURE; } @@ -1199,7 +1199,7 @@ OvsTcpSegmentNBL(PVOID ovsContext, ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL); /* Figure out the segment header size */ - status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber); + status = GetSegmentHeaderInfo(nb, hdrInfo, &hdrSize, &seqNumber); if (status != NDIS_STATUS_SUCCESS) { OVS_LOG_INFO("Cannot parse NBL header"); return NULL; diff --git a/datapath-windows/ovsext/OvsBufferMgmt.h b/datapath-windows/ovsext/OvsBufferMgmt.h index 9c00b1b..a0c2c77 100644 --- a/datapath-windows/ovsext/OvsBufferMgmt.h +++ b/datapath-windows/ovsext/OvsBufferMgmt.h @@ -81,6 +81,11 @@ typedef struct _OVS_NBL_POOL { #endif } OVS_NBL_POOL, *POVS_NBL_POOL; +#define OVS_NBL_FOR_EACH(pCurNbl, nblList) \ +for (NET_BUFFER_LIST* pCurNbl = nblList; \ + pCurNbl != NULL; \ + pCurNbl = NET_BUFFER_LIST_NEXT_NBL(pCurNbl)) + NDIS_STATUS OvsInitBufferPool(PVOID context); VOID OvsCleanupBufferPool(PVOID context); diff --git a/datapath-windows/ovsext/OvsChecksum.c b/datapath-windows/ovsext/OvsChecksum.c index e192373..5b9cbfa 100644 --- a/datapath-windows/ovsext/OvsChecksum.c +++ b/datapath-windows/ovsext/OvsChecksum.c @@ -535,8 +535,13 @@ OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, } /* Next, check if the NIC did not validate the RX checksum. */ + /* + * XXX: we have no confidence that this transfer is an Rx, except + * if we check the caller function + */ if (!csumInfo.Receive.IpChecksumSucceeded) { - ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage); + ipHdr = OvsGetIp(NET_BUFFER_LIST_FIRST_NB(curNbl), + hdrInfo->l3Offset, &ip_storage); if (ipHdr) { ip_storage = *ipHdr; hdrChecksum = ipHdr->check; diff --git a/datapath-windows/ovsext/OvsFlow.c b/datapath-windows/ovsext/OvsFlow.c index daa64e0..60b01e2 100644 --- a/datapath-windows/ovsext/OvsFlow.c +++ b/datapath-windows/ovsext/OvsFlow.c @@ -104,7 +104,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath, /* *---------------------------------------------------------------------------- - * GetStartAddrNBL -- + * GetStartAddrNB -- * Get the virtual address of the frame. * * Results: @@ -112,7 +112,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath, *---------------------------------------------------------------------------- */ static __inline VOID * -GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) +GetStartAddrNB(const NET_BUFFER *_pNB) { PMDL curMdl; PUINT8 curBuffer; @@ -121,21 +121,21 @@ GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) ASSERT(_pNB); // Ethernet Header is a guaranteed safe access. - curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl; + curMdl = (_pNB)->CurrentMdl; curBuffer = MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); if (!curBuffer) { return NULL; } curHeader = (PEthHdr) - (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset); + (curBuffer + (_pNB)->CurrentMdlOffset); return (VOID *) curHeader; } VOID OvsFlowUsed(OvsFlow *flow, - const NET_BUFFER_LIST *packet, + const NET_BUFFER *packet, const POVS_PACKET_HDR_INFO layers) { LARGE_INTEGER tickCount; @@ -144,7 +144,7 @@ OvsFlowUsed(OvsFlow *flow, flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick; flow->used += ovsUserTimestampDelta; flow->packetCount++; - flow->byteCount += OvsPacketLenNBL(packet); + flow->byteCount += NET_BUFFER_DATA_LENGTH(packet); flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers); } @@ -191,15 +191,14 @@ DeleteAllFlows(OVS_DATAPATH *datapath) *---------------------------------------------------------------------------- */ NDIS_STATUS -OvsExtractFlow(const NET_BUFFER_LIST *packet, +OvsExtractFlow(const NET_BUFFER* pNb, UINT32 inPort, OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, - OvsIPv4TunnelKey *tunKey) + OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue) { struct Eth_Header *eth; UINT8 offset = 0; - PVOID vlanTagValue; layers->value = 0; @@ -213,21 +212,20 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, } flow->l2.inPort = inPort; - - if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) { + + if (NET_BUFFER_DATA_LENGTH(pNb) < ETH_HEADER_LEN_DIX) { flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset; return NDIS_STATUS_SUCCESS; } /* Link layer. */ - eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); + eth = (Eth_Header *)GetStartAddrNB(pNb); memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); /* * vlan_tci. */ - vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo); if (vlanTagValue) { PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; @@ -262,7 +260,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, if (ETH_TYPENOT8023(eth->dix.typeNBO)) { flow->l2.dlType = eth->dix.typeNBO; layers->l3Offset = ETH_HEADER_LEN_DIX + offset; - } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && + } else if (NET_BUFFER_DATA_LENGTH(pNb) >= ETH_HEADER_LEN_802_3 && eth->e802_3.llc.dsap == 0xaa && eth->e802_3.llc.ssap == 0xaa && eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && @@ -285,7 +283,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, flow->l2.keyLen += OVS_IP_KEY_SIZE; layers->isIPv4 = 1; - nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); + nh = OvsGetIp(pNb, layers->l3Offset, &ip_storage); if (nh) { layers->l4Offset = layers->l3Offset + nh->ihl * 4; @@ -309,14 +307,14 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, if (!(nh->frag_off & htons(IP_OFFSET))) { if (ipKey->nwProto == SOCKET_IPPROTO_TCP) { - OvsParseTcp(packet, &ipKey->l4, layers); + OvsParseTcp(pNb, &ipKey->l4, layers); } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) { - OvsParseUdp(packet, &ipKey->l4, layers); + OvsParseUdp(pNb, &ipKey->l4, layers); } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) { ICMPHdr icmpStorage; const ICMPHdr *icmp; - icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage); + icmp = OvsGetIcmp(pNb, layers->l4Offset, &icmpStorage); if (icmp) { ipKey->l4.tpSrc = htons(icmp->type); ipKey->l4.tpDst = htons(icmp->code); @@ -331,7 +329,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { NDIS_STATUS status; flow->l2.keyLen += OVS_IPV6_KEY_SIZE; - status = OvsParseIPv6(packet, flow, layers); + status = OvsParseIPv6(pNb, flow, layers); if (status != NDIS_STATUS_SUCCESS) { memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); return status; @@ -342,11 +340,11 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, flow->ipv6Key.pad = 0; if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { - OvsParseTcp(packet, &(flow->ipv6Key.l4), layers); + OvsParseTcp(pNb, &(flow->ipv6Key.l4), layers); } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { - OvsParseUdp(packet, &(flow->ipv6Key.l4), layers); + OvsParseUdp(pNb, &(flow->ipv6Key.l4), layers); } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { - OvsParseIcmpV6(packet, flow, layers); + OvsParseIcmpV6(pNb, flow, layers); flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); } } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { @@ -357,7 +355,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, ((UINT64 *)arpKey)[1] = 0; ((UINT64 *)arpKey)[2] = 0; flow->l2.keyLen += OVS_ARP_KEY_SIZE; - arp = OvsGetArp(packet, layers->l3Offset, &arpStorage); + arp = OvsGetArp(pNb, layers->l3Offset, &arpStorage); if (arp && arp->ea_hdr.ar_hrd == htons(1) && arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) && arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH && diff --git a/datapath-windows/ovsext/OvsFlow.h b/datapath-windows/ovsext/OvsFlow.h index 93368b3..aeec803 100644 --- a/datapath-windows/ovsext/OvsFlow.h +++ b/datapath-windows/ovsext/OvsFlow.h @@ -50,13 +50,13 @@ NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath); NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, POVS_SWITCH_CONTEXT switchContext); -NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, +NDIS_STATUS OvsExtractFlow(const NET_BUFFER *pNb, UINT32 inPort, OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, - OvsIPv4TunnelKey *tunKey); + OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue); OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key, UINT64 *hash, BOOLEAN hashValid); UINT64 OvsHashFlow(const OvsFlowKey *key); -VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt, +VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER *pkt, const POVS_PACKET_HDR_INFO layers); NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength, diff --git a/datapath-windows/ovsext/OvsPacketIO.c b/datapath-windows/ovsext/OvsPacketIO.c index 39e5703..7896e7b 100644 --- a/datapath-windows/ovsext/OvsPacketIO.c +++ b/datapath-windows/ovsext/OvsPacketIO.c @@ -211,6 +211,7 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, OvsFlowKey key; UINT64 hash; PNET_BUFFER curNb; + VOID* vlanTagValue; nextNbl = curNbl->Next; curNbl->Next = NULL; @@ -223,6 +224,7 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, } else { POVS_BUFFER_CONTEXT ctx; OvsFlow *flow; + NET_BUFFER_LIST* pNewNbl; fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); sourcePort = fwdDetail->SourcePortId; @@ -261,53 +263,76 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, vport->stats.rxPackets++; vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb); + vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, + Ieee8021QNetBufferListInfo); - status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL); - if (status != NDIS_STATUS_SUCCESS) { - RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed"); - goto dropit; - } + pNewNbl = OvsPartialCopyToMultipleNBLs(switchContext, curNbl, + 0, 0, TRUE); + if (!pNewNbl) { + RtlInitUnicodeString(&filterReason, + L"Cannot allocate new NBL: partial copy NB to " + L"multiple NBLs."); + + OvsStartNBLIngressError(switchContext, curNbl, + sendCompleteFlags, &filterReason, + NDIS_STATUS_RESOURCES); - ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); - OvsAcquireDatapathRead(datapath, &dpLockState, dispatch); - - flow = OvsLookupFlow(datapath, &key, &hash, FALSE); - if (flow) { - OvsFlowUsed(flow, curNbl, &layers); - datapath->hits++; - /* If successful, OvsActionsExecute() consumes the NBL. - * Otherwise, it adds it to the completionList. No need to - * check the return value. */ - OvsActionsExecute(switchContext, &completionList, curNbl, - portNo, SendFlags, &key, &hash, &layers, - flow->actions, flow->actionsLen); - OvsReleaseDatapath(datapath, &dpLockState); NdisReleaseRWLock(switchContext->dispatchLock, &lockState); continue; - } else { - OvsReleaseDatapath(datapath, &dpLockState); - - datapath->misses++; - status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE, - NULL, 0, OVS_PACKET_CMD_MISS, - portNo, - key.tunKey.dst != 0 ? - (OvsIPv4TunnelKey *)&key.tunKey : - NULL, curNbl, - sourcePort == - switchContext->externalPortId, - &layers, switchContext, - &missedPackets, &num); - if (status == NDIS_STATUS_SUCCESS) { - /* Complete the packet since it was copied to user - * buffer. */ - RtlInitUnicodeString(&filterReason, - L"OVS-Dropped since packet was copied to userspace"); + } + + OVS_NBL_FOR_EACH(pCurNbl, pNewNbl) + { + status = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pCurNbl), + vport->portNo, &key, &layers, NULL, vlanTagValue); + if (status != NDIS_STATUS_SUCCESS) { + RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed"); + goto dropit; + } + + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + OvsAcquireDatapathRead(datapath, &dpLockState, dispatch); + + flow = OvsLookupFlow(datapath, &key, &hash, FALSE); + if (flow) { + OvsFlowUsed(flow, NET_BUFFER_LIST_FIRST_NB(pCurNbl), &layers); + datapath->hits++; + + /* If successful, OvsActionsExecute() consumes the NBL. + * Otherwise, it adds it to the completionList. No need to + * check the return value. */ + OvsActionsExecute(switchContext, &completionList, pCurNbl, + portNo, SendFlags, &key, &hash, &layers, + flow->actions, flow->actionsLen); + + OvsReleaseDatapath(datapath, &dpLockState); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + continue; } else { - RtlInitUnicodeString(&filterReason, - L"OVS-Dropped due to failure to queue to userspace"); + OvsReleaseDatapath(datapath, &dpLockState); + + datapath->misses++; + status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE, + NULL, 0, OVS_PACKET_CMD_MISS, + portNo, + key.tunKey.dst != 0 ? + (OvsIPv4TunnelKey *)&key.tunKey : + NULL, curNbl, + sourcePort == + switchContext->externalPortId, + &layers, switchContext, + &missedPackets, &num); + if (status == NDIS_STATUS_SUCCESS) { + /* Complete the packet since it was copied to user + * buffer. */ + RtlInitUnicodeString(&filterReason, + L"OVS-Dropped since packet was copied to userspace"); + } else { + RtlInitUnicodeString(&filterReason, + L"OVS-Dropped due to failure to queue to userspace"); + } + goto dropit; } - goto dropit; } dropit: diff --git a/datapath-windows/ovsext/OvsPacketParser.c b/datapath-windows/ovsext/OvsPacketParser.c index 0a93435..666d3d5 100644 --- a/datapath-windows/ovsext/OvsPacketParser.c +++ b/datapath-windows/ovsext/OvsPacketParser.c @@ -18,13 +18,12 @@ //XXX consider moving to NdisGetDataBuffer. const VOID * -OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, +OvsGetPacketBytes(const NET_BUFFER *netBuffer, UINT32 len, UINT32 srcOffset, VOID *storage) { NDIS_STATUS status = NDIS_STATUS_SUCCESS; - PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl); PMDL currentMdl; BOOLEAN firstMDL = TRUE; ULONG destOffset = 0; @@ -83,7 +82,7 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, } NDIS_STATUS -OvsParseIPv6(const NET_BUFFER_LIST *packet, +OvsParseIPv6(const NET_BUFFER *packet, OvsFlowKey *key, POVS_PACKET_HDR_INFO layers) { @@ -142,7 +141,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, len = extHdr->hdrExtLen; ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8; nextHdr = extHdr->nextHeader; - if (OvsPacketLenNBL(packet) < ofs) { + if (NET_BUFFER_DATA_LENGTH(packet) < ofs) { return NDIS_STATUS_FAILURE; } } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) { @@ -177,12 +176,12 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, } VOID -OvsParseTcp(const NET_BUFFER_LIST *packet, +OvsParseTcp(const NET_BUFFER* pNb, L4Key *flow, POVS_PACKET_HDR_INFO layers) { TCPHdr tcpStorage; - const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); + const TCPHdr *tcp = OvsGetTcp(pNb, layers->l4Offset, &tcpStorage); if (tcp) { flow->tpSrc = tcp->source; flow->tpDst = tcp->dest; @@ -192,7 +191,7 @@ OvsParseTcp(const NET_BUFFER_LIST *packet, } VOID -OvsParseUdp(const NET_BUFFER_LIST *packet, +OvsParseUdp(const NET_BUFFER *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers) { @@ -210,7 +209,7 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, } NDIS_STATUS -OvsParseIcmpV6(const NET_BUFFER_LIST *packet, +OvsParseIcmpV6(const NET_BUFFER *packet, OvsFlowKey *key, POVS_PACKET_HDR_INFO layers) { @@ -249,7 +248,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, } flow->ndTarget = *ndTarget; - while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { + while ((UINT32)(ofs + 8) <= NET_BUFFER_DATA_LENGTH(packet)) { /* * The minimum size of an option is 8 bytes, which also is * the size of Ethernet link-layer options. @@ -264,7 +263,8 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, } optLen = ndOpt->len * 8; - if (!optLen || (UINT32)(ofs + optLen) > OvsPacketLenNBL(packet)) { + if (!optLen || (UINT32)(ofs + optLen) > + NET_BUFFER_DATA_LENGTH(packet)) { goto invalid; } diff --git a/datapath-windows/ovsext/OvsPacketParser.h b/datapath-windows/ovsext/OvsPacketParser.h index ab3c613..aaf3877 100644 --- a/datapath-windows/ovsext/OvsPacketParser.h +++ b/datapath-windows/ovsext/OvsPacketParser.h @@ -20,15 +20,15 @@ #include "precomp.h" #include "OvsNetProto.h" -const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, +const VOID* OvsGetPacketBytes(const NET_BUFFER *pNB, UINT32 len, UINT32 SrcOffset, VOID *storage); -NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, +NDIS_STATUS OvsParseIPv6(const NET_BUFFER *packet, OvsFlowKey *key, POVS_PACKET_HDR_INFO layers); -VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, +VOID OvsParseTcp(const NET_BUFFER* pNb, L4Key *flow, POVS_PACKET_HDR_INFO layers); -VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow, +VOID OvsParseUdp(const NET_BUFFER *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers); -NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, +NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER *packet, OvsFlowKey *key, POVS_PACKET_HDR_INFO layers); static __inline ULONG @@ -58,7 +58,7 @@ OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) * which C does not allow. */ static UINT16 -OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN +OvsGetTcpCtl(const NET_BUFFER *packet, // IN const POVS_PACKET_HDR_INFO layers) // IN { #define TCP_CTL_OFS 12 // Offset of "ctl" field in TCP header. @@ -74,7 +74,7 @@ OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN static UINT8 -OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN +OvsGetTcpFlags(const NET_BUFFER *packet, // IN const OvsFlowKey *key, // IN const POVS_PACKET_HDR_INFO layers) // IN { @@ -88,7 +88,7 @@ OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN } static const EtherArp * -OvsGetArp(const NET_BUFFER_LIST *packet, +OvsGetArp(const NET_BUFFER *packet, UINT32 ofs, EtherArp *storage) { @@ -96,14 +96,14 @@ OvsGetArp(const NET_BUFFER_LIST *packet, } static const IPHdr * -OvsGetIp(const NET_BUFFER_LIST *packet, +OvsGetIp(const NET_BUFFER* pNb, UINT32 ofs, IPHdr *storage) { - const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage); + const IPHdr *ip = OvsGetPacketBytes(pNb, sizeof *ip, ofs, storage); if (ip) { int ipLen = ip->ihl * 4; - if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) { + if (ipLen >= sizeof *ip && NET_BUFFER_DATA_LENGTH(pNb) >= ofs + ipLen) { return ip; } } @@ -111,14 +111,14 @@ OvsGetIp(const NET_BUFFER_LIST *packet, } static const TCPHdr * -OvsGetTcp(const NET_BUFFER_LIST *packet, +OvsGetTcp(const NET_BUFFER *packet, UINT32 ofs, TCPHdr *storage) { const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage); if (tcp) { int tcpLen = tcp->doff * 4; - if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) { + if (tcpLen >= sizeof *tcp && NET_BUFFER_DATA_LENGTH(packet) >= ofs + tcpLen) { return tcp; } } @@ -126,7 +126,7 @@ OvsGetTcp(const NET_BUFFER_LIST *packet, } static const UDPHdr * -OvsGetUdp(const NET_BUFFER_LIST *packet, +OvsGetUdp(const NET_BUFFER *packet, UINT32 ofs, UDPHdr *storage) { @@ -134,7 +134,7 @@ OvsGetUdp(const NET_BUFFER_LIST *packet, } static const ICMPHdr * -OvsGetIcmp(const NET_BUFFER_LIST *packet, +OvsGetIcmp(const NET_BUFFER *packet, UINT32 ofs, ICMPHdr *storage) { diff --git a/datapath-windows/ovsext/OvsTunnel.c b/datapath-windows/ovsext/OvsTunnel.c index b5a369a..a1387cb 100644 --- a/datapath-windows/ovsext/OvsTunnel.c +++ b/datapath-windows/ovsext/OvsTunnel.c @@ -228,6 +228,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath; ASSERT(gOvsSwitchContext); + ASSERT(pNbl->FirstNetBuffer->Next == NULL); /* Fill the tunnel key */ status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey); @@ -265,6 +266,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, UINT64 hash; PNET_BUFFER curNb; OvsFlow *flow; + VOID* vlanTagValue; fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); @@ -295,20 +297,22 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN); portNo = vport->portNo; + vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo); - status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey); + status = OvsExtractFlow(curNb, portNo, &key, &layers, + &tunnelKey, vlanTagValue); if (status != NDIS_STATUS_SUCCESS) { goto unlockAndDrop; } flow = OvsLookupFlow(datapath, &key, &hash, FALSE); if (flow) { - OvsFlowUsed(flow, pNbl, &layers); + OvsFlowUsed(flow, curNb, &layers); datapath->hits++; OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl, - portNo, SendFlags, &key, &hash, &layers, - flow->actions, flow->actionsLen); + portNo, SendFlags, &key, &hash, &layers, + flow->actions, flow->actionsLen); OvsReleaseDatapath(datapath, &dpLockState); } else { @@ -316,8 +320,8 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, datapath->misses++; elem = OvsCreateQueuePacket(1, NULL, 0, OVS_PACKET_CMD_MISS, - portNo, &key.tunKey, pNbl, curNb, - TRUE, &layers); + portNo, &key.tunKey, pNbl, curNb, + TRUE, &layers); if (elem) { /* Complete the packet since it was copied to user buffer. */ InsertTailList(&missedPackets, &elem->link); diff --git a/datapath-windows/ovsext/OvsUser.c b/datapath-windows/ovsext/OvsUser.c index 8271d52..267ef1b 100644 --- a/datapath-windows/ovsext/OvsUser.c +++ b/datapath-windows/ovsext/OvsUser.c @@ -314,6 +314,7 @@ OvsExecuteDpIoctl(PVOID inputBuffer, PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; OvsFlowKey key; OVS_PACKET_HDR_INFO layers; + VOID* vlanTagValue; if (inputLength < sizeof(*execute) || outputLength != 0) { return STATUS_INFO_LENGTH_MISMATCH; @@ -355,17 +356,22 @@ OvsExecuteDpIoctl(PVOID inputBuffer, fwdDetail->SourceNicIndex = 0; // XXX: Figure out if any of the other members of fwdDetail need to be set. - ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers, - NULL); + ASSERT(pNbl->FirstNetBuffer->Next == NULL); + + vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo); + + ndisStatus = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pNbl), + fwdDetail->SourcePortId, &key, &layers, + NULL, vlanTagValue); if (ndisStatus == NDIS_STATUS_SUCCESS) { ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, - NDIS_RWL_AT_DISPATCH_LEVEL); + NDIS_RWL_AT_DISPATCH_LEVEL); ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl, - 0, // XXX: we are passing 0 for srcVportNo - NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP, - &key, NULL, &layers, actions, - execute->actionsLen); + 0, // XXX: we are passing 0 for srcVportNo + NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP, + &key, NULL, &layers, actions, + execute->actionsLen); pNbl = NULL; NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); } @@ -820,6 +826,8 @@ OvsCreateAndAddPackets(UINT32 queueId, PNET_BUFFER_LIST newNbl = NULL; PNET_BUFFER nb; + ASSERT(nbl->FirstNetBuffer->Next == NULL); + if (hdrInfo->isTcp) { NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; UINT32 packetLength; diff --git a/datapath-windows/ovsext/OvsVxlan.c b/datapath-windows/ovsext/OvsVxlan.c index 63909ae..953dfc7 100644 --- a/datapath-windows/ovsext/OvsVxlan.c +++ b/datapath-windows/ovsext/OvsVxlan.c @@ -116,6 +116,8 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, UINT32 headRoom = OvsGetVxlanTunHdrSize(); UINT32 packetLength; + ASSERT(curNbl->FirstNetBuffer->Next == NULL); + /* * XXX: the assumption currently is that the NBL is owned by OVS, and * headroom has already been allocated as part of allocating the NBL and @@ -286,13 +288,19 @@ OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl, OvsFlowKey key; NDIS_STATUS status; UNREFERENCED_PARAMETER(inPort); - - status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL); - if (result == STATUS_SUCCESS) { - status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, + VOID* vlanTagValue; + + vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, Ieee8021QNetBufferListInfo); + for (NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + pNb != NULL; pNb = NET_BUFFER_NEXT_NB(pNb)) + { + status = OvsExtractFlow(pNb, inPort, &key, &layers, NULL, vlanTagValue); + if (result == STATUS_SUCCESS) { + status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, (POVS_SWITCH_CONTEXT)cbData1, NULL); - } else { - status = NDIS_STATUS_FAILURE; + } else { + status = NDIS_STATUS_FAILURE; + } } if (status != NDIS_STATUS_SUCCESS) { @@ -457,9 +465,16 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, OVS_PACKET_HDR_INFO layers; layers.value = 0; + ASSERT(packet->FirstNetBuffer->Next == NULL); do { - nh = OvsGetIp(packet, layers.l3Offset, &ip_storage); + NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(packet); + + /* + * given that it is an encapsulated packet (UDP), + * we can use the first NB for checks + */ + nh = OvsGetIp(pNb, layers.l3Offset, &ip_storage); if (nh) { layers.l4Offset = layers.l3Offset + nh->ihl * 4; } else { @@ -467,7 +482,7 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, } /* make sure it's a VXLAN packet */ - udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage); + udp = OvsGetUdp(pNb, layers.l4Offset, &udpStorage); if (udp) { layers.l7Offset = layers.l4Offset + sizeof *udp; } else { @@ -477,7 +492,11 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, /* XXX Should be tested against the dynamic port # in the VXLAN vport */ ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT)); - VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet, + /* + * we may have multiple VXLAN packets here that need to be decapsulated + * but for the VXLAN header, we need only the first NET_BUFFER + */ + VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(pNb, sizeof(*VxlanHeader), layers.l7Offset, &VxlanHeaderBuffer); -- 1.8.3.msysgit.0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev