ovs/ovs-issues#15 All NET_BUFFERs of a NET_BUFFER_LIST must go through the pipeline: extract, find flow, execute. Previously, only the first NET_BUFFER of a NET_BUFFER_LIST was going through this pipeline, which was erroneous.
OvsPartialCopyToMultipleNBLs is used to make each NET_BUFFER have its own NET_BUFFER_LIST. Some functions that used to take as argument a NET_BUFFER_LIST now take as argument a NET_BUFFER. I have also added a few ASSERTs where the NET_BUFFER_LIST is expected to have only one NET_BUFFER." Signed-off-by: Samuel Ghinet <sghinet at cloudbasesolutions.com> Co-authored-by: Alin Gabriel Serdean <aserd...@cloudbasesolutions.com> --- datapath-windows/ovsext/OvsActions.c | 20 ++++- datapath-windows/ovsext/OvsBufferMgmt.c | 8 +- datapath-windows/ovsext/OvsBufferMgmt.h | 5 ++ datapath-windows/ovsext/OvsChecksum.c | 3 +- datapath-windows/ovsext/OvsFlow.c | 44 +++++---- datapath-windows/ovsext/OvsFlow.h | 6 +- datapath-windows/ovsext/OvsPacketIO.c | 143 +++++++++++++++++++----------- datapath-windows/ovsext/OvsPacketParser.c | 24 ++--- datapath-windows/ovsext/OvsPacketParser.h | 30 ++++--- datapath-windows/ovsext/OvsTunnel.c | 12 ++- datapath-windows/ovsext/OvsUser.c | 19 +++- datapath-windows/ovsext/OvsVxlan.c | 37 ++++++-- 12 files changed, 221 insertions(+), 130 deletions(-) diff --git a/datapath-windows/ovsext/OvsActions.c b/datapath-windows/ovsext/OvsActions.c index 635becc..c5c5116 100644 --- a/datapath-windows/ovsext/OvsActions.c +++ b/datapath-windows/ovsext/OvsActions.c @@ -517,6 +517,9 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) OvsFlow *flow; UINT64 hash; NDIS_STATUS status; + VOID* vlanTagValue; + NET_BUFFER* pNb; + POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo); if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { @@ -527,12 +530,19 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) return NDIS_STATUS_SUCCESS; } ASSERT(vport->nicState == NdisSwitchNicStateConnected); + ASSERT(ovsFwdCtx->curNbl->FirstNetBuffer->Next == NULL); /* Assert that in the Rx direction, key is always setup. */ ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0); - status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo, - &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ? - &ovsFwdCtx->tunKey : NULL); + vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl, + Ieee8021QNetBufferListInfo); + + pNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + + status = OvsExtractFlow(pNb, ovsFwdCtx->srcVportNo, + &key, &ovsFwdCtx->layers, + ovsFwdCtx->tunKey.dst != 0 ? + &ovsFwdCtx->tunKey : NULL, vlanTagValue); if (status != NDIS_STATUS_SUCCESS) { OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Flow extract failed"); @@ -542,7 +552,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE); if (flow) { - OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers); + OvsFlowUsed(flow, pNb, &ovsFwdCtx->layers); ovsFwdCtx->switchContext->datapath.hits++; status = OvsActionsExecute(ovsFwdCtx->switchContext, ovsFwdCtx->completionList, ovsFwdCtx->curNbl, @@ -1370,6 +1380,8 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + ASSERT(curNbl->FirstNetBuffer->Next == NULL); + /* XXX: ASSERT that the flow table lock is held. */ status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo, sendFlags, fwdDetail, completionList, diff --git a/datapath-windows/ovsext/OvsBufferMgmt.c b/datapath-windows/ovsext/OvsBufferMgmt.c index 8aa8060..c367d2f 100644 --- a/datapath-windows/ovsext/OvsBufferMgmt.c +++ b/datapath-windows/ovsext/OvsBufferMgmt.c @@ -863,7 +863,7 @@ OvsPartialCopyToMultipleNBLs(PVOID ovsContext, if (prevNbl == NULL) { firstNbl = newNbl; } else { - NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl; + NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl; NET_BUFFER_NEXT_NB(prevNb) = nb; } prevNbl = newNbl; @@ -1085,7 +1085,7 @@ nblcopy_error: * -------------------------------------------------------------------------- */ static NDIS_STATUS -GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, +GetSegmentHeaderInfo(PNET_BUFFER nb, const POVS_PACKET_HDR_INFO hdrInfo, UINT32 *hdrSize, UINT32 *seqNumber) { @@ -1093,7 +1093,7 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, const TCPHdr *tcp; /* Parse the orginal Eth/IP/TCP header */ - tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage); + tcp = OvsGetPacketBytes(nb, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage); if (tcp == NULL) { return NDIS_STATUS_FAILURE; } @@ -1199,7 +1199,7 @@ OvsTcpSegmentNBL(PVOID ovsContext, ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL); /* Figure out the segment header size */ - status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber); + status = GetSegmentHeaderInfo(nb, hdrInfo, &hdrSize, &seqNumber); if (status != NDIS_STATUS_SUCCESS) { OVS_LOG_INFO("Cannot parse NBL header"); return NULL; diff --git a/datapath-windows/ovsext/OvsBufferMgmt.h b/datapath-windows/ovsext/OvsBufferMgmt.h index 9c00b1b..8e822e0 100644 --- a/datapath-windows/ovsext/OvsBufferMgmt.h +++ b/datapath-windows/ovsext/OvsBufferMgmt.h @@ -81,6 +81,11 @@ typedef struct _OVS_NBL_POOL { #endif } OVS_NBL_POOL, *POVS_NBL_POOL; +#define OVS_NBL_FOR_EACH(pCurNbl, nblList) \ +for (NET_BUFFER_LIST* pCurNbl = nblList; \ + pCurNbl != NULL; \ + pCurNbl = NET_BUFFER_LIST_NEXT_NBL(pCurNbl)) + NDIS_STATUS OvsInitBufferPool(PVOID context); VOID OvsCleanupBufferPool(PVOID context); diff --git a/datapath-windows/ovsext/OvsChecksum.c b/datapath-windows/ovsext/OvsChecksum.c index e192373..0c65221 100644 --- a/datapath-windows/ovsext/OvsChecksum.c +++ b/datapath-windows/ovsext/OvsChecksum.c @@ -536,7 +536,8 @@ OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, /* Next, check if the NIC did not validate the RX checksum. */ if (!csumInfo.Receive.IpChecksumSucceeded) { - ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage); + ipHdr = OvsGetIp(NET_BUFFER_LIST_FIRST_NB(curNbl), + hdrInfo->l3Offset, &ip_storage); if (ipHdr) { ip_storage = *ipHdr; hdrChecksum = ipHdr->check; diff --git a/datapath-windows/ovsext/OvsFlow.c b/datapath-windows/ovsext/OvsFlow.c index daa64e0..19f6b2a 100644 --- a/datapath-windows/ovsext/OvsFlow.c +++ b/datapath-windows/ovsext/OvsFlow.c @@ -104,7 +104,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath, /* *---------------------------------------------------------------------------- - * GetStartAddrNBL -- + * GetStartAddrNB -- * Get the virtual address of the frame. * * Results: @@ -112,7 +112,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath, *---------------------------------------------------------------------------- */ static __inline VOID * -GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) +GetStartAddrNB(const NET_BUFFER *_pNB) { PMDL curMdl; PUINT8 curBuffer; @@ -121,21 +121,21 @@ GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) ASSERT(_pNB); // Ethernet Header is a guaranteed safe access. - curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl; + curMdl = (_pNB)->CurrentMdl; curBuffer = MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); if (!curBuffer) { return NULL; } curHeader = (PEthHdr) - (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset); + (curBuffer + (_pNB)->CurrentMdlOffset); return (VOID *) curHeader; } VOID OvsFlowUsed(OvsFlow *flow, - const NET_BUFFER_LIST *packet, + const NET_BUFFER *packet, const POVS_PACKET_HDR_INFO layers) { LARGE_INTEGER tickCount; @@ -144,7 +144,7 @@ OvsFlowUsed(OvsFlow *flow, flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick; flow->used += ovsUserTimestampDelta; flow->packetCount++; - flow->byteCount += OvsPacketLenNBL(packet); + flow->byteCount += NET_BUFFER_DATA_LENGTH(packet); flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers); } @@ -191,15 +191,14 @@ DeleteAllFlows(OVS_DATAPATH *datapath) *---------------------------------------------------------------------------- */ NDIS_STATUS -OvsExtractFlow(const NET_BUFFER_LIST *packet, +OvsExtractFlow(const NET_BUFFER* pNb, UINT32 inPort, OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, - OvsIPv4TunnelKey *tunKey) + OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue) { struct Eth_Header *eth; UINT8 offset = 0; - PVOID vlanTagValue; layers->value = 0; @@ -214,20 +213,19 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, flow->l2.inPort = inPort; - if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) { + if (NET_BUFFER_DATA_LENGTH(pNb) < ETH_HEADER_LEN_DIX) { flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset; return NDIS_STATUS_SUCCESS; } /* Link layer. */ - eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); + eth = (Eth_Header *)GetStartAddrNB(pNb); memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); /* * vlan_tci. */ - vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo); if (vlanTagValue) { PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; @@ -262,7 +260,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, if (ETH_TYPENOT8023(eth->dix.typeNBO)) { flow->l2.dlType = eth->dix.typeNBO; layers->l3Offset = ETH_HEADER_LEN_DIX + offset; - } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && + } else if (NET_BUFFER_DATA_LENGTH(pNb) >= ETH_HEADER_LEN_802_3 && eth->e802_3.llc.dsap == 0xaa && eth->e802_3.llc.ssap == 0xaa && eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && @@ -285,7 +283,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, flow->l2.keyLen += OVS_IP_KEY_SIZE; layers->isIPv4 = 1; - nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); + nh = OvsGetIp(pNb, layers->l3Offset, &ip_storage); if (nh) { layers->l4Offset = layers->l3Offset + nh->ihl * 4; @@ -309,14 +307,14 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, if (!(nh->frag_off & htons(IP_OFFSET))) { if (ipKey->nwProto == SOCKET_IPPROTO_TCP) { - OvsParseTcp(packet, &ipKey->l4, layers); + OvsParseTcp(pNb, &ipKey->l4, layers); } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) { - OvsParseUdp(packet, &ipKey->l4, layers); + OvsParseUdp(pNb, &ipKey->l4, layers); } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) { ICMPHdr icmpStorage; const ICMPHdr *icmp; - icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage); + icmp = OvsGetIcmp(pNb, layers->l4Offset, &icmpStorage); if (icmp) { ipKey->l4.tpSrc = htons(icmp->type); ipKey->l4.tpDst = htons(icmp->code); @@ -331,7 +329,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { NDIS_STATUS status; flow->l2.keyLen += OVS_IPV6_KEY_SIZE; - status = OvsParseIPv6(packet, flow, layers); + status = OvsParseIPv6(pNb, flow, layers); if (status != NDIS_STATUS_SUCCESS) { memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); return status; @@ -342,11 +340,11 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, flow->ipv6Key.pad = 0; if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { - OvsParseTcp(packet, &(flow->ipv6Key.l4), layers); + OvsParseTcp(pNb, &(flow->ipv6Key.l4), layers); } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { - OvsParseUdp(packet, &(flow->ipv6Key.l4), layers); + OvsParseUdp(pNb, &(flow->ipv6Key.l4), layers); } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { - OvsParseIcmpV6(packet, flow, layers); + OvsParseIcmpV6(pNb, flow, layers); flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); } } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { @@ -357,7 +355,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, ((UINT64 *)arpKey)[1] = 0; ((UINT64 *)arpKey)[2] = 0; flow->l2.keyLen += OVS_ARP_KEY_SIZE; - arp = OvsGetArp(packet, layers->l3Offset, &arpStorage); + arp = OvsGetArp(pNb, layers->l3Offset, &arpStorage); if (arp && arp->ea_hdr.ar_hrd == htons(1) && arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) && arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH && @@ -420,9 +418,7 @@ AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow) */ KeMemoryBarrier(); - //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql); InsertTailList(head, &flow->ListEntry); - //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql); datapath->nFlows++; diff --git a/datapath-windows/ovsext/OvsFlow.h b/datapath-windows/ovsext/OvsFlow.h index 93368b3..aeec803 100644 --- a/datapath-windows/ovsext/OvsFlow.h +++ b/datapath-windows/ovsext/OvsFlow.h @@ -50,13 +50,13 @@ NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath); NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, POVS_SWITCH_CONTEXT switchContext); -NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, +NDIS_STATUS OvsExtractFlow(const NET_BUFFER *pNb, UINT32 inPort, OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, - OvsIPv4TunnelKey *tunKey); + OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue); OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key, UINT64 *hash, BOOLEAN hashValid); UINT64 OvsHashFlow(const OvsFlowKey *key); -VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt, +VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER *pkt, const POVS_PACKET_HDR_INFO layers); NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength, diff --git a/datapath-windows/ovsext/OvsPacketIO.c b/datapath-windows/ovsext/OvsPacketIO.c index 39e5703..d6d2e0e 100644 --- a/datapath-windows/ovsext/OvsPacketIO.c +++ b/datapath-windows/ovsext/OvsPacketIO.c @@ -211,61 +211,97 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, OvsFlowKey key; UINT64 hash; PNET_BUFFER curNb; + VOID* vlanTagValue; + NET_BUFFER_LIST* pNextNbl; nextNbl = curNbl->Next; curNbl->Next = NULL; /* Ethernet Header is a guaranteed safe access. */ - curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); - if (curNb->Next != NULL) { - /* XXX: This case is not handled yet. */ - ASSERT(FALSE); + POVS_BUFFER_CONTEXT ctx; + OvsFlow *flow; + NET_BUFFER_LIST* pNewNbl; + BOOLEAN dropNbl; + + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + sourcePort = fwdDetail->SourcePortId; + sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; + + /* Take the DispatchLock so none of the VPORTs disconnect while + * we are setting destination ports. + * + * XXX: acquire/release the dispatch lock for a "batch" of packets + * rather than for each packet. */ + NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, + dispatch); + + ctx = OvsInitExternalNBLContext(switchContext, curNbl, + sourcePort == switchContext->externalPortId); + if (ctx == NULL) { + RtlInitUnicodeString(&filterReason, + L"Cannot allocate external NBL context."); + + OvsStartNBLIngressError(switchContext, curNbl, + sendCompleteFlags, &filterReason, + NDIS_STATUS_RESOURCES); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + continue; + } + + vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort, + sourceIndex); + if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { + RtlInitUnicodeString(&filterReason, + L"OVS-Cannot forward packet from unknown source port"); + goto dropit; } else { - POVS_BUFFER_CONTEXT ctx; - OvsFlow *flow; - - fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); - sourcePort = fwdDetail->SourcePortId; - sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; - - /* Take the DispatchLock so none of the VPORTs disconnect while - * we are setting destination ports. - * - * XXX: acquire/release the dispatch lock for a "batch" of packets - * rather than for each packet. */ - NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, - dispatch); - - ctx = OvsInitExternalNBLContext(switchContext, curNbl, - sourcePort == switchContext->externalPortId); - if (ctx == NULL) { + portNo = vport->portNo; + } + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + vport->stats.rxPackets++; + vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb); + vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, + Ieee8021QNetBufferListInfo); + + if (curNb->Next == NULL) + { + pNewNbl = curNbl; + } + else + { + InterlockedDecrement((volatile LONG*)&ctx->refCount); + pNewNbl = OvsPartialCopyToMultipleNBLs(switchContext, curNbl, + 0, 0, TRUE); + if (!pNewNbl) { RtlInitUnicodeString(&filterReason, - L"Cannot allocate external NBL context."); + L"Cannot allocate new NBL: partial copy NB to " + L"multiple NBLs."); OvsStartNBLIngressError(switchContext, curNbl, sendCompleteFlags, &filterReason, NDIS_STATUS_RESOURCES); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); continue; } + } - vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort, - sourceIndex); - if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { - RtlInitUnicodeString(&filterReason, - L"OVS-Cannot forward packet from unknown source port"); - goto dropit; - } else { - portNo = vport->portNo; - } - - vport->stats.rxPackets++; - vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb); + dropNbl = FALSE; + for (NET_BUFFER_LIST* pCurNbl = pNewNbl; pCurNbl != NULL; pCurNbl = pNextNbl) + { + pNextNbl = NET_BUFFER_LIST_NEXT_NBL(pCurNbl); + NET_BUFFER_LIST_NEXT_NBL(pCurNbl) = NULL; - status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL); + status = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pCurNbl), + vport->portNo, &key, &layers, + NULL, vlanTagValue); if (status != NDIS_STATUS_SUCCESS) { RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed"); - goto dropit; + dropNbl = TRUE; + OvsAddPktCompletionList(&completionList, TRUE, sourcePort, + pCurNbl, 0, &filterReason); + continue; } ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); @@ -273,16 +309,17 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, flow = OvsLookupFlow(datapath, &key, &hash, FALSE); if (flow) { - OvsFlowUsed(flow, curNbl, &layers); + OvsFlowUsed(flow, NET_BUFFER_LIST_FIRST_NB(pCurNbl), &layers); datapath->hits++; + /* If successful, OvsActionsExecute() consumes the NBL. - * Otherwise, it adds it to the completionList. No need to - * check the return value. */ - OvsActionsExecute(switchContext, &completionList, curNbl, - portNo, SendFlags, &key, &hash, &layers, - flow->actions, flow->actionsLen); + * Otherwise, it adds it to the completionList. No need to + * check the return value. */ + OvsActionsExecute(switchContext, &completionList, pCurNbl, + portNo, SendFlags, &key, &hash, &layers, + flow->actions, flow->actionsLen); + OvsReleaseDatapath(datapath, &dpLockState); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); continue; } else { OvsReleaseDatapath(datapath, &dpLockState); @@ -293,28 +330,30 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, portNo, key.tunKey.dst != 0 ? (OvsIPv4TunnelKey *)&key.tunKey : - NULL, curNbl, + NULL, pCurNbl, sourcePort == switchContext->externalPortId, &layers, switchContext, &missedPackets, &num); if (status == NDIS_STATUS_SUCCESS) { /* Complete the packet since it was copied to user - * buffer. */ + * buffer. + */ RtlInitUnicodeString(&filterReason, L"OVS-Dropped since packet was copied to userspace"); } else { RtlInitUnicodeString(&filterReason, L"OVS-Dropped due to failure to queue to userspace"); } - goto dropit; + dropNbl = TRUE; + OvsAddPktCompletionList(&completionList, TRUE, sourcePort, + pCurNbl, 0, &filterReason); + continue; } - -dropit: - OvsAddPktCompletionList(&completionList, TRUE, sourcePort, curNbl, 0, - &filterReason); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); } + + dropit: + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); } /* Queue the missed packets. */ diff --git a/datapath-windows/ovsext/OvsPacketParser.c b/datapath-windows/ovsext/OvsPacketParser.c index 0a93435..0b971a7 100644 --- a/datapath-windows/ovsext/OvsPacketParser.c +++ b/datapath-windows/ovsext/OvsPacketParser.c @@ -18,13 +18,12 @@ //XXX consider moving to NdisGetDataBuffer. const VOID * -OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, +OvsGetPacketBytes(const NET_BUFFER *netBuffer, UINT32 len, UINT32 srcOffset, VOID *storage) { NDIS_STATUS status = NDIS_STATUS_SUCCESS; - PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl); PMDL currentMdl; BOOLEAN firstMDL = TRUE; ULONG destOffset = 0; @@ -83,7 +82,7 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, } NDIS_STATUS -OvsParseIPv6(const NET_BUFFER_LIST *packet, +OvsParseIPv6(const NET_BUFFER *packet, OvsFlowKey *key, POVS_PACKET_HDR_INFO layers) { @@ -134,7 +133,8 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, const IPv6ExtHdr *extHdr; UINT8 len; - extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, &extHdrStorage); + extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, + &extHdrStorage); if (!extHdr) { return NDIS_STATUS_FAILURE; } @@ -142,7 +142,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, len = extHdr->hdrExtLen; ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8; nextHdr = extHdr->nextHeader; - if (OvsPacketLenNBL(packet) < ofs) { + if (NET_BUFFER_DATA_LENGTH(packet) < ofs) { return NDIS_STATUS_FAILURE; } } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) { @@ -177,7 +177,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, } VOID -OvsParseTcp(const NET_BUFFER_LIST *packet, +OvsParseTcp(const NET_BUFFER *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers) { @@ -192,7 +192,7 @@ OvsParseTcp(const NET_BUFFER_LIST *packet, } VOID -OvsParseUdp(const NET_BUFFER_LIST *packet, +OvsParseUdp(const NET_BUFFER *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers) { @@ -210,7 +210,7 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, } NDIS_STATUS -OvsParseIcmpV6(const NET_BUFFER_LIST *packet, +OvsParseIcmpV6(const NET_BUFFER *packet, OvsFlowKey *key, POVS_PACKET_HDR_INFO layers) { @@ -249,7 +249,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, } flow->ndTarget = *ndTarget; - while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { + while ((UINT32)(ofs + 8) <= NET_BUFFER_DATA_LENGTH(packet)) { /* * The minimum size of an option is 8 bytes, which also is * the size of Ethernet link-layer options. @@ -258,13 +258,15 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, const IPv6NdOptHdr *ndOpt; UINT16 optLen; - ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, &ndOptStorage); + ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, + &ndOptStorage); if (!ndOpt) { return NDIS_STATUS_FAILURE; } optLen = ndOpt->len * 8; - if (!optLen || (UINT32)(ofs + optLen) > OvsPacketLenNBL(packet)) { + if (!optLen || (UINT32)(ofs + optLen) > + NET_BUFFER_DATA_LENGTH(packet)) { goto invalid; } diff --git a/datapath-windows/ovsext/OvsPacketParser.h b/datapath-windows/ovsext/OvsPacketParser.h index ab3c613..0928932 100644 --- a/datapath-windows/ovsext/OvsPacketParser.h +++ b/datapath-windows/ovsext/OvsPacketParser.h @@ -20,15 +20,15 @@ #include "precomp.h" #include "OvsNetProto.h" -const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, +const VOID* OvsGetPacketBytes(const NET_BUFFER *_pNB, UINT32 len, UINT32 SrcOffset, VOID *storage); -NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, +NDIS_STATUS OvsParseIPv6(const NET_BUFFER *packet, OvsFlowKey *key, POVS_PACKET_HDR_INFO layers); -VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, +VOID OvsParseTcp(const NET_BUFFER *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers); -VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow, +VOID OvsParseUdp(const NET_BUFFER *packet, L4Key *flow, POVS_PACKET_HDR_INFO layers); -NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, +NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER *packet, OvsFlowKey *key, POVS_PACKET_HDR_INFO layers); static __inline ULONG @@ -58,7 +58,7 @@ OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) * which C does not allow. */ static UINT16 -OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN +OvsGetTcpCtl(const NET_BUFFER *packet, // IN const POVS_PACKET_HDR_INFO layers) // IN { #define TCP_CTL_OFS 12 // Offset of "ctl" field in TCP header. @@ -74,7 +74,7 @@ OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN static UINT8 -OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN +OvsGetTcpFlags(const NET_BUFFER *packet, // IN const OvsFlowKey *key, // IN const POVS_PACKET_HDR_INFO layers) // IN { @@ -88,7 +88,7 @@ OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN } static const EtherArp * -OvsGetArp(const NET_BUFFER_LIST *packet, +OvsGetArp(const NET_BUFFER *packet, UINT32 ofs, EtherArp *storage) { @@ -96,14 +96,15 @@ OvsGetArp(const NET_BUFFER_LIST *packet, } static const IPHdr * -OvsGetIp(const NET_BUFFER_LIST *packet, +OvsGetIp(const NET_BUFFER *packet, UINT32 ofs, IPHdr *storage) { const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage); if (ip) { int ipLen = ip->ihl * 4; - if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) { + if (ipLen >= sizeof *ip && + NET_BUFFER_DATA_LENGTH(packet) >= ofs + ipLen) { return ip; } } @@ -111,14 +112,15 @@ OvsGetIp(const NET_BUFFER_LIST *packet, } static const TCPHdr * -OvsGetTcp(const NET_BUFFER_LIST *packet, +OvsGetTcp(const NET_BUFFER *packet, UINT32 ofs, TCPHdr *storage) { const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage); if (tcp) { int tcpLen = tcp->doff * 4; - if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) { + if (tcpLen >= sizeof *tcp && + NET_BUFFER_DATA_LENGTH(packet) >= ofs + tcpLen) { return tcp; } } @@ -126,7 +128,7 @@ OvsGetTcp(const NET_BUFFER_LIST *packet, } static const UDPHdr * -OvsGetUdp(const NET_BUFFER_LIST *packet, +OvsGetUdp(const NET_BUFFER *packet, UINT32 ofs, UDPHdr *storage) { @@ -134,7 +136,7 @@ OvsGetUdp(const NET_BUFFER_LIST *packet, } static const ICMPHdr * -OvsGetIcmp(const NET_BUFFER_LIST *packet, +OvsGetIcmp(const NET_BUFFER *packet, UINT32 ofs, ICMPHdr *storage) { diff --git a/datapath-windows/ovsext/OvsTunnel.c b/datapath-windows/ovsext/OvsTunnel.c index b5a369a..9c96aec 100644 --- a/datapath-windows/ovsext/OvsTunnel.c +++ b/datapath-windows/ovsext/OvsTunnel.c @@ -228,6 +228,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath; ASSERT(gOvsSwitchContext); + ASSERT(pNbl->FirstNetBuffer->Next == NULL); /* Fill the tunnel key */ status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey); @@ -265,6 +266,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, UINT64 hash; PNET_BUFFER curNb; OvsFlow *flow; + VOID* vlanTagValue; fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); @@ -295,20 +297,22 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN); portNo = vport->portNo; + vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo); - status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey); + status = OvsExtractFlow(curNb, portNo, &key, &layers, + &tunnelKey, vlanTagValue); if (status != NDIS_STATUS_SUCCESS) { goto unlockAndDrop; } flow = OvsLookupFlow(datapath, &key, &hash, FALSE); if (flow) { - OvsFlowUsed(flow, pNbl, &layers); + OvsFlowUsed(flow, curNb, &layers); datapath->hits++; OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl, - portNo, SendFlags, &key, &hash, &layers, - flow->actions, flow->actionsLen); + portNo, SendFlags, &key, &hash, &layers, + flow->actions, flow->actionsLen); OvsReleaseDatapath(datapath, &dpLockState); } else { diff --git a/datapath-windows/ovsext/OvsUser.c b/datapath-windows/ovsext/OvsUser.c index 5093f20..9f717ee 100644 --- a/datapath-windows/ovsext/OvsUser.c +++ b/datapath-windows/ovsext/OvsUser.c @@ -315,6 +315,7 @@ OvsExecuteDpIoctl(PVOID inputBuffer, OvsFlowKey key; OVS_PACKET_HDR_INFO layers; POVS_VPORT_ENTRY vport; + VOID* vlanTagValue; if (inputLength < sizeof(*execute) || outputLength != 0) { return STATUS_INFO_LENGTH_MISMATCH; @@ -360,14 +361,22 @@ OvsExecuteDpIoctl(PVOID inputBuffer, fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; fwdDetail->SourceNicIndex = 0; } - // XXX: Figure out if any of the other members of fwdDetail need to be set. + /* + * XXX: Figure out if any of the other members of fwdDetail need to be + * set. + */ + + ASSERT(pNbl->FirstNetBuffer->Next == NULL); - ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers, - NULL); + vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, Ieee8021QNetBufferListInfo); + + ndisStatus = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pNbl), + fwdDetail->SourcePortId, &key, &layers, NULL, + vlanTagValue); if (ndisStatus == NDIS_STATUS_SUCCESS) { ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, - NDIS_RWL_AT_DISPATCH_LEVEL); + NDIS_RWL_AT_DISPATCH_LEVEL); ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl, vport ? vport->portNo : 0, NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP, @@ -827,6 +836,8 @@ OvsCreateAndAddPackets(UINT32 queueId, PNET_BUFFER_LIST newNbl = NULL; PNET_BUFFER nb; + ASSERT(nbl->FirstNetBuffer->Next == NULL); + if (hdrInfo->isTcp) { NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; UINT32 packetLength; diff --git a/datapath-windows/ovsext/OvsVxlan.c b/datapath-windows/ovsext/OvsVxlan.c index 63909ae..c50d2d9 100644 --- a/datapath-windows/ovsext/OvsVxlan.c +++ b/datapath-windows/ovsext/OvsVxlan.c @@ -116,6 +116,8 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, UINT32 headRoom = OvsGetVxlanTunHdrSize(); UINT32 packetLength; + ASSERT(curNbl->FirstNetBuffer->Next == NULL); + /* * XXX: the assumption currently is that the NBL is owned by OVS, and * headroom has already been allocated as part of allocating the NBL and @@ -286,13 +288,19 @@ OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl, OvsFlowKey key; NDIS_STATUS status; UNREFERENCED_PARAMETER(inPort); - - status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL); - if (result == STATUS_SUCCESS) { - status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, + VOID* vlanTagValue; + + vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, Ieee8021QNetBufferListInfo); + for (NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + pNb != NULL; pNb = NET_BUFFER_NEXT_NB(pNb)) { + status = OvsExtractFlow(pNb, inPort, &key, &layers, NULL, + vlanTagValue); + if (result == STATUS_SUCCESS) { + status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, (POVS_SWITCH_CONTEXT)cbData1, NULL); - } else { - status = NDIS_STATUS_FAILURE; + } else { + status = NDIS_STATUS_FAILURE; + } } if (status != NDIS_STATUS_SUCCESS) { @@ -457,9 +465,16 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, OVS_PACKET_HDR_INFO layers; layers.value = 0; + ASSERT(packet->FirstNetBuffer->Next == NULL); do { - nh = OvsGetIp(packet, layers.l3Offset, &ip_storage); + NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(packet); + + /* + * It is an encapsulated packet (UDP), we can use the first NB + * to start the check + */ + nh = OvsGetIp(pNb, layers.l3Offset, &ip_storage); if (nh) { layers.l4Offset = layers.l3Offset + nh->ihl * 4; } else { @@ -467,7 +482,7 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, } /* make sure it's a VXLAN packet */ - udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage); + udp = OvsGetUdp(pNb, layers.l4Offset, &udpStorage); if (udp) { layers.l7Offset = layers.l4Offset + sizeof *udp; } else { @@ -477,7 +492,11 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, /* XXX Should be tested against the dynamic port # in the VXLAN vport */ ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT)); - VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet, + /* + * We may have multiple VXLAN packets here that need to be decapsulated + * For the VXLAN header, we need only the first NET_BUFFER + */ + VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(pNb, sizeof(*VxlanHeader), layers.l7Offset, &VxlanHeaderBuffer); -- 1.9.0.msysgit.0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev