Hi Alin, Thanks for working on it! I am almost done reviewing the change. I have a few comments below.
>ovs/ovs-issues#15 > >All NET_BUFFERs of a NET_BUFFER_LIST must go through >the pipeline: extract, find flow, execute. Previously, >only the first NET_BUFFER of a NET_BUFFER_LIST was going >through this pipeline, which was erroneous. > >OvsPartialCopyToMultipleNBLs is used to make each NET_BUFFER >have its own NET_BUFFER_LIST. > >Some functions that used to take as argument a NET_BUFFER_LIST >now take as argument a NET_BUFFER. I have also added a few >ASSERTs where the NET_BUFFER_LIST is expected to have >only one NET_BUFFER." > >Signed-off-by: Samuel Ghinet <sghinet at cloudbasesolutions.com> >Co-authored-by: Alin Gabriel Serdean <aserd...@cloudbasesolutions.com> >--- > datapath-windows/ovsext/OvsActions.c | 20 ++++- > datapath-windows/ovsext/OvsBufferMgmt.c | 8 +- > datapath-windows/ovsext/OvsBufferMgmt.h | 5 ++ > datapath-windows/ovsext/OvsChecksum.c | 3 +- > datapath-windows/ovsext/OvsFlow.c | 44 +++++---- > datapath-windows/ovsext/OvsFlow.h | 6 +- > datapath-windows/ovsext/OvsPacketIO.c | 143 >+++++++++++++++++++----------- > datapath-windows/ovsext/OvsPacketParser.c | 24 ++--- > datapath-windows/ovsext/OvsPacketParser.h | 30 ++++--- > datapath-windows/ovsext/OvsTunnel.c | 12 ++- > datapath-windows/ovsext/OvsUser.c | 19 +++- > datapath-windows/ovsext/OvsVxlan.c | 37 ++++++-- > 12 files changed, 221 insertions(+), 130 deletions(-) > >diff --git a/datapath-windows/ovsext/OvsActions.c >b/datapath-windows/ovsext/OvsActions.c >index 635becc..c5c5116 100644 >--- a/datapath-windows/ovsext/OvsActions.c >+++ b/datapath-windows/ovsext/OvsActions.c >@@ -517,6 +517,9 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) > OvsFlow *flow; > UINT64 hash; > NDIS_STATUS status; >+ VOID* vlanTagValue; >+ NET_BUFFER* pNb; >+ > POVS_VPORT_ENTRY vport = > OvsFindVportByPortNo(ovsFwdCtx->switchContext, >ovsFwdCtx->srcVportNo); > if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { >@@ -527,12 +530,19 @@ OvsDoFlowLookupOutput(OvsForwardingContext >*ovsFwdCtx) > return NDIS_STATUS_SUCCESS; > } > ASSERT(vport->nicState == NdisSwitchNicStateConnected); >+ ASSERT(ovsFwdCtx->curNbl->FirstNetBuffer->Next == NULL); > > /* Assert that in the Rx direction, key is always setup. */ > ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0); >- status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo, >- &key, &ovsFwdCtx->layers, >ovsFwdCtx->tunKey.dst != 0 ? >- &ovsFwdCtx->tunKey : NULL); >+ vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx->curNbl, >+ Ieee8021QNetBufferListInfo); >+ >+ pNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); >+ >+ status = OvsExtractFlow(pNb, ovsFwdCtx->srcVportNo, >+ &key, &ovsFwdCtx->layers, >+ ovsFwdCtx->tunKey.dst != 0 ? >+ &ovsFwdCtx->tunKey : NULL, vlanTagValue); > if (status != NDIS_STATUS_SUCCESS) { > OvsCompleteNBLForwardingCtx(ovsFwdCtx, > L"OVS-Flow extract failed"); >@@ -542,7 +552,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) > > flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, >&hash, FALSE); > if (flow) { >- OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers); >+ OvsFlowUsed(flow, pNb, &ovsFwdCtx->layers); > ovsFwdCtx->switchContext->datapath.hits++; > status = OvsActionsExecute(ovsFwdCtx->switchContext, > ovsFwdCtx->completionList, >ovsFwdCtx->curNbl, >@@ -1370,6 +1380,8 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, > PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail = > NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); > >+ ASSERT(curNbl->FirstNetBuffer->Next == NULL); >+ > /* XXX: ASSERT that the flow table lock is held. */ > status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, >portNo, > sendFlags, fwdDetail, completionList, >diff --git a/datapath-windows/ovsext/OvsBufferMgmt.c >b/datapath-windows/ovsext/OvsBufferMgmt.c >index 8aa8060..c367d2f 100644 >--- a/datapath-windows/ovsext/OvsBufferMgmt.c >+++ b/datapath-windows/ovsext/OvsBufferMgmt.c >@@ -863,7 +863,7 @@ OvsPartialCopyToMultipleNBLs(PVOID ovsContext, > if (prevNbl == NULL) { > firstNbl = newNbl; > } else { >- NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl; >+ NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl; > NET_BUFFER_NEXT_NB(prevNb) = nb; > } > prevNbl = newNbl; >@@ -1085,7 +1085,7 @@ nblcopy_error: > * >-------------------------------------------------------------------------- > */ > static NDIS_STATUS >-GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, >+GetSegmentHeaderInfo(PNET_BUFFER nb, > const POVS_PACKET_HDR_INFO hdrInfo, > UINT32 *hdrSize, UINT32 *seqNumber) > { >@@ -1093,7 +1093,7 @@ GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, > const TCPHdr *tcp; > > /* Parse the orginal Eth/IP/TCP header */ >- tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, >&tcpStorage); >+ tcp = OvsGetPacketBytes(nb, sizeof *tcp, hdrInfo->l4Offset, >&tcpStorage); > if (tcp == NULL) { > return NDIS_STATUS_FAILURE; > } >@@ -1199,7 +1199,7 @@ OvsTcpSegmentNBL(PVOID ovsContext, > ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL); > > /* Figure out the segment header size */ >- status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber); >+ status = GetSegmentHeaderInfo(nb, hdrInfo, &hdrSize, &seqNumber); > if (status != NDIS_STATUS_SUCCESS) { > OVS_LOG_INFO("Cannot parse NBL header"); > return NULL; >diff --git a/datapath-windows/ovsext/OvsBufferMgmt.h >b/datapath-windows/ovsext/OvsBufferMgmt.h >index 9c00b1b..8e822e0 100644 >--- a/datapath-windows/ovsext/OvsBufferMgmt.h >+++ b/datapath-windows/ovsext/OvsBufferMgmt.h >@@ -81,6 +81,11 @@ typedef struct _OVS_NBL_POOL { > #endif > } OVS_NBL_POOL, *POVS_NBL_POOL; > >+#define OVS_NBL_FOR_EACH(pCurNbl, nblList) \ >+for (NET_BUFFER_LIST* pCurNbl = nblList; \ >+ pCurNbl != NULL; \ >+ pCurNbl = NET_BUFFER_LIST_NEXT_NBL(pCurNbl)) >+ > > NDIS_STATUS OvsInitBufferPool(PVOID context); > VOID OvsCleanupBufferPool(PVOID context); >diff --git a/datapath-windows/ovsext/OvsChecksum.c >b/datapath-windows/ovsext/OvsChecksum.c >index e192373..0c65221 100644 >--- a/datapath-windows/ovsext/OvsChecksum.c >+++ b/datapath-windows/ovsext/OvsChecksum.c >@@ -536,7 +536,8 @@ OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, > > /* Next, check if the NIC did not validate the RX checksum. */ > if (!csumInfo.Receive.IpChecksumSucceeded) { >- ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage); >+ ipHdr = OvsGetIp(NET_BUFFER_LIST_FIRST_NB(curNbl), >+ hdrInfo->l3Offset, &ip_storage); > if (ipHdr) { > ip_storage = *ipHdr; > hdrChecksum = ipHdr->check; >diff --git a/datapath-windows/ovsext/OvsFlow.c >b/datapath-windows/ovsext/OvsFlow.c >index daa64e0..19f6b2a 100644 >--- a/datapath-windows/ovsext/OvsFlow.c >+++ b/datapath-windows/ovsext/OvsFlow.c >@@ -104,7 +104,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath, > > /* > >*------------------------------------------------------------------------- >--- >- * GetStartAddrNBL -- >+ * GetStartAddrNB -- > * Get the virtual address of the frame. > * > * Results: >@@ -112,7 +112,7 @@ OvsAllocateFlowTable(OVS_DATAPATH *datapath, > >*------------------------------------------------------------------------- >--- > */ > static __inline VOID * >-GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) >+GetStartAddrNB(const NET_BUFFER *_pNB) > { > PMDL curMdl; > PUINT8 curBuffer; >@@ -121,21 +121,21 @@ GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) > ASSERT(_pNB); > > // Ethernet Header is a guaranteed safe access. >- curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl; >+ curMdl = (_pNB)->CurrentMdl; > curBuffer = MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); > if (!curBuffer) { > return NULL; > } > > curHeader = (PEthHdr) >- (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset); >+ (curBuffer + (_pNB)->CurrentMdlOffset); > > return (VOID *) curHeader; > } > > VOID > OvsFlowUsed(OvsFlow *flow, >- const NET_BUFFER_LIST *packet, >+ const NET_BUFFER *packet, > const POVS_PACKET_HDR_INFO layers) > { > LARGE_INTEGER tickCount; >@@ -144,7 +144,7 @@ OvsFlowUsed(OvsFlow *flow, > flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick; > flow->used += ovsUserTimestampDelta; > flow->packetCount++; >- flow->byteCount += OvsPacketLenNBL(packet); >+ flow->byteCount += NET_BUFFER_DATA_LENGTH(packet); > flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers); > } > >@@ -191,15 +191,14 @@ DeleteAllFlows(OVS_DATAPATH *datapath) > >*------------------------------------------------------------------------- >--- > */ > NDIS_STATUS >-OvsExtractFlow(const NET_BUFFER_LIST *packet, >+OvsExtractFlow(const NET_BUFFER* pNb, > UINT32 inPort, > OvsFlowKey *flow, > POVS_PACKET_HDR_INFO layers, >- OvsIPv4TunnelKey *tunKey) >+ OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue) Nit: the parameter should be on a new line. > { > struct Eth_Header *eth; > UINT8 offset = 0; >- PVOID vlanTagValue; > > layers->value = 0; > >@@ -214,20 +213,19 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > > flow->l2.inPort = inPort; > >- if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) { >+ if (NET_BUFFER_DATA_LENGTH(pNb) < ETH_HEADER_LEN_DIX) { > flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset; > return NDIS_STATUS_SUCCESS; > } > > /* Link layer. */ >- eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); >+ eth = (Eth_Header *)GetStartAddrNB(pNb); > memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); > memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); > > /* > * vlan_tci. > */ >- vlanTagValue = NET_BUFFER_LIST_INFO(packet, >Ieee8021QNetBufferListInfo); > if (vlanTagValue) { > PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag = > (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; >@@ -262,7 +260,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > if (ETH_TYPENOT8023(eth->dix.typeNBO)) { > flow->l2.dlType = eth->dix.typeNBO; > layers->l3Offset = ETH_HEADER_LEN_DIX + offset; >- } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && >+ } else if (NET_BUFFER_DATA_LENGTH(pNb) >= ETH_HEADER_LEN_802_3 && > eth->e802_3.llc.dsap == 0xaa && > eth->e802_3.llc.ssap == 0xaa && > eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && >@@ -285,7 +283,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > > flow->l2.keyLen += OVS_IP_KEY_SIZE; > layers->isIPv4 = 1; >- nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); >+ nh = OvsGetIp(pNb, layers->l3Offset, &ip_storage); > if (nh) { > layers->l4Offset = layers->l3Offset + nh->ihl * 4; > >@@ -309,14 +307,14 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > > if (!(nh->frag_off & htons(IP_OFFSET))) { > if (ipKey->nwProto == SOCKET_IPPROTO_TCP) { >- OvsParseTcp(packet, &ipKey->l4, layers); >+ OvsParseTcp(pNb, &ipKey->l4, layers); > } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) { >- OvsParseUdp(packet, &ipKey->l4, layers); >+ OvsParseUdp(pNb, &ipKey->l4, layers); > } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) { > ICMPHdr icmpStorage; > const ICMPHdr *icmp; > >- icmp = OvsGetIcmp(packet, layers->l4Offset, >&icmpStorage); >+ icmp = OvsGetIcmp(pNb, layers->l4Offset, >&icmpStorage); > if (icmp) { > ipKey->l4.tpSrc = htons(icmp->type); > ipKey->l4.tpDst = htons(icmp->code); >@@ -331,7 +329,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { > NDIS_STATUS status; > flow->l2.keyLen += OVS_IPV6_KEY_SIZE; >- status = OvsParseIPv6(packet, flow, layers); >+ status = OvsParseIPv6(pNb, flow, layers); > if (status != NDIS_STATUS_SUCCESS) { > memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); > return status; >@@ -342,11 +340,11 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > flow->ipv6Key.pad = 0; > > if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { >- OvsParseTcp(packet, &(flow->ipv6Key.l4), layers); >+ OvsParseTcp(pNb, &(flow->ipv6Key.l4), layers); > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { >- OvsParseUdp(packet, &(flow->ipv6Key.l4), layers); >+ OvsParseUdp(pNb, &(flow->ipv6Key.l4), layers); > } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { >- OvsParseIcmpV6(packet, flow, layers); >+ OvsParseIcmpV6(pNb, flow, layers); > flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); > } > } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { >@@ -357,7 +355,7 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > ((UINT64 *)arpKey)[1] = 0; > ((UINT64 *)arpKey)[2] = 0; > flow->l2.keyLen += OVS_ARP_KEY_SIZE; >- arp = OvsGetArp(packet, layers->l3Offset, &arpStorage); >+ arp = OvsGetArp(pNb, layers->l3Offset, &arpStorage); > if (arp && arp->ea_hdr.ar_hrd == htons(1) && > arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) && > arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH && >@@ -420,9 +418,7 @@ AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow) > */ > KeMemoryBarrier(); > >- //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql); > InsertTailList(head, &flow->ListEntry); >- //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql); > > datapath->nFlows++; > >diff --git a/datapath-windows/ovsext/OvsFlow.h >b/datapath-windows/ovsext/OvsFlow.h >index 93368b3..aeec803 100644 >--- a/datapath-windows/ovsext/OvsFlow.h >+++ b/datapath-windows/ovsext/OvsFlow.h >@@ -50,13 +50,13 @@ NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH >*datapath); > NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, > POVS_SWITCH_CONTEXT switchContext); > >-NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, >+NDIS_STATUS OvsExtractFlow(const NET_BUFFER *pNb, UINT32 inPort, > OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, >- OvsIPv4TunnelKey *tunKey); >+ OvsIPv4TunnelKey *tunKey, VOID* vlanTagValue); > OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key, > UINT64 *hash, BOOLEAN hashValid); > UINT64 OvsHashFlow(const OvsFlowKey *key); >-VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt, >+VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER *pkt, > const POVS_PACKET_HDR_INFO layers); > > NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength, >diff --git a/datapath-windows/ovsext/OvsPacketIO.c >b/datapath-windows/ovsext/OvsPacketIO.c >index 39e5703..d6d2e0e 100644 >--- a/datapath-windows/ovsext/OvsPacketIO.c >+++ b/datapath-windows/ovsext/OvsPacketIO.c >@@ -211,61 +211,97 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT >switchContext, This function has become a bit too long now, can you please refactor it? One obvious thing you could do is to extract the processing of an single NB into a separate function and the parent function can take care of splitting the NBL (if required) & creating the right forwarding ctx. > OvsFlowKey key; > UINT64 hash; > PNET_BUFFER curNb; >+ VOID* vlanTagValue; >+ NET_BUFFER_LIST* pNextNbl; > > nextNbl = curNbl->Next; > curNbl->Next = NULL; > > /* Ethernet Header is a guaranteed safe access. */ >- curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); >- if (curNb->Next != NULL) { >- /* XXX: This case is not handled yet. */ >- ASSERT(FALSE); >+ POVS_BUFFER_CONTEXT ctx; >+ OvsFlow *flow; >+ NET_BUFFER_LIST* pNewNbl; >+ BOOLEAN dropNbl; >+ >+ fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); >+ sourcePort = fwdDetail->SourcePortId; >+ sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; >+ >+ /* Take the DispatchLock so none of the VPORTs disconnect while >+ * we are setting destination ports. >+ * >+ * XXX: acquire/release the dispatch lock for a "batch" of packets >+ * rather than for each packet. */ >+ NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, >+ dispatch); >+ I believe we can take the lock after initializing the external context. Also, the argument 'dispatch' needs correct indentation. >+ ctx = OvsInitExternalNBLContext(switchContext, curNbl, >+ sourcePort == switchContext->externalPortId); >+ if (ctx == NULL) { >+ RtlInitUnicodeString(&filterReason, >+ L"Cannot allocate external NBL context."); >+ >+ OvsStartNBLIngressError(switchContext, curNbl, >+ sendCompleteFlags, &filterReason, >+ NDIS_STATUS_RESOURCES); >+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState); >+ continue; >+ } >+ >+ vport = OvsFindVportByPortIdAndNicIndex(switchContext, >sourcePort, >+ sourceIndex); >+ if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { >+ RtlInitUnicodeString(&filterReason, >+ L"OVS-Cannot forward packet from unknown source port"); >+ goto dropit; > } else { >- POVS_BUFFER_CONTEXT ctx; >- OvsFlow *flow; >- >- fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); >- sourcePort = fwdDetail->SourcePortId; >- sourceIndex = >(NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; >- >- /* Take the DispatchLock so none of the VPORTs disconnect >while >- * we are setting destination ports. >- * >- * XXX: acquire/release the dispatch lock for a "batch" of >packets >- * rather than for each packet. */ >- NdisAcquireRWLockRead(switchContext->dispatchLock, >&lockState, >- dispatch); >- >- ctx = OvsInitExternalNBLContext(switchContext, curNbl, >- sourcePort == >switchContext->externalPortId); >- if (ctx == NULL) { >+ portNo = vport->portNo; >+ } >+ >+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); >+ vport->stats.rxPackets++; >+ vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb); This is obviously wrong, since the stats need to be updated for each NB of the NBL. >+ vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, >+ Ieee8021QNetBufferListInfo); >+ >+ if (curNb->Next == NULL) >+ { >+ pNewNbl = curNbl; >+ } >+ else >+ { >+ InterlockedDecrement((volatile LONG*)&ctx->refCount); >+ pNewNbl = OvsPartialCopyToMultipleNBLs(switchContext, curNbl, >+ 0, 0, TRUE); >+ if (!pNewNbl) { > RtlInitUnicodeString(&filterReason, >- L"Cannot allocate external NBL >context."); >+ L"Cannot allocate new NBL: partial copy NB to " >+ L"multiple NBLs."); > > OvsStartNBLIngressError(switchContext, curNbl, > sendCompleteFlags, &filterReason, > NDIS_STATUS_RESOURCES); >+ > NdisReleaseRWLock(switchContext->dispatchLock, >&lockState); > continue; > } >+ } The if-else style needs to be fixed. > >- vport = OvsFindVportByPortIdAndNicIndex(switchContext, >sourcePort, >- sourceIndex); >- if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) >{ >- RtlInitUnicodeString(&filterReason, >- L"OVS-Cannot forward packet from unknown source >port"); >- goto dropit; >- } else { >- portNo = vport->portNo; >- } >- >- vport->stats.rxPackets++; >- vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb); >+ dropNbl = FALSE; >+ for (NET_BUFFER_LIST* pCurNbl = pNewNbl; pCurNbl != NULL; >pCurNbl = pNextNbl) >+ { You should use your new macro OVS_NBL_FOR_EACH here and the parentheses go immediately after, not on a new line. >+ pNextNbl = NET_BUFFER_LIST_NEXT_NBL(pCurNbl); >+ NET_BUFFER_LIST_NEXT_NBL(pCurNbl) = NULL; > >- status = OvsExtractFlow(curNbl, vport->portNo, &key, >&layers, NULL); >+ status = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pCurNbl), >+ vport->portNo, &key, &layers, >+ NULL, vlanTagValue); > if (status != NDIS_STATUS_SUCCESS) { > RtlInitUnicodeString(&filterReason, L"OVS-Flow extract >failed"); >- goto dropit; >+ dropNbl = TRUE; >+ OvsAddPktCompletionList(&completionList, TRUE, >sourcePort, >+ pCurNbl, 0, &filterReason); >+ continue; > } > > ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); >@@ -273,16 +309,17 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT >switchContext, > > flow = OvsLookupFlow(datapath, &key, &hash, FALSE); > if (flow) { >- OvsFlowUsed(flow, curNbl, &layers); >+ OvsFlowUsed(flow, NET_BUFFER_LIST_FIRST_NB(pCurNbl), >&layers); > datapath->hits++; >+ > /* If successful, OvsActionsExecute() consumes the NBL. >- * Otherwise, it adds it to the completionList. No need >to >- * check the return value. */ >- OvsActionsExecute(switchContext, &completionList, curNbl, >- portNo, SendFlags, &key, &hash, &layers, >- flow->actions, flow->actionsLen); >+ * Otherwise, it adds it to the completionList. No need to >+ * check the return value. */ >+ OvsActionsExecute(switchContext, &completionList, >pCurNbl, >+ portNo, SendFlags, &key, &hash, >&layers, >+ flow->actions, flow->actionsLen); >+ > OvsReleaseDatapath(datapath, &dpLockState); >- NdisReleaseRWLock(switchContext->dispatchLock, >&lockState); > continue; > } else { > OvsReleaseDatapath(datapath, &dpLockState); >@@ -293,28 +330,30 @@ OvsStartNBLIngress(POVS_SWITCH_CONTEXT >switchContext, > portNo, > key.tunKey.dst != 0 ? > (OvsIPv4TunnelKey >*)&key.tunKey : >- NULL, curNbl, >+ NULL, pCurNbl, > sourcePort == > >switchContext->externalPortId, > &layers, switchContext, > &missedPackets, &num); > if (status == NDIS_STATUS_SUCCESS) { > /* Complete the packet since it was copied to user >- * buffer. */ >+ * buffer. >+ */ Why this change? > RtlInitUnicodeString(&filterReason, > L"OVS-Dropped since packet was copied to >userspace"); > } else { > RtlInitUnicodeString(&filterReason, > L"OVS-Dropped due to failure to queue to >userspace"); > } >- goto dropit; >+ dropNbl = TRUE; >+ OvsAddPktCompletionList(&completionList, TRUE, >sourcePort, >+ pCurNbl, 0, &filterReason); >+ continue; > } >- >-dropit: >- OvsAddPktCompletionList(&completionList, TRUE, sourcePort, >curNbl, 0, >- &filterReason); >- NdisReleaseRWLock(switchContext->dispatchLock, &lockState); > } >+ >+ dropit: >+ NdisReleaseRWLock(switchContext->dispatchLock, &lockState); > } > > /* Queue the missed packets. */ >diff --git a/datapath-windows/ovsext/OvsPacketParser.c >b/datapath-windows/ovsext/OvsPacketParser.c >index 0a93435..0b971a7 100644 >--- a/datapath-windows/ovsext/OvsPacketParser.c >+++ b/datapath-windows/ovsext/OvsPacketParser.c >@@ -18,13 +18,12 @@ > > //XXX consider moving to NdisGetDataBuffer. > const VOID * >-OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, >+OvsGetPacketBytes(const NET_BUFFER *netBuffer, > UINT32 len, > UINT32 srcOffset, > VOID *storage) > { > NDIS_STATUS status = NDIS_STATUS_SUCCESS; >- PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl); > PMDL currentMdl; > BOOLEAN firstMDL = TRUE; > ULONG destOffset = 0; >@@ -83,7 +82,7 @@ OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, > } > > NDIS_STATUS >-OvsParseIPv6(const NET_BUFFER_LIST *packet, >+OvsParseIPv6(const NET_BUFFER *packet, > OvsFlowKey *key, > POVS_PACKET_HDR_INFO layers) > { >@@ -134,7 +133,8 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > const IPv6ExtHdr *extHdr; > UINT8 len; > >- extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, >&extHdrStorage); >+ extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, >+ &extHdrStorage); > if (!extHdr) { > return NDIS_STATUS_FAILURE; > } >@@ -142,7 +142,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > len = extHdr->hdrExtLen; > ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + >1) * 8; > nextHdr = extHdr->nextHeader; >- if (OvsPacketLenNBL(packet) < ofs) { >+ if (NET_BUFFER_DATA_LENGTH(packet) < ofs) { > return NDIS_STATUS_FAILURE; > } > } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) { >@@ -177,7 +177,7 @@ OvsParseIPv6(const NET_BUFFER_LIST *packet, > } > > VOID >-OvsParseTcp(const NET_BUFFER_LIST *packet, >+OvsParseTcp(const NET_BUFFER *packet, > L4Key *flow, > POVS_PACKET_HDR_INFO layers) > { >@@ -192,7 +192,7 @@ OvsParseTcp(const NET_BUFFER_LIST *packet, > } > > VOID >-OvsParseUdp(const NET_BUFFER_LIST *packet, >+OvsParseUdp(const NET_BUFFER *packet, > L4Key *flow, > POVS_PACKET_HDR_INFO layers) > { >@@ -210,7 +210,7 @@ OvsParseUdp(const NET_BUFFER_LIST *packet, > } > > NDIS_STATUS >-OvsParseIcmpV6(const NET_BUFFER_LIST *packet, >+OvsParseIcmpV6(const NET_BUFFER *packet, > OvsFlowKey *key, > POVS_PACKET_HDR_INFO layers) > { >@@ -249,7 +249,7 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > } > flow->ndTarget = *ndTarget; > >- while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { >+ while ((UINT32)(ofs + 8) <= NET_BUFFER_DATA_LENGTH(packet)) { > /* > * The minimum size of an option is 8 bytes, which also is > * the size of Ethernet link-layer options. >@@ -258,13 +258,15 @@ OvsParseIcmpV6(const NET_BUFFER_LIST *packet, > const IPv6NdOptHdr *ndOpt; > UINT16 optLen; > >- ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, >&ndOptStorage); >+ ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, >+ &ndOptStorage); > if (!ndOpt) { > return NDIS_STATUS_FAILURE; > } > > optLen = ndOpt->len * 8; >- if (!optLen || (UINT32)(ofs + optLen) > >OvsPacketLenNBL(packet)) { >+ if (!optLen || (UINT32)(ofs + optLen) > >+ NET_BUFFER_DATA_LENGTH(packet)) { > goto invalid; > } > >diff --git a/datapath-windows/ovsext/OvsPacketParser.h >b/datapath-windows/ovsext/OvsPacketParser.h >index ab3c613..0928932 100644 >--- a/datapath-windows/ovsext/OvsPacketParser.h >+++ b/datapath-windows/ovsext/OvsPacketParser.h >@@ -20,15 +20,15 @@ > #include "precomp.h" > #include "OvsNetProto.h" > >-const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, >+const VOID* OvsGetPacketBytes(const NET_BUFFER *_pNB, UINT32 len, > UINT32 SrcOffset, VOID *storage); >-NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, >+NDIS_STATUS OvsParseIPv6(const NET_BUFFER *packet, OvsFlowKey *key, > POVS_PACKET_HDR_INFO layers); >-VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, >+VOID OvsParseTcp(const NET_BUFFER *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); >-VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow, >+VOID OvsParseUdp(const NET_BUFFER *packet, L4Key *flow, > POVS_PACKET_HDR_INFO layers); >-NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey >*key, >+NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER *packet, OvsFlowKey *key, > POVS_PACKET_HDR_INFO layers); > > static __inline ULONG >@@ -58,7 +58,7 @@ OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) > * which C does not allow. > */ > static UINT16 >-OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN >+OvsGetTcpCtl(const NET_BUFFER *packet, // IN > const POVS_PACKET_HDR_INFO layers) // IN > { > #define TCP_CTL_OFS 12 // Offset of "ctl" field in TCP >header. >@@ -74,7 +74,7 @@ OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN > > > static UINT8 >-OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN >+OvsGetTcpFlags(const NET_BUFFER *packet, // IN > const OvsFlowKey *key, // IN > const POVS_PACKET_HDR_INFO layers) // IN > { >@@ -88,7 +88,7 @@ OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN > } > > static const EtherArp * >-OvsGetArp(const NET_BUFFER_LIST *packet, >+OvsGetArp(const NET_BUFFER *packet, > UINT32 ofs, > EtherArp *storage) > { >@@ -96,14 +96,15 @@ OvsGetArp(const NET_BUFFER_LIST *packet, > } > > static const IPHdr * >-OvsGetIp(const NET_BUFFER_LIST *packet, >+OvsGetIp(const NET_BUFFER *packet, > UINT32 ofs, > IPHdr *storage) > { > const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, >storage); > if (ip) { > int ipLen = ip->ihl * 4; >- if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + >ipLen) { >+ if (ipLen >= sizeof *ip && >+ NET_BUFFER_DATA_LENGTH(packet) >= ofs + ipLen) { > return ip; > } > } >@@ -111,14 +112,15 @@ OvsGetIp(const NET_BUFFER_LIST *packet, > } > > static const TCPHdr * >-OvsGetTcp(const NET_BUFFER_LIST *packet, >+OvsGetTcp(const NET_BUFFER *packet, > UINT32 ofs, > TCPHdr *storage) > { > const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, >storage); > if (tcp) { > int tcpLen = tcp->doff * 4; >- if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + >tcpLen) { >+ if (tcpLen >= sizeof *tcp && >+ NET_BUFFER_DATA_LENGTH(packet) >= ofs + tcpLen) { > return tcp; > } > } >@@ -126,7 +128,7 @@ OvsGetTcp(const NET_BUFFER_LIST *packet, > } > > static const UDPHdr * >-OvsGetUdp(const NET_BUFFER_LIST *packet, >+OvsGetUdp(const NET_BUFFER *packet, > UINT32 ofs, > UDPHdr *storage) > { >@@ -134,7 +136,7 @@ OvsGetUdp(const NET_BUFFER_LIST *packet, > } > > static const ICMPHdr * >-OvsGetIcmp(const NET_BUFFER_LIST *packet, >+OvsGetIcmp(const NET_BUFFER *packet, > UINT32 ofs, > ICMPHdr *storage) > { >diff --git a/datapath-windows/ovsext/OvsTunnel.c >b/datapath-windows/ovsext/OvsTunnel.c >index b5a369a..9c96aec 100644 >--- a/datapath-windows/ovsext/OvsTunnel.c >+++ b/datapath-windows/ovsext/OvsTunnel.c >@@ -228,6 +228,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, > OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath; > > ASSERT(gOvsSwitchContext); >+ ASSERT(pNbl->FirstNetBuffer->Next == NULL); > > /* Fill the tunnel key */ > status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey); >@@ -265,6 +266,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, > UINT64 hash; > PNET_BUFFER curNb; > OvsFlow *flow; >+ VOID* vlanTagValue; > > fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); > >@@ -295,20 +297,22 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, > ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN); > > portNo = vport->portNo; >+ vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, >Ieee8021QNetBufferListInfo); > >- status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey); >+ status = OvsExtractFlow(curNb, portNo, &key, &layers, >+ &tunnelKey, vlanTagValue); > if (status != NDIS_STATUS_SUCCESS) { > goto unlockAndDrop; > } > > flow = OvsLookupFlow(datapath, &key, &hash, FALSE); > if (flow) { >- OvsFlowUsed(flow, pNbl, &layers); >+ OvsFlowUsed(flow, curNb, &layers); > datapath->hits++; > > OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl, >- portNo, SendFlags, &key, &hash, &layers, >- flow->actions, flow->actionsLen); >+ portNo, SendFlags, &key, &hash, &layers, >+ flow->actions, flow->actionsLen); > > OvsReleaseDatapath(datapath, &dpLockState); > } else { >diff --git a/datapath-windows/ovsext/OvsUser.c >b/datapath-windows/ovsext/OvsUser.c >index 5093f20..9f717ee 100644 >--- a/datapath-windows/ovsext/OvsUser.c >+++ b/datapath-windows/ovsext/OvsUser.c >@@ -315,6 +315,7 @@ OvsExecuteDpIoctl(PVOID inputBuffer, > OvsFlowKey key; > OVS_PACKET_HDR_INFO layers; > POVS_VPORT_ENTRY vport; >+ VOID* vlanTagValue; > > if (inputLength < sizeof(*execute) || outputLength != 0) { > return STATUS_INFO_LENGTH_MISMATCH; >@@ -360,14 +361,22 @@ OvsExecuteDpIoctl(PVOID inputBuffer, > fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; > fwdDetail->SourceNicIndex = 0; > } >- // XXX: Figure out if any of the other members of fwdDetail need to >be set. >+ /* >+ * XXX: Figure out if any of the other members of fwdDetail need to >be >+ * set. >+ */ >+ I actually prefer the shorter version which would take up only 2 lines. I am going to suggest this be added to the coding guide as well. >+ ASSERT(pNbl->FirstNetBuffer->Next == NULL); > >- ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, >&layers, >- NULL); >+ vlanTagValue = NET_BUFFER_LIST_INFO(pNbl, >Ieee8021QNetBufferListInfo); >+ >+ ndisStatus = OvsExtractFlow(NET_BUFFER_LIST_FIRST_NB(pNbl), >+ fwdDetail->SourcePortId, &key, &layers, >NULL, >+ vlanTagValue); > if (ndisStatus == NDIS_STATUS_SUCCESS) { > ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); > NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, >&lockState, >- NDIS_RWL_AT_DISPATCH_LEVEL); >+ NDIS_RWL_AT_DISPATCH_LEVEL); > ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl, > vport ? vport->portNo : 0, > >NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP, >@@ -827,6 +836,8 @@ OvsCreateAndAddPackets(UINT32 queueId, > PNET_BUFFER_LIST newNbl = NULL; > PNET_BUFFER nb; > >+ ASSERT(nbl->FirstNetBuffer->Next == NULL); >+ > if (hdrInfo->isTcp) { > NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; > UINT32 packetLength; >diff --git a/datapath-windows/ovsext/OvsVxlan.c >b/datapath-windows/ovsext/OvsVxlan.c >index 63909ae..c50d2d9 100644 >--- a/datapath-windows/ovsext/OvsVxlan.c >+++ b/datapath-windows/ovsext/OvsVxlan.c >@@ -116,6 +116,8 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, > UINT32 headRoom = OvsGetVxlanTunHdrSize(); > UINT32 packetLength; > >+ ASSERT(curNbl->FirstNetBuffer->Next == NULL); >+ > /* > * XXX: the assumption currently is that the NBL is owned by OVS, and > * headroom has already been allocated as part of allocating the NBL >and >@@ -286,13 +288,19 @@ OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl, > OvsFlowKey key; > NDIS_STATUS status; > UNREFERENCED_PARAMETER(inPort); >- >- status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL); >- if (result == STATUS_SUCCESS) { >- status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, >+ VOID* vlanTagValue; >+ >+ vlanTagValue = NET_BUFFER_LIST_INFO(curNbl, >Ieee8021QNetBufferListInfo); >+ for (NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(curNbl); >+ pNb != NULL; pNb = NET_BUFFER_NEXT_NB(pNb)) { >+ status = OvsExtractFlow(pNb, inPort, &key, &layers, NULL, >+ vlanTagValue); >+ if (result == STATUS_SUCCESS) { >+ status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, > (POVS_SWITCH_CONTEXT)cbData1, NULL); >- } else { >- status = NDIS_STATUS_FAILURE; >+ } else { >+ status = NDIS_STATUS_FAILURE; >+ } > } > > if (status != NDIS_STATUS_SUCCESS) { >@@ -457,9 +465,16 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, > OVS_PACKET_HDR_INFO layers; > > layers.value = 0; >+ ASSERT(packet->FirstNetBuffer->Next == NULL); > > do { >- nh = OvsGetIp(packet, layers.l3Offset, &ip_storage); >+ NET_BUFFER* pNb = NET_BUFFER_LIST_FIRST_NB(packet); >+ >+ /* >+ * It is an encapsulated packet (UDP), we can use the first NB >+ * to start the check >+ */ >+ nh = OvsGetIp(pNb, layers.l3Offset, &ip_storage); > if (nh) { > layers.l4Offset = layers.l3Offset + nh->ihl * 4; > } else { >@@ -467,7 +482,7 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, > } > > /* make sure it's a VXLAN packet */ >- udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage); >+ udp = OvsGetUdp(pNb, layers.l4Offset, &udpStorage); > if (udp) { > layers.l7Offset = layers.l4Offset + sizeof *udp; > } else { >@@ -477,7 +492,11 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, > /* XXX Should be tested against the dynamic port # in the VXLAN >vport */ > ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT)); > >- VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet, >+ /* >+ * We may have multiple VXLAN packets here that need to be >decapsulated >+ * For the VXLAN header, we need only the first NET_BUFFER >+ */ >+ VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(pNb, > sizeof(*VxlanHeader), > layers.l7Offset, > &VxlanHeaderBuffer); >-- >1.9.0.msysgit.0 > > >_______________________________________________ >dev mailing list >dev@openvswitch.org >https://urldefense.proofpoint.com/v1/url?u=http://openvswitch.org/mailman/ >listinfo/dev&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=pEkjsHfytvHEWufeZPpgqSO >JMdMjuZPbesVsNhCUc0E%3D%0A&m=G6Z6cJKEWSaM%2Fp60WkBxb%2BuhTQIc94OiwO%2BaGAd >Zijk%3D%0A&s=446f3fe34df7cc4aefd30362cab16149e04596dcde7ed2f71cdc776bfde5d >20e _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev