Hi Sorin/Alin, Is this an optimization or a fix? Can you pls. add more details?
We do two flow lookups for a reason - once for the inner packet from the VIF, and once more after encapsulation. There¹s nothing to avoid here. The only way to avoid the second lookup is by handing off the packet to the Windows IP stack by using a kernel socket, and letting Windows do the routing. For this, we¹d need to use a ³Private² Hyper-V switch. -- Nithin -----Original Message----- From: Sorin Vinturis <svintu...@cloudbasesolutions.com> Date: Wednesday, September 23, 2015 at 5:09 AM To: "dev@openvswitch.org" <dev@openvswitch.org> Subject: [ovs-dev] [PATCH] datapath-windows: Avoid double flow lookup in Tx tunnel function >Signed-off-by: Sorin Vinturis <svintu...@cloudbasesolutions.com> >Co-authored-by: Alin Gabriel Serdean <aserd...@cloudbasesolutions.com> >--- > datapath-windows/ovsext/Actions.c | 264 >+++++++++++++++++++++++++------------- > datapath-windows/ovsext/Vport.c | 1 - > datapath-windows/ovsext/Vport.h | 2 +- > 3 files changed, 173 insertions(+), 94 deletions(-) > >diff --git a/datapath-windows/ovsext/Actions.c >b/datapath-windows/ovsext/Actions.c >index bfe5d7f..2a482b9 100644 >--- a/datapath-windows/ovsext/Actions.c >+++ b/datapath-windows/ovsext/Actions.c >@@ -322,65 +322,18 @@ OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, > return FALSE; > } > >- >-/* >- * >-------------------------------------------------------------------------- >- * OvsAddPorts -- >- * Add the specified destination vport into the forwarding context. >If the >- * vport is a VIF/external port, it is added directly to the NBL. If >it is >- * a tunneling port, it is NOT added to the NBL. >- * >- * Result: >- * NDIS_STATUS_SUCCESS on success >- * Other NDIS_STATUS upon failure. >- * >-------------------------------------------------------------------------- >- */ >-static __inline NDIS_STATUS >-OvsAddPorts(OvsForwardingContext *ovsFwdCtx, >- OvsFlowKey *flowKey, >- NDIS_SWITCH_PORT_ID dstPortId, >- BOOLEAN preserveVLAN, >- BOOLEAN preservePriority) >+static NTSTATUS >+OvsGrowNblDestinations(OvsForwardingContext *ovsFwdCtx) > { >- POVS_VPORT_ENTRY vport; >- PNDIS_SWITCH_PORT_DESTINATION fwdPort; >- NDIS_STATUS status; >+ NDIS_STATUS status = STATUS_SUCCESS; > POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext; >+ BOOLEAN error = TRUE; > >- /* >- * We hold the dispatch lock that protects the list of vports, so >vports >- * validated here can be added as destinations safely before we call >into >- * NDIS. >- * >- * Some of the vports can be tunnelled ports as well in which case >- * they should be added to a separate list of tunnelled destination >ports >- * instead of the VIF ports. The context for the tunnel is settable >- * in OvsForwardingContext. >- */ >- vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId); >- if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { >- /* >- * There may be some latency between a port disappearing, and >userspace >- * updating the recalculated flows. In the meantime, handle >invalid >- * ports gracefully. >- */ >- ovsActionStats.noVport++; >- return NDIS_STATUS_SUCCESS; >- } >- ASSERT(vport->nicState == NdisSwitchNicStateConnected); >- vport->stats.txPackets++; >- vport->stats.txBytes += >- >NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl)); >- >- if (OvsIsBridgeInternalVport(vport)) { >- return NDIS_STATUS_SUCCESS; >- } >- >- if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) { >- return NDIS_STATUS_SUCCESS; >- } >+ do { >+ if (ovsFwdCtx->destPortsSizeOut != ovsFwdCtx->destPortsSizeIn) { >+ break; >+ } > >- if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) { > if (ovsFwdCtx->destPortsSizeIn == 0) { > ASSERT(ovsFwdCtx->destinationPorts == NULL); > ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0); >@@ -391,13 +344,14 @@ OvsAddPorts(OvsForwardingContext *ovsFwdCtx, > &ovsFwdCtx->destinationPorts); > if (status != NDIS_STATUS_SUCCESS) { > ovsActionStats.cannotGrowDest++; >- return status; >+ break; > } > ovsFwdCtx->destPortsSizeIn = > ovsFwdCtx->fwdDetail->NumAvailableDestinations; > ASSERT(ovsFwdCtx->destinationPorts); > } else { > ASSERT(ovsFwdCtx->destinationPorts != NULL); >+ > /* > * NumElements: > * A ULONG value that specifies the total number of >@@ -420,6 +374,7 @@ OvsAddPorts(OvsForwardingContext *ovsFwdCtx, > ovsFwdCtx->destPortsSizeOut - > ovsFwdCtx->fwdDetail->NumAvailableDestinations); > ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0); >+ > /* > * Before we grow the array of destination ports, the >current set > * of ports needs to be committed. Only the ports added >since the >@@ -431,7 +386,7 @@ OvsAddPorts(OvsForwardingContext *ovsFwdCtx, > ovsFwdCtx->destinationPorts); > if (status != NDIS_STATUS_SUCCESS) { > ovsActionStats.cannotGrowDest++; >- return status; >+ break; > } > ASSERT(ovsFwdCtx->destinationPorts->NumElements == > ovsFwdCtx->destPortsSizeIn); >@@ -444,26 +399,96 @@ OvsAddPorts(OvsForwardingContext *ovsFwdCtx, > ovsFwdCtx->destPortsSizeIn, >&ovsFwdCtx->destinationPorts); > if (status != NDIS_STATUS_SUCCESS) { > ovsActionStats.cannotGrowDest++; >- return status; >+ break; > } > ASSERT(ovsFwdCtx->destinationPorts != NULL); > ovsFwdCtx->destPortsSizeIn <<= 1; > } >- } > >- ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn); >- fwdPort = >- >NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts, >- >ovsFwdCtx->destPortsSizeOut); >+ error = FALSE; >+ } while (error); >+ >+ return status; >+} > >- fwdPort->PortId = vport->portId; >- fwdPort->NicIndex = vport->nicIndex; >- fwdPort->IsExcluded = 0; >- fwdPort->PreserveVLAN = preserveVLAN; >- fwdPort->PreservePriority = preservePriority; >- ovsFwdCtx->destPortsSizeOut += 1; > >- return NDIS_STATUS_SUCCESS; >+/* >+ * >-------------------------------------------------------------------------- >+ * OvsAddPorts -- >+ * Add the specified destination vport into the forwarding context. >If the >+ * vport is a VIF/external port, it is added directly to the NBL. If >it is >+ * a tunneling port, it is NOT added to the NBL. >+ * >+ * Result: >+ * NDIS_STATUS_SUCCESS on success >+ * Other NDIS_STATUS upon failure. >+ * >-------------------------------------------------------------------------- >+ */ >+static __inline NDIS_STATUS >+OvsAddPorts(OvsForwardingContext *ovsFwdCtx, >+ OvsFlowKey *flowKey, >+ NDIS_SWITCH_PORT_ID dstPortId, >+ BOOLEAN preserveVLAN, >+ BOOLEAN preservePriority) >+{ >+ NDIS_STATUS status = NDIS_STATUS_SUCCESS; >+ POVS_VPORT_ENTRY vport; >+ BOOLEAN error = TRUE; >+ PNDIS_SWITCH_PORT_DESTINATION fwdPort; >+ >+ do { >+ /* >+ * We hold the dispatch lock that protects the list of vports, >so vports >+ * validated here can be added as destinations safely before we >call into >+ * NDIS. >+ * >+ * Some of the vports can be tunnelled ports as well in which >case >+ * they should be added to a separate list of tunnelled >destination ports >+ * instead of the VIF ports. The context for the tunnel is >settable >+ * in OvsForwardingContext. >+ */ >+ vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, >dstPortId); >+ if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { >+ /* >+ * There may be some latency between a port disappearing, >and userspace >+ * updating the recalculated flows. In the meantime, handle >invalid >+ * ports gracefully. >+ */ >+ ovsActionStats.noVport++; >+ break; >+ } >+ >+ ASSERT(vport->nicState == NdisSwitchNicStateConnected); >+ vport->stats.txPackets++; >+ vport->stats.txBytes += >+ >NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl)); >+ >+ if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) { >+ break; >+ } >+ >+ status = OvsGrowNblDestinations(ovsFwdCtx); >+ if (status != NDIS_STATUS_SUCCESS) { >+ ovsActionStats.cannotGrowDest++; >+ break; >+ } >+ >+ ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn); >+ fwdPort = >+ >NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts, >+ ovsFwdCtx->destPortsSizeOut); >+ >+ fwdPort->PortId = vport->portId; >+ fwdPort->NicIndex = vport->nicIndex; >+ fwdPort->IsExcluded = 0; >+ fwdPort->PreserveVLAN = preserveVLAN; >+ fwdPort->PreservePriority = preservePriority; >+ ovsFwdCtx->destPortsSizeOut += 1; >+ >+ error = FALSE; >+ } while (error); >+ >+ return status; > } > > >@@ -581,10 +606,11 @@ OvsDoFlowLookupOutput(OvsForwardingContext >*ovsFwdCtx) > OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers); > ovsFwdCtx->switchContext->datapath.hits++; > status = OvsActionsExecute(ovsFwdCtx->switchContext, >- ovsFwdCtx->completionList, >ovsFwdCtx->curNbl, >- ovsFwdCtx->srcVportNo, >ovsFwdCtx->sendFlags, >- &key, &hash, &ovsFwdCtx->layers, >- flow->actions, flow->actionsLen); >+ ovsFwdCtx->completionList, >+ ovsFwdCtx->curNbl, >+ ovsFwdCtx->srcVportNo, >ovsFwdCtx->sendFlags, >+ &key, &hash, &ovsFwdCtx->layers, >+ flow->actions, flow->actionsLen); > ovsFwdCtx->curNbl = NULL; > } else { > LIST_ENTRY missedPackets; >@@ -631,35 +657,26 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) > { > NDIS_STATUS status = NDIS_STATUS_FAILURE; > PNET_BUFFER_LIST newNbl = NULL; >+ POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext; > >- /* >- * Setup the source port to be the internal port to as to facilitate >the >- * second OvsLookupFlow. >- */ >- if (ovsFwdCtx->switchContext->internalVport == NULL || >- ovsFwdCtx->switchContext->virtualExternalVport == NULL) { >+ if (switchContext->internalVport == NULL || >+ switchContext->virtualExternalVport == NULL) { > OvsClearTunTxCtx(ovsFwdCtx); > OvsCompleteNBLForwardingCtx(ovsFwdCtx, > L"OVS-Dropped since either internal or external port is >absent"); > return NDIS_STATUS_FAILURE; > } >- ovsFwdCtx->srcVportNo = >- >((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo; >- >- ovsFwdCtx->fwdDetail->SourcePortId = >ovsFwdCtx->switchContext->internalPortId; >- ovsFwdCtx->fwdDetail->SourceNicIndex = >- >((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex; > > /* Do the encap. Encap function does not consume the NBL. */ > switch(ovsFwdCtx->tunnelTxNic->ovsType) { > case OVS_VPORT_TYPE_VXLAN: > status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl, >- &ovsFwdCtx->tunKey, >ovsFwdCtx->switchContext, >+ &ovsFwdCtx->tunKey, switchContext, > &ovsFwdCtx->layers, &newNbl); > break; > case OVS_VPORT_TYPE_STT: > status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl, >- &ovsFwdCtx->tunKey, >ovsFwdCtx->switchContext, >+ &ovsFwdCtx->tunKey, switchContext, > &ovsFwdCtx->layers, &newNbl); > break; > default: >@@ -670,15 +687,78 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) > OvsClearTunTxCtx(ovsFwdCtx); > > if (status == NDIS_STATUS_SUCCESS) { >+ PNDIS_SWITCH_PORT_DESTINATION fwdPort; >+ PCWSTR dropReason = L""; >+ > ASSERT(newNbl); > OvsCompleteNBLForwardingCtx(ovsFwdCtx, > L"Complete after cloning NBL for >encapsulation"); > ovsFwdCtx->curNbl = newNbl; >- status = OvsDoFlowLookupOutput(ovsFwdCtx); >+ >+ OvsInitForwardingCtx(ovsFwdCtx, switchContext, >+ ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo, >+ ovsFwdCtx->sendFlags, ovsFwdCtx->fwdDetail, >+ ovsFwdCtx->completionList, NULL, TRUE); >+ >+ status = OvsGrowNblDestinations(ovsFwdCtx); >+ if (status != NDIS_STATUS_SUCCESS) { >+ ovsActionStats.cannotGrowDest++; >+ return status; >+ } >+ >+ ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn); >+ fwdPort = >+ >NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts, >+ ovsFwdCtx->destPortsSizeOut); >+ fwdPort->PortId = >ovsFwdCtx->switchContext->virtualExternalPortId; >+ fwdPort->NicIndex = 1; >+ fwdPort->IsExcluded = 0; >+ fwdPort->PreserveVLAN = TRUE; >+ fwdPort->PreservePriority = TRUE; >+ ovsFwdCtx->destPortsSizeOut += 1; >+ >+ if (ovsFwdCtx->destPortsSizeOut > 0) { >+ PNET_BUFFER_LIST newNbl = NULL; >+ UINT32 portsToUpdate = >+ ovsFwdCtx->fwdDetail->NumAvailableDestinations - >+ (ovsFwdCtx->destPortsSizeIn - >ovsFwdCtx->destPortsSizeOut); >+ >+ ASSERT(ovsFwdCtx->destinationPorts != NULL); >+ ASSERT(portsToUpdate > 0); >+ >+ status = >switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations( >+ switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, >+ portsToUpdate, ovsFwdCtx->destinationPorts); >+ if (status == NDIS_STATUS_SUCCESS) { >+ OvsSendNBLIngress(switchContext, ovsFwdCtx->curNbl, >+ ovsFwdCtx->sendFlags); >+ ovsFwdCtx->destPortsSizeOut = 0; >+ ovsFwdCtx->curNbl = NULL; >+ if (newNbl) { >+ status = OvsInitForwardingCtx(ovsFwdCtx, >switchContext, >+ newNbl, ovsFwdCtx->srcVportNo, 0, >+ NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), >+ ovsFwdCtx->completionList, >+ &ovsFwdCtx->layers, FALSE); >+ if (status != NDIS_STATUS_SUCCESS) { >+ dropReason = L"Dropped due to resources."; >+ } >+ } >+ } else { >+ OvsCompleteNBL(switchContext, newNbl, TRUE); >+ ovsActionStats.cannotGrowDest++; >+ dropReason = L"Dropped due to failure to update >destinations."; >+ } >+ } >+ >+ if (status != NDIS_STATUS_SUCCESS) { >+ OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason); >+ } >+ > ASSERT(ovsFwdCtx->curNbl == NULL); > } else { > /* >- * XXX: Temporary freeing of the packet until we register a >+ * XXX: Temporary freeing of the packet until we register a > * callback to IP helper. > */ > OvsCompleteNBLForwardingCtx(ovsFwdCtx, >@@ -1433,7 +1513,7 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, > case OVS_ACTION_ATTR_OUTPUT: > dstPortID = NlAttrGetU32(a); > status = OvsAddPorts(&ovsFwdCtx, key, dstPortID, >- TRUE, TRUE); >+ TRUE, TRUE); > if (status != NDIS_STATUS_SUCCESS) { > dropReason = L"OVS-adding destination port failed"; > goto dropit; >@@ -1515,7 +1595,7 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, > BOOLEAN isRecv = FALSE; > > POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(switchContext, >- portNo); >+ portNo); > > if (vport) { > if (vport->isExternal || >diff --git a/datapath-windows/ovsext/Vport.c >b/datapath-windows/ovsext/Vport.c >index baab056..61a9379 100644 >--- a/datapath-windows/ovsext/Vport.c >+++ b/datapath-windows/ovsext/Vport.c >@@ -1010,7 +1010,6 @@ AssignNicNameSpecial(POVS_VPORT_ENTRY vport) > } > } > >- > /* > * >-------------------------------------------------------------------------- > * Functionality common to any port on the Hyper-V switch. This function >is not >diff --git a/datapath-windows/ovsext/Vport.h >b/datapath-windows/ovsext/Vport.h >index ead55a9..89099f2 100644 >--- a/datapath-windows/ovsext/Vport.h >+++ b/datapath-windows/ovsext/Vport.h >@@ -73,7 +73,7 @@ typedef struct _OVS_VPORT_FULL_STATS { > OVS_VPORT_ERR_STATS; > }OVS_VPORT_FULL_STATS; > /* >- * Each internal, external adapter or vritual adapter has >+ * Each internal, external adapter or virtual adapter has > * one vport entry. In addition, we have one vport for each > * tunnel type, such as vxlan, gre > */ >-- >1.9.0.msysgit.0 >_______________________________________________ >dev mailing list >dev@openvswitch.org >https://urldefense.proofpoint.com/v2/url?u=http-3A__openvswitch.org_mailma >n_listinfo_dev&d=BQIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=pN >HQcdr7B40b4h6Yb7FIedI1dnBsxdDuTLBYD3JqV80&m=lPR4aO2DeQ7LHo8aCnpeHaZsgHDHdG >6GfLKqPjGuULQ&s=4gdQ1BpakSNM0qCIs4cyMxIiVrlUS8G0S2LhhLzOdtc&e= _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev