Hi Sorin, Thanks for incorporating the comments from previous iteration. I had a few cosmetic comments to improve readability, but looks good otherwise.
Acked-by: Nithin Raju <nit...@vmware.com> Thanks, -- Nithin -----Original Message----- From: dev <dev-boun...@openvswitch.org> on behalf of Sorin Vinturis <svintu...@cloudbasesolutions.com> Date: Tuesday, January 19, 2016 at 12:04 PM To: "dev@openvswitch.org" <dev@openvswitch.org> Subject: [ovs-dev] [PATCH v2 1/2] datapath-windows: Support for OVS_KEY_ATTR_MPLS attribute >This patch adds OVS_KEY_ATTR_MPLS to the OVS flow mechanism. > >Tested using ping. >Tested using iperf (TCP and UDP). >Tested using DriverVerifier. > >Signed-off-by: Sorin Vinturis <svintu...@cloudbasesolutions.com> >--- >Compared to the previous patch, the code regarding MPLS push&pop actions >was revised. >v2: Addressed comments after the code review. >--- > datapath-windows/automake.mk | 1 + > datapath-windows/ovsext/Actions.c | 200 >+++++++++++++++++++++++++++--- > datapath-windows/ovsext/DpInternal.h | 11 +- > datapath-windows/ovsext/Ethernet.h | 2 + > datapath-windows/ovsext/Flow.c | 102 ++++++++++++++- > datapath-windows/ovsext/Mpls.h | 56 +++++++++ > datapath-windows/ovsext/NetProto.h | 2 + > datapath-windows/ovsext/Netlink/Netlink.h | 1 + > datapath-windows/ovsext/PacketParser.c | 12 +- > datapath-windows/ovsext/PacketParser.h | 7 ++ > datapath-windows/ovsext/ovsext.vcxproj | 1 + > 11 files changed, 366 insertions(+), 29 deletions(-) > create mode 100644 datapath-windows/ovsext/Mpls.h > >diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk >index 7f12d92..fd22218 100644 >--- a/datapath-windows/automake.mk >+++ b/datapath-windows/automake.mk >@@ -31,6 +31,7 @@ EXTRA_DIST += \ > datapath-windows/ovsext/IpHelper.h \ > datapath-windows/ovsext/Jhash.c \ > datapath-windows/ovsext/Jhash.h \ >+ datapath-windows/ovsext/Mpls.h \ > datapath-windows/ovsext/NetProto.h \ > datapath-windows/ovsext/Netlink/Netlink.c \ > datapath-windows/ovsext/Netlink/Netlink.h \ >diff --git a/datapath-windows/ovsext/Actions.c >b/datapath-windows/ovsext/Actions.c >index 6506584..185f1aa 100644 >--- a/datapath-windows/ovsext/Actions.c >+++ b/datapath-windows/ovsext/Actions.c >@@ -20,6 +20,7 @@ > #include "Event.h" > #include "Flow.h" > #include "Gre.h" >+#include "Mpls.h" > #include "NetProto.h" > #include "PacketIO.h" > #include "Stt.h" >@@ -1031,28 +1032,25 @@ OvsOutputBeforeSetAction(OvsForwardingContext >*ovsFwdCtx) > > /* > * >-------------------------------------------------------------------------- >- * OvsPopVlanInPktBuf -- >- * Function to pop a VLAN tag when the tag is in the packet buffer. >+ * OvsPopPacketBuf -- >+ * Function to pop a specified buffer from the packet data. > * >-------------------------------------------------------------------------- > */ > static __inline NDIS_STATUS >-OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) >+OvsPopPacketBuf(OvsForwardingContext *ovsFwdCtx, >+ UINT32 dataLength, >+ UINT32 shiftLength, >+ PUINT8 *bufferData) Good example of code reuse. One suggestion I have to make things clearer: I¹d rename the variables as: ŒdataLength¹ => shiftOffset The description could be updated to: ³Function to pop a specified field of length ŒshiftLength¹ located as ¹shiftOffset¹ from the ethernet header. The data on the left of ŒshiftOffset¹ is right shifted. Returns a pointer to the new start in ŒbufferData¹. Maybe the function itself can be renamed to ³OvsPopFieldInPacketBuf². > { > PNET_BUFFER curNb; > PMDL curMdl; > PUINT8 bufferStart; >- ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48); > UINT32 packetLen, mdlLen; > PNET_BUFFER_LIST newNbl; > NDIS_STATUS status; >+ PUINT8 tempBuffer[ETH_HEADER_LENGTH]; > >- /* >- * Declare a dummy vlanTag structure since we need to compute the >size >- * of shiftLength. The NDIS one is a unionized structure. >- */ >- NDIS_PACKET_8021Q_INFO vlanTag = {0}; >- ULONG shiftLength = sizeof (vlanTag.TagHeader); >- PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)]; >+ ASSERT(dataLength > ETH_ADDR_LENGTH); > > newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, >ovsFwdCtx->curNbl, > 0, 0, TRUE /* copy NBL info */); >@@ -1064,8 +1062,8 @@ OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) > /* Complete the original NBL and create a copy to modify. */ > OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy"); > >- status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, >- newNbl, ovsFwdCtx->srcVportNo, 0, >+ status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, >newNbl, >+ ovsFwdCtx->srcVportNo, 0, > >NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), > NULL, &ovsFwdCtx->layers, FALSE); > if (status != NDIS_STATUS_SUCCESS) { >@@ -1083,8 +1081,8 @@ OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) > return NDIS_STATUS_RESOURCES; > } > mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb); >- /* Bail out if L2 + VLAN header is not contiguous in the first >buffer. */ >- if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) { >+ /* Bail out if L2 + shiftLength is not contiguous in the first >buffer. */ >+ if (MIN(packetLen, mdlLen) < sizeof(EthHdr) + shiftLength) { > ASSERT(FALSE); > return NDIS_STATUS_FAILURE; > } >@@ -1092,6 +1090,134 @@ OvsPopVlanInPktBuf(OvsForwardingContext >*ovsFwdCtx) > RtlCopyMemory(tempBuffer, bufferStart, dataLength); > RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength); > NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL); >+ >+ if (bufferData) { >+ *bufferData = bufferStart + shiftLength; >+ } >+ >+ return NDIS_STATUS_SUCCESS; >+} >+ >+ >+/* >+ * >-------------------------------------------------------------------------- >+ * OvsPopVlanInPktBuf -- >+ * Function to pop a VLAN tag when the tag is in the packet buffer. >+ * >-------------------------------------------------------------------------- >+ */ >+static __inline NDIS_STATUS >+OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) >+{ >+ /* >+ * Declare a dummy vlanTag structure since we need to compute the >size >+ * of shiftLength. The NDIS one is a unionized structure. >+ */ >+ NDIS_PACKET_8021Q_INFO vlanTag = {0}; >+ UINT32 shiftLength = sizeof(vlanTag.TagHeader); >+ UINT32 dataLength = sizeof(DL_EUI48) + sizeof(DL_EUI48); >+ >+ return OvsPopPacketBuf(ovsFwdCtx, dataLength, shiftLength, NULL); >+} >+ >+ >+/* >+ * >-------------------------------------------------------------------------- >+ * OvsActionMplsPop -- >+ * Function to pop the first MPLS label from the current packet. >+ * >-------------------------------------------------------------------------- >+ */ >+static __inline NDIS_STATUS >+OvsActionMplsPop(OvsForwardingContext *ovsFwdCtx, >+ ovs_be16 ethertype) >+{ >+ NDIS_STATUS status = NDIS_STATUS_SUCCESS; >+ OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers; >+ EthHdr *ethHdr = NULL; >+ >+ status = OvsPopPacketBuf(ovsFwdCtx, sizeof(*ethHdr), >+ MPLS_HLEN, (PUINT8*)ðHdr); >+ if (status == NDIS_STATUS_SUCCESS) { >+ if (ethHdr && OvsEthertypeIsMpls(ethHdr->Type)) { >+ ethHdr->Type = ethertype; >+ } >+ >+ layers->l3Offset -= MPLS_HLEN; >+ layers->l4Offset -= MPLS_HLEN; >+ } >+ >+ return status; >+} >+ >+ >+/* >+ * >-------------------------------------------------------------------------- >+ * OvsActionMplsPush -- >+ * Function to push the MPLS label into the current packet. >+ * >-------------------------------------------------------------------------- >+ */ >+static __inline NDIS_STATUS >+OvsActionMplsPush(OvsForwardingContext *ovsFwdCtx, >+ const struct ovs_action_push_mpls *mpls) >+{ >+ NDIS_STATUS status; >+ PNET_BUFFER curNb = NULL; >+ PMDL curMdl = NULL; >+ PUINT8 bufferStart = NULL; >+ OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers; >+ EthHdr *ethHdr = NULL; >+ MPLSHdr *mplsHdr = NULL; >+ UINT32 mdlLen = 0, curMdlOffset = 0; >+ PNET_BUFFER_LIST newNbl; >+ >+ newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, >ovsFwdCtx->curNbl, >+ layers->l3Offset, MPLS_HLEN, TRUE); >+ if (!newNbl) { >+ ovsActionStats.noCopiedNbl++; >+ return NDIS_STATUS_RESOURCES; >+ } >+ OvsCompleteNBLForwardingCtx(ovsFwdCtx, >+ L"Complete after partial copy."); >+ >+ status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, >+ newNbl, ovsFwdCtx->srcVportNo, 0, >+ >NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), >+ NULL, &ovsFwdCtx->layers, FALSE); >+ if (status != NDIS_STATUS_SUCCESS) { >+ OvsCompleteNBLForwardingCtx(ovsFwdCtx, >+ L"OVS-Dropped due to resources"); >+ return NDIS_STATUS_RESOURCES; >+ } >+ >+ curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); >+ ASSERT(curNb->Next == NULL); >+ >+ status = NdisRetreatNetBufferDataStart(curNb, MPLS_HLEN, 0, NULL); >+ if (status != NDIS_STATUS_SUCCESS) { >+ return status; >+ } >+ >+ curMdl = NET_BUFFER_CURRENT_MDL(curNb); >+ NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); >+ if (!curMdl) { >+ ovsActionStats.noResource++; >+ return NDIS_STATUS_RESOURCES; >+ } >+ >+ curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb); >+ mdlLen -= curMdlOffset; >+ ASSERT(mdlLen >= MPLS_HLEN); >+ >+ ethHdr = (EthHdr *)(bufferStart + curMdlOffset); >+ RtlCopyMemory(ethHdr, >+ (UINT8*)ethHdr + MPLS_HLEN, >+ sizeof(*ethHdr)); Here are source and destination are overlapping. Hence, you should be using RtlMoveMemory(). >+ ethHdr->Type = mpls->mpls_ethertype; >+ >+ mplsHdr = (MPLSHdr *)(ethHdr + 1); >+ mplsHdr->lse = mpls->mpls_lse; >+ >+ layers->l3Offset += MPLS_HLEN; >+ layers->l4Offset += MPLS_HLEN; > > return NDIS_STATUS_SUCCESS; > } >@@ -1523,6 +1649,50 @@ OvsActionsExecute(POVS_SWITCH_CONTEXT >switchContext, > break; > } > >+ case OVS_ACTION_ATTR_PUSH_MPLS: >+ { >+ if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic >!= NULL >+ || ovsFwdCtx.tunnelRxNic != NULL) { >+ status = OvsOutputBeforeSetAction(&ovsFwdCtx); >+ if (status != NDIS_STATUS_SUCCESS) { >+ dropReason = L"OVS-adding destination failed"; >+ goto dropit; >+ } >+ } >+ >+ status = OvsActionMplsPush(&ovsFwdCtx, >+ (struct ovs_action_push_mpls >*)NlAttrGet >+ ((const PNL_ATTR)a)); >+ if (status != NDIS_STATUS_SUCCESS) { >+ dropReason = L"OVS-push MPLS action failed"; >+ goto dropit; >+ } >+ layers->l3Offset += MPLS_HLEN; >+ layers->l4Offset += MPLS_HLEN; Double increment of fields in Œlayers¹. I guess it didn¹t matter since l3Offset does not get looked at for MPLS packet. >+ break; >+ } >+ >+ case OVS_ACTION_ATTR_POP_MPLS: >+ { >+ if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic >!= NULL >+ || ovsFwdCtx.tunnelRxNic != NULL) { >+ status = OvsOutputBeforeSetAction(&ovsFwdCtx); >+ if (status != NDIS_STATUS_SUCCESS) { >+ dropReason = L"OVS-adding destination failed"; >+ goto dropit; >+ } >+ } >+ >+ status = OvsActionMplsPop(&ovsFwdCtx, NlAttrGetBe16(a)); >+ if (status != NDIS_STATUS_SUCCESS) { >+ dropReason = L"OVS-pop MPLS action failed"; >+ goto dropit; >+ } >+ layers->l3Offset -= MPLS_HLEN; >+ layers->l4Offset -= MPLS_HLEN; >+ break; >+ } >+ > case OVS_ACTION_ATTR_USERSPACE: > { > PNL_ATTR userdataAttr; >diff --git a/datapath-windows/ovsext/DpInternal.h >b/datapath-windows/ovsext/DpInternal.h >index 466a33a..b9403d0 100644 >--- a/datapath-windows/ovsext/DpInternal.h >+++ b/datapath-windows/ovsext/DpInternal.h >@@ -20,6 +20,7 @@ > #include <netioapi.h> > #define IFNAMSIZ IF_NAMESIZE > #include "../ovsext/Netlink/Netlink.h" >+#include "Mpls.h" > > #define OVS_DP_NUMBER ((uint32_t) 0) > >@@ -125,7 +126,7 @@ typedef struct L2Key { > uint8_t dlDst[6]; /* Ethernet destination address. */ > ovs_be16 vlanTci; /* If 802.1Q, TCI | VLAN_CFI; otherwise >0. */ > ovs_be16 dlType; /* Ethernet frame type. */ >-} L2Key; /* Size of 24 byte. */ >+} L2Key; /* Size of 24 byte. */ > > /* Number of packet attributes required to store OVS tunnel key. */ > #define NUM_PKT_ATTR_REQUIRED 3 >@@ -147,7 +148,12 @@ typedef union OvsIPv4TunnelKey { > }; > }; > uint64_t attr[NUM_PKT_ATTR_REQUIRED]; >-} OvsIPv4TunnelKey; >+} OvsIPv4TunnelKey; /* Size of 24 byte. */ >+ >+typedef struct MplsKey { >+ ovs_be32 lse; /* MPLS topmost label stack entry. */ >+ uint8 pad[4]; >+} MplsKey; /* Size of 8 bytes. */ > > typedef __declspec(align(8)) struct OvsFlowKey { > OvsIPv4TunnelKey tunKey; /* 24 bytes */ >@@ -157,6 +163,7 @@ typedef __declspec(align(8)) struct OvsFlowKey { > ArpKey arpKey; /* size 24 */ > Ipv6Key ipv6Key; /* size 48 */ > Icmp6Key icmp6Key; /* size 72 */ >+ MplsKey mplsKey; /* size 8 */ These network header structures we have are part of a union, and so far they are mutually exclusive. With MplsKey, it would be the same, I suppose. Do you mind adding a comment saying these headers must be mutually exclusive. > }; > } OvsFlowKey; > >diff --git a/datapath-windows/ovsext/Ethernet.h >b/datapath-windows/ovsext/Ethernet.h >index 22aa27c..1d69d47 100644 >--- a/datapath-windows/ovsext/Ethernet.h >+++ b/datapath-windows/ovsext/Ethernet.h >@@ -66,6 +66,8 @@ typedef enum { > ETH_TYPE_CDP = 0x2000, > ETH_TYPE_802_1PQ = 0x8100, // not really a DIX type, but used as >such > ETH_TYPE_LLC = 0xFFFF, // 0xFFFF is IANA reserved, used to >mark LLC >+ ETH_TYPE_MPLS = 0x8847, >+ ETH_TYPE_MPLS_MCAST = 0x8848, > } Eth_DixType; > > typedef enum { >diff --git a/datapath-windows/ovsext/Flow.c >b/datapath-windows/ovsext/Flow.c >index 31ddc66..678c841 100644 >--- a/datapath-windows/ovsext/Flow.c >+++ b/datapath-windows/ovsext/Flow.c >@@ -80,6 +80,8 @@ static NTSTATUS _MapFlowIpv6KeyToNlKey(PNL_BUFFER nlBuf, > Icmp6Key *ipv6FlowPutIcmpKey); > static NTSTATUS _MapFlowArpKeyToNlKey(PNL_BUFFER nlBuf, > ArpKey *arpFlowPutKey); >+static NTSTATUS _MapFlowMplsKeyToNlKey(PNL_BUFFER nlBuf, >+ MplsKey *mplsFlowPutKey); > > static NTSTATUS OvsDoDumpFlows(OvsFlowDumpInput *dumpInput, > OvsFlowDumpOutput *dumpOutput, >@@ -108,7 +110,7 @@ const NL_POLICY nlFlowPolicy[] = { > > /* For Parsing nested OVS_FLOW_ATTR_KEY attributes. > * Some of the attributes like OVS_KEY_ATTR_RECIRC_ID >- * & OVS_KEY_ATTR_MPLS are not supported yet. */ >+ * are not supported yet. */ > > const NL_POLICY nlFlowKeyPolicy[] = { > [OVS_KEY_ATTR_ENCAP] = {.type = NL_A_VAR_LEN, .optional = TRUE}, >@@ -872,6 +874,13 @@ MapFlowKeyToNlKey(PNL_BUFFER nlBuf, > break; > } > >+ case ETH_TYPE_MPLS: >+ case ETH_TYPE_MPLS_MCAST: { >+ MplsKey *mplsFlowPutKey = &(flowKey->mplsKey); >+ rc = _MapFlowMplsKeyToNlKey(nlBuf, mplsFlowPutKey); >+ break; >+ } >+ > default: > break; > } >@@ -1194,6 +1203,31 @@ done: > > /* > >*------------------------------------------------------------------------- >--- >+ * _MapFlowMplsKeyToNlKey -- >+ * Maps _MapFlowMplsKeyToNlKey to OVS_KEY_ATTR_MPLS attribute. >+ >*------------------------------------------------------------------------- >--- >+ */ >+static NTSTATUS >+_MapFlowMplsKeyToNlKey(PNL_BUFFER nlBuf, MplsKey *mplsFlowPutKey) >+{ >+ NTSTATUS rc = STATUS_SUCCESS; >+ struct ovs_key_mpls *mplsKey; >+ >+ mplsKey = (struct ovs_key_mpls *) >+ NlMsgPutTailUnspecUninit(nlBuf, OVS_KEY_ATTR_MPLS, >sizeof(*mplsKey)); >+ if (!mplsKey) { >+ rc = STATUS_UNSUCCESSFUL; >+ goto done; >+ } >+ >+ mplsKey->mpls_lse = mplsFlowPutKey->lse; >+ >+done: >+ return rc; >+} >+ >+/* >+ >*------------------------------------------------------------------------- >--- > * _MapNlToFlowPut -- > * Maps input netlink message to OvsFlowPut. > >*------------------------------------------------------------------------- >--- >@@ -1469,8 +1503,26 @@ _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs, > arpFlowPutKey->pad[1] = 0; > arpFlowPutKey->pad[2] = 0; > destKey->l2.keyLen += OVS_ARP_KEY_SIZE; >- break; > } >+ break; >+ } >+ case ETH_TYPE_MPLS: >+ case ETH_TYPE_MPLS_MCAST: { >+ >+ if (keyAttrs[OVS_KEY_ATTR_MPLS]) { >+ MplsKey *mplsFlowPutKey = &destKey->mplsKey; >+ const struct ovs_key_mpls *mplsKey; >+ >+ mplsKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_MPLS]); >+ >+ mplsFlowPutKey->lse = mplsKey->mpls_lse; >+ mplsFlowPutKey->pad[0] = 0; >+ mplsFlowPutKey->pad[1] = 0; >+ mplsFlowPutKey->pad[2] = 0; >+ mplsFlowPutKey->pad[3] = 0; >+ destKey->l2.keyLen += sizeof(MplsKey); XXX: This can be problematic if packet is both IP + MPLS. >+ } >+ break; > } > } > } >@@ -1734,10 +1786,10 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > flow->l2.vlanTci = 0; > } > /* >- * XXX >- * Please note after this point, src mac and dst mac should >- * not be accessed through eth >- */ >+ * XXX >+ * Please note after this point, src mac and dst mac should >+ * not be accessed through eth >+ */ > eth = (Eth_Header *)((UINT8 *)eth + offset); > } > >@@ -1868,6 +1920,44 @@ OvsExtractFlow(const NET_BUFFER_LIST *packet, > memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); > } > } >+ } else if (OvsEthertypeIsMpls(flow->l2.dlType)) { >+ MPLSHdr mplsStorage; >+ const MPLSHdr *mpls; >+ MplsKey *mplsKey = &flow->mplsKey; >+ ((UINT64 *)mplsKey)[0] = 0; >+ >+ /* >+ * In the presence of an MPLS label stack the end of the L2 >+ * header and the beginning of the L3 header differ. >+ * >+ * A network packet may contain multiple MPLS labels, but we >+ * are only interested in the topmost label stack entry. >+ * >+ * Advance network header to the beginning of the L3 header. >+ * layers->l3Offset corresponds to the end of the L2 header. >+ */ >+ for (UINT32 i = 0; i < FLOW_MAX_MPLS_LABELS; i++) { >+ mpls = OvsGetMpls(packet, layers->l3Offset, &mplsStorage); >+ if (!mpls) { >+ break; >+ } >+ >+ /* Keep only the topmost MPLS label stack entry. */ >+ if (i == 0) { >+ mplsKey->lse = mpls->lse; >+ } >+ >+ layers->l3Offset += MPLS_HLEN; >+ layers->l4Offset += MPLS_HLEN; >+ >+ if (mpls->lse & htonl(MPLS_BOS_MASK)) { >+ /* >+ * Bottom of Stack bit is set, which means there are >no >+ * remaining MPLS labels in the packet. >+ */ minor: indentation of comment. _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev