Alin, I had some minor comments and added them inline. Other than that, looks good to me. Acked-by: Sorin Vinturis <svintu...@cloudbasesolutions.com>
-----Original Message----- From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Alin Serdean Sent: Wednesday, 2 December, 2015 22:19 To: dev@openvswitch.org Subject: [ovs-dev] [PATCH 3/3 v2] datapath-windows: Add GRE TEB support for windows datapath This patch introduces the support for GRE TEB (trasparent ethernet bridging) for the windows datapath. The GRE support is based on http://tools.ietf.org/html/rfc2890 and supports only the GRE protocol type 6558 (trasparent ethernet bridging) like its linux counterpart. Util.h: define the GRE pool tag Vport.c/h: sort the includes alphabetically add the function OvsFindTunnelVportByPortType which searches the tunnelVportsArray for a given port type Actions.c : sort the includes alphabetically call the GRE encapsulation / decapsulation functions when needed Gre.c/h : add GRE type defines add initialization/cleanup functions add encapsulation / decapsulation functions with software offloads (hardware offloads will be added in a separate patch) with LSO(TSO) support Tested using: PSPING (https://technet.microsoft.com/en-us/sysinternals/psping.aspx) (ICMP, TCP, UDP) with various packet lengths IPERF3 (https://iperf.fr/iperf-download.php) (TCP, UDP) with various options Signed-off-by: Alin Gabriel Serdean <aserd...@cloudbasesolutions.com> --- v2: add Gre.c/h to automake.mk EXTRA_DIST --- datapath-windows/automake.mk | 20 +- datapath-windows/ovsext/Actions.c | 71 +++-- datapath-windows/ovsext/Gre.c | 456 +++++++++++++++++++++++++++++++++ datapath-windows/ovsext/Gre.h | 113 ++++++++ datapath-windows/ovsext/Util.h | 1 + datapath-windows/ovsext/Vport.c | 43 +++- datapath-windows/ovsext/Vport.h | 14 +- datapath-windows/ovsext/ovsext.vcxproj | 2 + 8 files changed, 676 insertions(+), 44 deletions(-) create mode 100644 datapath-windows/ovsext/Gre.c create mode 100644 datapath-windows/ovsext/Gre.h diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index ed48c69..7f12d92 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -4,45 +4,49 @@ EXTRA_DIST += \ datapath-windows/Package/package.VcxProj \ datapath-windows/Package/package.VcxProj.user \ datapath-windows/include/OvsDpInterfaceExt.h \ + datapath-windows/misc/OVS.psm1 \ datapath-windows/misc/install.cmd \ datapath-windows/misc/uninstall.cmd \ - datapath-windows/misc/OVS.psm1 \ datapath-windows/ovsext.sln \ - datapath-windows/ovsext/Datapath.c \ - datapath-windows/ovsext/Datapath.h \ - datapath-windows/ovsext/DpInternal.h\ datapath-windows/ovsext/Actions.c \ datapath-windows/ovsext/Atomic.h \ datapath-windows/ovsext/BufferMgmt.c \ datapath-windows/ovsext/BufferMgmt.h \ datapath-windows/ovsext/Checksum.c \ datapath-windows/ovsext/Checksum.h \ + datapath-windows/ovsext/Datapath.c \ + datapath-windows/ovsext/Datapath.h \ datapath-windows/ovsext/Debug.c \ datapath-windows/ovsext/Debug.h \ + datapath-windows/ovsext/DpInternal.h\ datapath-windows/ovsext/Driver.c \ datapath-windows/ovsext/Ethernet.h \ datapath-windows/ovsext/Event.c \ datapath-windows/ovsext/Event.h \ datapath-windows/ovsext/Flow.c \ datapath-windows/ovsext/Flow.h \ + datapath-windows/ovsext/Gre.h \ + datapath-windows/ovsext/Gre.c \ datapath-windows/ovsext/IpHelper.c \ datapath-windows/ovsext/IpHelper.h \ datapath-windows/ovsext/Jhash.c \ datapath-windows/ovsext/Jhash.h \ + datapath-windows/ovsext/NetProto.h \ datapath-windows/ovsext/Netlink/Netlink.c \ datapath-windows/ovsext/Netlink/Netlink.h \ datapath-windows/ovsext/Netlink/NetlinkBuf.c \ datapath-windows/ovsext/Netlink/NetlinkBuf.h \ datapath-windows/ovsext/Netlink/NetlinkError.h \ datapath-windows/ovsext/Netlink/NetlinkProto.h \ - datapath-windows/ovsext/NetProto.h \ datapath-windows/ovsext/Oid.c \ datapath-windows/ovsext/Oid.h \ datapath-windows/ovsext/PacketIO.c \ datapath-windows/ovsext/PacketIO.h \ datapath-windows/ovsext/PacketParser.c \ datapath-windows/ovsext/PacketParser.h \ - datapath-windows/ovsext/Switch.c \ + datapath-windows/ovsext/Stt.c \ + datapath-windows/ovsext/Stt.h \ + datapath-windows/ovsext/Switch.c \ datapath-windows/ovsext/Switch.h \ datapath-windows/ovsext/Tunnel.c \ datapath-windows/ovsext/Tunnel.h \ @@ -51,13 +55,11 @@ EXTRA_DIST += \ datapath-windows/ovsext/Types.h \ datapath-windows/ovsext/User.c \ datapath-windows/ovsext/User.h \ - datapath-windows/ovsext/Util.c \ + datapath-windows/ovsext/Util.c \ datapath-windows/ovsext/Util.h \ datapath-windows/ovsext/Vport.c \ datapath-windows/ovsext/Vport.h \ datapath-windows/ovsext/Vxlan.c \ - datapath-windows/ovsext/Stt.h \ - datapath-windows/ovsext/Stt.c \ datapath-windows/ovsext/Vxlan.h \ datapath-windows/ovsext/ovsext.inf \ datapath-windows/ovsext/ovsext.rc \ diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index e902983..6b2a191 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -16,16 +16,17 @@ #include "precomp.h" -#include "Switch.h" -#include "Vport.h" +#include "Checksum.h" #include "Event.h" -#include "User.h" -#include "NetProto.h" #include "Flow.h" -#include "Vxlan.h" -#include "Stt.h" -#include "Checksum.h" +#include "Gre.h" +#include "NetProto.h" #include "PacketIO.h" +#include "Stt.h" +#include "Switch.h" +#include "User.h" +#include "Vport.h" +#include "Vxlan.h" #ifdef OVS_DBG_MOD #undef OVS_DBG_MOD @@ -34,6 +35,8 @@ #include "Debug.h" typedef struct _OVS_ACTION_STATS { + UINT64 rxGre; + UINT64 txGre; UINT64 rxVxlan; UINT64 txVxlan; UINT64 rxStt; @@ -205,27 +208,35 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, /* XXX: we should also check for the length of the UDP payload to pick * packets only if they are at least VXLAN header size. */ - if (!flowKey->ipKey.nwFrag && - flowKey->ipKey.nwProto == IPPROTO_UDP) { - UINT16 dstPort = ntohs(flowKey->ipKey.l4.tpDst); - tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, - dstPort, - OVS_VPORT_TYPE_VXLAN); - if (tunnelVport) { - ovsActionStats.rxVxlan++; - } - } else if (!flowKey->ipKey.nwFrag && - flowKey->ipKey.nwProto == IPPROTO_TCP) { + if (!flowKey->ipKey.nwFrag) { UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); - tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, - dstPort, - OVS_VPORT_TYPE_STT); - if (tunnelVport) { - ovsActionStats.rxStt++; + switch (flowKey->ipKey.nwProto) { + case IPPROTO_GRE: + tunnelVport = OvsFindTunnelVportByPortType(ovsFwdCtx->switchContext, + OVS_VPORT_TYPE_GRE); + if (tunnelVport) { + ovsActionStats.rxGre++; + } + break; + case IPPROTO_TCP: + tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, + dstPort, + OVS_VPORT_TYPE_STT); + if (tunnelVport) { + ovsActionStats.rxStt++; + } + break; + case IPPROTO_UDP: + tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, + dstPort, + OVS_VPORT_TYPE_VXLAN); + if (tunnelVport) { + ovsActionStats.rxVxlan++; + } + break; } } - // We might get tunnel packets even before the tunnel gets initialized. if (tunnelVport) { ASSERT(ovsFwdCtx->tunnelRxNic == NULL); @@ -306,6 +317,9 @@ OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, /* Tunnel the packet only if tunnel context is set. */ if (ovsFwdCtx->tunKey.dst != 0) { switch(dstVport->ovsType) { + case OVS_VPORT_TYPE_GRE: + ovsActionStats.txGre++; + break; case OVS_VPORT_TYPE_VXLAN: ovsActionStats.txVxlan++; break; @@ -652,6 +666,11 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) /* Do the encap. Encap function does not consume the NBL. */ switch(ovsFwdCtx->tunnelTxNic->ovsType) { + case OVS_VPORT_TYPE_GRE: + status = OvsEncapGre(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext, + &ovsFwdCtx->layers, &newNbl); + break; case OVS_VPORT_TYPE_VXLAN: status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext, @@ -724,6 +743,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) */ switch(tunnelRxVport->ovsType) { + case OVS_VPORT_TYPE_GRE: + status = OvsDecapGre(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); + break; case OVS_VPORT_TYPE_VXLAN: status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, &newNbl); diff --git a/datapath-windows/ovsext/Gre.c b/datapath-windows/ovsext/Gre.c new file mode 100644 index 0000000..de914be --- /dev/null +++ b/datapath-windows/ovsext/Gre.c @@ -0,0 +1,456 @@ +/* + * Copyright (c) 2015 Cloudbase Solutions Srl + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "Atomic.h" +#include "Checksum.h" +#include "Flow.h" +#include "Gre.h" +#include "IpHelper.h" +#include "NetProto.h" +#include "PacketIO.h" +#include "PacketParser.h" +#include "Switch.h" +#include "User.h" +#include "Util.h" +#include "Vport.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_GRE +#include "Debug.h" + +static NDIS_STATUS +OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl); + +/* + * +----------------------------------------------------------------------- +--- + * OvsInitGreTunnel -- + * Initialize GRE tunnel module. + * +----------------------------------------------------------------------- +--- + */ +NTSTATUS +OvsInitGreTunnel(POVS_VPORT_ENTRY vport, + UINT16 udpDestPort) SV: udpDestPort should be renamed with greDestPort; +{ + POVS_GRE_VPORT grePort; + + grePort = (POVS_GRE_VPORT)OvsAllocateMemoryWithTag(sizeof(*grePort), + OVS_GRE_POOL_TAG); + if (!grePort) { + OVS_LOG_ERROR("Insufficient memory, can't allocate OVS_GRE_VPORT"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(grePort, sizeof(*grePort)); + grePort->dstPort = udpDestPort; + vport->priv = (PVOID)grePort; + return STATUS_SUCCESS; +} + +/* + * +----------------------------------------------------------------------- +--- + * OvsCleanupGreTunnel -- + * Cleanup GRE Tunnel module. + * +----------------------------------------------------------------------- +--- + */ +void +OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport) { + if (vport->ovsType != OVS_VPORT_TYPE_GRE || + vport->priv == NULL) { + return; + } + + OvsFreeMemoryWithTag(vport->priv, OVS_GRE_POOL_TAG); + vport->priv = NULL; +} + +/* + * +----------------------------------------------------------------------- +--- + * OvsEncapGre -- + * Encapsulates a packet with an GRE header. + * +----------------------------------------------------------------------- +--- + */ +NDIS_STATUS +OvsEncapGre(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl) +{ + OVS_FWD_INFO fwdInfo; + NDIS_STATUS status; + + UNREFERENCED_PARAMETER(switchContext); SV: As Sai said, the above line should be removed. + status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); + if (status != STATUS_SUCCESS) { + OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); + return NDIS_STATUS_FAILURE; + } + + status = OvsDoEncapGre(vport, curNbl, tunKey, &fwdInfo, layers, + switchContext, newNbl); + return status; +} + +/* + * +----------------------------------------------------------------------- +--- + * OvsDoEncapGre -- + * Internal utility function which actually does the GRE encap. + * +----------------------------------------------------------------------- +--- + */ +NDIS_STATUS +OvsDoEncapGre(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl) { + NDIS_STATUS status; + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + EthHdr *ethHdr; + IPHdr *ipHdr; + PGREHdr greHdr; + POVS_GRE_VPORT vportGre; + UINT32 headRoom = GreTunHdrSize(tunKey->flags); #if DBG + UINT32 counterHeadRoom; +#endif + UINT32 packetLength; + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + + if (layers->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; + + tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpLargeSendNetBufferListInfo); + OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, + packetLength); + if (tsoInfo.LsoV1Transmit.MSS) { + OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); + *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, + tsoInfo.LsoV1Transmit.MSS, headRoom); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to segment NBL"); + return NDIS_STATUS_FAILURE; + } + /* Clear out LSO flags after this point */ + NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0; + } + } + + vportGre = (POVS_GRE_VPORT)GetOvsVportPriv(vport); + ASSERT(vportGre); + + /* If we didn't split the packet above, make a copy now */ + if (*newNbl == NULL) { + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*NBL info*/); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to copy NBL"); + return NDIS_STATUS_FAILURE; + } + /* + * To this point we do not have VXLAN offloading. + * Apply defined checksums + */ + curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, + LowPagePriority); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + + TcpIpChecksumNetBufferListInfo); + + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + + if (layers->isIPv4) { + IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset); + + if (csumInfo.Transmit.IpHeaderChecksum) { + ip->check = 0; + ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0); + } + + if (layers->isTcp && csumInfo.Transmit.TcpChecksum) { + UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset); + TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset); + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_TCP, csumLength); + tcp->check = CalculateChecksumNB(curNb, csumLength, + (UINT32)(layers->l4Offset)); + } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) { + UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset); + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); + udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_UDP, csumLength); + udp->check = CalculateChecksumNB(curNb, csumLength, + (UINT32)(layers->l4Offset)); + } + } else if (layers->isIPv6) { + IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset); + + if (layers->isTcp && csumInfo.Transmit.TcpChecksum) { + UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset); + TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset); + tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr, + (UINT32 *) &ip->daddr, + IPPROTO_TCP, csumLength); + tcp->check = CalculateChecksumNB(curNb, csumLength, + (UINT32)(layers->l4Offset)); + } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) { + UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset); + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); + udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr, + (UINT32 *) &ip->daddr, + IPPROTO_UDP, csumLength); + udp->check = CalculateChecksumNB(curNb, csumLength, + (UINT32)(layers->l4Offset)); + } + } + /* Clear out TcpIpChecksumNetBufferListInfo flag */ + NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0; + } + + curNbl = *newNbl; + for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL; + curNb = curNb->Next) { +#if DBG + counterHeadRoom = headRoom; +#endif + status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); + if (status != NDIS_STATUS_SUCCESS) { + goto ret_error; + } + + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, + LowPagePriority); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (NET_BUFFER_NEXT_NB(curNb)) { + OVS_LOG_TRACE("nb length %u next %u", + NET_BUFFER_DATA_LENGTH(curNb), + NET_BUFFER_DATA_LENGTH(curNb->Next)); + } + + /* L2 header */ + ethHdr = (EthHdr *)bufferStart; + ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == + (PCHAR)&fwdInfo->srcMacAddr); + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination + sizeof ethHdr->Source); + ethHdr->Type = htons(ETH_TYPE_IPV4); #if DBG + counterHeadRoom -= sizeof *ethHdr; #endif + + /* IP header */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + + ipHdr->ihl = sizeof *ipHdr / 4; + ipHdr->version = IPPROTO_IPV4; + ipHdr->tos = tunKey->tos; + ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); + ipHdr->id = (uint16)atomic_add64(&vportGre->ipId, + NET_BUFFER_DATA_LENGTH(curNb)); + ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + IP_DF_NBO : 0; + ipHdr->ttl = tunKey->ttl ? tunKey->ttl : 64; + ipHdr->protocol = IPPROTO_GRE; + ASSERT(tunKey->dst == fwdInfo->dstIpAddr); + ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0); + ipHdr->saddr = fwdInfo->srcIpAddr; + ipHdr->daddr = fwdInfo->dstIpAddr; + + ipHdr->check = 0; + ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0); +#if DBG + counterHeadRoom -= sizeof *ipHdr; #endif + + /* GRE header */ + greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + greHdr->flags = OvsTunnelFlagsToGreFlags(tunKey->flags); + greHdr->protocolType = GRE_NET_TEB; #if DBG + counterHeadRoom -= sizeof *greHdr; #endif + + PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr; + + if (tunKey->flags & OVS_TNL_F_CSUM) { + RtlZeroMemory(currentOffset, 4); + currentOffset += 4; +#if DBG + counterHeadRoom -= 4; +#endif + } + + if (tunKey->flags & OVS_TNL_F_KEY) { + RtlZeroMemory(currentOffset, 4); + UINT32 key = (tunKey->tunnelId >> 32); + RtlCopyMemory(currentOffset, &key, sizeof key); + currentOffset += 4; +#if DBG + counterHeadRoom -= 4; +#endif + } + + if (tunKey->flags & OVS_TNL_F_SEQ) { + RtlZeroMemory(currentOffset, 4); + currentOffset += 4; +#if DBG + counterHeadRoom -= 4; +#endif + } + +#if DBG + ASSERT(counterHeadRoom == 0); +#endif + + } + return STATUS_SUCCESS; + +ret_error: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + +NDIS_STATUS +OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) +{ + PNET_BUFFER curNb; + PMDL curMdl; + EthHdr *ethHdr; + IPHdr *ipHdr; + GREHdr *greHdr; + UINT32 tunnelSize = 0, packetLength = 0; + UINT32 headRoom = 0; + PUINT8 bufferStart; + NDIS_STATUS status; + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + tunnelSize = GreTunHdrSize(tunKey->flags); + if (packetLength <= tunnelSize) { + return NDIS_STATUS_INVALID_LENGTH; + } + + /* + * Create a copy of the NBL so that we have all the headers in one MDL. + */ + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, + tunnelSize + OVS_DEFAULT_COPY_SIZE, 0, + TRUE /*copy NBL info */); + + if (*newNbl == NULL) { + return NDIS_STATUS_RESOURCES; + } + + curNbl = *newNbl; + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) + + NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto dropNbl; + } + + ethHdr = (EthHdr *)bufferStart; + headRoom += sizeof *ethHdr; + + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + tunKey->src = ipHdr->saddr; + tunKey->dst = ipHdr->daddr; + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + headRoom += sizeof *ipHdr; + + greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + headRoom += sizeof *greHdr; + + /* Validate if GRE header protocol type. */ + if (greHdr->protocolType != GRE_NET_TEB) { + status = STATUS_NDIS_INVALID_PACKET; + goto dropNbl; + } + + PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr; + + if (greHdr->flags & GRE_CSUM) { + tunKey->flags |= OVS_TNL_F_CSUM; + currentOffset += 4; + headRoom += 4; + } + + if (greHdr->flags & GRE_KEY) { + tunKey->flags |= OVS_TNL_F_KEY; + UINT32 key = 0; + RtlCopyMemory(&key, currentOffset, 4); + tunKey->tunnelId = (UINT64)key << 32; + currentOffset += 4; + headRoom += 4; + } + + if (greHdr->flags & GRE_SEQ) { + tunKey->flags |= OVS_TNL_F_SEQ; + currentOffset += 4; + headRoom += 4; + } + + /* Clear out the receive flag for the inner packet. */ + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; + NdisAdvanceNetBufferDataStart(curNb, GreTunHdrSize(tunKey->flags), FALSE, + NULL); + ASSERT(headRoom == GreTunHdrSize(tunKey->flags)); + return NDIS_STATUS_SUCCESS; + +dropNbl: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} diff --git a/datapath-windows/ovsext/Gre.h b/datapath-windows/ovsext/Gre.h new file mode 100644 index 0000000..71ff05e --- /dev/null +++ b/datapath-windows/ovsext/Gre.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2015 Cloudbase Solutions Srl + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __GRE_H_ +#define __GRE_H_ 1 + +#include "NetProto.h" +#include "Flow.h" + +typedef struct _OVS_GRE_VPORT { + UINT16 dstPort; + UINT64 inPkts; + UINT64 outPkts; + UINT64 slowInPkts; + UINT64 slowOutPkts; + UINT64 filterID; + UINT64 ipId; + /* + * To be filled + */ +} OVS_GRE_VPORT, *POVS_GRE_VPORT; + + +/* GRE RFC 2890 header based on http://tools.ietf.org/html/rfc2890 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |C| |K|S| Reserved0 | Ver | Protocol Type | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Checksum (optional) | Reserved1 (Optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key (optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sequence Number (Optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +typedef struct GREHdr { + UINT16 flags; + UINT16 protocolType; +} GREHdr, *PGREHdr; + +/* Transparent Ethernet Bridging */ +#define GRE_NET_TEB 0x5865 +/* GRE Flags*/ +#define GRE_CSUM 0x0080 +#define GRE_KEY 0x0020 +#define GRE_SEQ 0x0010 + +NTSTATUS OvsInitGreTunnel(POVS_VPORT_ENTRY vport, + UINT16 udpDestPort); + +VOID OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport); + + +void OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport); + +NDIS_STATUS OvsEncapGre(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl); + +NDIS_STATUS OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); + +static __inline UINT16 +OvsTunnelFlagsToGreFlags(UINT16 tunnelflags) { + UINT16 flags = 0; + + if (tunnelflags & OVS_TNL_F_CSUM) + flags |= GRE_CSUM; + + if (tunnelflags & OVS_TNL_F_KEY) + flags |= GRE_KEY; + + if (tunnelflags & OVS_TNL_F_SEQ) + flags |= GRE_SEQ; + + return flags; +} + +static __inline UINT32 +GreTunHdrSize(UINT16 flags) +{ + UINT32 sum = sizeof(EthHdr) + sizeof(IPHdr) + sizeof(GREHdr); + sum += (flags & OVS_TNL_F_CSUM) ? + 4 : 0; + sum += (flags & OVS_TNL_F_KEY) ? + 4 : 0; + sum += (flags & OVS_TNL_F_SEQ) ? + 4 : 0; + + return sum; +} + +#endif /*__GRE_H_ */ diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h index e5ba72b..a81c723 100644 --- a/datapath-windows/ovsext/Util.h +++ b/datapath-windows/ovsext/Util.h @@ -34,6 +34,7 @@ #define OVS_USER_POOL_TAG 'USVO' #define OVS_VPORT_POOL_TAG 'PSVO' #define OVS_STT_POOL_TAG 'RSVO' +#define OVS_GRE_POOL_TAG 'GSVO' #define OVS_TUNFLT_POOL_TAG 'WSVO' VOID *OvsAllocateMemory(size_t size); diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index a7576d3..11737a8 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -15,16 +15,18 @@ */ #include "precomp.h" + +#include "Datapath.h" +#include "Event.h" +#include "Gre.h" +#include "IpHelper.h" #include "Jhash.h" +#include "Oid.h" +#include "Stt.h" #include "Switch.h" -#include "Vport.h" -#include "Event.h" #include "User.h" +#include "Vport.h" #include "Vxlan.h" -#include "Stt.h" -#include "IpHelper.h" -#include "Oid.h" -#include "Datapath.h" #ifdef OVS_DBG_MOD #undef OVS_DBG_MOD @@ -700,6 +702,26 @@ OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, return NULL; } +POVS_VPORT_ENTRY +OvsFindTunnelVportByPortType(POVS_SWITCH_CONTEXT switchContext, + OVS_VPORT_TYPE ovsPortType) { + POVS_VPORT_ENTRY vport; + PLIST_ENTRY head, link; + UINT16 dstPort = 0; + UINT32 hash = OvsJhashBytes((const VOID *)&dstPort, sizeof(dstPort), + OVS_HASH_BASIS); + head = &(switchContext->tunnelVportsArray[hash & OVS_VPORT_MASK]); + LIST_FORALL(head, link) { + vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, tunnelVportLink); + if (vport->ovsType == ovsPortType) { + return vport; + } + } + return NULL; +} + + POVS_VPORT_ENTRY OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, @@ -983,6 +1005,7 @@ OvsInitTunnelVport(PVOID userContext, vport->ovsState = OVS_STATE_PORT_CREATED; switch (ovsType) { case OVS_VPORT_TYPE_GRE: + status = OvsInitGreTunnel(vport, dstPort); break; case OVS_VPORT_TYPE_VXLAN: { @@ -1153,6 +1176,7 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, UINT32 hash; switch(vport->ovsType) { + case OVS_VPORT_TYPE_GRE: case OVS_VPORT_TYPE_VXLAN: case OVS_VPORT_TYPE_STT: { @@ -1242,6 +1266,7 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext, OvsCleanupSttTunnel(vport); break; case OVS_VPORT_TYPE_GRE: + OvsCleanupGreTunnel(vport); break; case OVS_VPORT_TYPE_NETDEV: if (vport->isExternal) { @@ -1299,7 +1324,8 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext, RemoveEntryList(&vport->portNoLink); InitializeListHead(&vport->portNoLink); if (OVS_VPORT_TYPE_VXLAN == vport->ovsType || - OVS_VPORT_TYPE_STT == vport->ovsType) { + OVS_VPORT_TYPE_STT == vport->ovsType || + OVS_VPORT_TYPE_GRE == vport->ovsType) { RemoveEntryList(&vport->tunnelVportLink); InitializeListHead(&vport->tunnelVportLink); } @@ -2190,6 +2216,9 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, UINT16 transportPortDest = 0; switch (portType) { + case OVS_VPORT_TYPE_GRE: + OvsCleanupGreTunnel(vport); + break; case OVS_VPORT_TYPE_VXLAN: transportPortDest = VXLAN_UDP_PORT; break; diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h index e9f3b03..b11cf79 100644 --- a/datapath-windows/ovsext/Vport.h +++ b/datapath-windows/ovsext/Vport.h @@ -17,9 +17,10 @@ #ifndef __VPORT_H_ #define __VPORT_H_ 1 +#include "Gre.h" +#include "Stt.h" #include "Switch.h" #include "VxLan.h" -#include "Stt.h" #define OVS_MAX_DPPORTS MAXUINT16 #define OVS_DPPORT_NUMBER_INVALID OVS_MAX_DPPORTS @@ -147,6 +148,8 @@ POVS_VPORT_ENTRY OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchConte POVS_VPORT_ENTRY OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, UINT16 dstPort, OVS_VPORT_TYPE ovsVportType); +POVS_VPORT_ENTRY OvsFindTunnelVportByPortType(POVS_SWITCH_CONTEXT switchContext, + OVS_VPORT_TYPE +ovsPortType); NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext); NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext); @@ -256,16 +259,19 @@ GetPortFromPriv(POVS_VPORT_ENTRY vport) /* XXX would better to have a commom tunnel "parent" structure */ ASSERT(vportPriv); switch(vport->ovsType) { - case OVS_VPORT_TYPE_VXLAN: - dstPort = ((POVS_VXLAN_VPORT)vportPriv)->dstPort; + case OVS_VPORT_TYPE_GRE: + dstPort = ((POVS_GRE_VPORT)vportPriv)->dstPort; break; case OVS_VPORT_TYPE_STT: dstPort = ((POVS_STT_VPORT)vportPriv)->dstPort; break; + case OVS_VPORT_TYPE_VXLAN: + dstPort = ((POVS_VXLAN_VPORT)vportPriv)->dstPort; + break; default: ASSERT(! "Port is not a tunnel port"); } - ASSERT(dstPort); + ASSERT(dstPort || vport->ovsType == OVS_VPORT_TYPE_GRE); return dstPort; } diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index 616f688..231ac83 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -80,6 +80,7 @@ <ClInclude Include="Ethernet.h" /> <ClInclude Include="Event.h" /> <ClInclude Include="Flow.h" /> + <ClInclude Include="Gre.h" /> <ClInclude Include="IpHelper.h" /> <ClInclude Include="Jhash.h" /> <ClInclude Include="Netlink/Netlink.h" /> @@ -172,6 +173,7 @@ <ClCompile Include="Driver.c" /> <ClCompile Include="Event.c" /> <ClCompile Include="Flow.c" /> + <ClCompile Include="Gre.c" /> <ClCompile Include="IpHelper.c" /> <ClCompile Include="Jhash.c" /> <ClCompile Include="Netlink/Netlink.c" /> -- 1.9.5.msysgit.0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev