This change include an initial implementable of STT. The following should be added: [1] Checksum offload (SW and HW) [2] LSO (SW and HW) [3] IP layer WFP callout for IP segments
Testing: link layer connection through ping works. File transfer. Signed-off-by: Eitan Eliahu <elia...@vmware.com> Co-authored-by: Saurabh Shah <ssaur...@vmware.com> Signed-off-by: Saurabh Shah <ssaur...@vmware.com> --- [1] Dispatch lock is acquired before action is executed so no race condition possible when retrieving MTU from the external port. [2] Don't call tunnel port encap when there is no external port [3] if the NB is split to multiple MDs NdisGetDataBuffer() takes care of it through the driver provided buffer [4] Address all other comments. Thanks! --- INSTALL.Windows.md | 40 ++-- datapath-windows/automake.mk | 2 + datapath-windows/ovsext/Actions.c | 33 ++- datapath-windows/ovsext/Debug.h | 1 + datapath-windows/ovsext/Stt.c | 368 +++++++++++++++++++++++++++++++++ datapath-windows/ovsext/Stt.h | 88 ++++++++ datapath-windows/ovsext/Switch.h | 3 +- datapath-windows/ovsext/Tunnel.c | 3 +- datapath-windows/ovsext/Util.h | 1 + datapath-windows/ovsext/Vport.c | 24 ++- datapath-windows/ovsext/Vport.h | 22 ++ datapath-windows/ovsext/Vxlan.c | 16 +- datapath-windows/ovsext/Vxlan.h | 9 +- datapath-windows/ovsext/ovsext.vcxproj | 4 +- 14 files changed, 567 insertions(+), 47 deletions(-) create mode 100644 datapath-windows/ovsext/Stt.c create mode 100644 datapath-windows/ovsext/Stt.h diff --git a/INSTALL.Windows.md b/INSTALL.Windows.md index 78af0a1..9552119 100644 --- a/INSTALL.Windows.md +++ b/INSTALL.Windows.md @@ -385,29 +385,31 @@ Hyper-Vs. The following examples demonstrate how it can be done: % ovs-vsctl add-port br-int ovs-port-a tag=900 % ovs-vsctl add-port br-int ovs-port-b tag=900 -Steps to add VXLAN tunnels +Steps to add tunnels -------------------------- -The Windows Open vSwitch implementation support VXLAN tunnels. To add VXLAN +The Windows Open vSwitch implementation support VXLAN and STT tunnels. To add tunnels, the following steps serve as examples. Note that, any patch ports created between br-int and br-pif MUST be beleted -prior to adding VXLAN tunnels. - -01> Add the vxlan port between 172.168.201.101 <-> 172.168.201.102 - % ovs-vsctl add-port br-int vxlan-1 - % ovs-vsctl set Interface vxlan-1 type=vxlan - % ovs-vsctl set Interface vxlan-1 options:local_ip=172.168.201.101 - % ovs-vsctl set Interface vxlan-1 options:remote_ip=172.168.201.102 - % ovs-vsctl set Interface vxlan-1 options:in_key=flow - % ovs-vsctl set Interface vxlan-1 options:out_key=flow - -02> Add the vxlan port between 172.168.201.101 <-> 172.168.201.105 - % ovs-vsctl add-port br-int vxlan-2 - % ovs-vsctl set Interface vxlan-2 type=vxlan - % ovs-vsctl set Interface vxlan-2 options:local_ip=172.168.201.102 - % ovs-vsctl set Interface vxlan-2 options:remote_ip=172.168.201.105 - % ovs-vsctl set Interface vxlan-2 options:in_key=flow - % ovs-vsctl set Interface vxlan-2 options:out_key=flow +prior to adding tunnels. + +01> Add the tunnel port between 172.168.201.101 <-> 172.168.201.102 + % ovs-vsctl add-port br-int tun-1 + % ovs-vsctl set Interface tun-1 type=port-type + % ovs-vsctl set Interface tun-1 options:local_ip=172.168.201.101 + % ovs-vsctl set Interface tun-1 options:remote_ip=172.168.201.102 + % ovs-vsctl set Interface tun-1 options:in_key=flow + % ovs-vsctl set Interface tun-1 options:out_key=flow + +02> Add the tunnel port between 172.168.201.101 <-> 172.168.201.105 + % ovs-vsctl add-port br-int tun-2 + % ovs-vsctl set Interface tun-2 type=port-type + % ovs-vsctl set Interface tun-2 options:local_ip=172.168.201.102 + % ovs-vsctl set Interface tun-2 options:remote_ip=172.168.201.105 + % ovs-vsctl set Interface tun-2 options:in_key=flow + % ovs-vsctl set Interface tun-2 options:out_key=flow + + Where port-type is the string stt or vxlan Requirements diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 9324b3c..a4f5a57 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -56,6 +56,8 @@ EXTRA_DIST += \ datapath-windows/ovsext/Vport.c \ datapath-windows/ovsext/Vport.h \ datapath-windows/ovsext/Vxlan.c \ + datapath-windows/ovsext/Stt.h \ + datapath-windows/ovsext/Stt.c \ datapath-windows/ovsext/Vxlan.h \ datapath-windows/ovsext/ovsext.inf \ datapath-windows/ovsext/ovsext.rc \ diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index a93fe03..10c18b9 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -23,6 +23,7 @@ #include "NetProto.h" #include "Flow.h" #include "Vxlan.h" +#include "Stt.h" #include "Checksum.h" #include "PacketIO.h" @@ -207,6 +208,10 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) { tunnelVport = ovsFwdCtx->switchContext->vxlanVport; ovsActionStats.rxVxlan++; + } else if (!flowKey->ipKey.nwFrag && + flowKey->ipKey.nwProto == IPPROTO_TCP && + flowKey->ipKey.l4.tpDst == STT_DST_PORT_NBO) { + tunnelVport = ovsFwdCtx->switchContext->sttVport; } // We might get tunnel packets even before the tunnel gets initialized. @@ -612,10 +617,11 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) * Setup the source port to be the internal port to as to facilitate the * second OvsLookupFlow. */ - if (ovsFwdCtx->switchContext->internalVport == NULL) { + if (ovsFwdCtx->switchContext->internalVport == NULL || + ovsFwdCtx->switchContext->virtualExternalVport == NULL) { OvsClearTunTxCtx(ovsFwdCtx); OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped since internal port is absent"); + L"OVS-Dropped since either internal or external port is absent"); return NDIS_STATUS_FAILURE; } ovsFwdCtx->srcVportNo = @@ -630,8 +636,12 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) case OVS_VPORT_TYPE_VXLAN: status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext, - (VOID *)ovsFwdCtx->completionList, &ovsFwdCtx->layers, &newNbl); + break; + case OVS_VPORT_TYPE_STT: + status = OvsEncapStt(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, + ovsFwdCtx->switchContext, + &ovsFwdCtx->layers, &newNbl); break; default: ASSERT(! "Tx: Unhandled tunnel type"); @@ -688,14 +698,19 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) goto dropNbl; } + /* + * Decap port functions should return a new NBL if it was copied, and + * this new NBL should be setup as the ovsFwdCtx->curNbl. + */ + switch(tunnelRxVport->ovsType) { case OVS_VPORT_TYPE_VXLAN: - /* - * OvsDoDecapVxlan should return a new NBL if it was copied, and - * this new NBL should be setup as the ovsFwdCtx->curNbl. - */ - status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - &ovsFwdCtx->tunKey, &newNbl); + status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); + break; + case OVS_VPORT_TYPE_STT: + status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); break; default: OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n", diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/Debug.h index a0da5eb..4b7b526 100644 --- a/datapath-windows/ovsext/Debug.h +++ b/datapath-windows/ovsext/Debug.h @@ -40,6 +40,7 @@ #define OVS_DBG_OTHERS BIT32(21) #define OVS_DBG_NETLINK BIT32(22) #define OVS_DBG_TUNFLT BIT32(23) +#define OVS_DBG_STT BIT32(24) #define OVS_DBG_RESERVED BIT32(31) //Please add above OVS_DBG_RESERVED. diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c new file mode 100644 index 0000000..af4d4d0 --- /dev/null +++ b/datapath-windows/ovsext/Stt.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2015 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "NetProto.h" +#include "Switch.h" +#include "Vport.h" +#include "Flow.h" +#include "Stt.h" +#include "IpHelper.h" +#include "Checksum.h" +#include "User.h" +#include "PacketIO.h" +#include "Flow.h" +#include "PacketParser.h" +#include "Atomic.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_STT +#include "Debug.h" + +static NDIS_STATUS +OvsDoEncapStt(PNET_BUFFER_LIST curNbl, const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl); + +/* + * -------------------------------------------------------------------------- + * OvsInitSttTunnel -- + * Initialize STT tunnel module. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsInitSttTunnel(POVS_VPORT_ENTRY vport, + UINT16 tcpDestPort) +{ + POVS_STT_VPORT sttPort; + + sttPort = (POVS_STT_VPORT) OvsAllocateMemoryWithTag(sizeof(*sttPort), + OVS_STT_POOL_TAG); + if (!sttPort) { + OVS_LOG_ERROR("Insufficient memory, can't allocate STT_VPORT"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(sttPort, sizeof(*sttPort)); + sttPort->dstPort = tcpDestPort; + vport->priv = (PVOID) sttPort; + return STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsCleanupSttTunnel -- + * Cleanup STT Tunnel module. + * -------------------------------------------------------------------------- + */ +void +OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport) +{ + if (vport->ovsType != OVS_VPORT_TYPE_STT || + vport->priv == NULL) { + return; + } + + OvsFreeMemoryWithTag(vport->priv, OVS_STT_POOL_TAG); + vport->priv = NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsEncapStt -- + * Encapsulates a packet with an STT header. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsEncapStt(PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl) +{ + OVS_FWD_INFO fwdInfo; + NDIS_STATUS status; + + UNREFERENCED_PARAMETER(switchContext); + status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); + if (status != STATUS_SUCCESS) { + OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); + /* + * XXX This case where the ARP table is not populated is + * currently not handled + */ + return NDIS_STATUS_FAILURE; + } + + status = OvsDoEncapStt(curNbl, tunKey, &fwdInfo, layers, switchContext, + newNbl); + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsDoEncapStt -- + * Internal utility function which actually does the STT encap. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDoEncapStt(PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PMDL curMdl = NULL; + PNET_BUFFER curNb; + PUINT8 buf = NULL; + EthHdr *outerEthHdr; + IPHdr *outerIpHdr; + TCPHdr *outerTcpHdr; + SttHdr *sttHdr; + UINT32 innerFrameLen, ipTotalLen; + POVS_STT_VPORT vportStt; + UINT32 headRoom = OvsGetSttTunHdrSize(); + UINT32 tcpChksumLen; + POVS_VPORT_ENTRY ovsVport; + + UNREFERENCED_PARAMETER(layers); + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + if (layers->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; + + lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpLargeSendNetBufferListInfo); + if (lsoInfo.LsoV1Transmit.MSS) { + /* XXX We don't handle LSO yet */ + OVS_LOG_ERROR("LSO on STT is not supported"); + return NDIS_STATUS_FAILURE; + } + } + + ovsVport = OvsGetTunnelVport(switchContext, OVS_VPORT_TYPE_STT); + ASSERT(ovsVport); + vportStt = (POVS_STT_VPORT) GetOvsVportPriv(ovsVport); + ASSERT(vportStt); + + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*copy NblInfo*/); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to copy NBL"); + return NDIS_STATUS_FAILURE; + } + + curNbl = *newNbl; + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + /* NB Chain should be split before */ + ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); + + innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb); + /* + * External port can't be removed as we hold the dispatch lock + * We also check if the external port was removed beforecalling + * port encapsulation functions + */ + if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) { + OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't encapsulate", + innerFrameLen, OvsGetExternalMtu(switchContext)); + status = NDIS_STATUS_FAILURE; + goto ret_error; + } + + status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); + if (status != NDIS_STATUS_SUCCESS) { + ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)"); + OVS_LOG_ERROR("Unable to NdisRetreatNetBufferDataStart(headroom)"); + goto ret_error; + } + + /* + * Make sure that the headroom for the tunnel header is continguous in + * memory. + */ + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) + >= (int) headRoom); + + buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); + if (!buf) { + ASSERT(!"MmGetSystemAddressForMdlSafe failed"); + OVS_LOG_ERROR("MmGetSystemAddressForMdlSafe failed"); + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + outerEthHdr = (EthHdr *)buf; + outerIpHdr = (IPHdr *) (outerEthHdr + 1); + outerTcpHdr = (TCPHdr *) (outerIpHdr + 1); + sttHdr = (SttHdr *) (outerTcpHdr + 1); + + /* L2 header */ + ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == + (PCHAR)&fwdInfo->srcMacAddr); + NdisMoveMemory(outerEthHdr->Destination, fwdInfo->dstMacAddr, + sizeof outerEthHdr->Destination + sizeof outerEthHdr->Source); + outerEthHdr->Type = htons(ETH_TYPE_IPV4); + + /* L3 header */ + outerIpHdr->ihl = sizeof(IPHdr) >> 2; + outerIpHdr->version = IPPROTO_IPV4; + outerIpHdr->tos = tunKey->tos; + + ipTotalLen = sizeof(IPHdr) + sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; + outerIpHdr->tot_len = htons(ipTotalLen); + ASSERT(ipTotalLen < 65536); + + outerIpHdr->id = (uint16) atomic_add64(&vportStt->ipId, innerFrameLen); + outerIpHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + IP_DF_NBO : 0; + outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64; + outerIpHdr->protocol = IPPROTO_TCP; + outerIpHdr->check = 0; + outerIpHdr->saddr = fwdInfo->srcIpAddr; + outerIpHdr->daddr = tunKey->dst; + outerIpHdr->check = IPChecksum((uint8 *)outerIpHdr, sizeof *outerIpHdr, 0); + + /* L4 header */ + RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr); + outerTcpHdr->source = htons(tunKey->flow_hash | 32768); + outerTcpHdr->dest = STT_DST_PORT_NBO; + outerTcpHdr->seq = htonl((STT_HDR_LEN + innerFrameLen) << + STT_SEQ_LEN_SHIFT); + outerTcpHdr->ack_seq = htonl(atomic_inc64(&vportStt->ackNo)); + outerTcpHdr->doff = sizeof(TCPHdr) >> 2; + outerTcpHdr->psh = 1; + outerTcpHdr->ack = 1; + outerTcpHdr->window = (uint16) ~0; + + /* Calculate pseudo header chksum */ + tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; + ASSERT(tcpChksumLen < 65535); + outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst, + IPPROTO_TCP, (uint16) tcpChksumLen); + sttHdr->version = 0; + + /* XXX need to peek into the inner packet, hard code for now */ + sttHdr->flags = STT_PROTO_IPV4; + sttHdr->l4Offset = 0; + + sttHdr->reserved = 0; + /* XXX Used for large TCP packets.Not sure how it is used, clarify */ + sttHdr->mss = 0; + sttHdr->vlanTCI = 0; + sttHdr->key = tunKey->tunnelId; + /* Zero out stt padding */ + *(uint16 *)(sttHdr + 1) = 0; + + /* Calculate software tcp checksum */ + outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen, + sizeof(EthHdr) + sizeof(IPHdr)); + if (outerTcpHdr->check == 0) { + status = NDIS_STATUS_FAILURE; + goto ret_error; + } + + return STATUS_SUCCESS; + +ret_error: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsDecapStt -- + * Decapsulates an STT packet. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + PNET_BUFFER curNb; + IPHdr *ipHdr; + char *ipBuf[sizeof(IPHdr)]; + SttHdr *sttHdr; + char *sttBuf[STT_HDR_LEN]; + UINT32 advanceCnt, hdrLen; + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); + + if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) { + OVS_LOG_ERROR("Packet length received is less than the tunnel header:" + " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb), OvsGetSttTunHdrSize()); + return NDIS_STATUS_INVALID_LENGTH; + } + + /* Skip Eth header */ + hdrLen = sizeof(EthHdr); + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt = hdrLen; + + ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf, + 1 /*no align*/, 0); + ASSERT(ipHdr); + + /* Skip IP & TCP headers */ + hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt += hdrLen; + + /* STT Header */ + sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf, + 1 /*no align*/, 0); + ASSERT(sttHdr); + + /* Initialize the tunnel key */ + tunKey->dst = ipHdr->daddr; + tunKey->src = ipHdr->saddr; + tunKey->tunnelId = sttHdr->key; + tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY); + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + + /* Skip stt header, DataOffset points to inner pkt now. */ + hdrLen = STT_HDR_LEN; + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt += hdrLen; + + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE, + 0, FALSE /*copy NBL info*/); + + ASSERT(advanceCnt == OvsGetSttTunHdrSize()); + status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL); + + if (*newNbl == NULL) { + OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned NBL"); + status = NDIS_STATUS_RESOURCES; + } + + return status; +} diff --git a/datapath-windows/ovsext/Stt.h b/datapath-windows/ovsext/Stt.h new file mode 100644 index 0000000..e77c9a9 --- /dev/null +++ b/datapath-windows/ovsext/Stt.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2015 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_STT_H_ +#define __OVS_STT_H_ 1 + +#define STT_DST_PORT 7471 +#define STT_DST_PORT_NBO 0x2f1d + +#define MAX_IP_TOTAL_LEN 65535 + +// STT defines. +#define STT_SEQ_LEN_SHIFT 16 +#define STT_SEQ_OFFSET_MASK ((1 << STT_SEQ_LEN_SHIFT) - 1) +#define STT_FRAME_LEN(seq) ((seq) >> STT_SEQ_LEN_SHIFT) +#define STT_SEGMENT_OFF(seq) ((seq) & STT_SEQ_OFFSET_MASK) + +#define STT_CSUM_VERIFIED (1 << 0) +#define STT_CSUM_PARTIAL (1 << 1) +#define STT_PROTO_IPV4 (1 << 2) +#define STT_PROTO_TCP (1 << 3) +#define STT_PROTO_TYPES (STT_PROTO_IPV4 | STT_PROTO_TCP) + +#define STT_ETH_PAD 2 +typedef struct SttHdr { + UINT8 version; + UINT8 flags; + UINT8 l4Offset; + UINT8 reserved; + UINT16 mss; + UINT16 vlanTCI; + UINT64 key; +} SttHdr, *PSttHdr; + +#define STT_HDR_LEN (sizeof(SttHdr) + STT_ETH_PAD) + +typedef struct _OVS_STT_VPORT { + UINT32 dstPort; + UINT64 ackNo; + UINT64 ipId; + + UINT64 inPkts; + UINT64 outPkts; + UINT64 slowInPkts; + UINT64 slowOutPkts; +} OVS_STT_VPORT, *POVS_STT_VPORT; + +NTSTATUS OvsInitSttTunnel(POVS_VPORT_ENTRY vport, + UINT16 udpDestPort); + +VOID OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport); + + +void OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport); + +NDIS_STATUS OvsEncapStt(PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl); + + +NDIS_STATUS OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); + +static __inline UINT32 +OvsGetSttTunHdrSize(VOID) +{ + return sizeof (EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr) + + STT_HDR_LEN; +} + +#endif /*__OVS_STT_H_ */ diff --git a/datapath-windows/ovsext/Switch.h b/datapath-windows/ovsext/Switch.h index 6ec34e1..ee43ccb 100644 --- a/datapath-windows/ovsext/Switch.h +++ b/datapath-windows/ovsext/Switch.h @@ -133,6 +133,7 @@ typedef struct _OVS_SWITCH_CONTEXT POVS_VPORT_ENTRY internalVport; POVS_VPORT_ENTRY vxlanVport; + POVS_VPORT_ENTRY sttVport; /* * 'portIdHashArray' ONLY contains ports that exist on the Hyper-V switch, @@ -216,6 +217,4 @@ OvsAcquireSwitchContext(VOID); VOID OvsReleaseSwitchContext(POVS_SWITCH_CONTEXT switchContext); -PVOID OvsGetExternalVport(); - #endif /* __SWITCH_H_ */ diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c index fed58f1..a4bf7ed 100644 --- a/datapath-windows/ovsext/Tunnel.c +++ b/datapath-windows/ovsext/Tunnel.c @@ -292,7 +292,8 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, goto unlockAndDrop; } - ASSERT(vport->ovsType == OVS_VPORT_TYPE_VXLAN); + ASSERT(vport->ovsType == OVS_VPORT_TYPE_VXLAN || + vport->ovsType == OVS_VPORT_TYPE_STT); portNo = vport->portNo; diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h index 9a01242..ee676fa 100644 --- a/datapath-windows/ovsext/Util.h +++ b/datapath-windows/ovsext/Util.h @@ -33,6 +33,7 @@ #define OVS_SWITCH_POOL_TAG 'SSVO' #define OVS_USER_POOL_TAG 'USVO' #define OVS_VPORT_POOL_TAG 'PSVO' +#define OVS_STT_POOL_TAG 'TSVO' VOID *OvsAllocateMemory(size_t size); VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag); diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index 1423ace..62b6cd9 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -21,6 +21,7 @@ #include "Event.h" #include "User.h" #include "Vxlan.h" +#include "Stt.h" #include "IpHelper.h" #include "Oid.h" #include "Datapath.h" @@ -867,6 +868,9 @@ OvsInitTunnelVport(POVS_VPORT_ENTRY vport, case OVS_VPORT_TYPE_VXLAN: status = OvsInitVxlanTunnel(vport, dstPort); break; + case OVS_VPORT_TYPE_STT: + status = OvsInitSttTunnel(vport, dstPort); + break; default: ASSERT(0); } @@ -1016,6 +1020,11 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, switchContext->vxlanVport = vport; switchContext->numNonHvVports++; break; + case OVS_VPORT_TYPE_STT: + ASSERT(switchContext->sttVport == NULL); + switchContext->sttVport = vport; + switchContext->numNonHvVports++; + break; case OVS_VPORT_TYPE_INTERNAL: if (vport->isBridgeInternal) { switchContext->numNonHvVports++; @@ -1099,6 +1108,10 @@ OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, OvsCleanupVxlanTunnel(vport); switchContext->vxlanVport = NULL; break; + case OVS_VPORT_TYPE_STT: + OvsCleanupSttTunnel(vport); + switchContext->sttVport = NULL; + break; case OVS_VPORT_TYPE_GRE: case OVS_VPORT_TYPE_GRE64: break; @@ -2113,7 +2126,16 @@ Cleanup: if (vport && vportAllocated == TRUE) { if (vportInitialized == TRUE) { if (OvsIsTunnelVportType(portType)) { - OvsCleanupVxlanTunnel(vport); + switch (vport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + OvsCleanupVxlanTunnel(vport); + break; + case OVS_VPORT_TYPE_STT: + OvsCleanupSttTunnel(vport);; + break; + default: + ASSERT(!"Invalid tunnel port type"); + } } } OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG); diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h index 348fbfd..d74ff59 100644 --- a/datapath-windows/ovsext/Vport.h +++ b/datapath-windows/ovsext/Vport.h @@ -176,6 +176,7 @@ static __inline BOOLEAN OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType) { return ovsType == OVS_VPORT_TYPE_VXLAN || + ovsType == OVS_VPORT_TYPE_STT || ovsType == OVS_VPORT_TYPE_GRE || ovsType == OVS_VPORT_TYPE_GRE64; } @@ -187,11 +188,19 @@ OvsGetTunnelVport(POVS_SWITCH_CONTEXT switchContext, switch(ovsType) { case OVS_VPORT_TYPE_VXLAN: return switchContext->vxlanVport; + case OVS_VPORT_TYPE_STT: + return switchContext->sttVport; default: return NULL; } } +static __inline PVOID +GetOvsVportPriv(POVS_VPORT_ENTRY ovsVport) +{ + return ovsVport->priv; +} + static __inline BOOLEAN OvsIsInternalVportType(OVS_VPORT_TYPE ovsType) { @@ -207,6 +216,19 @@ OvsIsBridgeInternalVport(POVS_VPORT_ENTRY vport) return vport->isBridgeInternal == TRUE; } +static __inline POVS_VPORT_ENTRY +OvsGetExternalVport(POVS_SWITCH_CONTEXT switchContext) +{ + return switchContext->virtualExternalVport; +} + +static __inline UINT32 +OvsGetExternalMtu(POVS_SWITCH_CONTEXT switchContext) +{ + ASSERT(OvsGetExternalVport(switchContext)); + return ((POVS_VPORT_ENTRY) OvsGetExternalVport(switchContext))->mtu; +} + VOID OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY vport, BOOLEAN hvDelete, BOOLEAN ovsDelete, diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 8c57185..c269537 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -173,10 +173,10 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, /* L2 header */ ethHdr = (EthHdr *)bufferStart; - NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, - sizeof ethHdr->Destination + sizeof ethHdr->Source); ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == (PCHAR)&fwdInfo->srcMacAddr); + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination + sizeof ethHdr->Source); ethHdr->Type = htons(ETH_TYPE_IPV4); // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such, @@ -240,13 +240,11 @@ NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, OvsIPv4TunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, - VOID *completionList, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl) { NTSTATUS status; OVS_FWD_INFO fwdInfo; - UNREFERENCED_PARAMETER(completionList); status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); if (status != STATUS_SUCCESS) { @@ -349,15 +347,15 @@ OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, /* *---------------------------------------------------------------------------- - * OvsDoDecapVxlan + * OvsDecapVxlan * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'. *---------------------------------------------------------------------------- */ NDIS_STATUS -OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - PNET_BUFFER_LIST *newNbl) +OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) { PNET_BUFFER curNb; PMDL curMdl; diff --git a/datapath-windows/ovsext/Vxlan.h b/datapath-windows/ovsext/Vxlan.h index d84796d..b7ac794 100644 --- a/datapath-windows/ovsext/Vxlan.h +++ b/datapath-windows/ovsext/Vxlan.h @@ -58,14 +58,13 @@ NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, OvsIPv4TunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, - VOID *completionList, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl); -NDIS_STATUS OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - PNET_BUFFER_LIST *newNbl); +NDIS_STATUS OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); static __inline UINT32 OvsGetVxlanTunHdrSize(VOID) diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index 693bc50..7050015 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -90,6 +90,7 @@ <ClInclude Include="PacketIO.h" /> <ClInclude Include="PacketParser.h" /> <ClInclude Include="precomp.h" /> + <ClInclude Include="Stt.h" /> <ClInclude Include="Switch.h" /> <ClInclude Include="Tunnel.h" /> <ClInclude Include="TunnelIntf.h" /> @@ -183,6 +184,7 @@ <PreCompiledHeader>Create</PreCompiledHeader> <PreCompiledHeaderOutputFile>$(IntDir)\precomp.h.pch</PreCompiledHeaderOutputFile> </ClCompile> + <ClCompile Include="Stt.c" /> <ClCompile Include="Switch.c" /> <ClCompile Include="Tunnel.c" /> <ClCompile Include="TunnelFilter.c" /> @@ -202,4 +204,4 @@ <None Exclude="@(None)" Include="*.def;*.bat;*.hpj;*.asmx" /> </ItemGroup> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> -</Project> +</Project> \ No newline at end of file -- 1.9.4.msysgit.2 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev