> Acked-by: Nithin Raju <nit...@vmware.com> Series applied.
> > -----Original Message----- > From: Sairam Venugopal <vsai...@vmware.com> > Date: Tuesday, October 27, 2015 at 10:20 AM > To: Nithin Raju <nit...@vmware.com> > Subject: Fw: [PATCH v2 3/3] datapath-windows: STT - Enable support for TCP > Segmentation offloads > >> >>________________________________________ >>From: Sairam Venugopal <vsai...@vmware.com> >>Sent: Monday, October 26, 2015 4:48 PM >>To: dev@openvswitch.org >>Cc: Sairam Venugopal >>Subject: [PATCH v2 3/3] datapath-windows: STT - Enable support for TCP >>Segmentation offloads >> >>Add support to STT - Encap and Decap functions to reassemble the packet >>fragments. Also add support to offload the packet to NDIS. >> >>Signed-off-by: Sairam Venugopal <vsai...@vmware.com> >>--- >> datapath-windows/ovsext/Actions.c | 40 ++-- >> datapath-windows/ovsext/Stt.c | 398 >>+++++++++++++++++++++++++++++--------- >> 2 files changed, 329 insertions(+), 109 deletions(-) >> >>diff --git a/datapath-windows/ovsext/Actions.c >>b/datapath-windows/ovsext/Actions.c >>index b4644a7..ce592b3 100644 >>--- a/datapath-windows/ovsext/Actions.c >>+++ b/datapath-windows/ovsext/Actions.c >>@@ -594,7 +594,7 @@ OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) >> InitializeListHead(&missedPackets); >> status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS, >>vport, >> &key,ovsFwdCtx->curNbl, >>- ovsFwdCtx->tunnelRxNic != NULL, >>&ovsFwdCtx->layers, >>+ FALSE, &ovsFwdCtx->layers, >> ovsFwdCtx->switchContext, &missedPackets, >>&num); >> if (num) { >> OvsQueuePackets(&missedPackets, num); >>@@ -709,6 +709,7 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) >> NDIS_STATUS status = NDIS_STATUS_SUCCESS; >> PNET_BUFFER_LIST newNbl = NULL; >> POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic; >>+ PCWSTR dropReason = L"OVS-dropped due to new decap packet"; >> >> if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers) >> != NDIS_STATUS_SUCCESS) { >>@@ -730,6 +731,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) >> case OVS_VPORT_TYPE_STT: >> status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, >> &ovsFwdCtx->tunKey, &newNbl); >>+ if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) { >>+ /* This was an STT-LSO Fragment */ >>+ dropReason = L"OVS-STT segment is cached"; >>+ } >> break; >> default: >> OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n", >>@@ -747,25 +752,26 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) >> * tunnelRxNic and other fields will be cleared, re-init the context >> * before usage. >> */ >>- OvsCompleteNBLForwardingCtx(ovsFwdCtx, >>- L"OVS-dropped due to new decap packet"); >>+ OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason); >> >>- /* Decapsulated packet is in a new NBL */ >>- ovsFwdCtx->tunnelRxNic = tunnelRxVport; >>- OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, >>- newNbl, tunnelRxVport->portNo, 0, >>- >>NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), >>- ovsFwdCtx->completionList, >>- &ovsFwdCtx->layers, FALSE); >>+ if (newNbl) { >>+ /* Decapsulated packet is in a new NBL */ >>+ ovsFwdCtx->tunnelRxNic = tunnelRxVport; >>+ OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, >>+ newNbl, tunnelRxVport->portNo, 0, >>+ >>NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), >>+ ovsFwdCtx->completionList, >>+ &ovsFwdCtx->layers, FALSE); >> >>- /* >>- * Set the NBL's SourcePortId and SourceNicIndex to default values to >>- * keep NDIS happy when we forward the packet. >>- */ >>- ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; >>- ovsFwdCtx->fwdDetail->SourceNicIndex = 0; >>+ /* >>+ * Set the NBL's SourcePortId and SourceNicIndex to default >>values to >>+ * keep NDIS happy when we forward the packet. >>+ */ >>+ ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; >>+ ovsFwdCtx->fwdDetail->SourceNicIndex = 0; >> >>- status = OvsDoFlowLookupOutput(ovsFwdCtx); >>+ status = OvsDoFlowLookupOutput(ovsFwdCtx); >>+ } >> ASSERT(ovsFwdCtx->curNbl == NULL); >> OvsClearTunRxCtx(ovsFwdCtx); >> >>diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c >>index b78ef95..ef44d23 100644 >>--- a/datapath-windows/ovsext/Stt.c >>+++ b/datapath-windows/ovsext/Stt.c >>@@ -34,6 +34,7 @@ >> #endif >> #define OVS_DBG_MOD OVS_DBG_STT >> #include "Debug.h" >>+#include "Jhash.h" >> >> KSTART_ROUTINE OvsSttDefragCleaner; >> static PLIST_ENTRY OvsSttPktFragHash; >>@@ -152,8 +153,8 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, >> UINT32 headRoom = OvsGetSttTunHdrSize(); >> UINT32 tcpChksumLen; >> PUINT8 bufferStart; >>- >>- UNREFERENCED_PARAMETER(layers); >>+ ULONG mss = 0; >>+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; >> >> curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); >> >>@@ -162,14 +163,20 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, >> BOOLEAN innerPartialChecksum = FALSE; >> >> if (layers->isTcp) { >>- NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; >>- >> lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, >> TcpLargeSendNetBufferListInfo); >>- if (lsoInfo.LsoV1Transmit.MSS) { >>- /* XXX We don't handle LSO yet */ >>- OVS_LOG_ERROR("LSO on STT is not supported"); >>- return NDIS_STATUS_FAILURE; >>+ >>+ switch (lsoInfo.Transmit.Type) { >>+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE: >>+ mss = lsoInfo.LsoV1Transmit.MSS; >>+ break; >>+ case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE: >>+ mss = lsoInfo.LsoV2Transmit.MSS; >>+ break; >>+ default: >>+ OVS_LOG_ERROR("Unknown LSO transmit type:%d", >>+ lsoInfo.Transmit.Type); >>+ return NDIS_STATUS_FAILURE; >> } >> } >> >>@@ -186,21 +193,36 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, >> return NDIS_STATUS_FAILURE; >> } >> >>- curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); >>+ curNbl = *newNbl; >>+ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); >> curMdl = NET_BUFFER_CURRENT_MDL(curNb); >>+ /* NB Chain should be split before */ >>+ ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); >>+ innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb); >>+ >> bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, >> LowPagePriority); >> bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); >> >>- if (layers->isIPv4 && csumInfo.Transmit.IpHeaderChecksum) { >>+ if (layers->isIPv4) { >> IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset); >>- ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0); >>+ if (!ip->tot_len) { >>+ ip->tot_len = htons(innerFrameLen - sizeof(EthHdr)); >>+ } >>+ if (!ip->check) { >>+ ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0); >>+ } >> } >>+ >> if (layers->isTcp) { >>- if(!csumInfo.Transmit.TcpChecksum) { >>- innerChecksumVerified = TRUE; >>- } else { >>+ if (mss) { >> innerPartialChecksum = TRUE; >>+ } else { >>+ if (!csumInfo.Transmit.TcpChecksum) { >>+ innerChecksumVerified = TRUE; >>+ } else { >>+ innerPartialChecksum = TRUE; >>+ } >> } >> } else if (layers->isUdp) { >> if(!csumInfo.Transmit.UdpChecksum) { >>@@ -210,24 +232,6 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, >> } >> } >> >>- curNbl = *newNbl; >>- curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); >>- /* NB Chain should be split before */ >>- ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); >>- >>- innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb); >>- /* >>- * External port can't be removed as we hold the dispatch lock >>- * We also check if the external port was removed beforecalling >>- * port encapsulation functions >>- */ >>- if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) { >>- OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't >>encapsulate", >>- innerFrameLen, OvsGetExternalMtu(switchContext)); >>- status = NDIS_STATUS_FAILURE; >>- goto ret_error; >>- } >>- >> status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); >> if (status != NDIS_STATUS_SUCCESS) { >> ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)"); >>@@ -301,33 +305,52 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, >> IPPROTO_TCP, (uint16) >>tcpChksumLen); >> sttHdr->version = 0; >> >>- /* XXX need to peek into the inner packet, hard code for now */ >>- sttHdr->flags = STT_PROTO_IPV4; >>- if (innerChecksumVerified) { >>- sttHdr->flags |= STT_CSUM_VERIFIED; >>- } else if (innerPartialChecksum) { >>+ /* Set STT Header */ >>+ sttHdr->flags = 0; >>+ if (innerPartialChecksum) { >> sttHdr->flags |= STT_CSUM_PARTIAL; >>+ if (layers->isIPv4) { >>+ sttHdr->flags |= STT_PROTO_IPV4; >>+ } >>+ if (layers->isTcp) { >>+ sttHdr->flags |= STT_PROTO_TCP; >>+ } >>+ sttHdr->l4Offset = (UINT8) layers->l4Offset; >>+ sttHdr->mss = (UINT16) htons(mss); >>+ } else if (innerChecksumVerified) { >>+ sttHdr->flags = STT_CSUM_VERIFIED; >>+ sttHdr->l4Offset = 0; >>+ sttHdr->mss = 0; >> } >>- sttHdr->l4Offset = 0; >> >> sttHdr->reserved = 0; >>- /* XXX Used for large TCP packets.Not sure how it is used, clarify */ >>- sttHdr->mss = 0; >> sttHdr->vlanTCI = 0; >> sttHdr->key = tunKey->tunnelId; >> /* Zero out stt padding */ >> *(uint16 *)(sttHdr + 1) = 0; >> >> /* Offload IP and TCP checksum */ >>+ ULONG tcpHeaderOffset = sizeof *outerEthHdr + >>+ outerIpHdr->ihl * 4; >> csumInfo.Value = 0; >> csumInfo.Transmit.IpHeaderChecksum = 1; >> csumInfo.Transmit.TcpChecksum = 1; >> csumInfo.Transmit.IsIPv4 = 1; >>- csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr + >>- outerIpHdr->ihl * 4; >>+ csumInfo.Transmit.TcpHeaderOffset = tcpHeaderOffset; >> NET_BUFFER_LIST_INFO(curNbl, >> TcpIpChecksumNetBufferListInfo) = >>csumInfo.Value; >> >>+ UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - >>sizeof(TCPHdr); >>+ if (ipTotalLen > encapMss) { >>+ lsoInfo.Value = 0; >>+ lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset; >>+ lsoInfo.LsoV2Transmit.MSS = encapMss; >>+ lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; >>+ lsoInfo.LsoV2Transmit.IPVersion = >>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; >>+ NET_BUFFER_LIST_INFO(curNbl, >>+ TcpLargeSendNetBufferListInfo) = >>lsoInfo.Value; >>+ } >>+ >> return STATUS_SUCCESS; >> >> ret_error: >>@@ -338,16 +361,22 @@ ret_error: >> >> /* >> >>*------------------------------------------------------------------------- >>--- >>- * OvsCalculateTCPChecksum >>- * Calculate TCP checksum >>+ * OvsValidateTCPChecksum >>+ * Validate TCP checksum >> >>*------------------------------------------------------------------------- >>--- >> */ >> static __inline NDIS_STATUS >>-OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb) >>+OvsValidateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb) >> { >> NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; >> csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, >> >>TcpIpChecksumNetBufferListInfo); >>+ >>+ /* Check if NIC has indicated TCP checksum failure */ >>+ if (csumInfo.Receive.TcpChecksumFailed) { >>+ return NDIS_STATUS_INVALID_PACKET; >>+ } >>+ >> UINT16 checkSum; >> >> /* Check if TCP Checksum has been calculated by NIC */ >>@@ -399,10 +428,9 @@ OvsInitSttDefragmentation() >> NdisAllocateSpinLock(&OvsSttSpinLock); >> >> /* Init the Hash Buffer */ >>- OvsSttPktFragHash = (PLIST_ENTRY) OvsAllocateMemoryWithTag( >>- sizeof(LIST_ENTRY) >>- * STT_HASH_TABLE_SIZE, >>- OVS_STT_POOL_TAG); >>+ OvsSttPktFragHash = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY) >>+ * STT_HASH_TABLE_SIZE, >>+ OVS_STT_POOL_TAG); >> if (OvsSttPktFragHash == NULL) { >> NdisFreeSpinLock(&OvsSttSpinLock); >> return STATUS_INSUFFICIENT_RESOURCES; >>@@ -487,6 +515,7 @@ OvsSttDefragCleaner(PVOID data) >> entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link); >> if (entry->timeout < currentTime) { >> RemoveEntryList(&entry->link); >>+ OvsFreeMemoryWithTag(entry->packetBuf, >>OVS_STT_POOL_TAG); >> OvsFreeMemoryWithTag(entry, OVS_STT_POOL_TAG); >> } >> } >>@@ -500,6 +529,158 @@ OvsSttDefragCleaner(PVOID data) >> PsTerminateSystemThread(STATUS_SUCCESS); >> } >> >>+static OVS_STT_PKT_KEY >>+OvsGeneratePacketKey(IPHdr *ipHdr, TCPHdr *tcpHdr) >>+{ >>+ OVS_STT_PKT_KEY key; >>+ key.sAddr = ipHdr->saddr; >>+ key.dAddr = ipHdr->daddr; >>+ key.ackSeq = ntohl(tcpHdr->ack_seq); >>+ return key; >>+} >>+ >>+static UINT32 >>+OvsSttGetPktHash(OVS_STT_PKT_KEY *pktKey) >>+{ >>+ UINT32 arr[3]; >>+ arr[0] = pktKey->ackSeq; >>+ arr[1] = pktKey->dAddr; >>+ arr[2] = pktKey->sAddr; >>+ return OvsJhashWords(arr, 3, OVS_HASH_BASIS); >>+} >>+ >>+static VOID * >>+OvsLookupPktFrag(OVS_STT_PKT_KEY *pktKey, UINT32 hash) >>+{ >>+ PLIST_ENTRY link; >>+ POVS_STT_PKT_ENTRY entry; >>+ >>+ LIST_FORALL(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], link) { >>+ entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link); >>+ if (entry->ovsPktKey.ackSeq == pktKey->ackSeq && >>+ entry->ovsPktKey.dAddr == pktKey->dAddr && >>+ entry->ovsPktKey.sAddr == pktKey->sAddr) { >>+ return entry; >>+ } >>+ } >>+ return NULL; >>+} >>+ >>+/* >>+* >>+------------------------------------------------------------------------- >>- >>+* OvsSttReassemble -- >>+* Reassemble an LSO packet from multiple STT-Fragments. >>+* >>+------------------------------------------------------------------------- >>- >>+*/ >>+PNET_BUFFER_LIST >>+OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext, >>+ PNET_BUFFER_LIST curNbl, >>+ IPHdr *ipHdr, >>+ TCPHdr *tcp, >>+ SttHdr *newSttHdr, >>+ UINT16 payloadLen) >>+{ >>+ UINT32 seq = ntohl(tcp->seq); >>+ UINT32 innerPacketLen = (seq >> STT_SEQ_LEN_SHIFT) - STT_HDR_LEN; >>+ UINT32 segOffset = STT_SEGMENT_OFF(seq); >>+ UINT32 offset = segOffset == 0 ? 0 : segOffset - STT_HDR_LEN; >>+ UINT32 startOffset = 0; >>+ OVS_STT_PKT_ENTRY *pktFragEntry; >>+ PNET_BUFFER_LIST targetPNbl = NULL; >>+ BOOLEAN lastPacket = FALSE; >>+ PNET_BUFFER sourceNb; >>+ UINT32 fragmentLength = payloadLen; >>+ SttHdr stt; >>+ SttHdr *sttHdr = NULL; >>+ sourceNb = NET_BUFFER_LIST_FIRST_NB(curNbl); >>+ >>+ /* XXX optimize this lock */ >>+ NdisAcquireSpinLock(&OvsSttSpinLock); >>+ >>+ /* If this is the first fragment, copy the STT header */ >>+ if (segOffset == 0) { >>+ sttHdr = NdisGetDataBuffer(sourceNb, sizeof(SttHdr), &stt, 1, 0); >>+ if (sttHdr == NULL) { >>+ OVS_LOG_ERROR("Unable to retrieve STT header"); >>+ return NULL; >>+ } >>+ fragmentLength = fragmentLength - STT_HDR_LEN; >>+ startOffset = startOffset + STT_HDR_LEN; >>+ } >>+ >>+ /* Lookup fragment */ >>+ OVS_STT_PKT_KEY pktKey = OvsGeneratePacketKey(ipHdr, tcp); >>+ UINT32 hash = OvsSttGetPktHash(&pktKey); >>+ pktFragEntry = OvsLookupPktFrag(&pktKey, hash); >>+ >>+ if (pktFragEntry == NULL) { >>+ /* Create a new Packet Entry */ >>+ POVS_STT_PKT_ENTRY entry; >>+ entry = OvsAllocateMemoryWithTag(sizeof(OVS_STT_PKT_ENTRY), >>+ OVS_STT_POOL_TAG); >>+ RtlZeroMemory(entry, sizeof (OVS_STT_PKT_ENTRY)); >>+ >>+ /* Update Key, timestamp and recvdLen */ >>+ NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof >>(OVS_STT_PKT_KEY)); >>+ >>+ entry->recvdLen = fragmentLength; >>+ >>+ UINT64 currentTime; >>+ NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime); >>+ entry->timeout = currentTime + STT_ENTRY_TIMEOUT; >>+ >>+ if (segOffset == 0) { >>+ entry->sttHdr = *sttHdr; >>+ } >>+ >>+ /* Copy the data from Source to new buffer */ >>+ entry->packetBuf = OvsAllocateMemoryWithTag(innerPacketLen, >>+ OVS_STT_POOL_TAG); >>+ if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset, >>+ entry->packetBuf + offset) == NULL) { >>+ OVS_LOG_ERROR("Error when obtaining bytes from Packet"); >>+ goto handle_error; >>+ } >>+ >>+ /* Insert the entry in the Static Buffer */ >>+ InsertHeadList(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], >>+ &entry->link); >>+ } else { >>+ /* Add to recieved length to identify if this is the last >>fragment */ >>+ pktFragEntry->recvdLen += fragmentLength; >>+ lastPacket = (pktFragEntry->recvdLen == innerPacketLen); >>+ >>+ if (segOffset == 0) { >>+ pktFragEntry->sttHdr = *sttHdr; >>+ } >>+ >>+ /* Copy the fragment data from Source to existing buffer */ >>+ if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset, >>+ pktFragEntry->packetBuf + offset) == NULL) >>{ >>+ OVS_LOG_ERROR("Error when obtaining bytes from Packet"); >>+ goto handle_error; >>+ } >>+ } >>+ >>+handle_error: >>+ if (lastPacket) { >>+ /* Retrieve the original STT header */ >>+ NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof >>(SttHdr)); >>+ targetPNbl = OvsAllocateNBLFromBuffer(switchContext, >>pktFragEntry->packetBuf, >>+ innerPacketLen); >>+ >>+ /* Delete this entry and free up the memory/ */ >>+ RemoveEntryList(&pktFragEntry->link); >>+ OvsFreeMemoryWithTag(pktFragEntry->packetBuf, OVS_STT_POOL_TAG); >>+ OvsFreeMemoryWithTag(pktFragEntry, OVS_STT_POOL_TAG); >>+ } >>+ >>+ NdisReleaseSpinLock(&OvsSttSpinLock); >>+ return lastPacket ? targetPNbl : NULL; >>+} >>+ >> /* >> * >>-------------------------------------------------------------------------- >> * OvsDecapStt -- >>@@ -513,34 +694,20 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, >> PNET_BUFFER_LIST *newNbl) >> { >> NDIS_STATUS status = NDIS_STATUS_FAILURE; >>- PNET_BUFFER curNb; >>+ PNET_BUFFER curNb, newNb; >> IPHdr *ipHdr; >> char *ipBuf[sizeof(IPHdr)]; >>+ SttHdr stt; >> SttHdr *sttHdr; >> char *sttBuf[STT_HDR_LEN]; >> UINT32 advanceCnt, hdrLen; >>- NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; >>+ BOOLEAN isLsoPacket = FALSE; >> >> curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); >> ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); >> >>- if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) { >>- OVS_LOG_ERROR("Packet length received is less than the tunnel >>header:" >>- " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb), >>OvsGetSttTunHdrSize()); >>- return NDIS_STATUS_INVALID_LENGTH; >>- } >>- >>- /* Verify outer TCP Checksum */ >>- csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, >>- >>TcpIpChecksumNetBufferListInfo); >>- >>- /* Check if NIC has indicated TCP checksum failure */ >>- if (csumInfo.Receive.TcpChecksumFailed) { >>- return NDIS_STATUS_INVALID_PACKET; >>- } >>- >>- /* Calculate the TCP Checksum */ >>- status = OvsCalculateTCPChecksum(curNbl, curNb); >>+ /* Validate the TCP Checksum */ >>+ status = OvsValidateTCPChecksum(curNbl, curNb); >> if (status != NDIS_STATUS_SUCCESS) { >> return status; >> } >>@@ -554,34 +721,73 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, >> 1 /*no align*/, 0); >> ASSERT(ipHdr); >> >>+ TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4); >>+ >> /* Skip IP & TCP headers */ >> hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), >> NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); >> advanceCnt += hdrLen; >> >>- /* STT Header */ >>- sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf, >>- 1 /*no align*/, 0); >>+ UINT32 seq = ntohl(tcp->seq); >>+ UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT); >>+ UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len) >>+ - (ipHdr->ihl * 4) >>+ - (sizeof * tcp); >>+ >>+ /* Check if incoming packet requires reassembly */ >>+ if (totalLen != payloadLen) { >>+ sttHdr = &stt; >>+ PNET_BUFFER_LIST pNbl = OvsSttReassemble(switchContext, curNbl, >>+ ipHdr, tcp, sttHdr, >>+ payloadLen); >>+ if (pNbl == NULL) { >>+ return NDIS_STATUS_SUCCESS; >>+ } >>+ >>+ *newNbl = pNbl; >>+ isLsoPacket = TRUE; >>+ } else { >>+ /* STT Header */ >>+ sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, >>+ (PVOID) &sttBuf, 1 /*no align*/, 0); >>+ /* Skip stt header, DataOffset points to inner pkt now. */ >>+ hdrLen = STT_HDR_LEN; >>+ NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); >>+ advanceCnt += hdrLen; >>+ >>+ *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, >>+ 0, FALSE /*copy NBL info*/); >>+ } >>+ >>+ if (*newNbl == NULL) { >>+ OVS_LOG_ERROR("Unable to allocate a new cloned NBL"); >>+ return NDIS_STATUS_RESOURCES; >>+ } >>+ >>+ status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL); >>+ if (status != NDIS_STATUS_SUCCESS) { >>+ OvsCompleteNBL(switchContext, *newNbl, TRUE); >>+ return NDIS_STATUS_FAILURE; >>+ } >>+ newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); >>+ >> ASSERT(sttHdr); >> >> /* Initialize the tunnel key */ >> tunKey->dst = ipHdr->daddr; >> tunKey->src = ipHdr->saddr; >> tunKey->tunnelId = sttHdr->key; >>- tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY); >>+ tunKey->flags = OVS_TNL_F_KEY; >> tunKey->tos = ipHdr->tos; >> tunKey->ttl = ipHdr->ttl; >> tunKey->pad = 0; >> >>- /* Skip stt header, DataOffset points to inner pkt now. */ >>- hdrLen = STT_HDR_LEN; >>- NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); >>- advanceCnt += hdrLen; >>+ BOOLEAN requiresLSO = sttHdr->mss != 0; >> >> /* Verify checksum for inner packet if it's required */ >> if (!(sttHdr->flags & STT_CSUM_VERIFIED)) { >> BOOLEAN innerChecksumPartial = sttHdr->flags & STT_CSUM_PARTIAL; >>- EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), >>+ EthHdr *eth = (EthHdr *)NdisGetDataBuffer(newNb, sizeof(EthHdr), >> NULL, 1, 0); >> >> /* XXX Figure out a way to offload checksum receives */ >>@@ -597,14 +803,16 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, >> IPPROTO_TCP, >> (UINT16)l4Payload); >> } >>- tcp->check = CalculateChecksumNB(curNb, l4Payload, >>offset); >>+ if (!requiresLSO) { >>+ tcp->check = CalculateChecksumNB(newNb, l4Payload, >>offset); >>+ } >> } else if (ip->protocol == IPPROTO_UDP) { >> UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); >> if (!innerChecksumPartial){ >> udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, >> IPPROTO_UDP, >>l4Payload); >> } >>- udp->check = CalculateChecksumNB(curNb, l4Payload, >>offset); >>+ udp->check = CalculateChecksumNB(newNb, l4Payload, >>offset); >> } >> } else if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) { >> IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth); >>@@ -617,7 +825,9 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, >> (UINT32 *)&ip->daddr, >> IPPROTO_TCP, >>totalLength); >> } >>- tcp->check = CalculateChecksumNB(curNb, totalLength, >>offset); >>+ if (!requiresLSO) { >>+ tcp->check = CalculateChecksumNB(newNb, totalLength, >>offset); >>+ } >> } >> else if (ip->nexthdr == IPPROTO_UDP) { >> UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); >>@@ -626,23 +836,27 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, >> (UINT32 *)&ip->daddr, >> IPPROTO_UDP, >>totalLength); >> } >>- udp->check = CalculateChecksumNB(curNb, totalLength, >>offset); >>+ udp->check = CalculateChecksumNB(newNb, totalLength, >>offset); >> } >> } >> >>- NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; >>+ NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = >>0; >> } >> >>- *newNbl = OvsPartialCopyNBL(switchContext, curNbl, >>OVS_DEFAULT_COPY_SIZE, >>- 0, FALSE /*copy NBL info*/); >>- >>- ASSERT(advanceCnt == OvsGetSttTunHdrSize()); >>- status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL); >>- >>- if (*newNbl == NULL) { >>- OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned >>NBL"); >>- status = NDIS_STATUS_RESOURCES; >>+ if (requiresLSO) { >>+ NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; >>+ lsoInfo.Value = 0; >>+ lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset; >>+ lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU - sizeof(IPHdr) - >>sizeof(TCPHdr); >>+ lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; >>+ if (sttHdr->flags & STT_PROTO_IPV4) { >>+ lsoInfo.LsoV2Transmit.IPVersion = >>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4; >>+ } else { >>+ lsoInfo.LsoV2Transmit.IPVersion = >>NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6; >>+ } >>+ NET_BUFFER_LIST_INFO(*newNbl, >>+ TcpLargeSendNetBufferListInfo) = >>lsoInfo.Value; >> } >> >>- return status; >>+ return NDIS_STATUS_SUCCESS; >> } >>-- >>1.9.5.msysgit.0 >> > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev