Enable support for Checksum offloads in STT if it's enabled in the Windows VM.
Signed-off-by: Sairam Venugopal <vsai...@vmware.com> --- datapath-windows/ovsext/Stt.c | 190 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 176 insertions(+), 14 deletions(-) diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c index b6272c3..c1af9f3 100644 --- a/datapath-windows/ovsext/Stt.c +++ b/datapath-windows/ovsext/Stt.c @@ -146,19 +146,40 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, POVS_STT_VPORT vportStt; UINT32 headRoom = OvsGetSttTunHdrSize(); UINT32 tcpChksumLen; + ULONG mss = 0; + ULONG lsoType = 0; + ULONG ipVersion = 0; UNREFERENCED_PARAMETER(layers); curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + + /* Verify if inner checksum is verified */ + BOOLEAN innerChecksumVerified = FALSE; + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpIpChecksumNetBufferListInfo); + + innerChecksumVerified = csumInfo.Transmit.IpHeaderChecksum == 0 && + csumInfo.Transmit.TcpChecksum == 0 && + csumInfo.Transmit.UdpChecksum == 0; + if (layers->isTcp) { NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, - TcpLargeSendNetBufferListInfo); - if (lsoInfo.LsoV1Transmit.MSS) { - /* XXX We don't handle LSO yet */ - OVS_LOG_ERROR("LSO on STT is not supported"); - return NDIS_STATUS_FAILURE; + TcpLargeSendNetBufferListInfo); + lsoType = lsoInfo.Transmit.Type; + switch (lsoType) { + case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE: + mss = lsoInfo.LsoV1Transmit.MSS; + break; + case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE: + mss = lsoInfo.LsoV2Transmit.MSS; + ipVersion = lsoInfo.LsoV2Transmit.IPVersion; + break; + default: + break; } } @@ -243,7 +264,6 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, outerIpHdr->check = 0; outerIpHdr->saddr = fwdInfo->srcIpAddr; outerIpHdr->daddr = tunKey->dst; - outerIpHdr->check = IPChecksum((uint8 *)outerIpHdr, sizeof *outerIpHdr, 0); /* L4 header */ RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr); @@ -266,6 +286,11 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, /* XXX need to peek into the inner packet, hard code for now */ sttHdr->flags = STT_PROTO_IPV4; + + /* XXX need to handle Checksum partial flag */ + if (innerChecksumVerified) { + sttHdr->flags |= STT_CSUM_VERIFIED; + } sttHdr->l4Offset = 0; sttHdr->reserved = 0; @@ -276,12 +301,37 @@ OvsDoEncapStt(POVS_VPORT_ENTRY vport, /* Zero out stt padding */ *(uint16 *)(sttHdr + 1) = 0; - /* Calculate software tcp checksum */ - outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen, - sizeof(EthHdr) + sizeof(IPHdr)); - if (outerTcpHdr->check == 0) { - status = NDIS_STATUS_FAILURE; - goto ret_error; + /* Offload IP and TCP checksum */ + csumInfo.Value = 0; + csumInfo.Transmit.IpHeaderChecksum = 1; + csumInfo.Transmit.TcpChecksum = 1; + csumInfo.Transmit.IsIPv4 = 1; + csumInfo.Transmit.TcpHeaderOffset = sizeof *outerEthHdr + sizeof *outerIpHdr; + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; + + /* Offload TCP Segmentation */ + if (mss) { + OVS_LOG_ERROR("lsoInfo.Transmit.Type:%d lsoInfo packet - mss:%d", lsoType, mss); + + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; + lsoInfo.Value = 0; + ULONG tcpHeaderOffset = headRoom + sizeof(EthHdr) + sizeof(IPHdr); + lsoInfo.Transmit.Type = lsoType; + switch (lsoType) { + case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE: + lsoInfo.LsoV1Transmit.MSS = mss; + lsoInfo.LsoV1Transmit.TcpHeaderOffset = tcpHeaderOffset; + break; + case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE: + lsoInfo.LsoV2Transmit.MSS = mss; + lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset; + lsoInfo.LsoV2Transmit.IPVersion = ipVersion; + break; + default: + break; + } + + NET_BUFFER_LIST_INFO(curNbl, TcpLargeSendNetBufferListInfo) = lsoInfo.Value; } return STATUS_SUCCESS; @@ -293,6 +343,48 @@ ret_error: } /* + *---------------------------------------------------------------------------- + * OvsCalculateTCPChecksum + * Calculate TCP checksum + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsCalculateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb) +{ + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); + UINT16 checkSum; + + /* Check if TCP Checksum has been calculated by NIC */ + if (csumInfo.Receive.TcpChecksumSucceeded) { + return NDIS_STATUS_SUCCESS; + } + + EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), + NULL, 1, 0); + + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) { + IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth); + UINT32 l4Payload = ntohs(ip->tot_len) - ip->ihl * 4; + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); + checkSum = tcp->check; + + tcp->check = 0; + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_TCP, (UINT16)l4Payload); + tcp->check = CalculateChecksumNB(curNb, (UINT16)(l4Payload), + sizeof(EthHdr) + ip->ihl * 4); + if (checkSum != tcp->check) { + return NDIS_STATUS_INVALID_PACKET; + } + } + + csumInfo.Receive.TcpChecksumSucceeded = 1; + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; + return NDIS_STATUS_SUCCESS; +} + +/* * -------------------------------------------------------------------------- * OvsDecapStt -- * Decapsulates an STT packet. @@ -311,6 +403,7 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, SttHdr *sttHdr; char *sttBuf[STT_HDR_LEN]; UINT32 advanceCnt, hdrLen; + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); @@ -321,6 +414,20 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, return NDIS_STATUS_INVALID_LENGTH; } + /* Verify outer TCP Checksum */ + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); + + /* Check if NIC has indicated TCP checksum failure */ + if (csumInfo.Receive.TcpChecksumFailed) { + return NDIS_STATUS_INVALID_PACKET; + } + + /* Calculate the TCP Checksum */ + status = OvsCalculateTCPChecksum(curNbl, curNb); + if (status != NDIS_STATUS_SUCCESS) { + return NDIS_STATUS_INVALID_PACKET; + } + /* Skip Eth header */ hdrLen = sizeof(EthHdr); NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); @@ -348,12 +455,67 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, tunKey->tos = ipHdr->tos; tunKey->ttl = ipHdr->ttl; tunKey->pad = 0; - + /* Skip stt header, DataOffset points to inner pkt now. */ hdrLen = STT_HDR_LEN; NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); advanceCnt += hdrLen; + /* Verify checksum for inner packet if it's required */ + BOOLEAN innerChecksumVerified = sttHdr->flags & STT_CSUM_VERIFIED; + + if (!innerChecksumVerified) { + EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr), + NULL, 1, 0); + + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) { + IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth); + ip->check = 0; + ip->check = IPChecksum((UINT8 *)ip, sizeof *ip, 0); + UINT16 l4Payload = (UINT16)ntohs(ip->tot_len) - ip->ihl * 4; + UINT32 offset = sizeof(EthHdr) + sizeof(IPHdr); + + if (ip->protocol == IPPROTO_TCP) { + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); + tcp->check = 0; + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_TCP, + (UINT16)l4Payload); + tcp->check = CalculateChecksumNB(curNb, l4Payload, offset); + } else if (ip->protocol == IPPROTO_UDP) { + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); + udp->check = 0; + udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_UDP, l4Payload); + udp->check = CalculateChecksumNB(curNb, l4Payload, offset); + } + } + if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV6)) { + IPv6Hdr *ip = (IPv6Hdr *)((PCHAR)eth + sizeof *eth); + UINT32 offset = (UINT32)(sizeof *eth + sizeof *ip); + UINT16 totalLength = (UINT16)ntohs(ip->payload_len); + + if (ip->nexthdr == IPPROTO_TCP) { + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); + tcp->check = 0; + tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr, + (UINT32 *)&ip->daddr, + IPPROTO_TCP, totalLength); + tcp->check = CalculateChecksumNB(curNb, totalLength, offset); + } + else if (ip->nexthdr == IPPROTO_UDP) { + TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + sizeof *ip); + tcp->check = 0; + tcp->check = IPv6PseudoChecksum((UINT32 *)&ip->saddr, + (UINT32 *)&ip->daddr, + IPPROTO_UDP, totalLength); + tcp->check = CalculateChecksumNB(curNb, totalLength, offset); + } + } + + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; + } + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE, 0, FALSE /*copy NBL info*/); @@ -366,4 +528,4 @@ OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, } return status; -} +} \ No newline at end of file -- 2.5.0.windows.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev