Added support for IPv6 VXLAN tunnelling. Tested using PING and iperf.
Signed-off-by: Sorin Vinturis <svintu...@cloudbasesolutions.com> --- datapath-windows/ovsext/Actions.c | 120 ++++++++++++++---------- datapath-windows/ovsext/Flow.c | 66 +++++++++++--- datapath-windows/ovsext/Vxlan.c | 187 ++++++++++++++++++++++++++------------ datapath-windows/ovsext/Vxlan.h | 7 +- 4 files changed, 257 insertions(+), 123 deletions(-) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index ead9741..250ac13 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -216,8 +216,19 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, * packets only if they are at least VXLAN header size. */ if (!flowKey->ipKey.nwFrag) { - UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); - switch (flowKey->ipKey.nwProto) { + UINT16 dstPort; + UINT8 nwProto; + + if (flowKey->l2.dlType == ETH_TYPE_IPV4_NBO) { + nwProto = flowKey->ipKey.nwProto; + dstPort = htons(flowKey->ipKey.l4.tpDst); + } else { + ASSERT(flowKey->l2.dlType == ETH_TYPE_IPV6_NBO); + nwProto = flowKey->ipv6Key.nwProto; + dstPort = htons(flowKey->ipv6Key.l4.tpDst); + } + + switch (nwProto) { case IPPROTO_GRE: tunnelVport = OvsFindTunnelVportByPortType(ovsFwdCtx->switchContext, OVS_VPORT_TYPE_GRE); @@ -761,7 +772,7 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) break; case OVS_VPORT_TYPE_VXLAN: status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - &ovsFwdCtx->tunKey, &newNbl); + &ovsFwdCtx->tunKey, &ovsFwdCtx->layers, &newNbl); break; case OVS_VPORT_TYPE_STT: status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, @@ -1238,55 +1249,68 @@ OvsActionMplsPush(OvsForwardingContext *ovsFwdCtx, /* * -------------------------------------------------------------------------- - * OvsTunnelAttrToIPv4TunnelKey -- + * OvsTunnelAttrToIPTunnelKey -- * Convert tunnel attribute to OvsIPTunnelKey. * -------------------------------------------------------------------------- */ static __inline NDIS_STATUS -OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr, - OvsIPTunnelKey *tunKey) +OvsTunnelAttrToIPTunnelKey(PNL_ATTR attr, + OvsIPTunnelKey *tunKey) { - PNL_ATTR a; - INT rem; - - ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL); - for (UINT32 i = 0; i < sizeof(*tunKey) / sizeof(UINT64); i++) { - tunKey->attr[i] = 0; - } - - NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr), - NlAttrGetSize(attr)) { - switch (NlAttrType(a)) { - case OVS_TUNNEL_KEY_ATTR_ID: - tunKey->tunnelId = NlAttrGetBe64(a); - tunKey->flags |= OVS_TNL_F_KEY; - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: - tunKey->src.Ipv4.sin_addr.s_addr = NlAttrGetBe32(a); - tunKey->src.si_family = AF_INET; - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - tunKey->dst.Ipv4.sin_addr.s_addr = NlAttrGetBe32(a); - tunKey->dst.si_family = AF_INET; - break; - case OVS_TUNNEL_KEY_ATTR_TOS: - tunKey->tos = NlAttrGetU8(a); - break; - case OVS_TUNNEL_KEY_ATTR_TTL: - tunKey->ttl = NlAttrGetU8(a); - break; - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT; - break; - case OVS_TUNNEL_KEY_ATTR_CSUM: - tunKey->flags |= OVS_TNL_F_CSUM; - break; - default: - ASSERT(0); - } - } - - return NDIS_STATUS_SUCCESS; + PNL_ATTR a; + INT rem; + + ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL); + for (UINT32 i = 0; i < sizeof(*tunKey) / sizeof(UINT64); i++) { + tunKey->attr[i] = 0; + } + + NL_ATTR_FOR_EACH_UNSAFE(a, rem, NlAttrData(attr), NlAttrGetSize(attr)) { + switch (NlAttrType(a)) { + case OVS_TUNNEL_KEY_ATTR_ID: + tunKey->tunnelId = NlAttrGetBe64(a); + tunKey->flags |= OVS_TNL_F_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + tunKey->src.Ipv4.sin_addr.s_addr = NlAttrGetBe32(a); + tunKey->src.si_family = AF_INET; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + tunKey->dst.Ipv4.sin_addr.s_addr = NlAttrGetBe32(a); + tunKey->dst.si_family = AF_INET; + break; + case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: + RtlCopyMemory(&tunKey->src.Ipv6.sin6_addr, + NlAttrGetUnspec(a, + sizeof(tunKey->src.Ipv6.sin6_addr)), + sizeof(tunKey->src.Ipv6.sin6_addr)); + tunKey->src.si_family = AF_INET6; + break; + case OVS_TUNNEL_KEY_ATTR_IPV6_DST: + RtlCopyMemory(&tunKey->dst.Ipv6.sin6_addr, + NlAttrGetUnspec(a, + sizeof(tunKey->dst.Ipv6.sin6_addr)), + sizeof(tunKey->dst.Ipv6.sin6_addr)); + tunKey->dst.si_family = AF_INET6; + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + tunKey->tos = NlAttrGetU8(a); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + tunKey->ttl = NlAttrGetU8(a); + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tunKey->flags |= OVS_TNL_F_CSUM; + break; + default: + ASSERT(0); + } + } + + return NDIS_STATUS_SUCCESS; } /* @@ -1517,7 +1541,7 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, case OVS_KEY_ATTR_TUNNEL: { OvsIPTunnelKey tunKey; - status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); + status = OvsTunnelAttrToIPTunnelKey((PNL_ATTR)a, &tunKey); ASSERT(status == NDIS_STATUS_SUCCESS); tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key)); tunKey.dst_port = key->ipKey.l4.tpDst; diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c index 1eb5102..9470e3c 100644 --- a/datapath-windows/ovsext/Flow.c +++ b/datapath-windows/ovsext/Flow.c @@ -189,7 +189,7 @@ const NL_POLICY nlFlowTunnelKeyPolicy[] = { .maxLen = 8, .optional = TRUE}, [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = {.type = NL_A_UNSPEC, .minLen = 4, .maxLen = 4, .optional = TRUE}, - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = {.type = NL_A_UNSPEC, .minLen = 4 , + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = {.type = NL_A_UNSPEC, .minLen = 4, .maxLen = 4, .optional = FALSE}, [OVS_TUNNEL_KEY_ATTR_TOS] = {.type = NL_A_UNSPEC, .minLen = 1, .maxLen = 1, .optional = TRUE}, @@ -1005,16 +1005,34 @@ MapFlowTunKeyToNlKey(PNL_BUFFER nlBuf, goto done; } - if (!NlMsgPutTailU32(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV4_DST, - tunKey->dst.Ipv4.sin_addr.s_addr)) { - rc = STATUS_UNSUCCESSFUL; - goto done; + if (tunKey->dst.si_family == AF_INET) { + if (!NlMsgPutTailU32(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV4_DST, + tunKey->dst.Ipv4.sin_addr.s_addr)) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } + } else { // tunKey->dst.si_family == AF_INET6 + if (!NlMsgPutTailUnspec(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV6_DST, + (PCHAR)&tunKey->dst.Ipv6.sin6_addr, + sizeof(tunKey->dst.Ipv6.sin6_addr))) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } } - if (!NlMsgPutTailU32(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, - tunKey->src.Ipv4.sin_addr.s_addr)) { - rc = STATUS_UNSUCCESSFUL; - goto done; + if (tunKey->src.si_family == AF_INET) { + if (!NlMsgPutTailU32(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, + tunKey->src.Ipv4.sin_addr.s_addr)) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } + } else { // tunKey->src.si_family == AF_INET6 + if (!NlMsgPutTailUnspec(nlBuf, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, + (PCHAR)&tunKey->src.Ipv6.sin6_addr, + sizeof(tunKey->src.Ipv6.sin6_addr))) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } } if (!NlMsgPutTailU8(nlBuf, OVS_TUNNEL_KEY_ATTR_TOS, @@ -1653,15 +1671,33 @@ MapTunAttrToFlowPut(PNL_ATTR *keyAttrs, } if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]) { - destKey->tunKey.dst.si_family = AF_INET; - destKey->tunKey.dst.Ipv4.sin_addr.s_addr = - NlAttrGetU32(tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]); + destKey->tunKey.dst.si_family = AF_INET; + destKey->tunKey.dst.Ipv4.sin_addr.s_addr = + NlAttrGetU32(tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]); } if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]) { - destKey->tunKey.src.si_family = AF_INET; - destKey->tunKey.src.Ipv4.sin_addr.s_addr = - NlAttrGetU32(tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]); + destKey->tunKey.src.si_family = AF_INET; + destKey->tunKey.src.Ipv4.sin_addr.s_addr = + NlAttrGetU32(tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]); + } + + if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV6_DST]) { + destKey->tunKey.dst.si_family = AF_INET6; + RtlCopyMemory(&destKey->tunKey.dst.Ipv6.sin6_addr, + NlAttrGetUnspec( + tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV6_DST], + sizeof(destKey->tunKey.dst.Ipv6.sin6_addr)), + sizeof(destKey->tunKey.dst.Ipv6.sin6_addr)); + } + + if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV6_SRC]) { + destKey->tunKey.src.si_family = AF_INET6; + RtlCopyMemory(&destKey->tunKey.src.Ipv6.sin6_addr, + NlAttrGetUnspec( + tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV6_SRC], + sizeof(destKey->tunKey.src.Ipv6.sin6_addr)), + sizeof(destKey->tunKey.src.Ipv6.sin6_addr)); } if (tunAttrs[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT]) { diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 7f64acc..a2c132c 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -185,14 +185,19 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, PMDL curMdl; PUINT8 bufferStart; EthHdr *ethHdr; - IPHdr *ipHdr; UDPHdr *udpHdr; VXLANHdr *vxlanHdr; POVS_VXLAN_VPORT vportVxlan; - UINT32 headRoom = OvsGetVxlanTunHdrSize(); + UINT32 headRoom = + OvsGetVxlanTunHdrSize(fwdInfo->dstIpAddr.si_family == AF_INET ? + TRUE : FALSE); UINT32 packetLength; ULONG mss = 0; + ASSERT(IsEqualIpAddr(&tunKey->dst, &fwdInfo->dstIpAddr)); + ASSERT(IsEqualIpAddr(&tunKey->src, &fwdInfo->srcIpAddr) || + IsNullIpAddr(&tunKey->src)); + /* * XXX: the assumption currently is that the NBL is owned by OVS, and * headroom has already been allocated as part of allocating the NBL and @@ -249,7 +254,8 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, } curMdl = NET_BUFFER_CURRENT_MDL(curNb); - bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); + bufferStart = + (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); if (!bufferStart) { status = NDIS_STATUS_RESOURCES; goto ret_error; @@ -257,50 +263,71 @@ OvsDoEncapVxlan(POVS_VPORT_ENTRY vport, bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); if (NET_BUFFER_NEXT_NB(curNb)) { - OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb), + OVS_LOG_TRACE("nb length %u next %u", + NET_BUFFER_DATA_LENGTH(curNb), NET_BUFFER_DATA_LENGTH(curNb->Next)); } /* L2 header */ ethHdr = (EthHdr *)bufferStart; - ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == + ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof(fwdInfo->dstMacAddr)) == (PCHAR)&fwdInfo->srcMacAddr); NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, - sizeof ethHdr->Destination + sizeof ethHdr->Source); - ethHdr->Type = htons(ETH_TYPE_IPV4); - - /* IP header */ - ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); - - ipHdr->ihl = sizeof *ipHdr / 4; - ipHdr->version = IPPROTO_IPV4; - ipHdr->tos = tunKey->tos; - ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); - ipHdr->id = (uint16)atomic_add64(&vportVxlan->ipId, - NET_BUFFER_DATA_LENGTH(curNb)); - ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? - IP_DF_NBO : 0; - ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; - ipHdr->protocol = IPPROTO_UDP; - ASSERT(IsEqualIpAddr(&tunKey->dst, &fwdInfo->dstIpAddr)); - ASSERT(IsEqualIpAddr(&tunKey->src, &fwdInfo->srcIpAddr) || - IsNullIpAddr(&tunKey->src)); - ipHdr->saddr = fwdInfo->srcIpAddr.Ipv4.sin_addr.s_addr; - ipHdr->daddr = fwdInfo->dstIpAddr.Ipv4.sin_addr.s_addr; - - ipHdr->check = 0; - ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0); + sizeof(ethHdr->Destination) + sizeof(ethHdr->Source)); + + if (fwdInfo->dstIpAddr.si_family == AF_INET) { + IPHdr *ipv4Hdr; + ethHdr->Type = ETH_TYPE_IPV4_NBO; + + /* IPv4 header */ + ipv4Hdr = (IPHdr *)((PCHAR)ethHdr + sizeof(*ethHdr)); + + ipv4Hdr->ihl = sizeof(*ipv4Hdr) / 4; + ipv4Hdr->version = IPPROTO_IPV4; + ipv4Hdr->tos = tunKey->tos; + ipv4Hdr->tot_len = + htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof(*ethHdr)); + ipv4Hdr->id = (uint16)atomic_add64(&vportVxlan->ipId, + NET_BUFFER_DATA_LENGTH(curNb)); + ipv4Hdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + IP_DF_NBO : 0; + ipv4Hdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; + ipv4Hdr->protocol = IPPROTO_UDP; + ipv4Hdr->saddr = fwdInfo->srcIpAddr.Ipv4.sin_addr.s_addr; + ipv4Hdr->daddr = fwdInfo->dstIpAddr.Ipv4.sin_addr.s_addr; + + ipv4Hdr->check = 0; + ipv4Hdr->check = IPChecksum((UINT8 *)ipv4Hdr, sizeof(*ipv4Hdr), 0); + + udpHdr = (UDPHdr *)((PCHAR)ipv4Hdr + sizeof(*ipv4Hdr)); + } else { + IPv6Hdr *ipv6Hdr; + ASSERT(fwdInfo->dstIpAddr.si_family == AF_INET6); + ethHdr->Type = ETH_TYPE_IPV6_NBO; + + ipv6Hdr = (IPv6Hdr *)((PCHAR)ethHdr + sizeof(*ethHdr)); + ipv6Hdr->version = IPPROTO_IPV6; + ipv6Hdr->priority = (tunKey->tos & 0xF0) >> 4; + ipv6Hdr->flow_lbl[0] = (tunKey->tos & 0x0F) << 4; + ipv6Hdr->payload_len = + htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof(*ethHdr)); + ipv6Hdr->nexthdr = IPPROTO_UDP; + ipv6Hdr->hop_limit = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; + ipv6Hdr->saddr = fwdInfo->srcIpAddr.Ipv6.sin6_addr; + ipv6Hdr->daddr = fwdInfo->dstIpAddr.Ipv6.sin6_addr; + + udpHdr = (UDPHdr *)((PCHAR)ipv6Hdr + sizeof(*ipv6Hdr)); + } /* UDP header */ - udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); udpHdr->source = htons(tunKey->flow_hash | MAXINT16); udpHdr->dest = htons(vportVxlan->dstPort); udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom + - sizeof *udpHdr + sizeof *vxlanHdr); + sizeof(*udpHdr) + sizeof(*vxlanHdr)); udpHdr->check = 0; /* VXLAN header */ - vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof(*udpHdr)); vxlanHdr->flags1 = 0; vxlanHdr->locallyReplicate = 0; vxlanHdr->flags2 = 0; @@ -363,7 +390,7 @@ OvsEncapVxlan(POVS_VPORT_ENTRY vport, static __inline NDIS_STATUS OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb, - IPHdr *ipHdr, + EthHdr *ethHdr, UDPHdr *udpHdr, UINT32 packetLength) { @@ -378,14 +405,40 @@ OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, checkSum = udpHdr->check; - l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4; - udpHdr->check = 0; - udpHdr->check = - IPPseudoChecksum((UINT32 *)&ipHdr->saddr, - (UINT32 *)&ipHdr->daddr, - IPPROTO_UDP, (UINT16)l4Payload); - udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload, - sizeof(EthHdr) + ipHdr->ihl * 4); + switch (ethHdr->Type) { + case ETH_TYPE_IPV4_NBO: { + IPHdr *ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof(*ethHdr)); + + l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4; + udpHdr->check = 0; + udpHdr->check = + IPPseudoChecksum((UINT32 *)&ipHdr->saddr, + (UINT32 *)&ipHdr->daddr, + IPPROTO_UDP, (UINT16)l4Payload); + udpHdr->check = + CalculateChecksumNB(curNb, (UINT16)l4Payload, + sizeof(EthHdr) + ipHdr->ihl * 4); + break; + } + case ETH_TYPE_IPV6_NBO: { + IPv6Hdr *ipv6Hdr = (IPv6Hdr *)((PCHAR)ethHdr + sizeof(*ethHdr)); + + l4Payload = packetLength - sizeof(EthHdr) - sizeof(IPv6Hdr); + udpHdr->check = 0; + udpHdr->check = + IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->saddr, + (UINT32 *)&ipv6Hdr->daddr, + IPPROTO_UDP, (UINT16)l4Payload); + udpHdr->check = + CalculateChecksumNB(curNb, (UINT16)l4Payload, + sizeof(EthHdr) + sizeof(IPv6Hdr)); + break; + } + default: + OVS_LOG_ERROR("Invalid eth type: %d\n", ethHdr->Type); + ASSERT(!"Invalid eth type"); + } + if (checkSum != udpHdr->check) { OVS_LOG_TRACE("UDP checksum incorrect."); return NDIS_STATUS_INVALID_PACKET; @@ -407,12 +460,12 @@ NDIS_STATUS OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, OvsIPTunnelKey *tunKey, + POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl) { PNET_BUFFER curNb; PMDL curMdl; EthHdr *ethHdr; - IPHdr *ipHdr; UDPHdr *udpHdr; VXLANHdr *vxlanHdr; UINT32 tunnelSize = 0, packetLength = 0; @@ -422,7 +475,7 @@ OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, /* Check the length of the UDP payload */ curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); packetLength = NET_BUFFER_DATA_LENGTH(curNb); - tunnelSize = OvsGetVxlanTunHdrSize(); + tunnelSize = OvsGetVxlanTunHdrSize(layers->isIPv4 ? TRUE : FALSE); if (packetLength <= tunnelSize) { return NDIS_STATUS_INVALID_LENGTH; } @@ -442,24 +495,42 @@ OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, curNbl = *newNbl; curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curMdl = NET_BUFFER_CURRENT_MDL(curNb); - bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) + - NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + bufferStart = + (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); if (!bufferStart) { status = NDIS_STATUS_RESOURCES; goto dropNbl; } + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); ethHdr = (EthHdr *)bufferStart; - /* XXX: Handle IP options. */ - ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); - tunKey->src.Ipv4.sin_addr.s_addr = ipHdr->saddr; - tunKey->src.Ipv4.sin_family = AF_INET; - tunKey->dst.Ipv4.sin_addr.s_addr = ipHdr->daddr; - tunKey->dst.Ipv4.sin_family = AF_INET; - tunKey->tos = ipHdr->tos; - tunKey->ttl = ipHdr->ttl; - tunKey->pad = 0; - udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + + if (ethHdr->Type == ETH_TYPE_IPV4_NBO) { + IPHdr *ipHdr; + /* XXX: Handle IP options. */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof(*ethHdr)); + tunKey->src.Ipv4.sin_addr.s_addr = ipHdr->saddr; + tunKey->src.si_family = AF_INET; + tunKey->dst.Ipv4.sin_addr.s_addr = ipHdr->daddr; + tunKey->dst.si_family = AF_INET; + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof(*ipHdr)); + } else { + IPv6Hdr *ipv6Hdr; + ASSERT(ethHdr->Type == ETH_TYPE_IPV6_NBO); + ipv6Hdr = (IPv6Hdr *)((PCHAR)ethHdr + sizeof(*ethHdr)); + tunKey->src.Ipv6.sin6_addr = ipv6Hdr->saddr; + tunKey->src.si_family = AF_INET6; + tunKey->dst.Ipv6.sin6_addr = ipv6Hdr->daddr; + tunKey->dst.si_family = AF_INET6; + tunKey->tos = (ipv6Hdr->priority << 4) | + ((ipv6Hdr->flow_lbl[0] & 0xF0) >> 4); + tunKey->ttl = ipv6Hdr->hop_limit; + tunKey->pad = 0; + udpHdr = (UDPHdr *)((PCHAR)ipv6Hdr + sizeof(*ipv6Hdr)); + } /* Validate if NIC has indicated checksum failure. */ status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0); @@ -469,13 +540,14 @@ OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, /* Calculate and verify UDP checksum if NIC didn't do it. */ if (udpHdr->check != 0) { - status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, packetLength); + status = OvsCalculateUDPChecksum(curNbl, curNb, ethHdr, + udpHdr, packetLength); if (status != NDIS_STATUS_SUCCESS) { goto dropNbl; } } - vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof(*udpHdr)); if (vxlanHdr->instanceID) { tunKey->flags = OVS_TNL_F_KEY; tunKey->tunnelId = VXLAN_VNI_TO_TUNNELID(vxlanHdr->vxlanID); @@ -539,6 +611,7 @@ OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, tunnelKey->dst.si_family = AF_INET; tunnelKey->ttl = nh->ttl; tunnelKey->tos = nh->tos; + if (VxlanHeader->instanceID) { tunnelKey->flags = OVS_TNL_F_KEY; tunnelKey->tunnelId = VXLAN_VNI_TO_TUNNELID(VxlanHeader->vxlanID); diff --git a/datapath-windows/ovsext/Vxlan.h b/datapath-windows/ovsext/Vxlan.h index be154a5..9e8b3c3 100644 --- a/datapath-windows/ovsext/Vxlan.h +++ b/datapath-windows/ovsext/Vxlan.h @@ -69,14 +69,15 @@ NDIS_STATUS OvsEncapVxlan(POVS_VPORT_ENTRY vport, NDIS_STATUS OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, PNET_BUFFER_LIST curNbl, OvsIPTunnelKey *tunKey, + POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl); static __inline UINT32 -OvsGetVxlanTunHdrSize(VOID) +OvsGetVxlanTunHdrSize(BOOLEAN isIpv4) { /* XXX: Can L2 include VLAN at all? */ - return sizeof (EthHdr) + sizeof (IPHdr) + sizeof (UDPHdr) + - sizeof (VXLANHdr); + return sizeof(EthHdr) + (isIpv4 ? sizeof(IPHdr) : sizeof(IPv6Hdr)) + + sizeof (UDPHdr) + sizeof (VXLANHdr); } #define VXLAN_UDP_PORT 4789 -- 1.9.0.msysgit.0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev