On 6/1/26 11:02 PM, Leon Hwang wrote:
Currently, bpf_lwt_push_ip_encap() does not update skb->transport_header.
When a driver, e.g. ice, reuses the stale skb->transport_header to
offload checksum computation to NIC hardware, VxLAN packets encapsulated
by bpf_lwt_push_encap() helper may be dropped due to incorrect checksum.

Update skb->transport_header in bpf_lwt_push_ip_encap() whenever the
encapsulated packet uses UDP, so checksum offload works correctly.

Fixes: 52f278774e79 ("bpf: implement BPF_LWT_ENCAP_IP mode in 
bpf_lwt_push_encap")
Cc: Leon Hwang <[email protected]>
Signed-off-by: Leon Hwang <[email protected]>
---
  net/core/lwt_bpf.c | 11 +++++++++++
  1 file changed, 11 insertions(+)

diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index f71ef82a5f3d..65d1dfbf3312 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -599,6 +599,7 @@ static int handle_gso_encap(struct sk_buff *skb, bool ipv4, 
int encap_len)
int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress)
  {
+       bool is_udp_tunnel;
        struct iphdr *iph;
        bool ipv4;
        int err;
@@ -612,10 +613,16 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, 
u32 len, bool ingress)
                ipv4 = true;
                if (unlikely(len < iph->ihl * 4))
                        return -EINVAL;
+               is_udp_tunnel = iph->protocol == IPPROTO_UDP;
+               if (unlikely(is_udp_tunnel && len < iph->ihl * 4 + 
sizeof(struct udphdr)))
+                       return -EINVAL;
        } else if (iph->version == 6) {
                ipv4 = false;
                if (unlikely(len < sizeof(struct ipv6hdr)))
                        return -EINVAL;
+               is_udp_tunnel = ((struct ipv6hdr *)iph)->nexthdr == NEXTHDR_UDP;
+               if (unlikely(is_udp_tunnel && len < sizeof(struct ipv6hdr) + 
sizeof(struct udphdr)))
+                       return -EINVAL;
        } else {
                return -EINVAL;
        }
@@ -637,6 +644,10 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, 
u32 len, bool ingress)
        if (ingress)
                skb_postpush_rcsum(skb, iph, len);
        skb_reset_network_header(skb);
+       if (ipv4 && is_udp_tunnel)
+               skb_set_transport_header(skb, skb_network_offset(skb) + 
iph->ihl * 4);
+       else if (!ipv4 && is_udp_tunnel)
+               skb_set_transport_header(skb, skb_network_offset(skb) + 
sizeof(struct ipv6hdr));


I think GRE is also affected, why not unconditionally set transport header to network_offset + outer_ip_ihl regardless of outer protocol??

        memcpy(skb_network_header(skb), hdr, len);
        bpf_compute_data_pointers(skb);
        skb_clear_hash(skb);

Reply via email to