On Tue, Feb 12, 2019 at 6:58 PM David Ahern <dsah...@gmail.com> wrote: > > On 2/12/19 10:32 AM, Peter Oskolkov wrote: > > @@ -148,6 +174,87 @@ static int xmit_check_hhlen(struct sk_buff *skb) > > return 0; > > } > > > > +static int bpf_lwt_xmit_reroute(struct sk_buff *skb) > > +{ > > + struct net_device *l3mdev = l3mdev_master_dev_rcu(skb_dst(skb)->dev); > > + int oif = l3mdev ? l3mdev->ifindex : 0; > > + struct dst_entry *dst = NULL; > > + struct sock *sk; > > + struct net *net; > > + bool ipv4; > > + int err; > > + > > + if (skb->protocol == htons(ETH_P_IP)) > > + ipv4 = true; > > + else if (skb->protocol == htons(ETH_P_IPV6)) > > + ipv4 = false; > > + else > > + return -EAFNOSUPPORT; > > + > > + sk = sk_to_full_sk(skb->sk); > > + if (sk) { > > + if (sk->sk_bound_dev_if) > > + oif = sk->sk_bound_dev_if; > > + net = sock_net(sk); > > + } else { > > + net = dev_net(skb_dst(skb)->dev); > > + } > > + > > + if (ipv4) { > > + struct iphdr *iph = ip_hdr(skb); > > + struct flowi4 fl4 = {}; > > + struct rtable *rt; > > + > > + fl4.flowi4_oif = oif; > > + fl4.flowi4_mark = skb->mark; > > + fl4.flowi4_uid = sock_net_uid(net, sk); > > + fl4.flowi4_tos = RT_TOS(iph->tos); > > + fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; > > + fl4.flowi4_proto = iph->protocol; > > + fl4.daddr = iph->daddr; > > + fl4.saddr = iph->saddr; > > + > > + rt = ip_route_output_key(net, &fl4); > > + if (IS_ERR(rt) || rt->dst.error) > > + return -EINVAL; > > I think you have a dst leak here if rt is valid but the lookup is a > reject (e.g., unreachable or blackhole).
Thanks, David! I was not able to reproduce the leak, but based on your suggestion and similar code elsewhere I made a change in v11 to explicitly release a dst with error. > > > + dst = &rt->dst; > > + } else { > > + struct ipv6hdr *iph6 = ipv6_hdr(skb); > > + struct flowi6 fl6 = {}; > > + > > + fl6.flowi6_oif = oif; > > + fl6.flowi6_mark = skb->mark; > > + fl6.flowi6_uid = sock_net_uid(net, sk); > > + fl6.flowlabel = ip6_flowinfo(iph6); > > + fl6.flowi6_proto = iph6->nexthdr; > > + fl6.daddr = iph6->daddr; > > + fl6.saddr = iph6->saddr; > > + > > + err = ipv6_stub->ipv6_dst_lookup(net, skb->sk, &dst, &fl6); > > + if (err || IS_ERR(dst) || dst->error) > > + return -EINVAL; > > same here. > > You could check this by adding a route with unreachable as the target in > your tests. Test cleanup and namespace teardown will tell you pretty quick.