On Wed, Jan 24, 2018 at 6:42 AM, Roman Kapl <c...@rkapl.cz> wrote: > Tunnel devices often use skb_dst(skb)->ops, but ops are not implemented > for metadata tunnel destinations. Use skb_valid_dst to check if skb_dst > is a real (non-metadata) destination. > > Such packets can easily be crafted using tc tunnel_key actions or BPF > and will crash the tunnels, as observed at least with sit, geneve and > vxlan. Some of these tunnels are also intended to be used with metadata > destinations, so this represents a loss of functionality. > > There might be more places with similar patterns, but sometimes it is > hard to tell if metadata dst packets can reach them, so this patch is > not exhaustive. This patch is trying to fix a lot of places, and there may be more. But all because of the lack of .update_pmtu or .neigh_lookup. (dst_link_failure is safe, btw)
Not sure if it will be better to add .update_pmtu, .neigh_lookup and even .redirect for md_dst_ops, but just leave them empty ? like fake_dst_ops in br_nf. Let's see other's opinions. > > Fixes: 52a589d51f10 ("geneve: update skb dst pmtu on tx path") > Fixes: a93bf0ff4490 ("vxlan: update skb dst pmtu on tx path") > Signed-off-by: Roman Kapl <c...@rkapl.cz> > --- > drivers/infiniband/ulp/ipoib/ipoib_cm.c | 3 ++- > drivers/net/geneve.c | 5 +++-- > drivers/net/vxlan.c | 5 +++-- > drivers/s390/net/qeth_l3_main.c | 7 +++++-- > include/net/ip6_tunnel.h | 1 + > net/ipv4/ip_tunnel.c | 2 +- > net/ipv4/ip_vti.c | 3 ++- > net/ipv6/sit.c | 7 ++++--- > 8 files changed, 21 insertions(+), 12 deletions(-) > > diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c > b/drivers/infiniband/ulp/ipoib/ipoib_cm.c > index 2c13123bfd69..65eacafc898f 100644 > --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c > +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c > @@ -40,6 +40,7 @@ > #include <linux/moduleparam.h> > #include <linux/sched/signal.h> > #include <linux/sched/mm.h> > +#include <net/dst_metadata.h> > > #include "ipoib.h" > > @@ -1456,7 +1457,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, > struct sk_buff *skb, > struct ipoib_dev_priv *priv = ipoib_priv(dev); > int e = skb_queue_empty(&priv->cm.skb_queue); > > - if (skb_dst(skb)) > + if (skb_valid_dst(skb)) > skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); > > skb_queue_tail(&priv->cm.skb_queue, skb); > diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c > index 195e0d0add8d..2e5d2bc8d851 100644 > --- a/drivers/net/geneve.c > +++ b/drivers/net/geneve.c > @@ -19,6 +19,7 @@ > #include <net/rtnetlink.h> > #include <net/geneve.h> > #include <net/protocol.h> > +#include <net/dst_metadata.h> > > #define GENEVE_NETDEV_VER "0.6" > > @@ -825,7 +826,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct > net_device *dev, > if (IS_ERR(rt)) > return PTR_ERR(rt); > > - if (skb_dst(skb)) { > + if (skb_valid_dst(skb)) { > int mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr) - > GENEVE_BASE_HLEN - info->options_len - 14; > > @@ -871,7 +872,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct > net_device *dev, > if (IS_ERR(dst)) > return PTR_ERR(dst); > > - if (skb_dst(skb)) { > + if (skb_valid_dst(skb)) { > int mtu = dst_mtu(dst) - sizeof(struct ipv6hdr) - > GENEVE_BASE_HLEN - info->options_len - 14; > > diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c > index 82090ae7ced1..b0e96746bc2e 100644 > --- a/drivers/net/vxlan.c > +++ b/drivers/net/vxlan.c > @@ -28,6 +28,7 @@ > #include <net/netns/generic.h> > #include <net/tun_proto.h> > #include <net/vxlan.h> > +#include <net/dst_metadata.h> > > #if IS_ENABLED(CONFIG_IPV6) > #include <net/ip6_tunnel.h> > @@ -2155,7 +2156,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct > net_device *dev, > } > > ndst = &rt->dst; > - if (skb_dst(skb)) { > + if (skb_valid_dst(skb)) { > int mtu = dst_mtu(ndst) - VXLAN_HEADROOM; > > skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, > @@ -2197,7 +2198,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct > net_device *dev, > goto out_unlock; > } > > - if (skb_dst(skb)) { > + if (skb_valid_dst(skb)) { > int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM; > > skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, > diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c > index b0c888e86cd4..694efab9cccd 100644 > --- a/drivers/s390/net/qeth_l3_main.c > +++ b/drivers/s390/net/qeth_l3_main.c > @@ -35,6 +35,7 @@ > #include <net/ip6_fib.h> > #include <net/ip6_checksum.h> > #include <net/iucv/af_iucv.h> > +#include <net/dst_metadata.h> > #include <linux/hashtable.h> > > #include "qeth_l3.h" > @@ -2259,9 +2260,11 @@ static int qeth_l3_get_cast_type(struct sk_buff *skb) > struct dst_entry *dst; > > rcu_read_lock(); > - dst = skb_dst(skb); > - if (dst) > + if (skb_valid_dst(skb)) { > + dst = skb_dst(skb); > n = dst_neigh_lookup_skb(dst, skb); > + } > + > if (n) { > int cast_type = n->type; > > diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h > index 236e40ba06bf..1a704dfe5e1a 100644 > --- a/include/net/ip6_tunnel.h > +++ b/include/net/ip6_tunnel.h > @@ -148,6 +148,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device > *dev); > int ip6_tnl_get_iflink(const struct net_device *dev); > int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu); > > +/* Must be called with valid skb_dst */ > static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, > struct net_device *dev) > { > diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c > index 5ddb1cb52bd4..9256e8b9c538 100644 > --- a/net/ipv4/ip_tunnel.c > +++ b/net/ipv4/ip_tunnel.c > @@ -520,7 +520,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct > sk_buff *skb, > else > mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; > > - if (skb_dst(skb)) > + if (skb_valid_dst(skb)) > skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); > > if (skb->protocol == htons(ETH_P_IP)) { > diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c > index 949f432a5f04..fdb20a95b819 100644 > --- a/net/ipv4/ip_vti.c > +++ b/net/ipv4/ip_vti.c > @@ -43,6 +43,7 @@ > #include <net/xfrm.h> > #include <net/net_namespace.h> > #include <net/netns/generic.h> > +#include <net/dst_metadata.h> > > static struct rtnl_link_ops vti_link_ops __read_mostly; > > @@ -172,7 +173,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct > net_device *dev, > int err; > int mtu; > > - if (!dst) { > + if (!skb_valid_dst(skb)) { > dev->stats.tx_carrier_errors++; > goto tx_error_icmp; > } > diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c > index d7dc23c1b2ca..50a5be98462f 100644 > --- a/net/ipv6/sit.c > +++ b/net/ipv6/sit.c > @@ -55,6 +55,7 @@ > #include <net/dsfield.h> > #include <net/net_namespace.h> > #include <net/netns/generic.h> > +#include <net/dst_metadata.h> > > /* > This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c > @@ -837,7 +838,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, > struct neighbour *neigh = NULL; > bool do_tx_error = false; > > - if (skb_dst(skb)) > + if (skb_valid_dst(skb)) > neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); > > if (!neigh) { > @@ -866,7 +867,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, > struct neighbour *neigh = NULL; > bool do_tx_error = false; > > - if (skb_dst(skb)) > + if (skb_valid_dst(skb)) > neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); > > if (!neigh) { > @@ -934,7 +935,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, > df = 0; > } > > - if (tunnel->parms.iph.daddr && skb_dst(skb)) > + if (tunnel->parms.iph.daddr && skb_valid_dst(skb)) > skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, > skb, mtu); > > if (skb->len > mtu && !skb_is_gso(skb)) { > -- > 2.15.1 >