Hi David, thanks for the comments. On Tue, Sep 13, 2016 at 8:24 PM, David Ahern <d...@cumulusnetworks.com> wrote: > On 9/12/16 12:01 PM, Mahesh Bandewar wrote: > >> +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, >> + u16 proto) >> +{ >> + struct ipvl_addr *addr; >> + struct net_device *sdev; >> + >> + addr = ipvlan_skb_to_addr(skb, dev); >> + if (!addr) >> + goto out; >> + >> + sdev = addr->master->dev; >> + switch (proto) { >> + case AF_INET: >> + { >> + int err; >> + struct iphdr *ip4h = ip_hdr(skb); >> + >> + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, >> + ip4h->tos, sdev); >> + if (unlikely(err)) >> + goto out; >> + break; >> + } >> + case AF_INET6: >> + { >> + struct dst_entry *dst; >> + struct ipv6hdr *ip6h = ipv6_hdr(skb); >> + int flags = RT6_LOOKUP_F_HAS_SADDR; >> + struct flowi6 fl6 = { >> + .flowi6_iif = sdev->ifindex, >> + .daddr = ip6h->daddr, >> + .saddr = ip6h->saddr, >> + .flowlabel = ip6_flowinfo(ip6h), >> + .flowi6_mark = skb->mark, >> + .flowi6_proto = ip6h->nexthdr, >> + }; >> + >> + skb_dst_drop(skb); >> + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); >> + skb_dst_set(skb, dst); >> + break; >> + } >> + default: >> + break; >> + } > > Nit: why not put the above in separate per-version functions (ipvlan_ip_rcv > and ipvlan_ip6_rcv) similar to what is done for ipvlan_process_outbound? > I can but it's small enough to have it together. Also 'proto' is a parameter to the handler and makes it easier However do you see any issue having just one function?
> >> + >> +out: >> + return skb; >> +} >> + >> +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, >> + const struct nf_hook_state *state) >> +{ >> + struct ipvl_addr *addr; >> + unsigned int len; >> + >> + addr = ipvlan_skb_to_addr(skb, skb->dev); >> + if (!addr) >> + goto out; >> + >> + skb->dev = addr->master->dev; >> + len = skb->len + ETH_HLEN; >> + ipvlan_count_rx(addr->master, len, true, false); >> +out: >> + return NF_ACCEPT; >> +} >> diff --git a/drivers/net/ipvlan/ipvlan_main.c >> b/drivers/net/ipvlan/ipvlan_main.c >> index 18b4e8c7f68a..d02be277e1db 100644 >> --- a/drivers/net/ipvlan/ipvlan_main.c >> +++ b/drivers/net/ipvlan/ipvlan_main.c >> @@ -9,24 +9,65 @@ >> >> #include "ipvlan.h" >> >> +static struct nf_hook_ops ipvl_nfops[] __read_mostly = { >> + { >> + .hook = ipvlan_nf_input, >> + .pf = NFPROTO_IPV4, >> + .hooknum = NF_INET_LOCAL_IN, >> + .priority = INT_MAX, >> + }, >> + { >> + .hook = ipvlan_nf_input, >> + .pf = NFPROTO_IPV6, >> + .hooknum = NF_INET_LOCAL_IN, >> + .priority = INT_MAX, >> + }, >> +}; >> + >> +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { >> + .l3mdev_l3_rcv = ipvlan_l3_rcv, >> +}; >> + >> static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device >> *dev) >> { >> ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; >> } >> >> -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) >> +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) >> { >> struct ipvl_dev *ipvlan; >> + int err = 0; >> >> + ASSERT_RTNL(); >> if (port->mode != nval) { >> + if (nval == IPVLAN_MODE_L3S) { >> + port->dev->l3mdev_ops = &ipvl_l3mdev_ops; >> + port->dev->priv_flags |= IFF_L3MDEV_MASTER; >> + if (!port->ipt_hook_added) { >> + err = _nf_register_hooks(ipvl_nfops, >> + >> ARRAY_SIZE(ipvl_nfops)); > > That's clever. The hooks are not device based so why do the register for each > device? Alternatively, you could use a static dst like VRF does for Tx. In > the ipvlan rcv function set the dst input handler to send the packet back to > the ipvlan driver via dst->input. From there send the packet through the > netfilter hooks and then do the real lookup, update the dst and call its > input function. I have working code for VRF driver somewhere that shows how > to do this. > Do you mean per slave device? It's registering it for a port (so only once) depending on the mode. If the mode != l3s it wont bother registering to keep current modes as they are. I'm sure dst idea could work as well (as you have suggested) but l3mdev + ipt-hook is simpler and probably far less intrusive. > >> + if (!err) >> + port->ipt_hook_added = true; >> + else >> + return err; >> + } >> + } else { >> + port->dev->priv_flags &= ~IFF_L3MDEV_MASTER; >> + port->dev->l3mdev_ops = NULL; >> + if (port->ipt_hook_added) >> + _nf_unregister_hooks(ipvl_nfops, >> + ARRAY_SIZE(ipvl_nfops)); >> + port->ipt_hook_added = false; >> + } > >