Hello,

On Fri, 2 Jun 2017, Sowmini Varadhan wrote:

> The command
>   # arp -s 62.2.0.1 a:b:c:d:e:f dev eth2
> adds an entry like the following (listed by "arp -an")
>   ? (62.2.0.1) at 0a:0b:0c:0d:0e:0f [ether] PERM on eth2
> but the symmetric deletion command
>   # arp -i eth2 -d 62.2.0.1
> does not remove the PERM entry from the table, and instead leaves behind
>   ? (62.2.0.1) at <incomplete> on eth2
> 
> The reason is that there is a refcnt of 1 for the arp_tbl itself
> (neigh_alloc starts off the entry with a refcnt of 1), thus
> the neigh_release() call from arp_invalidate() will (at best) just
> decrement the ref to 1, but will never actually free it from the
> table.
> 
> To fix this, we need to do something like neigh_forced_gc: if
> the refcnt is 1 (i.e., on the table's ref), remove the entry from
> the table and free it. This patch refactors and shares common code
> between neigh_forced_gc and the newly added neigh_remove_one.
> 
> A similar issue exists for IPv6 Neighbor Cache entries, and is fixed
> in a similar manner by this patch.
> 
> Signed-off-by: Sowmini Varadhan <sowmini.varad...@oracle.com>
> Reviewed-by: Julian Anastasov <j...@ssi.bg>

        Very good, thanks!

> ---
> v2: kbuild-test-robot compile error
> v3: do not export_symbol neigh_remove_one (David Miller comment)
> v4: rearrange locking for tbl->lock (Julian Anastasov comment)
> v5: minor touch-ups: removed retval from neigh_remove_one (Julian Anastasov),
>     got rid of checkpatch warnings, explicitly add net-next to subject line.
> 
>  include/net/neighbour.h |    1 +
>  net/core/neighbour.c    |   60 ++++++++++++++++++++++++++++++++++++++--------
>  net/ipv4/arp.c          |    4 +++
>  3 files changed, 54 insertions(+), 11 deletions(-)
> 
> diff --git a/include/net/neighbour.h b/include/net/neighbour.h
> index e4dd3a2..639b675 100644
> --- a/include/net/neighbour.h
> +++ b/include/net/neighbour.h
> @@ -317,6 +317,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, 
> const void *pkey,
>  int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 
> flags,
>                u32 nlmsg_pid);
>  void __neigh_set_probe_once(struct neighbour *neigh);
> +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl);
>  void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
>  int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
>  int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb);
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index d274f81..dadb5ee 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -118,6 +118,50 @@ unsigned long neigh_rand_reach_time(unsigned long base)
>  EXPORT_SYMBOL(neigh_rand_reach_time);
>  
>  
> +static bool neigh_del(struct neighbour *n, __u8 state,
> +                   struct neighbour __rcu **np, struct neigh_table *tbl)
> +{
> +     bool retval = false;
> +
> +     write_lock(&n->lock);
> +     if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & state)) {
> +             struct neighbour *neigh;
> +
> +             neigh = rcu_dereference_protected(n->next,
> +                                               lockdep_is_held(&tbl->lock));
> +             rcu_assign_pointer(*np, neigh);
> +             n->dead = 1;
> +             retval = true;
> +     }
> +     write_unlock(&n->lock);
> +     if (retval)
> +             neigh_cleanup_and_release(n);
> +     return retval;
> +}
> +
> +bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
> +{
> +     struct neigh_hash_table *nht;
> +     void *pkey = ndel->primary_key;
> +     u32 hash_val;
> +     struct neighbour *n;
> +     struct neighbour __rcu **np;
> +
> +     nht = rcu_dereference_protected(tbl->nht,
> +                                     lockdep_is_held(&tbl->lock));
> +     hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd);
> +     hash_val = hash_val >> (32 - nht->hash_shift);
> +
> +     np = &nht->hash_buckets[hash_val];
> +     while ((n = rcu_dereference_protected(*np,
> +                                           lockdep_is_held(&tbl->lock)))) {
> +             if (n == ndel)
> +                     return neigh_del(n, 0, np, tbl);
> +             np = &n->next;
> +     }
> +     return false;
> +}
> +
>  static int neigh_forced_gc(struct neigh_table *tbl)
>  {
>       int shrunk = 0;
> @@ -140,19 +184,10 @@ static int neigh_forced_gc(struct neigh_table *tbl)
>                        * - nobody refers to it.
>                        * - it is not permanent
>                        */
> -                     write_lock(&n->lock);
> -                     if (atomic_read(&n->refcnt) == 1 &&
> -                         !(n->nud_state & NUD_PERMANENT)) {
> -                             rcu_assign_pointer(*np,
> -                                     rcu_dereference_protected(n->next,
> -                                               lockdep_is_held(&tbl->lock)));
> -                             n->dead = 1;
> -                             shrunk  = 1;
> -                             write_unlock(&n->lock);
> -                             neigh_cleanup_and_release(n);
> +                     if (neigh_del(n, NUD_PERMANENT, np, tbl)) {
> +                             shrunk = 1;
>                               continue;
>                       }
> -                     write_unlock(&n->lock);
>                       np = &n->next;
>               }
>       }
> @@ -1649,7 +1684,10 @@ static int neigh_delete(struct sk_buff *skb, struct 
> nlmsghdr *nlh,
>                          NEIGH_UPDATE_F_OVERRIDE |
>                          NEIGH_UPDATE_F_ADMIN,
>                          NETLINK_CB(skb).portid);
> +     write_lock_bh(&tbl->lock);
>       neigh_release(neigh);
> +     neigh_remove_one(neigh, tbl);
> +     write_unlock_bh(&tbl->lock);
>  
>  out:
>       return err;
> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
> index e9f3386..a651c53 100644
> --- a/net/ipv4/arp.c
> +++ b/net/ipv4/arp.c
> @@ -1113,13 +1113,17 @@ static int arp_invalidate(struct net_device *dev, 
> __be32 ip)
>  {
>       struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
>       int err = -ENXIO;
> +     struct neigh_table *tbl = &arp_tbl;
>  
>       if (neigh) {
>               if (neigh->nud_state & ~NUD_NOARP)
>                       err = neigh_update(neigh, NULL, NUD_FAILED,
>                                          NEIGH_UPDATE_F_OVERRIDE|
>                                          NEIGH_UPDATE_F_ADMIN, 0);
> +             write_lock_bh(&tbl->lock);
>               neigh_release(neigh);
> +             neigh_remove_one(neigh, tbl);
> +             write_unlock_bh(&tbl->lock);
>       }
>  
>       return err;
> -- 
> 1.7.1

Regards

--
Julian Anastasov <j...@ssi.bg>

Reply via email to