On Tue, Apr 9, 2013 at 6:41 PM, Jesse Gross <je...@nicira.com> wrote:

> On Tue, Apr 9, 2013 at 3:03 PM, Pravin B Shelar <pshe...@nicira.com>
> wrote:
> > diff --git a/datapath/datapath.c b/datapath/datapath.c
> > index 9cd4b0e..f0191ce 100644
> > --- a/datapath/datapath.c
> > +++ b/datapath/datapath.c
> >  /**
> >   * DOC: Locking:
> >   *
> > - * Writes to device state (add/remove datapath, port, set operations on
> vports,
> > - * etc.) are protected by RTNL.
> > - *
> > - * Writes to other state (flow table modifications, set miscellaneous
> datapath
> > - * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests
> inside
> > - * genl_mutex.
> > + * All writes e.g. Writes to device state (add/remove datapath, port,
> set
> > + * operations on vports, etc.), Writes to other state (flow table
> > + * modifications, set miscellaneous datapath parameters, etc.) are
> protected
> > + * by ovs_lock.
> >   *
> >   * Reads are protected by RCU.
>
> I think we should probably add a comment about the role and nesting of
> RTNL here.
>
> ok.


> > -/* Called with genl_mutex and optionally with RTNL lock also. */
> > +/* Called with ovs_mutex and optionally with ovs_mutex also. */
> >  static struct datapath *lookup_datapath(struct net *net,
>
> I think we can simplify the comment to say that this is protected with
> ovs_mutex.
>
> ok. :-)


> > @@ -1681,14 +1720,7 @@ static void __dp_destroy(struct datapath *dp)
> >         }
> >
> >         list_del(&dp->list_node);
> > -       ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
> > -
> > -       /* rtnl_unlock() will wait until all the references to devices
> that
> > -        * are pending unregistration have been dropped.  We do it here
> to
> > -        * ensure that any internal devices (which contain DP pointers)
> are
> > -        * fully destroyed before freeing the datapath.
> > -        */
> > -       rtnl_unlock();
> > +       ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
>
> I think it would be good to have a comment in place of the one that is
> being deleted explaining why OVSP_LOCAL has to be handled last.
>
> ok.


> > -static int __rehash_flow_table(void *dummy)
> > +static int __rehash_flow_table(void)
> >  {
> >         struct datapath *dp;
> >         struct net *net;
> >
> > +       ovs_lock();
> >         rtnl_lock();
> >         for_each_net(net) {
> >                 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >
> >                 list_for_each_entry(dp, &ovs_net->dps, list_node) {
> > -                       struct flow_table *old_table =
> genl_dereference(dp->table);
> > +                       struct flow_table *old_table =
> ovsl_dereference(dp->table);
> >                         struct flow_table *new_table;
> >
> >                         new_table = ovs_flow_tbl_rehash(old_table);
> > @@ -2290,22 +2347,24 @@ static int __rehash_flow_table(void *dummy)
> >                 }
> >         }
> >         rtnl_unlock();
> > +       ovs_unlock();
> >         return 0;
> >  }
> >
> >  static void rehash_flow_table(struct work_struct *work)
> >  {
> > -       genl_exec(__rehash_flow_table, NULL);
> > +       __rehash_flow_table();
>
> I think we can now merge together the two rehash_flow_table functions
> to simplify things and make it look more like upstream.
>
> > -static int dp_destroy_all(void *data)
> > +static int dp_destroy_all(struct ovs_net *ovs_net)
> >  {
> >         struct datapath *dp, *dp_next;
> > -       struct ovs_net *ovs_net = data;
> >
> > +       ovs_lock();
> >         list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
> >                 __dp_destroy(dp);
> > +       ovs_unlock();
> >
> >         return 0;
> >  }
> > @@ -2322,7 +2382,8 @@ static void __net_exit ovs_exit_net(struct net
> *net)
> >  {
> >         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
> >
> > -       genl_exec(dp_destroy_all, ovs_net);
> > +       dp_destroy_all(ovs_net);
> > +       cancel_delayed_work_sync(&ovs_net->work);
> >  }
>
> Similarly, I think we can merge dp_destroy_all into ovs_exit_net.
>
> ok.


> > diff --git a/datapath/datapath.h b/datapath/datapath.h
> > index 9bc98fb..58d7169 100644
> > --- a/datapath/datapath.h
> > +++ b/datapath/datapath.h
> > @@ -142,9 +142,18 @@ struct dp_upcall_info {
> >  struct ovs_net {
> >         struct list_head dps;
> >         struct vport_net vport_net;
> > +       struct delayed_work work;
>
> We should probably give the work struct a name that makes it more
> obvious that it is for notifications.
>
> ok

> > +extern struct mutex ovs_mutex;
>
> I think we might just want to provide a lockdep_ovsl_is_held()
> function rather than exposing ovs_mutex directly.
>
> ok.

> > -static inline struct vport *ovs_vport_rtnl_rcu(const struct datapath
> *dp, int port_no)
> > +static inline struct vport *ovs_vport_ovsl_rcu(const struct datapath
> *dp, int port_no)
> >  {
> > -       WARN_ON_ONCE(!rcu_read_lock_held() && !rtnl_is_locked());
> > +       WARN_ON_ONCE(!rcu_read_lock_held() && !ovs_is_locked());
> >         return ovs_lookup_vport(dp, port_no);
> >  }
> >
> > -static inline struct vport *ovs_vport_rtnl(const struct datapath *dp,
> int port_no)
> > +static inline struct vport *ovs_vport_ovsl(const struct datapath *dp,
> int port_no)
> >  {
> > -       ASSERT_RTNL();
> > +       ASSERT_OVSL();
>
> For these two functions it seems like it might be better to use the
> lockdep version of the check rather than unconditionally doing it.
>
> do you mean lockdep_assert_held() ?


> > +void dp_notify_wq(struct work_struct *work);
>
> We should prefix all exposed symbols with ovs_.
>
> ok.


> > diff --git a/datapath/dp_notify.c b/datapath/dp_notify.c
> > index c9eeafe..1e19125 100644
> > --- a/datapath/dp_notify.c
> > +++ b/datapath/dp_notify.c
> > +static void vport_destroy(struct vport *vport)
>
> We should give this a more distinctive name - right now it sounds like
> it belongs in vport.c.
>
> ok.


> > +static atomic_t unreg_events = ATOMIC_INIT(0);
>
> We should probably include the right headers for this.
>
> ok.


> > +void dp_notify_wq(struct work_struct *work)
> > +{
> > +       struct ovs_net *ovs_net = container_of(work, struct ovs_net,
> work.work);
> > +       struct datapath *dp;
> > +       int count;
>
> I think we probably need a comment explaining the rationale behind all of
> this.
>
> ok.


> > +redo:
> > +       count = atomic_read(&unreg_events);
> > +       if (!count)
> > +               return;
>
> If there are devices unregistering in a different namespace, this will
> cause us to loop until everything has been unregistered everywhere.
> However, since we'll decrement the counter for each pass where nothing
> change we can actually prevent the devices in the other namespace from
> unregistering. It seems like a per-namespace counter would be safer.
>
> ok.


> I think there is potentially another race condition here: if a
> notification comes after we retrieve the value of count but before we
> exit the workqueue. The counter will be incremented but another work
> instance won't be scheduled since one is already running. Essentially
> we're trying to emulate a semaphore here but actually doing that
> directly would require introducing a new thread, which I really don't
> want to do.
>
> Pending flag is cleared before actual work execution is started. So I
think this race is not possible.


> > +       ovs_lock();
> > +       list_for_each_entry(dp, &ovs_net->dps, list_node) {
> > +               int i;
> > +
> > +               for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
> > +
>
    struct vport *vport;
> > +                       struct hlist_node *n;
> > +
> > +                       hlist_for_each_entry_safe(vport, n,
> &dp->ports[i], dp_hash_node) {
> > +                               struct netdev_vport *netdev_vport;
> > +
> > +                               if (vport->ops->type !=
> OVS_VPORT_TYPE_NETDEV)
> > +                                       continue;
> > +
> > +                               netdev_vport = netdev_vport_priv(vport);
> > +                               if (netdev_vport->dev->reg_state !=
> NETREG_UNREGISTERED)
>
> I think we also have to check for NETREG_UNREGISTERING.  This is set
> while the notifiers are running and if there are a lot of them they
> could still be running when we get here.
>
> ok.

> I think we need a flag to check whether we have deleted any ports
> rather than just looking at the count. Otherwise we could have
> decremented the count and someone else incremented it.
>
> right, I will fix it.


> > @@ -39,26 +103,11 @@ static int dp_device_event(struct notifier_block
> *unused, unsigned long event,
> >
> >         switch (event) {
> >         case NETDEV_UNREGISTER:
>
> At this point, we probably could just simplify this into an if statement.
>
> ok.


> > +               ovs_net = net_generic(dev_net(dev), ovs_net_id);
> > +               schedule_delayed_work(&ovs_net->work, 0);
>
> Can we just use queue_work() here?
>
> ok.


> > diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c
> > index 727194e..e556b2c 100644
> > --- a/datapath/vport-netdev.c
> > +++ b/datapath/vport-netdev.c
> > @@ -162,10 +163,12 @@ static struct vport *netdev_create(const struct
> vport_parms *parms)
> >         dev_disable_lro(netdev_vport->dev);
> >  #endif
> >         netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
> > +       rtnl_unlock();
> >
> >         return vport;
> >
> >  error_put:
> > +       rtnl_unlock();
>
> We can jump to error_put from the check for IFF_LOOPBACK,
> ARPHRD_ETHER, and is_internal_device.  We aren't yet holding RTNL so
> this could try to unlock a mutex that we don't hold.
>
> ok.


> > diff --git a/datapath/vport.c b/datapath/vport.c
> > index e9e1444..0a03799 100644
> > --- a/datapath/vport.c
> > +++ b/datapath/vport.c
> > @@ -222,7 +222,7 @@ struct vport *ovs_vport_add(const struct vport_parms
> *parms)
> >         int err = 0;
> >         int i;
> >
> > -       ASSERT_RTNL();
> > +       ASSERT_OVSL();
>
> Now that OVS mutex is common to the whole module, I'm not sure that we
> really need all of these assertions.
>
> ok.


> If you haven't already, can you please also do some testing with lockdep?
>

yes, lockdep is ok with these changes.
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to