The "groups" container is protected with a rwlock, and each group is
protected with their own rwlock.  Whenever groups are looked up, the
container's lock is taken.  If a group is found, the group is locked
before the container is released, as otherwise the group might be
deleted while the lock is being acquired.  The group's hmap_node is
considered part of the container, and is governed by the container's
lock.  This means that while a reader is still accessing the group,
the hmap itself may be rebalanced, or the group may be removed from
the container for deletion, but the group itself is not deleted as
long as there are any readers left.

A groups lock is only locked for writing when the container's write
lock is already held.  This prevents deadlocks where one thread is
holding a write lock on the group, and is waiting for a write lock
on the container, while another (reading) thread is holding a lock
on the container and is waiting for a lock for the group.

Signed-off-by: Jarno Rajahalme <jrajaha...@nicira.com>
---
 ofproto/ofproto-provider.h |   23 ++++-
 ofproto/ofproto.c          |  206 +++++++++++++++++++++++++++++++-------------
 2 files changed, 164 insertions(+), 65 deletions(-)

diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index d37014f..ab640c2 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -113,8 +113,9 @@ struct ofproto {
     int min_mtu;                    /* Current MTU of non-internal ports. */
 
     /* Groups. */
-    struct hmap groups;         /* Contains "struct ofgroup"s. */
-    uint32_t n_groups[4];       /* Number of existing groups of each type. */
+    struct ovs_rwlock groups_rwlock;
+    struct hmap groups OVS_GUARDED;   /* Contains "struct ofgroup"s. */
+    uint32_t n_groups[4] OVS_GUARDED; /* # of existing groups of each type. */
     struct ofputil_group_features ogf;
 };
 
@@ -303,7 +304,16 @@ bool ofproto_rule_is_hidden(const struct rule *);
  * With few exceptions, ofproto implementations may look at these fields but
  * should not modify them. */
 struct ofgroup {
-    struct hmap_node hmap_node; /* In struct ofproto's "group" hmap. */
+    /* The rwlock is used to prevent groups from being deleted while child
+     * threads are using them to xlate flows.  A read lock means the
+     * group is currently being used.  A write lock means the group is
+     * in the process of being deleted or updated.  Note that since
+     * a read lock on the groups container is held while searching, and
+     * a group is ever write locked only while holding a write lock
+     * on the container, the user's of groups will never face a group
+     * in the write locked state. */
+    struct ovs_rwlock rwlock;
+    struct hmap_node hmap_node; /* In struct ofproto's "groups" hmap. */
     struct ofproto *ofproto;    /* The ofproto that contains this group. */
     uint32_t group_id;
     uint8_t type;               /* One of OFPGT_*. */
@@ -315,6 +325,13 @@ struct ofgroup {
     uint32_t n_buckets;
 };
 
+bool ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
+                          struct ofgroup **group)
+    OVS_TRY_RDLOCK(true, (*group)->rwlock);
+
+void ofproto_group_release(struct ofgroup *group)
+    OVS_RELEASES(group->rwlock);
+
 /* ofproto class structure, to be defined by each ofproto implementation.
  *
  *
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 0eb1ee7..61d74f4 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -457,6 +457,7 @@ ofproto_create(const char *datapath_name, const char 
*datapath_type,
     ofproto->vlan_bitmap = NULL;
     ofproto->vlans_changed = false;
     ofproto->min_mtu = INT_MAX;
+    ovs_rwlock_init(&ofproto->groups_rwlock);
     hmap_init(&ofproto->groups);
 
     error = ofproto->ofproto_class->construct(ofproto);
@@ -1142,6 +1143,7 @@ ofproto_destroy__(struct ofproto *ofproto)
     }
 
     delete_group(ofproto, OFPG_ALL);
+    ovs_rwlock_destroy(&ofproto->groups_rwlock);
     hmap_destroy(&ofproto->groups);
 
     connmgr_destroy(ofproto->connmgr);
@@ -4694,24 +4696,67 @@ handle_meter_request(struct ofconn *ofconn, const 
struct ofp_header *request,
     return 0;
 }
 
-static struct ofgroup *
-ofproto_group_lookup(const struct ofproto *ofproto,
-                           uint32_t group_id)
+bool
+ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id,
+                     struct ofgroup **group)
+    OVS_TRY_RDLOCK(true, (*group)->rwlock)
 {
-    struct ofgroup *group;
+    ovs_rwlock_rdlock(&ofproto->groups_rwlock);
+    HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node,
+                             hash_int(group_id, 0), &ofproto->groups) {
+        if ((*group)->group_id == group_id) {
+            ovs_rwlock_rdlock(&(*group)->rwlock);
+            ovs_rwlock_unlock(&ofproto->groups_rwlock);
+            return true;
+        }
+    }
+    ovs_rwlock_unlock(&ofproto->groups_rwlock);
+    return false;
+}
+
+void
+ofproto_group_release(struct ofgroup *group)
+    OVS_RELEASES(group->rwlock)
+{
+    ovs_rwlock_unlock(&group->rwlock);
+}
 
-    HMAP_FOR_EACH_IN_BUCKET (group, hmap_node,
+static bool
+ofproto_group_write_lookup(const struct ofproto *ofproto, uint32_t group_id,
+                           struct ofgroup **group)
+    OVS_TRY_WRLOCK(true, ofproto->groups_rwlock)
+    OVS_TRY_WRLOCK(true, (*group)->rwlock)
+{
+    ovs_rwlock_wrlock(&ofproto->groups_rwlock);
+    HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node,
                              hash_int(group_id, 0), &ofproto->groups) {
-        if (group->group_id == group_id) {
-            return group;
+        if ((*group)->group_id == group_id) {
+            ovs_rwlock_wrlock(&(*group)->rwlock);
+            return true;
         }
     }
+    ovs_rwlock_unlock(&ofproto->groups_rwlock);
+    return false;
+}
 
-    return NULL;
+static bool
+ofproto_group_exists(const struct ofproto *ofproto, uint32_t group_id)
+    OVS_REQ_RDLOCK(ofproto->groups_rwlock)
+{
+    struct ofgroup *grp;
+
+    HMAP_FOR_EACH_IN_BUCKET (grp, hmap_node,
+                             hash_int(group_id, 0), &ofproto->groups) {
+        if (grp->group_id == group_id) {
+            return true;
+        }
+    }
+    return false;
 }
 
 static void
 append_group_stats(struct ofgroup *group, struct list *replies)
+    OVS_REQ_RDLOCK(group->rwlock)
 {
     struct ofputil_group_stats ogs;
     struct ofproto *ofproto = group->ofproto;
@@ -4754,13 +4799,17 @@ handle_group_stats_request(struct ofconn *ofconn,
     ofpmp_init(&replies, request);
 
     if (group_id == OFPG_ALL){
+        ovs_rwlock_rdlock(&ofproto->groups_rwlock);
         HMAP_FOR_EACH (group, hmap_node, &ofproto->groups) {
+            ovs_rwlock_rdlock(&group->rwlock);
             append_group_stats(group, &replies);
+            ovs_rwlock_unlock(&group->rwlock);
         }
+        ovs_rwlock_unlock(&ofproto->groups_rwlock);
     } else {
-        group = ofproto_group_lookup(ofproto, group_id);
-        if (group) {
+        if (ofproto_group_lookup(ofproto, group_id, &group)) {
             append_group_stats(group, &replies);
+            ofproto_group_release(group);
         }
     }
 
@@ -4780,11 +4829,13 @@ handle_group_desc_stats_request(struct ofconn *ofconn,
 
     ofpmp_init(&replies, request);
 
+    ovs_rwlock_rdlock(&ofproto->groups_rwlock);
     HMAP_FOR_EACH (group, hmap_node, &ofproto->groups) {
         gds.group_id = group->group_id;
         gds.type = group->type;
         ofputil_append_group_desc_reply(&gds, &group->buckets, &replies);
     }
+    ovs_rwlock_unlock(&ofproto->groups_rwlock);
 
     ofconn_send_replies(ofconn, &replies);
 
@@ -4806,20 +4857,6 @@ handle_group_features_stats_request(struct ofconn 
*ofconn,
     return 0;
 }
 
-/* Checks whether a new group 'group_id' may be created in 'ofproto'.  Returns
- * 0 if 'gm' is OK, otherwise an OpenFlow error code.  */
-static enum ofperr
-ofproto_may_create_group(const struct ofproto *ofproto, uint32_t group_id)
-{
-    if (group_id > OFPG_MAX) {
-        return OFPERR_OFPGMFC_INVALID_GROUP;
-    } else if (ofproto_group_lookup(ofproto, group_id)) {
-        return OFPERR_OFPGMFC_GROUP_EXISTS;
-    } else {
-        return 0;
-    }
-}
-
 /* Implements OFPGC11_ADD
  * in which no matching flow already exists in the flow table.
  *
@@ -4839,19 +4876,13 @@ add_group(struct ofproto *ofproto, struct 
ofputil_group_mod *gm)
     struct ofgroup *ofgroup;
     enum ofperr error;
 
-    error = ofproto_may_create_group(ofproto, gm->group_id);
-    if (error) {
-        return error;
+    if (gm->group_id > OFPG_MAX) {
+        return OFPERR_OFPGMFC_INVALID_GROUP;
     }
-
     if (gm->type > OFPGT11_FF) {
         return OFPERR_OFPGMFC_BAD_TYPE;
     }
 
-    if (ofproto->n_groups[gm->type] >= ofproto->ogf.max_groups[gm->type]) {
-        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
-    }
-
     /* Allocate new group and initialize it. */
     ofgroup = ofproto->ofproto_class->group_alloc();
     if (!ofgroup) {
@@ -4859,6 +4890,7 @@ add_group(struct ofproto *ofproto, struct 
ofputil_group_mod *gm)
         return OFPERR_OFPGMFC_OUT_OF_GROUPS;
     }
 
+    ovs_rwlock_init(&ofgroup->rwlock);
     ofgroup->ofproto  = ofproto;
     ofgroup->group_id = gm->group_id;
     ofgroup->type     = gm->type;
@@ -4867,20 +4899,43 @@ add_group(struct ofproto *ofproto, struct 
ofputil_group_mod *gm)
     list_move(&ofgroup->buckets, &gm->buckets);
     ofgroup->n_buckets = list_size(&ofgroup->buckets);
 
-    hmap_insert(&ofproto->groups, &ofgroup->hmap_node,
-                hash_int(ofgroup->group_id, 0));
-    ofproto->n_groups[ofgroup->type]++;
-
-    /* Insert new group. */
+    /* Construct called BEFORE any locks are held. */
     error = ofproto->ofproto_class->group_construct(ofgroup);
     if (error) {
-        ofputil_bucket_list_destroy(&ofgroup->buckets);
+        goto free_out;
+    }
+
+    /* We wrlock as late as possible to minimize the time we jam any other
+     * threads: No visible state changes before acquiring the lock. */
+    ovs_rwlock_wrlock(&ofproto->groups_rwlock);
+
+    if (ofproto->n_groups[gm->type] >= ofproto->ogf.max_groups[gm->type]) {
+        error = OFPERR_OFPGMFC_OUT_OF_GROUPS;
+        goto unlock_out;
+    }
+
+    if (ofproto_group_exists(ofproto, gm->group_id)) {
+        error = OFPERR_OFPGMFC_GROUP_EXISTS;
+        goto unlock_out;
+    }
+
+    if (!error) {
+        /* Insert new group. */
+        hmap_insert(&ofproto->groups, &ofgroup->hmap_node,
+                    hash_int(ofgroup->group_id, 0));
+        ofproto->n_groups[ofgroup->type]++;
 
-        hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
-        ofproto->ofproto_class->group_dealloc(ofgroup);
-        ofproto->n_groups[gm->type]--;
+        ovs_rwlock_unlock(&ofproto->groups_rwlock);
+        return error;
     }
 
+ unlock_out:
+    ovs_rwlock_unlock(&ofproto->groups_rwlock);
+    ofproto->ofproto_class->group_destruct(ofgroup);
+ free_out:
+    ofputil_bucket_list_destroy(&ofgroup->buckets);
+    ofproto->ofproto_class->group_dealloc(ofgroup);
+
     return error;
 }
 
@@ -4904,21 +4959,24 @@ modify_group(struct ofproto *ofproto, struct 
ofputil_group_mod *gm)
         return OFPERR_OFPGMFC_BAD_TYPE;
     }
 
-    ofgroup = ofproto_group_lookup(ofproto, gm->group_id);
-    if (!ofgroup) {
-        return OFPERR_OFPGMFC_UNKNOWN_GROUP;
+    victim = ofproto->ofproto_class->group_alloc();
+    if (!victim) {
+        VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name);
+        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
     }
 
+    if (!ofproto_group_write_lookup(ofproto, gm->group_id, &ofgroup)) {
+        error = OFPERR_OFPGMFC_UNKNOWN_GROUP;
+        goto free_out;
+    }
+    /* Both group's and its container's write locks held now.
+     * Also, n_groups[] is protected by ofproto->groups_rwlock. */
     if (ofgroup->type != gm->type
         && ofproto->n_groups[gm->type] >= ofproto->ogf.max_groups[gm->type]) {
-        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
+        error = OFPERR_OFPGMFC_OUT_OF_GROUPS;
+        goto unlock_out;
     }
 
-    victim = ofproto->ofproto_class->group_alloc();
-    if (!victim) {
-        VLOG_WARN_RL(&rl, "%s: failed to create group", ofproto->name);
-        return OFPERR_OFPGMFC_OUT_OF_GROUPS;
-    }
     *victim = *ofgroup;
     list_move(&victim->buckets, &ofgroup->buckets);
 
@@ -4938,20 +4996,31 @@ modify_group(struct ofproto *ofproto, struct 
ofputil_group_mod *gm)
         *ofgroup = *victim;
         list_move(&ofgroup->buckets, &victim->buckets);
     }
-    ofproto->ofproto_class->group_dealloc(victim);
 
+ unlock_out:
+    ovs_rwlock_unlock(&ofgroup->rwlock);
+    ovs_rwlock_unlock(&ofproto->groups_rwlock);
+ free_out:
+    ofproto->ofproto_class->group_dealloc(victim);
     return error;
 }
 
 static void
-delete_group__(struct ofgroup *ofgroup)
+delete_group__(struct ofproto *ofproto, struct ofgroup *ofgroup)
+    OVS_RELEASES(ofproto->groups_rwlock)
 {
-    struct ofproto *ofproto = ofgroup->ofproto;
+    /* Must wait until existing readers are done,
+     * while holding the container's write lock at the same time. */
+    ovs_rwlock_wrlock(&ofgroup->rwlock);
+    hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
+    /* No-one can find this group any more. */
+    ofproto->n_groups[ofgroup->type]--;
+    ovs_rwlock_unlock(&ofproto->groups_rwlock);
 
     ofproto->ofproto_class->group_destruct(ofgroup);
     ofputil_bucket_list_destroy(&ofgroup->buckets);
-    hmap_remove(&ofproto->groups, &ofgroup->hmap_node);
-    ofproto->n_groups[ofgroup->type]--;
+    ovs_rwlock_unlock(&ofgroup->rwlock);
+    ovs_rwlock_destroy(&ofgroup->rwlock);
     ofproto->ofproto_class->group_dealloc(ofgroup);
 }
 
@@ -4959,18 +5028,31 @@ delete_group__(struct ofgroup *ofgroup)
 static void
 delete_group(struct ofproto *ofproto, uint32_t group_id)
 {
-    if (group_id == OFPG_ALL) {
-        struct ofgroup *ofgroup, *next;
+    struct ofgroup *ofgroup;
 
-        HMAP_FOR_EACH_SAFE (ofgroup, next, hmap_node, &ofproto->groups) {
-            delete_group__(ofgroup);
+    ovs_rwlock_wrlock(&ofproto->groups_rwlock);
+    if (group_id == OFPG_ALL) {
+        for (;;) {
+            struct hmap_node *node = hmap_first(&ofproto->groups);
+            if (!node) {
+                break;
+            }
+            ofgroup = CONTAINER_OF(node, struct ofgroup, hmap_node);
+            delete_group__(ofproto, ofgroup);
+            /* Lock for each node separately, so that we will not jam the
+             * other threads for too long time. */
+            ovs_rwlock_wrlock(&ofproto->groups_rwlock);
         }
     } else {
-        struct ofgroup *ofgroup = ofproto_group_lookup(ofproto, group_id);
-        if (ofgroup) {
-            delete_group__(ofgroup);
+        HMAP_FOR_EACH_IN_BUCKET (ofgroup, hmap_node,
+                                 hash_int(group_id, 0), &ofproto->groups) {
+            if (ofgroup->group_id == group_id) {
+                delete_group__(ofproto, ofgroup);
+                return;
+            }
         }
     }
+    ovs_rwlock_unlock(&ofproto->groups_rwlock);
 }
 
 static enum ofperr
-- 
1.7.10.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to