On Mon, Jun 16, 2014 at 1:18 PM, Andy Zhou <az...@nicira.com> wrote:
> When deleting a mask from the mask array, we always move the last entry
> into its current location. Another approach can be NULL in its
> current place, and periodically compact it.
>
> The approach taken by this patch is more efficient during run
> time.  During look up, fast path packet don't have to skip over NULL
> pointers.
>
> A more important advantage of this approach is that it tries to
> keep the mask array index stable by avoiding periodic index
> reshuffle.
>
> This patch implements an optimization to further promote index
> stability.  By leaving the last entry value intact when moving it
> to a new location, the old cache index can 'fix' themselves, by noticing
> the index in the cache is outside the valid mask array region. The
> new index can be found by scanning the mask pointer within the valid
> region.
>
> Signed-off-by: Andy Zhou <az...@nicira.com>
>
> ----
> v5 -> v6:
>         Rewrote fixup_cache_entry_index(),
>         Remove tbl_mask_array_find_idx()
>         In ovs_flow_tbl_lookup_stats(), make i < count the fast path.
>
> V4 -> v5:
>         Further simplify mask_insert using "for loop".
>
> V3 -> v4:
>         split mask look simplication into its own patch.
>
> V2 -> v3:
>         Further simplifed mask flow lookup function
>
> v1 -> v2:
>        * added shrink mask array function.
>        * documented the cornor case of mask deletion.
>        * Simplifed mask flow lookup function (w.r.t. using and
>          update the mask cache)
> ---
> ---
>  datapath/flow_table.c | 124 
> +++++++++++++++++++++++++++++++++++++-------------
>  1 file changed, 92 insertions(+), 32 deletions(-)
>
> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index c448eec..12679af 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -247,10 +247,10 @@ static int tbl_mask_array_realloc(struct flow_table 
> *tbl, int size)
>         if (old) {
>                 int i;
>
> -               for (i = 0; i < old->max; i++) {
> -                       if (old->masks[i])
> -                               new->masks[new->count++] = old->masks[i];
> -               }
> +               for (i = 0; i < old->max; i++)
> +                       new->masks[i] = old->masks[i];
> +
> +               new->count = old->count;
>         }
>         rcu_assign_pointer(tbl->mask_array, new);
>
> @@ -260,6 +260,47 @@ static int tbl_mask_array_realloc(struct flow_table 
> *tbl, int size)
>         return 0;
>  }
>
> +static void tbl_mask_array_delete_mask(struct mask_array *ma,
> +                                      const struct sw_flow_mask *mask)
> +{
> +       int i = 0;
> +
> +       /* Delete a mask pointer from the valid section.
> +        *
> +        * Also move the last entry in its place, so there is no
> +        * whole in the valid section.
> +        *
> +        * Notice the last entry still points to the original mask.
> +        *
> +        * <Note>: there is a small race window that may cause a mask
> +        * to be missed in a search. Imaging a core is
> +        * walking through the array, passing the index of deleting mask.
> +        * But before reaching the last entry, it is overwritten,
> +        * by another core that is adding a new mask, now the last entry
> +        * will point to the new mask. In this case, the moved up last
> +        * entry can be missed by the core walking the mask array.
> +        *
> +        * In case this missed mask would have led to successful
> +        * lookup, Hitting the race window could cause a packet to miss
> +        * kernel flow cache, and be sent to the user space.
> +        * </Note>
> +        */
> +       for (i = 0; i < ma->count; i++)
> +               if (mask == ma->masks[i]) {
> +                       struct sw_flow_mask *last;
> +
> +                       last = ma->masks[ma->count - 1];
> +                       rcu_assign_pointer(ma->masks[i], last);
> +                       ma->count--;
> +                       break;
> +               }
> +
> +       /* Remove the deleted mask pointers from the invalid section. */
> +       for (i = ma->count; i < ma->max; i++)
> +               if (mask == ma->masks[i])
> +                       RCU_INIT_POINTER(ma->masks[i], NULL);
> +}
> +
>  int ovs_flow_tbl_init(struct flow_table *table)
>  {
>         struct table_instance *ti;
> @@ -513,7 +554,6 @@ static struct sw_flow *masked_flow_lookup(struct 
> table_instance *ti,
>         return NULL;
>  }
>
> -
>  static struct sw_flow *flow_lookup(struct flow_table *tbl,
>                                    struct table_instance *ti,
>                                    struct mask_array *ma,
> @@ -524,7 +564,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>         struct sw_flow *flow;
>         int i;
>
> -       for (i = 0; i < ma->max; i++) {
> +       for (i = 0; i < ma->count; i++) {
>                 struct sw_flow_mask *mask;
>
>                 mask = rcu_dereference_ovsl(ma->masks[i]);
> @@ -540,6 +580,21 @@ static struct sw_flow *flow_lookup(struct flow_table 
> *tbl,
>         return NULL;
>  }
>
> +/* If the the cache index is outside of the valid region, update the index
> + * in case cache entry was moved up. */
> +static void fixup_cache_entry_index(struct mask_cache_entry *e,
> +                                   const struct mask_array *ma,
> +                                   const struct sw_flow_mask *cache)
> +{
> +       int i;
> +
> +       for (i = 0; i < ma->count; i++)
> +               if (cache == ovsl_dereference(ma->masks[i])) {
> +                       e->mask_index = i;
> +                       return;
> +               }
> +}
> +
>  /*
>   * mask_cache maps flow to probable mask. This cache is not tightly
>   * coupled cache, It means updates to  mask list can result in inconsistent
> @@ -569,6 +624,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
> flow_table *tbl,
>
>         ce = NULL;
>         cache = NULL;
> +       flow = NULL;
No need to set NULL to flow here in function block, rather we can do
it in else part of count condition check. That way it is easier to
understand why is it done.
something like:

                        if (likely(i < ma->count)) {

                                cache = rcu_dereference_ovsl(ma->masks[i]);

                                flow = masked_flow_lookup(ti, key, cache,

                                                          n_mask_hit);

                        } else {

                                flow = NULL;

                                if (i < ma->max) {

                                        cache =
rcu_dereference_ovsl(ma->masks[i]);

                                        if (cache) {


fixup_cache_entry_index(e, ma, cache);

                                                flow =
masked_flow_lookup(ti, key, cache, n_mask_hit);

                                        }

                                }

                        }


>         entries = this_cpu_ptr(tbl->mask_cache);
>
>         /* Find the cache entry 'ce' to operate on. */
> @@ -578,15 +634,28 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
> flow_table *tbl,
>
>                 e = &entries[index];
>                 if (e->skb_hash == skb_hash) {
> -                       cache = 
> rcu_dereference_ovsl(ma->masks[e->mask_index]);
> -                       if (cache) {
> +                       int i = e->mask_index;
> +
> +                       if (likely(i < ma->count)) {
> +                               cache = rcu_dereference_ovsl(ma->masks[i]);
>                                 flow = masked_flow_lookup(ti, key, cache,
>                                                           n_mask_hit);
> -                               if (flow) /* Cache hit. */
> -                                       return flow;
> +                       } else if (i < ma->count) {
I think its ma->max.

> +                               cache = rcu_dereference_ovsl(ma->masks[i]);
> +                               if (cache) {
> +                                       fixup_cache_entry_index(e, ma, cache);
> +                                       flow = masked_flow_lookup(ti, key,
> +                                                       cache, n_mask_hit);
> +                               }
>                         }
> +
> +                       if (flow)  /* Cache hit. */
> +                               return flow;
> +
> +                       /* Cache miss. This is the best cache
> +                        * replacement candidate.  */
>                         e->skb_hash = 0;
> -                       ce = e;  /* The best cache replacement candidate. */
> +                       ce = e;
>                         break;
>                 }
>
> @@ -642,18 +711,17 @@ static void flow_mask_remove(struct flow_table *tbl, 
> struct sw_flow_mask *mask)
>
>                 if (!mask->ref_count) {
>                         struct mask_array *ma;
> -                       int i;
>
>                         ma = ovsl_dereference(tbl->mask_array);
> -                       for (i = 0; i < ma->max; i++) {
> -                               if (mask == ovsl_dereference(ma->masks[i])) {
> -                                       RCU_INIT_POINTER(ma->masks[i], NULL);
> -                                       ma->count--;
> -                                       goto free;
> -                               }
> +                       /* Shrink the mask array if necessary. */
> +                       if (ma->max > MASK_ARRAY_SIZE_MIN * 2
> +                               && ma->count <= ma->max / 4) {
> +
> +                               tbl_mask_array_realloc(tbl, ma->max / 2);
> +                               ma = ovsl_dereference(tbl->mask_array);
>                         }
> -                       BUG();
> -free:
> +
> +                       tbl_mask_array_delete_mask(ma, mask);
>                         call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
>                 }
>         }
> @@ -702,7 +770,7 @@ static struct sw_flow_mask *flow_mask_find(const struct 
> flow_table *tbl,
>         int i;
>
>         ma = ovsl_dereference(tbl->mask_array);
> -       for (i = 0; i < ma->max; i++) {
> +       for (i = 0; i < ma->count; i++) {
>                 struct sw_flow_mask *t;
>
>                 t = ovsl_dereference(ma->masks[i]);
> @@ -722,7 +790,6 @@ static int flow_mask_insert(struct flow_table *tbl, 
> struct sw_flow *flow,
>         mask = flow_mask_find(tbl, new);
>         if (!mask) {
>                 struct mask_array *ma;
> -               int i;
>
>                 /* Allocate a new mask if none exsits. */
>                 mask = mask_alloc();
> @@ -745,16 +812,9 @@ static int flow_mask_insert(struct flow_table *tbl, 
> struct sw_flow *flow,
>                         }
>                         ma = ovsl_dereference(tbl->mask_array);
>                 }
> -               for (i = 0; i < ma->max; i++) {
> -                       const struct sw_flow_mask *t;
> -
> -                       t = ovsl_dereference(ma->masks[i]);
> -                       if (!t) {
> -                               rcu_assign_pointer(ma->masks[i], mask);
> -                               ma->count++;
> -                               break;
> -                       }
> -               }
> +
> +               rcu_assign_pointer(ma->masks[ma->count], mask);
> +               ma->count++;
>         } else {
>                 BUG_ON(!mask->ref_count);
>                 mask->ref_count++;
> --
otherwise looks good.
Acked-by: Pravin B Shelar <pshe...@nicira.com>

> 1.9.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to