This has been sitting in patchwork for a while. I guess that Pravin or
Jesse needs to review it?

On Fri, May 16, 2014 at 5:33 PM, Andy Zhou <az...@nicira.com> wrote:
> When deleting a mask from the mask array, we always move the last entry
> into its current location. Another approach can be NULL in its current
> place, and periodically compact it.
>
> The approach taken by this patch is more efficient during run time.
> During look up, fast path packet don't have to skip over NULL pointers.
>
> A more important advantage of this approach is that it tries to
> keep the mask array index stable by avoiding periodic index reshuffle.
>
> This patch implements an optimization to further promote index
> stability.  By leaving the last entry value intact when moving it to
> a new location, the old cache index can 'fix' themselves, by noticing
> the index in the cache is outside the valid mask array region. The new
> index can be found by scanning the mask pointer within the valid region.
>
> Signed-off-by: Andy Zhou <az...@nicira.com>
>
> ----
> v1 -> v2:
>         * added shrink mask array function.
>         * documented the cornor case of mask deletion.
>         * Simplifed mask flow lookup function
>                 (w.r.t. using and update the mask cache)
> ---
>  datapath/flow_table.c | 162 
> ++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 112 insertions(+), 50 deletions(-)
>
> diff --git a/datapath/flow_table.c b/datapath/flow_table.c
> index 58a25c7..564e51d 100644
> --- a/datapath/flow_table.c
> +++ b/datapath/flow_table.c
> @@ -247,10 +247,10 @@ static int tbl_mask_array_realloc(struct flow_table 
> *tbl, int size)
>         if (old) {
>                 int i;
>
> -               for (i = 0; i < old->max; i++) {
> -                       if (old->masks[i])
> -                               new->masks[new->count++] = old->masks[i];
> -               }
> +               for (i = 0; i < old->max; i++)
> +                       new->masks[i] = old->masks[i];
> +
> +               new->count = old->count;
>         }
>         rcu_assign_pointer(tbl->mask_array, new);
>
> @@ -260,6 +260,65 @@ static int tbl_mask_array_realloc(struct flow_table 
> *tbl, int size)
>         return 0;
>  }
>
> +static void tbl_mask_array_delete_mask(struct mask_array *ma,
> +                                      const struct sw_flow_mask *mask)
> +{
> +       int i = 0;
> +
> +       /* Delete a mask pointer from the valid section.
> +        *
> +        * Also move the last entry in its place, so there is no
> +        * whole in the valid section.
> +        *
> +        * Notice the last entry still points to the original mask.
> +        *
> +        * <Note>: there is a small race window that may cause a mask
> +        * to be missed in a search. Imaging a core is
> +        * walking through the array, passing the index of deleting mask.
> +        * But before reaching the last entry, it is overwritten,
> +        * by another core that is adding a new mask, now the last entry
> +        * will point to the new mask. In this case, the moved up last
> +        * entry can be missed by the core walking the mask array.
> +        *
> +        * In case this missed mask would have led to successful
> +        * lookup, Hitting the race window could cause a packet to miss
> +        * kernel flow cache, and be sent to the user space.
> +        * </Note>
> +        */
> +       while (i < ma->count - 2) {
> +               if (mask == ma->masks[i]) {
> +                       struct sw_flow_mask *last;
> +
> +                       last = ma->masks[ma->count - 1];
> +                       rcu_assign_pointer(ma->masks[i], last);
> +                       ma->count--;
> +                       break;
> +               } else
> +                       i++;
> +       }
> +
> +       /* Remove the deleted mask pointers from the invalid section. */
> +       for (; i < ma->max; i++) {
> +               if (mask == ma->masks[i])
> +                       RCU_INIT_POINTER(ma->masks[i], NULL);
> +
> +               if (i == ma->count - 1)
> +                       ma->count--;
> +       }
> +}
> +
> +static int tbl_mask_array_find_idx(struct mask_array *ma,
> +                                   const struct sw_flow_mask *mask)
> +{
> +       int i;
> +
> +       for (i = 0; i < ma->count; i++)
> +               if (mask == ovsl_dereference(ma->masks[i]))
> +                       return i;
> +
> +       return -1;
> +}
> +
>  int ovs_flow_tbl_init(struct flow_table *table)
>  {
>         struct table_instance *ti;
> @@ -524,7 +583,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>         struct sw_flow *flow;
>         int i;
>
> -       for (i = 0; i < ma->max; i++) {
> +       for (i = 0; i < ma->count; i++) {
>                 struct sw_flow_mask *mask;
>
>                 mask = rcu_dereference_ovsl(ma->masks[i]);
> @@ -554,8 +613,9 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
> flow_table *tbl,
>  {
>         struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
>         struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
> -       struct mask_cache_entry  *entries, *ce, *del;
> +       struct mask_cache_entry  *entries, *ce;
>         struct sw_flow *flow;
> +       struct sw_flow_mask *cache;
>         u32 hash = skb_hash;
>         int seg;
>
> @@ -566,42 +626,53 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
> flow_table *tbl,
>                 return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
>         }
>
> -       del = NULL;
> +       ce = NULL;
> +       cache = NULL;
>         entries = this_cpu_ptr(tbl->mask_cache);
>
> +       /* Find the cache entry 'ce' to operate on. */
>         for (seg = 0; seg < MC_HASH_SEGS; seg++) {
> -               int index;
> -
> -               index = hash & (MC_HASH_ENTRIES - 1);
> -               ce = &entries[index];
> -
> -               if (ce->skb_hash == skb_hash) {
> -                       struct sw_flow_mask *mask;
> +               struct mask_cache_entry *e;
> +               int index = hash & (MC_HASH_ENTRIES - 1);
> +
> +               e = &entries[index];
> +               if (e->skb_hash == skb_hash) {
> +                       int i = e->mask_index;
> +
> +                       if (i < ma->count)
> +                               cache = rcu_dereference_ovsl(ma->masks[i]);
> +                       else if (i < ma->max) {
> +                               cache = rcu_dereference_ovsl(ma->masks[i]);
> +                               i = tbl_mask_array_find_idx(ma, cache);
> +                               if (i < 0)
> +                                       cache = NULL;
> +                       }
>
> -                       mask = 
> rcu_dereference_ovsl(ma->masks[ce->mask_index]);
> -                       if (mask) {
> -                               flow = masked_flow_lookup(ti, key, mask,
> -                                                         n_mask_hit);
> -                               if (flow)  /* Found */
> -                                       return flow;
> +                       if (!cache)
> +                               e->skb_hash = 0; /* Not a valid cache entry. 
> */
>
> -                       }
> -                       del = ce;
> +                       ce = e;  /* The best cache candidate. */
>                         break;
>                 }
>
> -               if (!del || (del->skb_hash && !ce->skb_hash) ||
> -                   (rcu_dereference_ovsl(ma->masks[del->mask_index]) &&
> -                   !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) {
> -                       del = ce;
> -               }
> +               if (!ce || e->skb_hash > ce->skb_hash)
> +                       ce = e;  /* A better replacement cache candidate. */
>
>                 hash >>= MC_HASH_SHIFT;
>         }
>
> -       flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index);
> +       /* Try cached mask first a cache entry is found.  */
> +       if (cache) {
> +               flow = masked_flow_lookup(ti, key, cache, n_mask_hit);
> +               if (flow)
> +                       /* Cache hit. */
> +                       return flow;
> +       }
> +
> +       /* Cache miss, do full lookup. */
> +       flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
>         if (flow)
> -               del->skb_hash = skb_hash;
> +               ce->skb_hash = skb_hash;
>
>         return flow;
>  }
> @@ -644,18 +715,17 @@ static void flow_mask_remove(struct flow_table *tbl, 
> struct sw_flow_mask *mask)
>
>                 if (!mask->ref_count) {
>                         struct mask_array *ma;
> -                       int i;
>
>                         ma = ovsl_dereference(tbl->mask_array);
> -                       for (i = 0; i < ma->max; i++) {
> -                               if (mask == ovsl_dereference(ma->masks[i])) {
> -                                       RCU_INIT_POINTER(ma->masks[i], NULL);
> -                                       ma->count--;
> -                                       goto free;
> -                               }
> +                       /* Shrink the mask array if necessary. */
> +                       if (ma->max > MASK_ARRAY_SIZE_MIN * 2
> +                               && ma->count <= ma->max / 4) {
> +
> +                               tbl_mask_array_realloc(tbl, ma->max / 2);
> +                               ma = ovsl_dereference(tbl->mask_array);
>                         }
> -                       BUG();
> -free:
> +
> +                       tbl_mask_array_delete_mask(ma, mask);
>                         call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
>                 }
>         }
> @@ -704,7 +774,7 @@ static struct sw_flow_mask *flow_mask_find(const struct 
> flow_table *tbl,
>         int i;
>
>         ma = ovsl_dereference(tbl->mask_array);
> -       for (i = 0; i < ma->max; i++) {
> +       for (i = 0; i < ma->count; i++) {
>                 struct sw_flow_mask *t;
>
>                 t = ovsl_dereference(ma->masks[i]);
> @@ -724,7 +794,6 @@ static int flow_mask_insert(struct flow_table *tbl, 
> struct sw_flow *flow,
>         mask = flow_mask_find(tbl, new);
>         if (!mask) {
>                 struct mask_array *ma;
> -               int i;
>
>                 /* Allocate a new mask if none exsits. */
>                 mask = mask_alloc();
> @@ -747,16 +816,9 @@ static int flow_mask_insert(struct flow_table *tbl, 
> struct sw_flow *flow,
>                         }
>                         ma = ovsl_dereference(tbl->mask_array);
>                 }
> -               for (i = 0; i < ma->max; i++) {
> -                       const struct sw_flow_mask *t;
> -
> -                       t = ovsl_dereference(ma->masks[i]);
> -                       if (!t) {
> -                               rcu_assign_pointer(ma->masks[i], mask);
> -                               ma->count++;
> -                               break;
> -                       }
> -               }
> +
> +               rcu_assign_pointer(ma->masks[ma->count], mask);
> +               ma->count++;
>         } else {
>                 BUG_ON(!mask->ref_count);
>                 mask->ref_count++;
> --
> 1.9.1
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev



-- 
"I don't normally do acked-by's.  I think it's my way of avoiding
getting blamed when it all blows up."               Andrew Morton
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to