This has been sitting in patchwork for a while. I guess that Pravin or Jesse needs to review it?
On Fri, May 16, 2014 at 5:33 PM, Andy Zhou <az...@nicira.com> wrote: > When deleting a mask from the mask array, we always move the last entry > into its current location. Another approach can be NULL in its current > place, and periodically compact it. > > The approach taken by this patch is more efficient during run time. > During look up, fast path packet don't have to skip over NULL pointers. > > A more important advantage of this approach is that it tries to > keep the mask array index stable by avoiding periodic index reshuffle. > > This patch implements an optimization to further promote index > stability. By leaving the last entry value intact when moving it to > a new location, the old cache index can 'fix' themselves, by noticing > the index in the cache is outside the valid mask array region. The new > index can be found by scanning the mask pointer within the valid region. > > Signed-off-by: Andy Zhou <az...@nicira.com> > > ---- > v1 -> v2: > * added shrink mask array function. > * documented the cornor case of mask deletion. > * Simplifed mask flow lookup function > (w.r.t. using and update the mask cache) > --- > datapath/flow_table.c | 162 > ++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 112 insertions(+), 50 deletions(-) > > diff --git a/datapath/flow_table.c b/datapath/flow_table.c > index 58a25c7..564e51d 100644 > --- a/datapath/flow_table.c > +++ b/datapath/flow_table.c > @@ -247,10 +247,10 @@ static int tbl_mask_array_realloc(struct flow_table > *tbl, int size) > if (old) { > int i; > > - for (i = 0; i < old->max; i++) { > - if (old->masks[i]) > - new->masks[new->count++] = old->masks[i]; > - } > + for (i = 0; i < old->max; i++) > + new->masks[i] = old->masks[i]; > + > + new->count = old->count; > } > rcu_assign_pointer(tbl->mask_array, new); > > @@ -260,6 +260,65 @@ static int tbl_mask_array_realloc(struct flow_table > *tbl, int size) > return 0; > } > > +static void tbl_mask_array_delete_mask(struct mask_array *ma, > + const struct sw_flow_mask *mask) > +{ > + int i = 0; > + > + /* Delete a mask pointer from the valid section. > + * > + * Also move the last entry in its place, so there is no > + * whole in the valid section. > + * > + * Notice the last entry still points to the original mask. > + * > + * <Note>: there is a small race window that may cause a mask > + * to be missed in a search. Imaging a core is > + * walking through the array, passing the index of deleting mask. > + * But before reaching the last entry, it is overwritten, > + * by another core that is adding a new mask, now the last entry > + * will point to the new mask. In this case, the moved up last > + * entry can be missed by the core walking the mask array. > + * > + * In case this missed mask would have led to successful > + * lookup, Hitting the race window could cause a packet to miss > + * kernel flow cache, and be sent to the user space. > + * </Note> > + */ > + while (i < ma->count - 2) { > + if (mask == ma->masks[i]) { > + struct sw_flow_mask *last; > + > + last = ma->masks[ma->count - 1]; > + rcu_assign_pointer(ma->masks[i], last); > + ma->count--; > + break; > + } else > + i++; > + } > + > + /* Remove the deleted mask pointers from the invalid section. */ > + for (; i < ma->max; i++) { > + if (mask == ma->masks[i]) > + RCU_INIT_POINTER(ma->masks[i], NULL); > + > + if (i == ma->count - 1) > + ma->count--; > + } > +} > + > +static int tbl_mask_array_find_idx(struct mask_array *ma, > + const struct sw_flow_mask *mask) > +{ > + int i; > + > + for (i = 0; i < ma->count; i++) > + if (mask == ovsl_dereference(ma->masks[i])) > + return i; > + > + return -1; > +} > + > int ovs_flow_tbl_init(struct flow_table *table) > { > struct table_instance *ti; > @@ -524,7 +583,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, > struct sw_flow *flow; > int i; > > - for (i = 0; i < ma->max; i++) { > + for (i = 0; i < ma->count; i++) { > struct sw_flow_mask *mask; > > mask = rcu_dereference_ovsl(ma->masks[i]); > @@ -554,8 +613,9 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct > flow_table *tbl, > { > struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); > struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); > - struct mask_cache_entry *entries, *ce, *del; > + struct mask_cache_entry *entries, *ce; > struct sw_flow *flow; > + struct sw_flow_mask *cache; > u32 hash = skb_hash; > int seg; > > @@ -566,42 +626,53 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct > flow_table *tbl, > return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index); > } > > - del = NULL; > + ce = NULL; > + cache = NULL; > entries = this_cpu_ptr(tbl->mask_cache); > > + /* Find the cache entry 'ce' to operate on. */ > for (seg = 0; seg < MC_HASH_SEGS; seg++) { > - int index; > - > - index = hash & (MC_HASH_ENTRIES - 1); > - ce = &entries[index]; > - > - if (ce->skb_hash == skb_hash) { > - struct sw_flow_mask *mask; > + struct mask_cache_entry *e; > + int index = hash & (MC_HASH_ENTRIES - 1); > + > + e = &entries[index]; > + if (e->skb_hash == skb_hash) { > + int i = e->mask_index; > + > + if (i < ma->count) > + cache = rcu_dereference_ovsl(ma->masks[i]); > + else if (i < ma->max) { > + cache = rcu_dereference_ovsl(ma->masks[i]); > + i = tbl_mask_array_find_idx(ma, cache); > + if (i < 0) > + cache = NULL; > + } > > - mask = > rcu_dereference_ovsl(ma->masks[ce->mask_index]); > - if (mask) { > - flow = masked_flow_lookup(ti, key, mask, > - n_mask_hit); > - if (flow) /* Found */ > - return flow; > + if (!cache) > + e->skb_hash = 0; /* Not a valid cache entry. > */ > > - } > - del = ce; > + ce = e; /* The best cache candidate. */ > break; > } > > - if (!del || (del->skb_hash && !ce->skb_hash) || > - (rcu_dereference_ovsl(ma->masks[del->mask_index]) && > - !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) { > - del = ce; > - } > + if (!ce || e->skb_hash > ce->skb_hash) > + ce = e; /* A better replacement cache candidate. */ > > hash >>= MC_HASH_SHIFT; > } > > - flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index); > + /* Try cached mask first a cache entry is found. */ > + if (cache) { > + flow = masked_flow_lookup(ti, key, cache, n_mask_hit); > + if (flow) > + /* Cache hit. */ > + return flow; > + } > + > + /* Cache miss, do full lookup. */ > + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index); > if (flow) > - del->skb_hash = skb_hash; > + ce->skb_hash = skb_hash; > > return flow; > } > @@ -644,18 +715,17 @@ static void flow_mask_remove(struct flow_table *tbl, > struct sw_flow_mask *mask) > > if (!mask->ref_count) { > struct mask_array *ma; > - int i; > > ma = ovsl_dereference(tbl->mask_array); > - for (i = 0; i < ma->max; i++) { > - if (mask == ovsl_dereference(ma->masks[i])) { > - RCU_INIT_POINTER(ma->masks[i], NULL); > - ma->count--; > - goto free; > - } > + /* Shrink the mask array if necessary. */ > + if (ma->max > MASK_ARRAY_SIZE_MIN * 2 > + && ma->count <= ma->max / 4) { > + > + tbl_mask_array_realloc(tbl, ma->max / 2); > + ma = ovsl_dereference(tbl->mask_array); > } > - BUG(); > -free: > + > + tbl_mask_array_delete_mask(ma, mask); > call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); > } > } > @@ -704,7 +774,7 @@ static struct sw_flow_mask *flow_mask_find(const struct > flow_table *tbl, > int i; > > ma = ovsl_dereference(tbl->mask_array); > - for (i = 0; i < ma->max; i++) { > + for (i = 0; i < ma->count; i++) { > struct sw_flow_mask *t; > > t = ovsl_dereference(ma->masks[i]); > @@ -724,7 +794,6 @@ static int flow_mask_insert(struct flow_table *tbl, > struct sw_flow *flow, > mask = flow_mask_find(tbl, new); > if (!mask) { > struct mask_array *ma; > - int i; > > /* Allocate a new mask if none exsits. */ > mask = mask_alloc(); > @@ -747,16 +816,9 @@ static int flow_mask_insert(struct flow_table *tbl, > struct sw_flow *flow, > } > ma = ovsl_dereference(tbl->mask_array); > } > - for (i = 0; i < ma->max; i++) { > - const struct sw_flow_mask *t; > - > - t = ovsl_dereference(ma->masks[i]); > - if (!t) { > - rcu_assign_pointer(ma->masks[i], mask); > - ma->count++; > - break; > - } > - } > + > + rcu_assign_pointer(ma->masks[ma->count], mask); > + ma->count++; > } else { > BUG_ON(!mask->ref_count); > mask->ref_count++; > -- > 1.9.1 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev -- "I don't normally do acked-by's. I think it's my way of avoiding getting blamed when it all blows up." Andrew Morton _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev