thanks, pushed both with the suggested fixes.
On Wed, Jun 18, 2014 at 2:46 PM, Pravin Shelar <pshe...@nicira.com> wrote: > On Mon, Jun 16, 2014 at 1:18 PM, Andy Zhou <az...@nicira.com> wrote: >> When deleting a mask from the mask array, we always move the last entry >> into its current location. Another approach can be NULL in its >> current place, and periodically compact it. >> >> The approach taken by this patch is more efficient during run >> time. During look up, fast path packet don't have to skip over NULL >> pointers. >> >> A more important advantage of this approach is that it tries to >> keep the mask array index stable by avoiding periodic index >> reshuffle. >> >> This patch implements an optimization to further promote index >> stability. By leaving the last entry value intact when moving it >> to a new location, the old cache index can 'fix' themselves, by noticing >> the index in the cache is outside the valid mask array region. The >> new index can be found by scanning the mask pointer within the valid >> region. >> >> Signed-off-by: Andy Zhou <az...@nicira.com> >> >> ---- >> v5 -> v6: >> Rewrote fixup_cache_entry_index(), >> Remove tbl_mask_array_find_idx() >> In ovs_flow_tbl_lookup_stats(), make i < count the fast path. >> >> V4 -> v5: >> Further simplify mask_insert using "for loop". >> >> V3 -> v4: >> split mask look simplication into its own patch. >> >> V2 -> v3: >> Further simplifed mask flow lookup function >> >> v1 -> v2: >> * added shrink mask array function. >> * documented the cornor case of mask deletion. >> * Simplifed mask flow lookup function (w.r.t. using and >> update the mask cache) >> --- >> --- >> datapath/flow_table.c | 124 >> +++++++++++++++++++++++++++++++++++++------------- >> 1 file changed, 92 insertions(+), 32 deletions(-) >> >> diff --git a/datapath/flow_table.c b/datapath/flow_table.c >> index c448eec..12679af 100644 >> --- a/datapath/flow_table.c >> +++ b/datapath/flow_table.c >> @@ -247,10 +247,10 @@ static int tbl_mask_array_realloc(struct flow_table >> *tbl, int size) >> if (old) { >> int i; >> >> - for (i = 0; i < old->max; i++) { >> - if (old->masks[i]) >> - new->masks[new->count++] = old->masks[i]; >> - } >> + for (i = 0; i < old->max; i++) >> + new->masks[i] = old->masks[i]; >> + >> + new->count = old->count; >> } >> rcu_assign_pointer(tbl->mask_array, new); >> >> @@ -260,6 +260,47 @@ static int tbl_mask_array_realloc(struct flow_table >> *tbl, int size) >> return 0; >> } >> >> +static void tbl_mask_array_delete_mask(struct mask_array *ma, >> + const struct sw_flow_mask *mask) >> +{ >> + int i = 0; >> + >> + /* Delete a mask pointer from the valid section. >> + * >> + * Also move the last entry in its place, so there is no >> + * whole in the valid section. >> + * >> + * Notice the last entry still points to the original mask. >> + * >> + * <Note>: there is a small race window that may cause a mask >> + * to be missed in a search. Imaging a core is >> + * walking through the array, passing the index of deleting mask. >> + * But before reaching the last entry, it is overwritten, >> + * by another core that is adding a new mask, now the last entry >> + * will point to the new mask. In this case, the moved up last >> + * entry can be missed by the core walking the mask array. >> + * >> + * In case this missed mask would have led to successful >> + * lookup, Hitting the race window could cause a packet to miss >> + * kernel flow cache, and be sent to the user space. >> + * </Note> >> + */ >> + for (i = 0; i < ma->count; i++) >> + if (mask == ma->masks[i]) { >> + struct sw_flow_mask *last; >> + >> + last = ma->masks[ma->count - 1]; >> + rcu_assign_pointer(ma->masks[i], last); >> + ma->count--; >> + break; >> + } >> + >> + /* Remove the deleted mask pointers from the invalid section. */ >> + for (i = ma->count; i < ma->max; i++) >> + if (mask == ma->masks[i]) >> + RCU_INIT_POINTER(ma->masks[i], NULL); >> +} >> + >> int ovs_flow_tbl_init(struct flow_table *table) >> { >> struct table_instance *ti; >> @@ -513,7 +554,6 @@ static struct sw_flow *masked_flow_lookup(struct >> table_instance *ti, >> return NULL; >> } >> >> - >> static struct sw_flow *flow_lookup(struct flow_table *tbl, >> struct table_instance *ti, >> struct mask_array *ma, >> @@ -524,7 +564,7 @@ static struct sw_flow *flow_lookup(struct flow_table >> *tbl, >> struct sw_flow *flow; >> int i; >> >> - for (i = 0; i < ma->max; i++) { >> + for (i = 0; i < ma->count; i++) { >> struct sw_flow_mask *mask; >> >> mask = rcu_dereference_ovsl(ma->masks[i]); >> @@ -540,6 +580,21 @@ static struct sw_flow *flow_lookup(struct flow_table >> *tbl, >> return NULL; >> } >> >> +/* If the the cache index is outside of the valid region, update the index >> + * in case cache entry was moved up. */ >> +static void fixup_cache_entry_index(struct mask_cache_entry *e, >> + const struct mask_array *ma, >> + const struct sw_flow_mask *cache) >> +{ >> + int i; >> + >> + for (i = 0; i < ma->count; i++) >> + if (cache == ovsl_dereference(ma->masks[i])) { >> + e->mask_index = i; >> + return; >> + } >> +} >> + >> /* >> * mask_cache maps flow to probable mask. This cache is not tightly >> * coupled cache, It means updates to mask list can result in inconsistent >> @@ -569,6 +624,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct >> flow_table *tbl, >> >> ce = NULL; >> cache = NULL; >> + flow = NULL; > No need to set NULL to flow here in function block, rather we can do > it in else part of count condition check. That way it is easier to > understand why is it done. > something like: > > if (likely(i < ma->count)) { > > cache = rcu_dereference_ovsl(ma->masks[i]); > > flow = masked_flow_lookup(ti, key, cache, > > n_mask_hit); > > } else { > > flow = NULL; > > if (i < ma->max) { > > cache = > rcu_dereference_ovsl(ma->masks[i]); > > if (cache) { > > > fixup_cache_entry_index(e, ma, cache); > > flow = > masked_flow_lookup(ti, key, cache, n_mask_hit); > > } > > } > > } > > >> entries = this_cpu_ptr(tbl->mask_cache); >> >> /* Find the cache entry 'ce' to operate on. */ >> @@ -578,15 +634,28 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct >> flow_table *tbl, >> >> e = &entries[index]; >> if (e->skb_hash == skb_hash) { >> - cache = >> rcu_dereference_ovsl(ma->masks[e->mask_index]); >> - if (cache) { >> + int i = e->mask_index; >> + >> + if (likely(i < ma->count)) { >> + cache = rcu_dereference_ovsl(ma->masks[i]); >> flow = masked_flow_lookup(ti, key, cache, >> n_mask_hit); >> - if (flow) /* Cache hit. */ >> - return flow; >> + } else if (i < ma->count) { > I think its ma->max. > >> + cache = rcu_dereference_ovsl(ma->masks[i]); >> + if (cache) { >> + fixup_cache_entry_index(e, ma, >> cache); >> + flow = masked_flow_lookup(ti, key, >> + cache, n_mask_hit); >> + } >> } >> + >> + if (flow) /* Cache hit. */ >> + return flow; >> + >> + /* Cache miss. This is the best cache >> + * replacement candidate. */ >> e->skb_hash = 0; >> - ce = e; /* The best cache replacement candidate. */ >> + ce = e; >> break; >> } >> >> @@ -642,18 +711,17 @@ static void flow_mask_remove(struct flow_table *tbl, >> struct sw_flow_mask *mask) >> >> if (!mask->ref_count) { >> struct mask_array *ma; >> - int i; >> >> ma = ovsl_dereference(tbl->mask_array); >> - for (i = 0; i < ma->max; i++) { >> - if (mask == ovsl_dereference(ma->masks[i])) { >> - RCU_INIT_POINTER(ma->masks[i], NULL); >> - ma->count--; >> - goto free; >> - } >> + /* Shrink the mask array if necessary. */ >> + if (ma->max > MASK_ARRAY_SIZE_MIN * 2 >> + && ma->count <= ma->max / 4) { >> + >> + tbl_mask_array_realloc(tbl, ma->max / 2); >> + ma = ovsl_dereference(tbl->mask_array); >> } >> - BUG(); >> -free: >> + >> + tbl_mask_array_delete_mask(ma, mask); >> call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb); >> } >> } >> @@ -702,7 +770,7 @@ static struct sw_flow_mask *flow_mask_find(const struct >> flow_table *tbl, >> int i; >> >> ma = ovsl_dereference(tbl->mask_array); >> - for (i = 0; i < ma->max; i++) { >> + for (i = 0; i < ma->count; i++) { >> struct sw_flow_mask *t; >> >> t = ovsl_dereference(ma->masks[i]); >> @@ -722,7 +790,6 @@ static int flow_mask_insert(struct flow_table *tbl, >> struct sw_flow *flow, >> mask = flow_mask_find(tbl, new); >> if (!mask) { >> struct mask_array *ma; >> - int i; >> >> /* Allocate a new mask if none exsits. */ >> mask = mask_alloc(); >> @@ -745,16 +812,9 @@ static int flow_mask_insert(struct flow_table *tbl, >> struct sw_flow *flow, >> } >> ma = ovsl_dereference(tbl->mask_array); >> } >> - for (i = 0; i < ma->max; i++) { >> - const struct sw_flow_mask *t; >> - >> - t = ovsl_dereference(ma->masks[i]); >> - if (!t) { >> - rcu_assign_pointer(ma->masks[i], mask); >> - ma->count++; >> - break; >> - } >> - } >> + >> + rcu_assign_pointer(ma->masks[ma->count], mask); >> + ma->count++; >> } else { >> BUG_ON(!mask->ref_count); >> mask->ref_count++; >> -- > otherwise looks good. > Acked-by: Pravin B Shelar <pshe...@nicira.com> > >> 1.9.1 >> >> _______________________________________________ >> dev mailing list >> dev@openvswitch.org >> http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev