On 10/15/20 11:46 AM, Eelco Chaudron wrote:
> The flow_lookup() function uses per CPU variables, which must not be
> preempted. However, this is fine in the general napi use case where
> the local BH is disabled. But, it's also called in the netlink
> context, which is preemptible. The below patch makes sure that even
> in the netlink path, preemption is disabled.
> 
> In addition, the u64_stats_update_begin() sync point was not protected,
> making the sync point part of the per CPU variable fixed this.
> 
> Fixes: eac87c413bf9 ("net: openvswitch: reorder masks array based on usage")
> Reported-by: Juri Lelli <jle...@redhat.com>
> Signed-off-by: Eelco Chaudron <echau...@redhat.com>
> ---
> v2: - Add u64_stats_update_begin() sync point protection
>     - Moved patch to net from net-next branch
> 
>  net/openvswitch/flow_table.c |   56 
> +++++++++++++++++++++++++-----------------
>  net/openvswitch/flow_table.h |    8 +++++-
>  2 files changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
> index e2235849a57e..d90b4af6f539 100644
> --- a/net/openvswitch/flow_table.c
> +++ b/net/openvswitch/flow_table.c
> @@ -172,7 +172,7 @@ static struct table_instance *table_instance_alloc(int 
> new_size)
>  
>  static void __mask_array_destroy(struct mask_array *ma)
>  {
> -     free_percpu(ma->masks_usage_cntr);
> +     free_percpu(ma->masks_usage_stats);
>       kfree(ma);
>  }
>  
> @@ -196,15 +196,15 @@ static void tbl_mask_array_reset_counters(struct 
> mask_array *ma)
>               ma->masks_usage_zero_cntr[i] = 0;
>  
>               for_each_possible_cpu(cpu) {
> -                     u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
> -                                                       cpu);
> +                     struct mask_array_stats *stats;
>                       unsigned int start;
>                       u64 counter;
>  
> +                     stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
>                       do {
> -                             start = u64_stats_fetch_begin_irq(&ma->syncp);
> -                             counter = usage_counters[i];
> -                     } while (u64_stats_fetch_retry_irq(&ma->syncp, start));
> +                             start = 
> u64_stats_fetch_begin_irq(&stats->syncp);
> +                             counter = stats->usage_cntrs[i];
> +                     } while (u64_stats_fetch_retry_irq(&stats->syncp, 
> start));
>  
>                       ma->masks_usage_zero_cntr[i] += counter;
>               }
> @@ -227,9 +227,10 @@ static struct mask_array *tbl_mask_array_alloc(int size)
>                                            sizeof(struct sw_flow_mask *) *
>                                            size);
>  
> -     new->masks_usage_cntr = __alloc_percpu(sizeof(u64) * size,
> -                                            __alignof__(u64));
> -     if (!new->masks_usage_cntr) {
> +     new->masks_usage_stats = __alloc_percpu(sizeof(struct mask_array_stats) 
> +
> +                                             sizeof(u64) * size,
> +                                             __alignof__(u64));
> +     if (!new->masks_usage_stats) {
>               kfree(new);
>               return NULL;
>       }
> @@ -732,7 +733,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>                                  u32 *n_cache_hit,
>                                  u32 *index)
>  {
> -     u64 *usage_counters = this_cpu_ptr(ma->masks_usage_cntr);
> +     struct mask_array_stats *stats = this_cpu_ptr(ma->masks_usage_stats);
>       struct sw_flow *flow;
>       struct sw_flow_mask *mask;
>       int i;
> @@ -742,9 +743,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>               if (mask) {
>                       flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
>                       if (flow) {
> -                             u64_stats_update_begin(&ma->syncp);
> -                             usage_counters[*index]++;
> -                             u64_stats_update_end(&ma->syncp);
> +                             u64_stats_update_begin(&stats->syncp);
> +                             stats->usage_cntrs[*index]++;
> +                             u64_stats_update_end(&stats->syncp);
>                               (*n_cache_hit)++;
>                               return flow;
>                       }
> @@ -763,9 +764,9 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
>               flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
>               if (flow) { /* Found */
>                       *index = i;
> -                     u64_stats_update_begin(&ma->syncp);
> -                     usage_counters[*index]++;
> -                     u64_stats_update_end(&ma->syncp);
> +                     u64_stats_update_begin(&stats->syncp);
> +                     stats->usage_cntrs[*index]++;
> +                     u64_stats_update_end(&stats->syncp);
>                       return flow;
>               }
>       }
> @@ -851,9 +852,17 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table 
> *tbl,
>       struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
>       u32 __always_unused n_mask_hit;
>       u32 __always_unused n_cache_hit;
> +     struct sw_flow *flow;
>       u32 index = 0;
>  
> -     return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
> +     /* This function gets called trough the netlink interface and therefore
> +      * is preemptible. However, flow_lookup() function needs to be called
> +      * with preemption disabled due to CPU specific variables.

Is it possible to add some kind of assertion inside flow_lookup() to avoid
this kind of issues in the future?

It might be also good to update the comment for flow_lookup() function itself.

> +      */
> +     local_bh_disable();
> +     flow = flow_lookup(tbl, ti, ma, key, &n_mask_hit, &n_cache_hit, &index);
> +     local_bh_enable();
> +     return flow;
>  }
>  
>  struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
> @@ -1109,7 +1118,6 @@ void ovs_flow_masks_rebalance(struct flow_table *table)
>  
>       for (i = 0; i < ma->max; i++)  {
>               struct sw_flow_mask *mask;
> -             unsigned int start;
>               int cpu;
>  
>               mask = rcu_dereference_ovsl(ma->masks[i]);
> @@ -1120,14 +1128,16 @@ void ovs_flow_masks_rebalance(struct flow_table 
> *table)
>               masks_and_count[i].counter = 0;
>  
>               for_each_possible_cpu(cpu) {
> -                     u64 *usage_counters = per_cpu_ptr(ma->masks_usage_cntr,
> -                                                       cpu);
> +                     struct mask_array_stats *stats;
> +                     unsigned int start;
>                       u64 counter;
>  
> +                     stats = per_cpu_ptr(ma->masks_usage_stats, cpu);
>                       do {
> -                             start = u64_stats_fetch_begin_irq(&ma->syncp);
> -                             counter = usage_counters[i];
> -                     } while (u64_stats_fetch_retry_irq(&ma->syncp, start));
> +                             start = 
> u64_stats_fetch_begin_irq(&stats->syncp);
> +                             counter = stats->usage_cntrs[i];
> +                     } while (u64_stats_fetch_retry_irq(&stats->syncp,
> +                                                        start));
>  
>                       masks_and_count[i].counter += counter;
>               }
> diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
> index 6e7d4ac59353..43144396e192 100644
> --- a/net/openvswitch/flow_table.h
> +++ b/net/openvswitch/flow_table.h
> @@ -38,12 +38,16 @@ struct mask_count {
>       u64 counter;
>  };
>  
> +struct mask_array_stats {
> +     struct u64_stats_sync syncp;
> +     u64 usage_cntrs[];
> +};
> +
>  struct mask_array {
>       struct rcu_head rcu;
>       int count, max;
> -     u64 __percpu *masks_usage_cntr;
> +     struct mask_array_stats __percpu *masks_usage_stats;
>       u64 *masks_usage_zero_cntr;
> -     struct u64_stats_sync syncp;
>       struct sw_flow_mask __rcu *masks[];
>  };
>  
> 
> _______________________________________________
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 

Reply via email to