On Thu, Jul 30, 2020 at 8:33 PM Eelco Chaudron <echau...@redhat.com> wrote: > > Thanks Tonghao for the review, see comments inline below! > > If I get no more reviews by the end of the week I’ll send out a v4. > > //Eelco > > > On 30 Jul 2020, at 7:07, Tonghao Zhang wrote: > > > On Wed, Jul 29, 2020 at 9:27 PM Eelco Chaudron <echau...@redhat.com> > > wrote: > >> > >> This patch makes the masks cache size configurable, or with > >> a size of 0, disable it. > >> > >> Reviewed-by: Paolo Abeni <pab...@redhat.com> > >> Signed-off-by: Eelco Chaudron <echau...@redhat.com> > >> --- > >> Changes in v3: > >> - Use is_power_of_2() function > >> - Use array_size() function > >> - Fix remaining sparse errors > >> > >> Changes in v2: > >> - Fix sparse warnings > >> - Fix netlink policy items reported by Florian Westphal > >> > >> include/uapi/linux/openvswitch.h | 1 > >> net/openvswitch/datapath.c | 14 +++++ > >> net/openvswitch/flow_table.c | 97 > >> +++++++++++++++++++++++++++++++++----- > >> net/openvswitch/flow_table.h | 10 ++++ > >> 4 files changed, 108 insertions(+), 14 deletions(-) > >> > >> diff --git a/include/uapi/linux/openvswitch.h > >> b/include/uapi/linux/openvswitch.h > >> index 7cb76e5ca7cf..8300cc29dec8 100644 > >> --- a/include/uapi/linux/openvswitch.h > >> +++ b/include/uapi/linux/openvswitch.h > >> @@ -86,6 +86,7 @@ enum ovs_datapath_attr { > >> OVS_DP_ATTR_MEGAFLOW_STATS, /* struct > >> ovs_dp_megaflow_stats */ > >> OVS_DP_ATTR_USER_FEATURES, /* OVS_DP_F_* */ > >> OVS_DP_ATTR_PAD, > >> + OVS_DP_ATTR_MASKS_CACHE_SIZE, > >> __OVS_DP_ATTR_MAX > >> }; > >> > >> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c > >> index a54df1fe3ec4..114b2ddb8037 100644 > >> --- a/net/openvswitch/datapath.c > >> +++ b/net/openvswitch/datapath.c > >> @@ -1498,6 +1498,7 @@ static size_t ovs_dp_cmd_msg_size(void) > >> msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats)); > >> msgsize += nla_total_size_64bit(sizeof(struct > >> ovs_dp_megaflow_stats)); > >> msgsize += nla_total_size(sizeof(u32)); /* > >> OVS_DP_ATTR_USER_FEATURES */ > >> + msgsize += nla_total_size(sizeof(u32)); /* > >> OVS_DP_ATTR_MASKS_CACHE_SIZE */ > >> > >> return msgsize; > >> } > >> @@ -1535,6 +1536,10 @@ static int ovs_dp_cmd_fill_info(struct > >> datapath *dp, struct sk_buff *skb, > >> if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, > >> dp->user_features)) > >> goto nla_put_failure; > >> > >> + if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE, > >> + ovs_flow_tbl_masks_cache_size(&dp->table))) > >> + goto nla_put_failure; > >> + > >> genlmsg_end(skb, ovs_header); > >> return 0; > >> > >> @@ -1599,6 +1604,13 @@ static int ovs_dp_change(struct datapath *dp, > >> struct nlattr *a[]) > >> #endif > >> } > >> > >> + if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) { > >> + u32 cache_size; > >> + > >> + cache_size = > >> nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]); > >> + ovs_flow_tbl_masks_cache_resize(&dp->table, > >> cache_size); > > Do we should return error code, if we can't change the "mask_cache" > > size ? for example, -EINVAL, -ENOMEM > > Initially, I did not do this due to the fact the new value is reported, > and on failure, the old value is shown. > However thinking about it again, it makes more sense to return an error. > Will sent a v4 with the following to return: > > - > -void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 > size) > +int ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 size) > { > struct mask_cache *mc = rcu_dereference(table->mask_cache); > struct mask_cache *new; > > if (size == mc->cache_size) > - return; > + return 0; > + > + if (!is_power_of_2(size) && size != 0) > + return -EINVAL; add check as below, and we can remove the check in tbl_mask_cache_alloc function. That function will only return NULL, if there is no memory. And we can comment for tbl_mask_cache_alloc, to indicate that size should be a power of 2. if ((!is_power_of_2(size) && size != 0) || (size * sizeof(struct mask_cache_entry)) > PCPU_MIN_UNIT_SIZE)
Thanks Eelco. Reviewed-by: Tonghao Zhang <xiangxia.m....@gmail.com> > new = tbl_mask_cache_alloc(size); > if (!new) > - return; > + return -ENOMEM; > > rcu_assign_pointer(table->mask_cache, new); > call_rcu(&mc->rcu, mask_cache_rcu_cb); > + > + return 0; > } > > >> + } > >> + > >> dp->user_features = user_features; > >> > >> if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) > >> @@ -1894,6 +1906,8 @@ static const struct nla_policy > >> datapath_policy[OVS_DP_ATTR_MAX + 1] = { > >> [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = > >> IFNAMSIZ - 1 }, > >> [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 }, > >> [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 }, > >> + [OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, > >> 0, > >> + PCPU_MIN_UNIT_SIZE / sizeof(struct > >> mask_cache_entry)), > >> }; > >> > >> static const struct genl_ops dp_datapath_genl_ops[] = { > >> diff --git a/net/openvswitch/flow_table.c > >> b/net/openvswitch/flow_table.c > >> index a5912ea05352..5280aeeef628 100644 > >> --- a/net/openvswitch/flow_table.c > >> +++ b/net/openvswitch/flow_table.c > >> @@ -38,8 +38,8 @@ > >> #define MASK_ARRAY_SIZE_MIN 16 > >> #define REHASH_INTERVAL (10 * 60 * HZ) > >> > >> +#define MC_DEFAULT_HASH_ENTRIES 256 > >> #define MC_HASH_SHIFT 8 > >> -#define MC_HASH_ENTRIES (1u << MC_HASH_SHIFT) > >> #define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / > >> MC_HASH_SHIFT) > >> > >> static struct kmem_cache *flow_cache; > >> @@ -341,15 +341,75 @@ static void flow_mask_remove(struct flow_table > >> *tbl, struct sw_flow_mask *mask) > >> } > >> } > >> > >> +static void __mask_cache_destroy(struct mask_cache *mc) > >> +{ > >> + if (mc->mask_cache) > >> + free_percpu(mc->mask_cache); > > free_percpu the NULL is safe. we can remove the "if". > > Makes sense, will remove the if() check. > > >> + kfree(mc); > >> +} > >> + > >> +static void mask_cache_rcu_cb(struct rcu_head *rcu) > >> +{ > >> + struct mask_cache *mc = container_of(rcu, struct mask_cache, > >> rcu); > >> + > >> + __mask_cache_destroy(mc); > >> +} > >> + > >> +static struct mask_cache *tbl_mask_cache_alloc(u32 size) > >> +{ > >> + struct mask_cache_entry __percpu *cache = NULL; > >> + struct mask_cache *new; > >> + > >> + /* Only allow size to be 0, or a power of 2, and does not > >> exceed > >> + * percpu allocation size. > >> + */ > >> + if ((!is_power_of_2(size) && size != 0) || > >> + (size * sizeof(struct mask_cache_entry)) > > >> PCPU_MIN_UNIT_SIZE) > >> + return NULL; > >> + new = kzalloc(sizeof(*new), GFP_KERNEL); > >> + if (!new) > >> + return NULL; > >> + > >> + new->cache_size = size; > >> + if (new->cache_size > 0) { > >> + cache = __alloc_percpu(array_size(sizeof(struct > >> mask_cache_entry), > >> + new->cache_size), > >> + __alignof__(struct > >> mask_cache_entry)); > >> + if (!cache) { > >> + kfree(new); > >> + return NULL; > >> + } > >> + } > >> + > >> + new->mask_cache = cache; > >> + return new; > >> +} > >> + > >> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 > >> size) > >> +{ > >> + struct mask_cache *mc = rcu_dereference(table->mask_cache); > >> + struct mask_cache *new; > >> + > >> + if (size == mc->cache_size) > >> + return; > >> + > >> + new = tbl_mask_cache_alloc(size); > >> + if (!new) > >> + return; > >> + > >> + rcu_assign_pointer(table->mask_cache, new); > >> + call_rcu(&mc->rcu, mask_cache_rcu_cb); > >> +} > >> + > >> int ovs_flow_tbl_init(struct flow_table *table) > >> { > >> struct table_instance *ti, *ufid_ti; > >> + struct mask_cache *mc; > >> struct mask_array *ma; > >> > >> - table->mask_cache = __alloc_percpu(sizeof(struct > >> mask_cache_entry) * > >> - MC_HASH_ENTRIES, > >> - __alignof__(struct > >> mask_cache_entry)); > >> - if (!table->mask_cache) > >> + mc = tbl_mask_cache_alloc(MC_DEFAULT_HASH_ENTRIES); > >> + if (!mc) > >> return -ENOMEM; > >> > >> ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); > >> @@ -367,6 +427,7 @@ int ovs_flow_tbl_init(struct flow_table *table) > >> rcu_assign_pointer(table->ti, ti); > >> rcu_assign_pointer(table->ufid_ti, ufid_ti); > >> rcu_assign_pointer(table->mask_array, ma); > >> + rcu_assign_pointer(table->mask_cache, mc); > >> table->last_rehash = jiffies; > >> table->count = 0; > >> table->ufid_count = 0; > >> @@ -377,7 +438,7 @@ int ovs_flow_tbl_init(struct flow_table *table) > >> free_mask_array: > >> __mask_array_destroy(ma); > >> free_mask_cache: > >> - free_percpu(table->mask_cache); > >> + __mask_cache_destroy(mc); > >> return -ENOMEM; > >> } > >> > >> @@ -453,9 +514,11 @@ void ovs_flow_tbl_destroy(struct flow_table > >> *table) > >> { > >> struct table_instance *ti = rcu_dereference_raw(table->ti); > >> struct table_instance *ufid_ti = > >> rcu_dereference_raw(table->ufid_ti); > >> + struct mask_cache *mc = rcu_dereference(table->mask_cache); > >> + struct mask_array *ma = > >> rcu_dereference_ovsl(table->mask_array); > >> > >> - free_percpu(table->mask_cache); > >> - call_rcu(&table->mask_array->rcu, mask_array_rcu_cb); > >> + call_rcu(&mc->rcu, mask_cache_rcu_cb); > >> + call_rcu(&ma->rcu, mask_array_rcu_cb); > >> table_instance_destroy(table, ti, ufid_ti, false); > >> } > >> > >> @@ -724,6 +787,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct > >> flow_table *tbl, > >> u32 *n_mask_hit, > >> u32 *n_cache_hit) > >> { > >> + struct mask_cache *mc = rcu_dereference(tbl->mask_cache); > >> struct mask_array *ma = rcu_dereference(tbl->mask_array); > >> struct table_instance *ti = rcu_dereference(tbl->ti); > >> struct mask_cache_entry *entries, *ce; > >> @@ -733,7 +797,7 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct > >> flow_table *tbl, > >> > >> *n_mask_hit = 0; > >> *n_cache_hit = 0; > >> - if (unlikely(!skb_hash)) { > >> + if (unlikely(!skb_hash || mc->cache_size == 0)) { > >> u32 mask_index = 0; > >> u32 cache = 0; > >> > >> @@ -749,11 +813,11 @@ struct sw_flow > >> *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, > >> > >> ce = NULL; > >> hash = skb_hash; > >> - entries = this_cpu_ptr(tbl->mask_cache); > >> + entries = this_cpu_ptr(mc->mask_cache); > >> > >> /* Find the cache entry 'ce' to operate on. */ > >> for (seg = 0; seg < MC_HASH_SEGS; seg++) { > >> - int index = hash & (MC_HASH_ENTRIES - 1); > >> + int index = hash & (mc->cache_size - 1); > >> struct mask_cache_entry *e; > >> > >> e = &entries[index]; > >> @@ -867,6 +931,13 @@ int ovs_flow_tbl_num_masks(const struct > >> flow_table *table) > >> return READ_ONCE(ma->count); > >> } > >> > >> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table) > >> +{ > >> + struct mask_cache *mc = rcu_dereference(table->mask_cache); > >> + > >> + return READ_ONCE(mc->cache_size); > >> +} > >> + > >> static struct table_instance *table_instance_expand(struct > >> table_instance *ti, > >> bool ufid) > >> { > >> @@ -1095,8 +1166,8 @@ void ovs_flow_masks_rebalance(struct flow_table > >> *table) > >> for (i = 0; i < masks_entries; i++) { > >> int index = masks_and_count[i].index; > >> > >> - new->masks[new->count++] = > >> - rcu_dereference_ovsl(ma->masks[index]); > >> + if (ovsl_dereference(ma->masks[index])) > >> + new->masks[new->count++] = ma->masks[index]; > >> } > >> > >> rcu_assign_pointer(table->mask_array, new); > >> diff --git a/net/openvswitch/flow_table.h > >> b/net/openvswitch/flow_table.h > >> index 325e939371d8..f2dba952db2f 100644 > >> --- a/net/openvswitch/flow_table.h > >> +++ b/net/openvswitch/flow_table.h > >> @@ -27,6 +27,12 @@ struct mask_cache_entry { > >> u32 mask_index; > >> }; > >> > >> +struct mask_cache { > >> + struct rcu_head rcu; > >> + u32 cache_size; /* Must be ^2 value. */ > >> + struct mask_cache_entry __percpu *mask_cache; > >> +}; > >> + > >> struct mask_count { > >> int index; > >> u64 counter; > >> @@ -53,7 +59,7 @@ struct table_instance { > >> struct flow_table { > >> struct table_instance __rcu *ti; > >> struct table_instance __rcu *ufid_ti; > >> - struct mask_cache_entry __percpu *mask_cache; > >> + struct mask_cache __rcu *mask_cache; > >> struct mask_array __rcu *mask_array; > >> unsigned long last_rehash; > >> unsigned int count; > >> @@ -77,6 +83,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, > >> struct sw_flow *flow, > >> const struct sw_flow_mask *mask); > >> void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow > >> *flow); > >> int ovs_flow_tbl_num_masks(const struct flow_table *table); > >> +u32 ovs_flow_tbl_masks_cache_size(const struct flow_table *table); > >> +void ovs_flow_tbl_masks_cache_resize(struct flow_table *table, u32 > >> size); > >> struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *table, > >> u32 *bucket, u32 *idx); > >> struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *, > >> > > > > > > -- > > Best regards, Tonghao > -- Best regards, Tonghao