> Hi all, > > On 28/06/2019 05:34, Stephen Hemminger wrote: > > On Fri, 28 Jun 2019 02:44:54 +0000 > > "Ruifeng Wang (Arm Technology China)"<ruifeng.w...@arm.com> wrote: > > > >>> > >>>> Tests showed that the function inlining caused performance drop on > >>>> some x86 platforms with the memory ordering patches applied. > >>>> By force no-inline functions, the performance was better than > >>>> before on x86 and no impact to arm64 platforms. > >>>> > >>>> Suggested-by: Medvedkin Vladimir<vladimir.medved...@intel.com> > >>>> Signed-off-by: Ruifeng Wang<ruifeng.w...@arm.com> > >>>> Reviewed-by: Gavin Hu<gavin...@arm.com> > >>> { > >>> > >>> Do you actually need to force noinline or is just taking of inline enough? > >>> In general, letting compiler decide is often best practice. > >> The force noinline is an optimization for x86 platforms to keep > >> rte_lpm_add() API performance with memory ordering applied. > > I don't think you answered my question. What does a recent version of > > GCC do if you drop the inline. > > > > Actually all the functions in rte_lpm should drop inline. > I'm agree with Stephen. If it is not a fastpath and size of function is not > minimal it is good to remove inline qualifier for other control plane > functions > such as rule_add/delete/find/etc and let the compiler decide to inline it > (unless it affects performance). IMO, the rule needs to be simple. If it is control plane function, we should leave it to the compiler to decide. I do not think we need to worry too much about performance for control plane functions.
> > diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c index > > 6b7b28a2e431..ffe07e980864 100644 > > --- a/lib/librte_lpm/rte_lpm.c > > +++ b/lib/librte_lpm/rte_lpm.c > > @@ -399,7 +399,7 @@ MAP_STATIC_SYMBOL(void rte_lpm_free(struct > rte_lpm *lpm), > > * are stored in the rule table from 0 - 31. > > * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. > > */ > > -static inline int32_t > > +static int32_t > > rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t > depth, > > uint8_t next_hop) > > { > > @@ -471,7 +471,7 @@ rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t > ip_masked, uint8_t depth, > > return rule_index; > > } > > > > -static inline int32_t > > +static int32_t > > rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, > > uint32_t next_hop) > > { > > @@ -547,7 +547,7 @@ rule_add_v1604(struct rte_lpm *lpm, uint32_t > ip_masked, uint8_t depth, > > * Delete a rule from the rule table. > > * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. > > */ > > -static inline void > > +static void > > rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t > depth) > > { > > int i; > > @@ -570,7 +570,7 @@ rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t > rule_index, uint8_t depth) > > lpm->rule_info[depth - 1].used_rules--; > > } > > > > -static inline void > > +static void > > rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth) > > { > > int i; > > @@ -597,7 +597,7 @@ rule_delete_v1604(struct rte_lpm *lpm, int32_t > rule_index, uint8_t depth) > > * Finds a rule in rule table. > > * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. > > */ > > -static inline int32_t > > +static int32_t > > rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t > depth) > > { > > uint32_t rule_gindex, last_rule, rule_index; @@ -618,7 +618,7 @@ > > rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth) > > return -EINVAL; > > } > > > > -static inline int32_t > > +static int32_t > > rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth) > > { > > uint32_t rule_gindex, last_rule, rule_index; @@ -642,7 +642,7 @@ > > rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth) > > /* > > * Find, clean and allocate a tbl8. > > */ > > -static inline int32_t > > +static int32_t > > tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8) > > { > > uint32_t group_idx; /* tbl8 group index. */ @@ -669,7 +669,7 @@ > > tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8) > > return -ENOSPC; > > } > > > > -static inline int32_t > > +static int32_t > > tbl8_alloc_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s) > > { > > uint32_t group_idx; /* tbl8 group index. */ @@ -709,7 +709,7 @@ > > tbl8_free_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t tbl8_group_start) > > tbl8[tbl8_group_start].valid_group = INVALID; > > } > > > > -static inline int32_t > > +static int32_t > > add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth, > > uint8_t next_hop) > > { > > @@ -777,7 +777,7 @@ add_depth_small_v20(struct rte_lpm_v20 *lpm, > uint32_t ip, uint8_t depth, > > return 0; > > } > > > > -static inline int32_t > > +static int32_t > > add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, > > uint32_t next_hop) > > { > > @@ -846,7 +846,7 @@ add_depth_small_v1604(struct rte_lpm *lpm, > uint32_t ip, uint8_t depth, > > return 0; > > } > > > > -static inline int32_t > > +static int32_t > > add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, > uint8_t depth, > > uint8_t next_hop) > > { > > @@ -971,7 +971,7 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, > uint32_t ip_masked, uint8_t depth, > > return 0; > > } > > > > -static inline int32_t > > +static int32_t > > add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t > depth, > > uint32_t next_hop) > > { > > @@ -1244,7 +1244,7 @@ > BIND_DEFAULT_SYMBOL(rte_lpm_is_rule_present, _v1604, 16.04); > > MAP_STATIC_SYMBOL(int rte_lpm_is_rule_present(struct rte_lpm *lpm, > uint32_t ip, > > uint8_t depth, uint32_t *next_hop), > > rte_lpm_is_rule_present_v1604); > > > > -static inline int32_t > > +static int32_t > > find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t > depth, > > uint8_t *sub_rule_depth) > > { > > @@ -1266,7 +1266,7 @@ find_previous_rule_v20(struct rte_lpm_v20 > *lpm, uint32_t ip, uint8_t depth, > > return -1; > > } > > > > -static inline int32_t > > +static int32_t > > find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, > > uint8_t *sub_rule_depth) > > { > > @@ -1288,7 +1288,7 @@ find_previous_rule_v1604(struct rte_lpm *lpm, > uint32_t ip, uint8_t depth, > > return -1; > > } > > > > -static inline int32_t > > +static int32_t > > delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, > > uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) > > { > > @@ -1381,7 +1381,7 @@ delete_depth_small_v20(struct rte_lpm_v20 > *lpm, uint32_t ip_masked, > > return 0; > > } > > > > -static inline int32_t > > +static int32_t > > delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked, > > uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) > > { > > @@ -1483,7 +1483,7 @@ delete_depth_small_v1604(struct rte_lpm *lpm, > uint32_t ip_masked, > > * Return of value > -1 means tbl8 is in use but has all the same values > > and > > * thus can be recycled > > */ > > -static inline int32_t > > +static int32_t > > tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8, > > uint32_t tbl8_group_start) > > { > > @@ -1530,7 +1530,7 @@ tbl8_recycle_check_v20(struct > rte_lpm_tbl_entry_v20 *tbl8, > > return -EINVAL; > > } > > > > -static inline int32_t > > +static int32_t > > tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8, > > uint32_t tbl8_group_start) > > { > > @@ -1577,7 +1577,7 @@ tbl8_recycle_check_v1604(struct > rte_lpm_tbl_entry *tbl8, > > return -EINVAL; > > } > > > > -static inline int32_t > > +static int32_t > > delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, > > uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) > > { > > @@ -1655,7 +1655,7 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm, > uint32_t ip_masked, > > return 0; > > } > > > > -static inline int32_t > > +static int32_t > > delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, > > uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) > > { > > -- > Regards, > Vladimir