On Fri, 28 Jun 2019 02:44:54 +0000
"Ruifeng Wang (Arm Technology China)" <ruifeng.w...@arm.com> wrote:

> >   
> > > Tests showed that the function inlining caused performance drop on
> > > some x86 platforms with the memory ordering patches applied.
> > > By force no-inline functions, the performance was better than before
> > > on x86 and no impact to arm64 platforms.
> > >
> > > Suggested-by: Medvedkin Vladimir <vladimir.medved...@intel.com>
> > > Signed-off-by: Ruifeng Wang <ruifeng.w...@arm.com>
> > > Reviewed-by: Gavin Hu <gavin...@arm.com>  
> >  {
> > 
> > Do you actually need to force noinline or is just taking of inline enough?
> > In general, letting compiler decide is often best practice.  
> 
> The force noinline is an optimization for x86 platforms to keep rte_lpm_add() 
> API
> performance with memory ordering applied. 

I don't think you answered my question. What does a recent version of GCC do
if you drop the inline.

Actually all the functions in rte_lpm should drop inline.

diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c
index 6b7b28a2e431..ffe07e980864 100644
--- a/lib/librte_lpm/rte_lpm.c
+++ b/lib/librte_lpm/rte_lpm.c
@@ -399,7 +399,7 @@ MAP_STATIC_SYMBOL(void rte_lpm_free(struct rte_lpm *lpm),
  * are stored in the rule table from 0 - 31.
  * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
  */
-static inline int32_t
+static int32_t
 rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
        uint8_t next_hop)
 {
@@ -471,7 +471,7 @@ rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, 
uint8_t depth,
        return rule_index;
 }
 
-static inline int32_t
+static int32_t
 rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
        uint32_t next_hop)
 {
@@ -547,7 +547,7 @@ rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, 
uint8_t depth,
  * Delete a rule from the rule table.
  * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
  */
-static inline void
+static void
 rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth)
 {
        int i;
@@ -570,7 +570,7 @@ rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t 
rule_index, uint8_t depth)
        lpm->rule_info[depth - 1].used_rules--;
 }
 
-static inline void
+static void
 rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth)
 {
        int i;
@@ -597,7 +597,7 @@ rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, 
uint8_t depth)
  * Finds a rule in rule table.
  * NOTE: Valid range for depth parameter is 1 .. 32 inclusive.
  */
-static inline int32_t
+static int32_t
 rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth)
 {
        uint32_t rule_gindex, last_rule, rule_index;
@@ -618,7 +618,7 @@ rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, 
uint8_t depth)
        return -EINVAL;
 }
 
-static inline int32_t
+static int32_t
 rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth)
 {
        uint32_t rule_gindex, last_rule, rule_index;
@@ -642,7 +642,7 @@ rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, 
uint8_t depth)
 /*
  * Find, clean and allocate a tbl8.
  */
-static inline int32_t
+static int32_t
 tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
 {
        uint32_t group_idx; /* tbl8 group index. */
@@ -669,7 +669,7 @@ tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8)
        return -ENOSPC;
 }
 
-static inline int32_t
+static int32_t
 tbl8_alloc_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s)
 {
        uint32_t group_idx; /* tbl8 group index. */
@@ -709,7 +709,7 @@ tbl8_free_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t 
tbl8_group_start)
        tbl8[tbl8_group_start].valid_group = INVALID;
 }
 
-static inline int32_t
+static int32_t
 add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
                uint8_t next_hop)
 {
@@ -777,7 +777,7 @@ add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, 
uint8_t depth,
        return 0;
 }
 
-static inline int32_t
+static int32_t
 add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
                uint32_t next_hop)
 {
@@ -846,7 +846,7 @@ add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, 
uint8_t depth,
        return 0;
 }
 
-static inline int32_t
+static int32_t
 add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth,
                uint8_t next_hop)
 {
@@ -971,7 +971,7 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t 
ip_masked, uint8_t depth,
        return 0;
 }
 
-static inline int32_t
+static int32_t
 add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth,
                uint32_t next_hop)
 {
@@ -1244,7 +1244,7 @@ BIND_DEFAULT_SYMBOL(rte_lpm_is_rule_present, _v1604, 
16.04);
 MAP_STATIC_SYMBOL(int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip,
                uint8_t depth, uint32_t *next_hop), 
rte_lpm_is_rule_present_v1604);
 
-static inline int32_t
+static int32_t
 find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth,
                uint8_t *sub_rule_depth)
 {
@@ -1266,7 +1266,7 @@ find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t 
ip, uint8_t depth,
        return -1;
 }
 
-static inline int32_t
+static int32_t
 find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth,
                uint8_t *sub_rule_depth)
 {
@@ -1288,7 +1288,7 @@ find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t 
ip, uint8_t depth,
        return -1;
 }
 
-static inline int32_t
+static int32_t
 delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
        uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
 {
@@ -1381,7 +1381,7 @@ delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t 
ip_masked,
        return 0;
 }
 
-static inline int32_t
+static int32_t
 delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
        uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
 {
@@ -1483,7 +1483,7 @@ delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t 
ip_masked,
  * Return of value > -1 means tbl8 is in use but has all the same values and
  * thus can be recycled
  */
-static inline int32_t
+static int32_t
 tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
                uint32_t tbl8_group_start)
 {
@@ -1530,7 +1530,7 @@ tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8,
        return -EINVAL;
 }
 
-static inline int32_t
+static int32_t
 tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
                uint32_t tbl8_group_start)
 {
@@ -1577,7 +1577,7 @@ tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8,
        return -EINVAL;
 }
 
-static inline int32_t
+static int32_t
 delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked,
        uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
 {
@@ -1655,7 +1655,7 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t 
ip_masked,
        return 0;
 }
 
-static inline int32_t
+static int32_t
 delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked,
        uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth)
 {

Reply via email to