<snip>
> 
> On Thu, May 05, 2022 at 10:59:32PM +0000, Honnappa Nagarahalli wrote:
> > Thanks Stephen. Do you see any performance difference with this change?
> 
> as a matter of due diligence i think a comparison should be made just to be
> confident nothing is regressing.
> 
> i support this change in principal since it is generally accepted best 
> practice to
> not force inlining since it can remove more valuable optimizations that the
> compiler may make that the human can't see.
> the optimizations may vary depending on compiler implementation.
> 
> force inlining should be used as a targeted measure rather than blanket on
> every function and when in use probably needs to be periodically reviewed and
> potentially removed as the code / compiler evolves.
> 
> also one other consideration is the impact of a particular compiler's force
> inlining intrinsic/builtin is that it may permit inlining of functions when 
> not
> declared in a header. i.e. a function from one library may be able to be 
> inlined
> to another binary as a link time optimization. although everything here is in 
> a
> header so it's a bit moot.
> 
> i'd like to see this change go in if possible.
Like Stephen mentions below, I am sure we will have a for and against 
discussion here.
As a DPDK community we have put performance front and center, I would prefer to 
go down that route first.

> 
> thanks
> 
> >
> > > -----Original Message-----
> > > From: Stephen Hemminger <step...@networkplumber.org>
> > > Sent: Thursday, May 5, 2022 5:46 PM
> > > To: dev@dpdk.org
> > > Cc: Stephen Hemminger <step...@networkplumber.org>
> > > Subject: [RFC] rte_ring: don't use always inline
> > >
> > > Forcing compiler to inline with always inline can lead to worse and
> > > sometimes broken code. Better to use the standard inline keyword and
> > > let compiler have some flexibilty.
> > >
> > > Signed-off-by: Stephen Hemminger <step...@networkplumber.org>
> > > ---
> > >
> > > This is RFC because the use of large scale inlining is debatable.
> > > This change may slow things down on some versions of Gcc and
> architectures.
> > >
> > > If you follow Linux kernel list, this has been a debated topic over
> > > the years, with opinions for and against inlining.
> > > Combined with bad inlining in various Gcc versions.
> > >
> > >  lib/ring/rte_ring.h         | 36 ++++++++++++++++++------------------
> > >  lib/ring/rte_ring_c11_pvt.h |  6 +++---
> > >  lib/ring/rte_ring_elem.h    | 36 ++++++++++++++++++------------------
> > >  3 files changed, 39 insertions(+), 39 deletions(-)
> > >
> > > diff --git a/lib/ring/rte_ring.h b/lib/ring/rte_ring.h index
> > > 980e92e59493..4a2588beed9e 100644
> > > --- a/lib/ring/rte_ring.h
> > > +++ b/lib/ring/rte_ring.h
> > > @@ -226,7 +226,7 @@ void rte_ring_dump(FILE *f, const struct rte_ring
> *r);
> > >   * @return
> > >   *   The number of objects enqueued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
> > >                    unsigned int n, unsigned int *free_space)  { @@ -
> > > 249,7 +249,7 @@ rte_ring_mp_enqueue_bulk(struct rte_ring *r, void *
> > > const *obj_table,
> > >   * @return
> > >   *   The number of objects enqueued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
> > >                    unsigned int n, unsigned int *free_space)  { @@ -
> > > 276,7 +276,7 @@ rte_ring_sp_enqueue_bulk(struct rte_ring *r, void *
> > > const *obj_table,
> > >   * @return
> > >   *   The number of objects enqueued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table,
> > >                 unsigned int n, unsigned int *free_space)  { @@ -298,7
> > > +298,7 @@ rte_ring_enqueue_bulk(struct rte_ring *r, void * const
> > > +*obj_table,
> > >   *   - 0: Success; objects enqueued.
> > >   *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is
> > > enqueued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_mp_enqueue(struct rte_ring *r, void *obj)  {
> > >   return rte_ring_mp_enqueue_elem(r, &obj, sizeof(void *)); @@
> > > -315,7
> > > +315,7 @@ rte_ring_mp_enqueue(struct rte_ring *r, void *obj)
> > >   *   - 0: Success; objects enqueued.
> > >   *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is
> > > enqueued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_sp_enqueue(struct rte_ring *r, void *obj)  {
> > >   return rte_ring_sp_enqueue_elem(r, &obj, sizeof(void *)); @@
> > > -336,7
> > > +336,7 @@ rte_ring_sp_enqueue(struct rte_ring *r, void *obj)
> > >   *   - 0: Success; objects enqueued.
> > >   *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is
> > > enqueued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_enqueue(struct rte_ring *r, void *obj)  {
> > >   return rte_ring_enqueue_elem(r, &obj, sizeof(void *)); @@ -360,7
> > > +360,7 @@ rte_ring_enqueue(struct rte_ring *r, void *obj)
> > >   * @return
> > >   *   The number of objects dequeued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > >           unsigned int n, unsigned int *available)  { @@ -384,7 +384,7
> @@
> > > rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > >   * @return
> > >   *   The number of objects dequeued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > >           unsigned int n, unsigned int *available)  { @@ -411,7 +411,7
> @@
> > > rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table,
> > >   * @return
> > >   *   The number of objects dequeued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned int 
> > > n,
> > >           unsigned int *available)
> > >  {
> > > @@ -434,7 +434,7 @@ rte_ring_dequeue_bulk(struct rte_ring *r, void
> > > **obj_table, unsigned int n,
> > >   *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
> > >   *     dequeued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p)  {
> > >   return rte_ring_mc_dequeue_elem(r, obj_p, sizeof(void *)); @@
> > > -452,7
> > > +452,7 @@ rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p)
> > >   *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
> > >   *     dequeued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p)  {
> > >   return rte_ring_sc_dequeue_elem(r, obj_p, sizeof(void *)); @@
> > > -474,7
> > > +474,7 @@ rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p)
> > >   *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
> > >   *     dequeued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_dequeue(struct rte_ring *r, void **obj_p)  {
> > >   return rte_ring_dequeue_elem(r, obj_p, sizeof(void *)); @@ -681,7
> > > +681,7 @@ struct rte_ring *rte_ring_lookup(const char *name);
> > >   * @return
> > >   *   - n: Actual number of objects enqueued.
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
> > >                    unsigned int n, unsigned int *free_space)  { @@ -
> > > 704,7 +704,7 @@ rte_ring_mp_enqueue_burst(struct rte_ring *r, void *
> > > const *obj_table,
> > >   * @return
> > >   *   - n: Actual number of objects enqueued.
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table,
> > >                    unsigned int n, unsigned int *free_space)  { @@ -
> > > 731,7 +731,7 @@ rte_ring_sp_enqueue_burst(struct rte_ring *r, void *
> > > const *obj_table,
> > >   * @return
> > >   *   - n: Actual number of objects enqueued.
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table,
> > >                 unsigned int n, unsigned int *free_space)  { @@ -759,7
> > > +759,7 @@ rte_ring_enqueue_burst(struct rte_ring *r, void * const
> > > +*obj_table,
> > >   * @return
> > >   *   - n: Actual number of objects dequeued, 0 if ring is empty
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table,
> > >           unsigned int n, unsigned int *available)  { @@ -784,7 +784,7
> @@
> > > rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table,
> > >   * @return
> > >   *   - n: Actual number of objects dequeued, 0 if ring is empty
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table,
> > >           unsigned int n, unsigned int *available)  { @@ -811,7 +811,7
> @@
> > > rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table,
> > >   * @return
> > >   *   - Number of objects dequeued
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table,
> > >           unsigned int n, unsigned int *available)  { diff --git
> > > a/lib/ring/rte_ring_c11_pvt.h b/lib/ring/rte_ring_c11_pvt.h index
> > > f895950df487..6972a9825cb7 100644
> > > --- a/lib/ring/rte_ring_c11_pvt.h
> > > +++ b/lib/ring/rte_ring_c11_pvt.h
> > > @@ -11,7 +11,7 @@
> > >  #ifndef _RTE_RING_C11_PVT_H_
> > >  #define _RTE_RING_C11_PVT_H_
> > >
> > > -static __rte_always_inline void
> > > +static inline void
> > >  __rte_ring_update_tail(struct rte_ring_headtail *ht, uint32_t old_val,
> > >           uint32_t new_val, uint32_t single, uint32_t enqueue)  { @@ -
> > > 50,7 +50,7 @@ __rte_ring_update_tail(struct rte_ring_headtail *ht,
> > > uint32_t old_val,
> > >   *   Actual number of objects enqueued.
> > >   *   If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
> > >           unsigned int n, enum rte_ring_queue_behavior behavior,
> > >           uint32_t *old_head, uint32_t *new_head, @@ -126,7 +126,7
> @@
> > > __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp,
> > >   *   - Actual number of objects dequeued.
> > >   *     If behavior == RTE_RING_QUEUE_FIXED, this will be 0 or n only.
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  __rte_ring_move_cons_head(struct rte_ring *r, int is_sc,
> > >           unsigned int n, enum rte_ring_queue_behavior behavior,
> > >           uint32_t *old_head, uint32_t *new_head, diff --git
> > > a/lib/ring/rte_ring_elem.h b/lib/ring/rte_ring_elem.h index
> > > fb1edc9aad1f..35e110fc5b4b 100644
> > > --- a/lib/ring/rte_ring_elem.h
> > > +++ b/lib/ring/rte_ring_elem.h
> > > @@ -128,7 +128,7 @@ struct rte_ring *rte_ring_create_elem(const char
> > > *name, unsigned int esize,
> > >   * @return
> > >   *   The number of objects enqueued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_mp_enqueue_bulk_elem(struct rte_ring *r, const void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *free_space)  {
> > > @@ -157,7 +157,7 @@ rte_ring_mp_enqueue_bulk_elem(struct rte_ring
> > > *r, const void *obj_table,
> > >   * @return
> > >   *   The number of objects enqueued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_sp_enqueue_bulk_elem(struct rte_ring *r, const void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *free_space)  {
> > > @@ -191,7 +191,7 @@ rte_ring_sp_enqueue_bulk_elem(struct rte_ring
> > > *r, const void *obj_table,
> > >   * @return
> > >   *   The number of objects enqueued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_enqueue_bulk_elem(struct rte_ring *r, const void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *free_space)  {
> > > @@ -235,7 +235,7 @@ rte_ring_enqueue_bulk_elem(struct rte_ring *r,
> > > const void *obj_table,
> > >   *   - 0: Success; objects enqueued.
> > >   *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is
> > > enqueued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_mp_enqueue_elem(struct rte_ring *r, void *obj, unsigned int
> esize)  {
> > >   return rte_ring_mp_enqueue_bulk_elem(r, obj, esize, 1, NULL) ? 0 :
> > > @@ -259,7 +259,7 @@ rte_ring_mp_enqueue_elem(struct rte_ring *r,
> > > void *obj, unsigned int esize)
> > >   *   - 0: Success; objects enqueued.
> > >   *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is
> > > enqueued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_sp_enqueue_elem(struct rte_ring *r, void *obj, unsigned int 
> > > esize)
> {
> > >   return rte_ring_sp_enqueue_bulk_elem(r, obj, esize, 1, NULL) ? 0 :
> > > @@ -285,7 +285,7 @@ rte_ring_sp_enqueue_elem(struct rte_ring *r,
> > > void *obj, unsigned int esize)
> > >   *   - 0: Success; objects enqueued.
> > >   *   - -ENOBUFS: Not enough room in the ring to enqueue; no object is
> > > enqueued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_enqueue_elem(struct rte_ring *r, void *obj, unsigned int esize) 
> > >  {
> > >   return rte_ring_enqueue_bulk_elem(r, obj, esize, 1, NULL) ? 0 :
> > > @@ -314,7 +314,7 @@ rte_ring_enqueue_elem(struct rte_ring *r, void
> > > *obj, unsigned int esize)
> > >   * @return
> > >   *   The number of objects dequeued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_mc_dequeue_bulk_elem(struct rte_ring *r, void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *available)  {
> > > @@ -342,7 +342,7 @@ rte_ring_mc_dequeue_bulk_elem(struct rte_ring
> > > *r, void *obj_table,
> > >   * @return
> > >   *   The number of objects dequeued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_sc_dequeue_bulk_elem(struct rte_ring *r, void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *available)  {
> > > @@ -373,7 +373,7 @@ rte_ring_sc_dequeue_bulk_elem(struct rte_ring
> > > *r, void *obj_table,
> > >   * @return
> > >   *   The number of objects dequeued, either 0 or n
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_dequeue_bulk_elem(struct rte_ring *r, void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *available)  {
> > > @@ -418,7 +418,7 @@ rte_ring_dequeue_bulk_elem(struct rte_ring *r,
> > > void *obj_table,
> > >   *   - -ENOENT: Not enough entries in the ring to dequeue; no object is
> > >   *     dequeued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_mc_dequeue_elem(struct rte_ring *r, void *obj_p,
> > >                           unsigned int esize)
> > >  {
> > > @@ -442,7 +442,7 @@ rte_ring_mc_dequeue_elem(struct rte_ring *r,
> > > void *obj_p,
> > >   *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
> > >   *     dequeued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_sc_dequeue_elem(struct rte_ring *r, void *obj_p,
> > >                           unsigned int esize)
> > >  {
> > > @@ -470,7 +470,7 @@ rte_ring_sc_dequeue_elem(struct rte_ring *r,
> > > void *obj_p,
> > >   *   - -ENOENT: Not enough entries in the ring to dequeue, no object is
> > >   *     dequeued.
> > >   */
> > > -static __rte_always_inline int
> > > +static inline int
> > >  rte_ring_dequeue_elem(struct rte_ring *r, void *obj_p, unsigned int 
> > > esize)
> {
> > >   return rte_ring_dequeue_bulk_elem(r, obj_p, esize, 1, NULL) ? 0 :
> > > @@ -499,7 +499,7 @@ rte_ring_dequeue_elem(struct rte_ring *r, void
> > > *obj_p, unsigned int esize)
> > >   * @return
> > >   *   - n: Actual number of objects enqueued.
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_mp_enqueue_burst_elem(struct rte_ring *r, const void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *free_space)  {
> > > @@ -528,7 +528,7 @@ rte_ring_mp_enqueue_burst_elem(struct rte_ring
> > > *r, const void *obj_table,
> > >   * @return
> > >   *   - n: Actual number of objects enqueued.
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_sp_enqueue_burst_elem(struct rte_ring *r, const void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *free_space)  {
> > > @@ -559,7 +559,7 @@ rte_ring_sp_enqueue_burst_elem(struct rte_ring
> > > *r, const void *obj_table,
> > >   * @return
> > >   *   - n: Actual number of objects enqueued.
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_enqueue_burst_elem(struct rte_ring *r, const void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *free_space)  {
> > > @@ -609,7 +609,7 @@ rte_ring_enqueue_burst_elem(struct rte_ring *r,
> > > const void *obj_table,
> > >   * @return
> > >   *   - n: Actual number of objects dequeued, 0 if ring is empty
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_mc_dequeue_burst_elem(struct rte_ring *r, void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *available)  {
> > > @@ -638,7 +638,7 @@ rte_ring_mc_dequeue_burst_elem(struct rte_ring
> > > *r, void *obj_table,
> > >   * @return
> > >   *   - n: Actual number of objects dequeued, 0 if ring is empty
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_sc_dequeue_burst_elem(struct rte_ring *r, void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *available)  {
> > > @@ -669,7 +669,7 @@ rte_ring_sc_dequeue_burst_elem(struct rte_ring
> > > *r, void *obj_table,
> > >   * @return
> > >   *   - Number of objects dequeued
> > >   */
> > > -static __rte_always_inline unsigned int
> > > +static inline unsigned int
> > >  rte_ring_dequeue_burst_elem(struct rte_ring *r, void *obj_table,
> > >           unsigned int esize, unsigned int n, unsigned int *available)  {
> > > --
> > > 2.35.1

Reply via email to