[PATCH v2] eal/unix: optimize thread creation with glibc

2024-11-02 Thread David Marchand
Setting the cpu affinity of the child thread from the parent thread is
racy when using pthread_setaffinity_np, as the child thread may start
running and initialize before affinity is set.

On the other hand, setting the cpu affinity from the child thread itself
may fail, so the parent thread waits for the child thread to report
whether this call succeeded.

This synchronisation point resulted in a significant slow down of
rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
for some ARM systems).

Another option for setting cpu affinity is to use the not portable
pthread_attr_setaffinity_np, but it is not available with musl.
Assume availability by relying on __USE_GNU that is set with glibc.

Fixes: b28c6196b132 ("eal/unix: fix thread creation")
Cc: sta...@dpdk.org

Signed-off-by: David Marchand 
---
Changes since v1:
- fixed build with FreeBSD,

---
 lib/eal/unix/rte_thread.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 1b4c73f58e..03c4164059 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -19,6 +19,7 @@ struct eal_tls_key {
pthread_key_t thread_index;
 };
 
+#ifndef __USE_GNU
 struct thread_start_context {
rte_thread_func thread_func;
void *thread_args;
@@ -28,6 +29,7 @@ struct thread_start_context {
int wrapper_ret;
bool wrapper_done;
 };
+#endif
 
 static int
 thread_map_priority_to_os_value(enum rte_thread_priority eal_pri, int *os_pri,
@@ -88,6 +90,7 @@ thread_map_os_priority_to_eal_priority(int policy, int os_pri,
return 0;
 }
 
+#ifndef __USE_GNU
 static void *
 thread_start_wrapper(void *arg)
 {
@@ -113,6 +116,7 @@ thread_start_wrapper(void *arg)
 
return (void *)(uintptr_t)thread_func(thread_args);
 }
+#endif
 
 int
 rte_thread_create(rte_thread_t *thread_id,
@@ -126,6 +130,7 @@ rte_thread_create(rte_thread_t *thread_id,
.sched_priority = 0,
};
int policy = SCHED_OTHER;
+#ifndef __USE_GNU
struct thread_start_context ctx = {
.thread_func = thread_func,
.thread_args = args,
@@ -134,6 +139,7 @@ rte_thread_create(rte_thread_t *thread_id,
.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
.wrapper_cond = PTHREAD_COND_INITIALIZER,
};
+#endif
 
if (thread_attr != NULL) {
ret = pthread_attr_init(&attr);
@@ -144,6 +150,16 @@ rte_thread_create(rte_thread_t *thread_id,
 
attrp = &attr;
 
+#ifdef __USE_GNU
+   if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+   ret = pthread_attr_setaffinity_np(attrp, 
sizeof(thread_attr->cpuset),
+   &thread_attr->cpuset);
+   if (ret != 0) {
+   EAL_LOG(DEBUG, "pthread_attr_setaffinity_np 
failed");
+   goto cleanup;
+   }
+   }
+#endif
/*
 * Set the inherit scheduler parameter to explicit,
 * otherwise the priority attribute is ignored.
@@ -178,6 +194,14 @@ rte_thread_create(rte_thread_t *thread_id,
}
}
 
+#ifdef __USE_GNU
+   ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
+   (void *)(void *)thread_func, args);
+   if (ret != 0) {
+   EAL_LOG(DEBUG, "pthread_create failed");
+   goto cleanup;
+   }
+#else /* !__USE_GNU */
ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
thread_start_wrapper, &ctx);
if (ret != 0) {
@@ -193,6 +217,7 @@ rte_thread_create(rte_thread_t *thread_id,
 
if (ret != 0)
rte_thread_join(*thread_id, NULL);
+#endif /* __USE_GNU */
 
 cleanup:
if (attrp != NULL)
-- 
2.46.2



[PATCH] eal/unix: optimize thread creation with glibc

2024-11-02 Thread David Marchand
Setting the cpu affinity of the child thread from the parent thread is
racy when using pthread_setaffinity_np, as the child thread may start
running and initialize before affinity is set.

On the other hand, setting the cpu affinity from the child thread itself
may fail, so the parent thread waits for the child thread to report
whether this call succeeded.

This synchronisation point resulted in a significant slow down of
rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
for some ARM systems).

Another option for setting cpu affinity is to use the not portable
pthread_attr_setaffinity_np, but it is not available with musl.
Assume availability by relying on __USE_GNU that is not set with musl.

Fixes: b28c6196b132 ("eal/unix: fix thread creation")
Cc: sta...@dpdk.org

Signed-off-by: David Marchand 
---
 lib/eal/unix/rte_thread.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/lib/eal/unix/rte_thread.c b/lib/eal/unix/rte_thread.c
index 1b4c73f58e..e42b6c37a2 100644
--- a/lib/eal/unix/rte_thread.c
+++ b/lib/eal/unix/rte_thread.c
@@ -4,6 +4,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -19,6 +20,7 @@ struct eal_tls_key {
pthread_key_t thread_index;
 };
 
+#ifndef __USE_GNU
 struct thread_start_context {
rte_thread_func thread_func;
void *thread_args;
@@ -28,6 +30,7 @@ struct thread_start_context {
int wrapper_ret;
bool wrapper_done;
 };
+#endif
 
 static int
 thread_map_priority_to_os_value(enum rte_thread_priority eal_pri, int *os_pri,
@@ -88,6 +91,7 @@ thread_map_os_priority_to_eal_priority(int policy, int os_pri,
return 0;
 }
 
+#ifndef __USE_GNU
 static void *
 thread_start_wrapper(void *arg)
 {
@@ -113,6 +117,7 @@ thread_start_wrapper(void *arg)
 
return (void *)(uintptr_t)thread_func(thread_args);
 }
+#endif
 
 int
 rte_thread_create(rte_thread_t *thread_id,
@@ -126,6 +131,7 @@ rte_thread_create(rte_thread_t *thread_id,
.sched_priority = 0,
};
int policy = SCHED_OTHER;
+#ifndef __USE_GNU
struct thread_start_context ctx = {
.thread_func = thread_func,
.thread_args = args,
@@ -134,6 +140,7 @@ rte_thread_create(rte_thread_t *thread_id,
.wrapper_mutex = PTHREAD_MUTEX_INITIALIZER,
.wrapper_cond = PTHREAD_COND_INITIALIZER,
};
+#endif
 
if (thread_attr != NULL) {
ret = pthread_attr_init(&attr);
@@ -144,6 +151,16 @@ rte_thread_create(rte_thread_t *thread_id,
 
attrp = &attr;
 
+#ifdef __USE_GNU
+   if (CPU_COUNT(&thread_attr->cpuset) > 0) {
+   ret = pthread_attr_setaffinity_np(attrp, 
sizeof(thread_attr->cpuset),
+   &thread_attr->cpuset);
+   if (ret != 0) {
+   EAL_LOG(DEBUG, "pthread_attr_setaffinity_np 
failed");
+   goto cleanup;
+   }
+   }
+#endif
/*
 * Set the inherit scheduler parameter to explicit,
 * otherwise the priority attribute is ignored.
@@ -178,6 +195,14 @@ rte_thread_create(rte_thread_t *thread_id,
}
}
 
+#ifdef __USE_GNU
+   ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
+   (void *)(void *)thread_func, args);
+   if (ret != 0) {
+   EAL_LOG(DEBUG, "pthread_create failed");
+   goto cleanup;
+   }
+#else /* !__USE_GNU */
ret = pthread_create((pthread_t *)&thread_id->opaque_id, attrp,
thread_start_wrapper, &ctx);
if (ret != 0) {
@@ -193,6 +218,7 @@ rte_thread_create(rte_thread_t *thread_id,
 
if (ret != 0)
rte_thread_join(*thread_id, NULL);
+#endif /* __USE_GNU */
 
 cleanup:
if (attrp != NULL)
-- 
2.46.2



Re: [PATCH v2] Revert "eal/unix: fix thread creation"

2024-11-02 Thread David Marchand
On Thu, Oct 31, 2024 at 9:46 PM Stephen Hemminger
 wrote:
>
> On Thu, 31 Oct 2024 14:05:16 +
> Luca Boccassi  wrote:
>
> > On Thu, 31 Oct 2024 at 13:04, David Marchand  
> > wrote:
> > >
> > > On Thu, Oct 31, 2024 at 1:58 PM Luca Boccassi  
> > > wrote:
> > > >
> > > > On Thu, 31 Oct 2024 at 12:52, David Marchand 
> > > >  wrote:
> > > > >
> > > > > On Thu, Oct 31, 2024 at 1:47 PM David Marchand
> > > > >  wrote:
> > > > > > Could you share a backtrace when hitting this deadlock?
> > > > >
> > > > > If the backtrace is not possible, running with
> > > > > --log-level=lib.eal:debug may help.
> > > >
> > > > I cannot get backtraces. This runs via "meson test", how can that
> > > > option be passed in?
> > >
> > > # meson test -C build-debian --suite fast-tests --verbose -t 5
> > > --test-args=--log-level=lib.eal:debug
> >
> > https://paste.debian.net/1334095/
>
> Could not repro this on Raspberry Pi 5. Main branch builds and runs

There is no deadlock at play, as far as I can see.

The mentionned commit slowed down thread instantiation (a lot, from
what the timestamps seen in the paste link).
The exact reason is not entirely clear to me, but it forced the thread
creating children threads to wait for them to start running.
Reverting this commit enhances the situation, but reintroduce a double
free (if set affinity of the created thread fails, both the parent and
the created thread will free ctx).

I sent a patch, reintroducing use of pthread_attr_setaffinity_np like
Tyler had first proposed.
Let's see what the CI think of this.


-- 
David Marchand



Re: [PATCH v3 4/4] net/nfp: add support for port identify

2024-11-02 Thread Ferruh Yigit
On 11/1/2024 2:57 AM, Chaoyong He wrote:
> Implement the necessary functions to allow user to visually identify a
> physical port associated with a netdev by blinking an LED on that port.
> 
> Signed-off-by: James Hershaw 
> Signed-off-by: Chaoyong He 
> ---
>  .../net/nfp/flower/nfp_flower_representor.c   | 30 
>  drivers/net/nfp/nfp_ethdev.c  |  2 ++
>  drivers/net/nfp/nfp_net_common.c  | 32 +
>  drivers/net/nfp/nfp_net_common.h  |  2 ++
>  drivers/net/nfp/nfpcore/nfp_nsp.h |  1 +
>  drivers/net/nfp/nfpcore/nfp_nsp_eth.c | 36 +++
>  6 files changed, 103 insertions(+)
>

Can you please update nfp.ini to add LED support.

This patch is basically adding LED support, patch title can be updated
to reflect this, it is simpler description.


Re: [PATCH v5 01/18] net/r8169: add PMD driver skeleton

2024-11-02 Thread Ferruh Yigit
On 10/28/2024 7:30 AM, Howard Wang wrote:
> Meson build infrastructure, r8169_ethdev minimal skeleton,
> header with Realtek NIC device and vendor IDs.
> 
> Signed-off-by: Howard Wang 
> ---
>  MAINTAINERS  |   7 ++
>  drivers/net/meson.build  |   1 +
>  drivers/net/r8169/meson.build|   6 ++
>  drivers/net/r8169/r8169_base.h   |  15 +++
>  drivers/net/r8169/r8169_ethdev.c | 178 +++
>  drivers/net/r8169/r8169_ethdev.h |  40 +++
>  6 files changed, 247 insertions(+)
>  create mode 100644 drivers/net/r8169/meson.build
>  create mode 100644 drivers/net/r8169/r8169_base.h
>  create mode 100644 drivers/net/r8169/r8169_ethdev.c
>  create mode 100644 drivers/net/r8169/r8169_ethdev.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index c5a703b5c0..5f9eccc43f 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1076,6 +1076,13 @@ F: drivers/net/memif/
>  F: doc/guides/nics/memif.rst
>  F: doc/guides/nics/features/memif.ini
>  
> +Realtek r8169
> +M: Howard Wang 
> +M: ChunHao Lin 
> +M: Xing Wang 
> +M: Realtek NIC SW 
>

We prefer authors as maintainer, instead of group/alias, and as you
already provide names, can this be removed?


> +F: drivers/net/r8169
> +
>

Please sort alphabetically for 'Realtek'

Can you also add driver documentation and update release notes in this
patch?


>  
>  Crypto Drivers
>  --
> diff --git a/drivers/net/meson.build b/drivers/net/meson.build
> index fb6d34b782..fddcf39655 100644
> --- a/drivers/net/meson.build
> +++ b/drivers/net/meson.build
> @@ -53,6 +53,7 @@ drivers = [
>  'pfe',
>  'qede',
>  'ring',
> +'r8169',
>  'sfc',
>  'softnic',
>  'tap',
> diff --git a/drivers/net/r8169/meson.build b/drivers/net/r8169/meson.build
> new file mode 100644
> index 00..f14d4ae8fb
> --- /dev/null
> +++ b/drivers/net/r8169/meson.build
> @@ -0,0 +1,6 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2024 Realtek Corporation. All rights reserved
> +
> +sources = files(
> + 'r8169_ethdev.c',
> +)
> diff --git a/drivers/net/r8169/r8169_base.h b/drivers/net/r8169/r8169_base.h
> new file mode 100644
> index 00..6fc84592a6
> --- /dev/null
> +++ b/drivers/net/r8169/r8169_base.h
> @@ -0,0 +1,15 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2024 Realtek Corporation. All rights reserved
> + */
> +
> +#ifndef _R8169_BASE_H_
> +#define _R8169_BASE_H_
> +
> +typedef uint8_t   u8;
> +typedef uint16_t  u16;
> +typedef uint32_t  u32;
> +typedef uint64_t  u64;
>

most of the drivers has 'compat.h' or '_compat.h'
(r8169_compat.h) as compatibility layer for DPDK and define above like
structures there.
If there is no specific reason to have this file name, you can create a
compat.h for compatibility.

> +
> +#define PCI_VENDOR_ID_REALTEK 0x10EC
> +
> +#endif
> diff --git a/drivers/net/r8169/r8169_ethdev.c 
> b/drivers/net/r8169/r8169_ethdev.c
> new file mode 100644
> index 00..1f90a142c5
> --- /dev/null
> +++ b/drivers/net/r8169/r8169_ethdev.c
> @@ -0,0 +1,178 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2024 Realtek Corporation. All rights reserved
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
>

Do you really need all these headers here?
Please only include necessary ones and add them when you need them.


> +
> +#include "r8169_ethdev.h"
> +#include "r8169_base.h"
> +
> +static int rtl_dev_configure(struct rte_eth_dev *dev __rte_unused);
>

No need to have '__rte_unused' attribute for function decleration.

> +static int rtl_dev_start(struct rte_eth_dev *dev);
> +static int rtl_dev_stop(struct rte_eth_dev *dev);
> +static int rtl_dev_reset(struct rte_eth_dev *dev);
> +static int rtl_dev_close(struct rte_eth_dev *dev);
> +
> +/*
> + * The set of PCI devices this driver supports
> + */
> +static const struct rte_pci_id pci_id_r8169_map[] = {
> + { RTE_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8125) },
> + { RTE_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8162) },
> + { RTE_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x8126) },
> + { RTE_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0x5000) },
> + {.vendor_id = 0, /* sentinel */ },
> +};
> +
> +static const struct eth_dev_ops rtl_eth_dev_ops = {
> + .dev_configure= rtl_dev_configure,
> + .dev_start= rtl_dev_start,
> + .dev_stop = rtl_dev_stop,
> + .dev_close= rtl_dev_close,
> + .dev_reset= rtl_dev_reset,
> +};
> +
> +static int
> +rtl_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +{
> + return 0;
> +}
> +
> +/*
> + * Configure device link speed and setup link.
> + * It returns 0 on success.
> + */
> +static int
> +rtl_dev_start(struct rte_eth_dev *

Re: [PATCH v5 18/18] doc/guides/nics: add documents for r8169 pmd

2024-11-02 Thread Ferruh Yigit
On 10/28/2024 7:31 AM, Howard Wang wrote:
> Signed-off-by: Howard Wang 
> ---
>  MAINTAINERS|  2 ++
>  doc/guides/nics/features/r8169.ini | 32 ++
>  doc/guides/nics/index.rst  |  1 +
>  doc/guides/nics/r8169.rst  | 17 
>  4 files changed, 52 insertions(+)
>  create mode 100644 doc/guides/nics/features/r8169.ini
>  create mode 100644 doc/guides/nics/r8169.rst
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 5f9eccc43f..6f56c966fd 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1082,6 +1082,8 @@ M: ChunHao Lin 
>  M: Xing Wang 
>  M: Realtek NIC SW 
>  F: drivers/net/r8169
> +F: doc/guides/nics/r8169.rst
> +F: doc/guides/nics/features/r8169.ini
>  
>  
>  Crypto Drivers
> diff --git a/doc/guides/nics/features/r8169.ini 
> b/doc/guides/nics/features/r8169.ini
> new file mode 100644
> index 00..8e4142f64e
> --- /dev/null
> +++ b/doc/guides/nics/features/r8169.ini
> @@ -0,0 +1,32 @@
> +;
> +; Supported features of the 'r8169' network poll mode driver.
> +;
> +; Refer to default.ini for the full list of available PMD features.
> +;
> +[Features]
> +Speed capabilities   = Y
> +Link speed configuration = Y
> +Link status  = Y
> +Link status event= Y
> +MTU update   = Y
> +Scattered Rx = Y
> +TSO  = Y
> +Promiscuous mode = Y
> +Allmulticast mode= Y
> +Unicast MAC filter   = Y
> +Multicast MAC filter = Y
> +Flow control = Y
> +CRC offload  = Y
> +L3 checksum offload  = Y
> +L4 checksum offload  = Y
> +Packet type parsing  = Y
> +Rx descriptor status = Y
> +Tx descriptor status = Y
> +Basic stats  = Y
> +Extended stats   = Y
> +Stats per queue  = Y
> +FW version   = Y
> +Registers dump   = Y
> +Linux= Y
> +x86-32   = Y
> +x86-64   = Y
>

Can you please distribute the content of this patch to rest of the
series, instead of having a separate commit for it?

> diff --git a/doc/guides/nics/index.rst b/doc/guides/nics/index.rst
> index c14bc7988a..0861be59d3 100644
> --- a/doc/guides/nics/index.rst
> +++ b/doc/guides/nics/index.rst
> @@ -60,6 +60,7 @@ Network Interface Controller Drivers
>  pcap_ring
>  pfe
>  qede
> +r8169
>  sfc_efx
>  softnic
>  tap
> diff --git a/doc/guides/nics/r8169.rst b/doc/guides/nics/r8169.rst
> new file mode 100644
> index 00..149276cc91
> --- /dev/null
> +++ b/doc/guides/nics/r8169.rst
> @@ -0,0 +1,17 @@
> +..  SPDX-License-Identifier: BSD-3-Clause
> +Copyright(c) 2024 Realtek Corporation. All rights reserved
> +
> +R8169 Poll Mode Driver
> +==
> +
> +The R8169 PMD provides poll mode driver support for Realtek 2.5 and 5 Gigabit
> +Ethernet NICs.
>

Can you please add a link to the relevant product page?


> +
> +Features
> +
> +
> +Features of the R8169 PMD are:
> +
> +* Checksum offload
> +* TCP segmentation offload
> +* Jumbo frames supported
>

Same comment as above, please distribute these updates to the patch that
introduces the mentioned update above?




Re: [PATCH v5 09/18] net/r8169: add support for hw initialization

2024-11-02 Thread Ferruh Yigit
On 10/28/2024 7:31 AM, Howard Wang wrote:
> Signed-off-by: Howard Wang 
> 

Can you please add some description to the commit log?

<...>

> diff --git a/drivers/net/r8169/r8169_phy.h b/drivers/net/r8169/r8169_phy.h
> index 1d8889f8dc..2576538a00 100644
> --- a/drivers/net/r8169/r8169_phy.h
> +++ b/drivers/net/r8169/r8169_phy.h
> @@ -24,8 +24,8 @@
>  #define MII_EXPANSION0x06/* Expansion register  
> */
>  #define MII_CTRL1000 0x09/* 1000BASE-T control  */
>  #define MII_STAT1000 0x0a/* 1000BASE-T status   */
> -#define MII_MMD_CTRL 0x0d/* MMD Access Control Register */
> -#define MII_MMD_DATA 0x0e/* MMD Access Data Register*/
> +#define  MII_MMD_CTRL0x0d/* MMD Access Control Register 
> */
> +#define  MII_MMD_DATA0x0e/* MMD Access Data Register
> */
>

This seems unintendent modification.


Re: [PATCH v5 03/18] net/r8169: add hardware registers access routines

2024-11-02 Thread Ferruh Yigit
On 10/28/2024 7:30 AM, Howard Wang wrote:
> Add implementation for hardware registers access routines.
> 
> Signed-off-by: Howard Wang 
> 

<...>

> diff --git a/drivers/net/r8169/r8169_base.h b/drivers/net/r8169/r8169_base.h
> index 6fc84592a6..2fa947b2d7 100644
> --- a/drivers/net/r8169/r8169_base.h
> +++ b/drivers/net/r8169/r8169_base.h
> @@ -5,11 +5,395 @@
>  #ifndef _R8169_BASE_H_
>  #define _R8169_BASE_H_
>  
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
>

Please only include used/necessary headers.



Re: [PATCH v5 00/18] modify code as suggested by the maintainer

2024-11-02 Thread Ferruh Yigit
On 10/28/2024 7:30 AM, Howard Wang wrote:
> Fix some warning issues.
> 
> Howard Wang (18):
>   net/r8169: add PMD driver skeleton
>   net/r8169: add logging structure
>   net/r8169: add hardware registers access routines
>   net/r8169: implement core logic for Tx/Rx
>   net/r8169: add support for hw config
>   net/r8169: add phy registers access routines
>   net/r8169: add support for hardware operations
>   net/r8169: add support for phy configuration
>   net/r8169: add support for hw initialization
>   net/r8169: add link status and interrupt management
>   net/r8169: implement Rx path
>   net/r8169: implement Tx path
>   net/r8169: implement device statistics
>   net/r8169: implement promisc and allmulti modes
>   net/r8169: implement MTU configuration
>   net/r8169: add support for getting fw version
>   net/r8169: add driver_start and driver_stop
>   doc/guides/nics: add documents for r8169 pmd
>

Hi Howard,

Thanks for restructuring the patch series, it looks better now, I put
some comments.

Btw, there are many "Avoid CamelCase" checkpatch warning, if this code
is not shared (but developed for DPDK), can you please address the warnings?


Re: [PATCH v3 3/4] net/nfp: add support for EEPROM functions

2024-11-02 Thread Ferruh Yigit
On 11/1/2024 2:57 AM, Chaoyong He wrote:
> Add support for the following EEPROM function callbacks:
> 
> .get_eeprom_len
> Get the maximum size of the device EEPROM data.
> 
> .get_eeprom
> Get the device EEPROM data at a certain offset and length.
> 
> .set_eeprom
> Set the device EEPROM data at a certain offset and length.
> 
> Note that for an nfp NIC, the "device EEPROM" is simply a field in the
> hwinfo that is used to store a 6B mac address associated with a physical
> port.
> 
> .get_module_info
> Get information regarding the type and size of the plugin module EEPROM
> for a specific port.
> 
> .get_module_eeprom
> Get the data stored in the plugin module EEPROM for a specific port.
> 
> Signed-off-by: James Hershaw 
> Signed-off-by: Chaoyong He 
> ---
>  .../net/nfp/flower/nfp_flower_representor.c   |  76 ++
>  drivers/net/nfp/nfp_ethdev.c  |   5 +
>  drivers/net/nfp/nfp_net_common.c  | 256 ++
>  drivers/net/nfp/nfp_net_common.h  |   6 +
>  drivers/net/nfp/nfpcore/nfp_nsp.c |  96 +++
>  drivers/net/nfp/nfpcore/nfp_nsp.h |   9 +
>  6 files changed, 448 insertions(+)
>

Can you please update nfp.ini to add "EEPROM dump" & "Module EEPROM dump"?


RE: RFC - Tap io_uring PMD

2024-11-02 Thread Morten Brørup
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Friday, 1 November 2024 01.35
> 
> On Thu, 31 Oct 2024 11:27:25 +0100
> Morten Brørup  wrote:
> 
> > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > Sent: Wednesday, 30 October 2024 22.57
> > >
> > > The current tap device is slow both due to architectural choices
> and
> > > the
> > > overhead of Linux system calls.
> >
> > Yes; but isn't it only being used for (low volume) management
> traffic?
> > Is the TAP PMD performance an issue for anyone? What is their use
> case?
> 
> 
> In embedded systems, if you want to use DPDK for dataplane, you still
> need
> to have a control plane path to the kernel. And most of the hardware
> used
> does not support a bifurcated driver. Either that or have two NIC's.

Yes, our application does that (not using a bifurcated driver); it can be 
configured for in-band management or a dedicated management port.

> 
> >
> > Or is the key issue that the TAP PMD makes system calls in the fast
> path, so you are looking to implement a new TAP PMD that doesn't make
> any system calls in the fast path?
> 
> Even the control path performance matters. Think of a router with lots
> BGP
> connections, or doing updates.

BGP is an excellent example where performance matters.
(Our applications currently don't support BGP, so I'm used to a relatively low 
volume of management traffic, and didn't think about BGP.)

> 
> >
> > > I am exploring a how to fix that but
> > > some
> > > of the choices require some tradeoffs. Which leads to some open
> > > questions:
> > >
> > > 1. DPDK tap also support tunnel (TUN) mode where there is no
> Ethernet
> > > header
> > >only L3. Does anyone actually use this? It is different than
> what
> > > every other
> > >PMD expects.
> >
> > If used for high volume (data plane) traffic, I would assume standard
> PMD behavior (i.e. incl. Ethernet headers) would suffice.

The traffic to/from the TAP port is likely to be exchanged with a physical 
port, so the packets will have an Ethernet header at this point.

> >
> > >
> > > 2. The fastest way to use kernel TAP device would be to use
> io_uring.
> > >But this was added in 5.1 kernel (2019). Rather than having
> > > conditional or
> > >dual mode in DPDK tap device, perhaps there should just be a new
> PMD
> > > tap_uring?
> >
> > If the features differ significantly, I'm in favor of a new PMD.
> > And it would be an opportunity to get rid of useless cruft, which I
> think you are already asking about here. :-)
> 
> Yes, and the TAP device was written to support a niche use case (all
> the flow stuff).
> Also TAP device has lots of extra code, at some point doing bit-by-bit
> cleanup gets annoying.
> 
> >
> > Furthermore, a "clean sheet" implementation - adding all the
> experience accumulated since the old TAP PMD - could serve as showcase
> for "best practices" for software PMDs.
> >
> > >
> > > 3. Current TAP device provides hooks for several rte_flow types by
> > > playing
> > >games with kernel qdisc. Does anyone really use this? Propose
> just
> > > not doing
> > >this in new tap_uring.
> > >
> > > 4. What other features of TAP device beyond basic send/receive make
> > > sense?
> > >It looks like new device could support better statistics.
> >
> > IMHO, statistics about missed packets are relevant. If the ingress
> (kernel->DPDK) queue is full, and the kernel has to drop packets, this
> drop counter should be exposed to the application through the PMD.
> 
> It may require some kernel side additions to extract that, but not out
> of scope.
> 
> >
> > I don't know if the existing TAP PMD supports it, but associating a
> port/queue with a "network namespace" or VRF in the kernel could also
> be relevant.
> 
> All network devices can be put in network namespace; VRF in Linux is
> separate from netns it has to do with
> which routing table is associated with the net device.
> 
> >
> > >
> > > 5. What about Rx interrupt support?
> >
> > RX interrupt support seems closely related to power management.
> > It could be used to reduce jitter/latency (and burstiness) when
> someone on the network communicates with an in-band management
> interface.
> 
> Not sure if ioring has wakeup mechanism, but probably epoll() is
> possible.

Yes, it seems to be:
https://unixism.net/loti/tutorial/register_eventfd.html

> 
> >
> > >
> > > Probably the hardest part of using io_uring is figuring out how to
> > > collect
> > > completions. The simplest way would be to handle all completions rx
> and
> > > tx
> > > in the rx_burst function.
> >
> > Please don't mix RX and TX, unless explicitly requested by the
> application through the recently introduced "mbuf recycle" feature.
> 
> The issue is Rx and Tx share a single fd and ioring for completion is
> per fd.
> The implementation for ioring came from the storage side so initially
> it was for fixing
> the broken Linux AIO support.
> 
> Some other devices only have sin

Re: [PATCH v2] eal/unix: optimize thread creation with glibc

2024-11-02 Thread Luca Boccassi
On Sat, 2 Nov 2024 at 11:32, David Marchand  wrote:
>
> Setting the cpu affinity of the child thread from the parent thread is
> racy when using pthread_setaffinity_np, as the child thread may start
> running and initialize before affinity is set.
>
> On the other hand, setting the cpu affinity from the child thread itself
> may fail, so the parent thread waits for the child thread to report
> whether this call succeeded.
>
> This synchronisation point resulted in a significant slow down of
> rte_thread_create() (as seen in the lcores_autotest unit tests, in OBS
> for some ARM systems).
>
> Another option for setting cpu affinity is to use the not portable
> pthread_attr_setaffinity_np, but it is not available with musl.
> Assume availability by relying on __USE_GNU that is set with glibc.
>
> Fixes: b28c6196b132 ("eal/unix: fix thread creation")
> Cc: sta...@dpdk.org
>
> Signed-off-by: David Marchand 
> ---
> Changes since v1:
> - fixed build with FreeBSD,
>
> ---
>  lib/eal/unix/rte_thread.c | 25 +
>  1 file changed, 25 insertions(+)

The test now completes in 1.19 seconds, so this fixes the issue with glibc:

[  438s] 36/82 DPDK:fast-tests / lcores_autotest OK
  1.19s

I do not use musl, so this is good enough for me. Thanks!

Acked-by: Luca Boccassi 


Re: 32-bit virtio failing on DPDK v23.11.1 (and tags)

2024-11-02 Thread Chris Brezovec (cbrezove)

Hi Maxime / team,

I have been going through the 12+ virtio commits between the last known working 
version and the first place we noticed this being broken.  It does appear to be 
a change in this commit: 
https://github.com/DPDK/dpdk/commit/a632f0f64ffba3553a18bdb51a670c1b603c0ce6

I focused on the virtio_alloc_queue_headers() and virtio_free_queue_headers() 
functions.  I think I have narrowed it down to to the hdr_mem setting.  The 
following changes seem to be working in my test environment (which is a little 
limited).

I was hoping you could look at these changes and hopefully help get a fix in 
for 24.11.

Regards,
-ChrisB

---
drivers/net/virtio/virtqueue.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c
index 6f419665f1..fc7f7a9c55 100644
--- a/drivers/net/virtio/virtqueue.c
+++ b/drivers/net/virtio/virtqueue.c
@@ -344,7 +344,7 @@ virtio_alloc_queue_headers(struct virtqueue *vq, int 
numa_node, const char *name
   if (vq->hw->use_va)
   *hdr_mem = (uintptr_t)(*hdr_mz)->addr;
   else
-  *hdr_mem = (uintptr_t)(*hdr_mz)->iova;
+ *hdr_mem = (*hdr_mz)->iova;
return 0;
}
--
2.35.6

From: Chris Brezovec (cbrezove) 
Date: Tuesday, September 3, 2024 at 10:43 AM
To: dev@dpdk.org , maxime.coque...@redhat.com 

Cc: Roger Melton (rmelton) , Walt Robinson (walrobin) 

Subject: Re: 32-bit virtio failing on DPDK v23.11.1 (and tags)
Hi Maxime / others,

I am just following up to see if you have had any chance to look at what I 
previously sent and had any ideas regarding the issue.

Thanks in advance!
-ChrisB

From: Chris Brezovec (cbrezove) 
Date: Wednesday, August 28, 2024 at 5:27 PM
To: dev@dpdk.org , maxime.coque...@redhat.com 

Cc: common-dpio-core-team(mailer list) 
Subject: 32-bit virtio failing on DPDK v23.11.1 (and tags)
HI Maxime,

My name is Chris Brezovec, we met and talked about some 32 bit virtio issues we 
were seeing at Cisco during the DPDK summit last year.  There was also a back 
and forth between you and Dave Johnson at Cisco last September regarding the 
same issue.  I have attached some of the email chain from that conversation 
that resulted in this commit being made to dpdk v23.11 
(https://github.com/DPDK/dpdk/commit/8c41645be010ec7fa0df4f6c3790b167945154b4).

We recently picked up the v23.11.1 DPDK release and saw that 32 bit virtio is 
not working again, but 64-bit virtio is working.  We are noticing CVQ timeouts 
- PMD receives no response from host and this leads to failure of the port to 
start.  We were able to recreate this issue using testpmd.  We have done some 
tracing through the virtio changes made during the development of the v23.xx 
DPDK release, and believe we have identified the following rework commit to 
have caused a failure 
(https://github.com/DPDK/dpdk/commit/a632f0f64ffba3553a18bdb51a670c1b603c0ce6).

We have also tested v23.07, v23.11, v23.11.2-rc2, v24.07 and they all seem to 
see the same issue when running in 32-bit mode using testpmd.

We were hoping you might be able to take a quick look at the two commits and 
see if there might be something obvious missing in the refactor work that might 
have caused this issue.  I am thinking there might a location or two in the 
code that should be using the VIRTIO_MBUF_ADDR() or similar macro that might 
have been missed.

Regards,
ChrisB

This is some of the testpmd output seen on v23.11.2-rc2:

LD_LIBRARY_PATH=/home/rmelton/scratch/dpdk-v23.11.2-rc2.git/build/lib 
/home/rmelton/scratch/dpdk-v23.11.2-rc2.git/build/app/dpdk-testpmd -l 2-3 -a 
:07:00.0 --log-level pmd.net.iavf.*,8 --log-level lib.eal.*,8 
--log-level=lib.eal:info --log-level=lib.eal:debug --log-level=lib.ethdev:info 
--log-level=lib.ethdev:debug --log-level=lib.virtio:warning 
--log-level=lib.virtio:info --log-level=lib.virtio:debug 
--log-level=pmd.*:debug --iova-mode=pa -- -i

— snip —

virtio_send_command(): vq->vq_desc_head_idx = 0, status = 255, vq->hw->cvq = 
0x76d9acc0 vq = 0x76d9ac80
virtio_send_command_split(): vq->vq_queue_index = 2
virtio_send_command_split(): vq->vq_free_cnt=64
vq->vq_desc_head_idx=0
virtio_dev_promiscuous_disable(): Failed to disable promisc
Failed to disable promiscuous mode for device (port 0): Resource temporarily 
unavailable
Error during restoring configuration for device (port 0): Resource temporarily 
unavailable
virtio_dev_stop(): stop
Fail to start port 0: Resource temporarily unavailable
Done
virtio_send_command(): vq->vq_desc_head_idx = 0, status = 255, vq->hw->cvq = 
0x76d9acc0 vq = 0x76d9ac80
virtio_send_command_split(): vq->vq_queue_index = 2
virtio_send_command_split(): vq->vq_free_cnt=64
vq->vq_desc_head_idx=0
virtio_dev_promiscuous_enable(): Failed to enable promisc
Error during enabling promiscuous mode for port 0: Resource temporarily 
unavailable - ignore