From: Marek Kasiewicz <[email protected]>

Remove the advertisement of RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP
and RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP capabilities from the
iavf VF driver.

Runtime queue setup on E810 VFs causes queue state corruption when
queues are dynamically reconfigured while the hardware rate limiter
is actively pacing TX queues. Queue configuration messages to the PF
via virtchnl can race with ongoing TX operations, leading to undefined
behavior.

By not advertising these capabilities, all queues are configured at
port start and remain stable throughout the port lifecycle.

Signed-off-by: Marek Kasiewicz <[email protected]>
Signed-off-by: Dawid Wesierski <[email protected]>
---
 doc/guides/nics/intel_vf.rst           |  9 +++++++++
 doc/guides/rel_notes/release_26_07.rst |  2 ++
 drivers/net/intel/iavf/iavf.h          |  1 +
 drivers/net/intel/iavf/iavf_ethdev.c   | 22 ++++++++++++++++++----
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/doc/guides/nics/intel_vf.rst b/doc/guides/nics/intel_vf.rst
index e010f852cf..86878330f2 100644
--- a/doc/guides/nics/intel_vf.rst
+++ b/doc/guides/nics/intel_vf.rst
@@ -131,6 +131,15 @@ IAVF PMD parameters
     * ``segment``: Check number of mbuf segments does not exceed HW limits.
     * ``offload``: Check for use of an unsupported offload flag.
 
+``no_runtime_queue_setup``
+    Runtime (post-start) Rx/Tx queue setup can race with the hardware Tx rate
+    limiter on E810 VFs and corrupt queue state.
+    It is advertised by default.
+    Applications that pace queues through the traffic manager can opt out
+    of advertising the runtime queue setup capability
+    by setting ``no_runtime_queue_setup`` to 1,
+    for example, ``-a 18:01.0,no_runtime_queue_setup=1``.
+
 
 HW-Specific Notes For IAVF
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/doc/guides/rel_notes/release_26_07.rst 
b/doc/guides/rel_notes/release_26_07.rst
index db8c4d5b16..d3e9dc6cf4 100644
--- a/doc/guides/rel_notes/release_26_07.rst
+++ b/doc/guides/rel_notes/release_26_07.rst
@@ -135,6 +135,8 @@ New Features
 
   * Added support for transmitting LLDP packets based on mbuf packet type.
   * Implemented AVX2 context descriptor transmit paths.
+  * Added ``no_runtime_queue_setup`` devarg to disable runtime queue setup
+    on devices that pace queues through the traffic manager.
 
 * **Updated Intel ice driver.**
 
diff --git a/drivers/net/intel/iavf/iavf.h b/drivers/net/intel/iavf/iavf.h
index 4444602a30..146f02ea13 100644
--- a/drivers/net/intel/iavf/iavf.h
+++ b/drivers/net/intel/iavf/iavf.h
@@ -326,6 +326,7 @@ struct iavf_devargs {
        int no_poll_on_link_down;
        uint64_t mbuf_check;
        int enable_ptype_lldp;
+       int no_runtime_queue_setup;
 };
 
 struct iavf_security_ctx;
diff --git a/drivers/net/intel/iavf/iavf_ethdev.c 
b/drivers/net/intel/iavf/iavf_ethdev.c
index ec1ad02826..be733f9edf 100644
--- a/drivers/net/intel/iavf/iavf_ethdev.c
+++ b/drivers/net/intel/iavf/iavf_ethdev.c
@@ -46,6 +46,7 @@
 #define IAVF_NO_POLL_ON_LINK_DOWN_ARG "no-poll-on-link-down"
 #define IAVF_MBUF_CHECK_ARG       "mbuf_check"
 #define IAVF_ENABLE_PTYPE_LLDP_ARG "enable_ptype_lldp"
+#define IAVF_NO_RUNTIME_QUEUE_SETUP_ARG "no_runtime_queue_setup"
 uint64_t iavf_timestamp_dynflag;
 int iavf_timestamp_dynfield_offset = -1;
 int rte_pmd_iavf_tx_lldp_dynfield_offset = -1;
@@ -59,6 +60,7 @@ static const char * const iavf_valid_args[] = {
        IAVF_NO_POLL_ON_LINK_DOWN_ARG,
        IAVF_MBUF_CHECK_ARG,
        IAVF_ENABLE_PTYPE_LLDP_ARG,
+       IAVF_NO_RUNTIME_QUEUE_SETUP_ARG,
        NULL
 };
 
@@ -1160,9 +1162,15 @@ iavf_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
        dev_info->reta_size = vf->vf_res->rss_lut_size;
        dev_info->flow_type_rss_offloads = IAVF_RSS_OFFLOAD_ALL;
        dev_info->max_mac_addrs = IAVF_NUM_MACADDR_MAX;
-       dev_info->dev_capa =
-               RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
-               RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
+       /*
+        * Runtime queue setup can race with the hardware Tx rate limiter on
+        * E810 VFs and corrupt queue state. Applications that pace queues via
+        * the traffic manager can opt out with no_runtime_queue_setup=1.
+        */
+       if (!adapter->devargs.no_runtime_queue_setup)
+               dev_info->dev_capa =
+                       RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
+                       RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
        dev_info->rx_offload_capa =
                RTE_ETH_RX_OFFLOAD_VLAN_STRIP |
                RTE_ETH_RX_OFFLOAD_QINQ_STRIP |
@@ -2533,6 +2541,11 @@ static int iavf_parse_devargs(struct rte_eth_dev *dev)
        if (ret)
                goto bail;
 
+       ret = rte_kvargs_process(kvlist, IAVF_NO_RUNTIME_QUEUE_SETUP_ARG,
+                                &parse_bool, 
&ad->devargs.no_runtime_queue_setup);
+       if (ret)
+               goto bail;
+
 bail:
        rte_kvargs_free(kvlist);
        return ret;
@@ -3619,7 +3632,8 @@ bool is_iavf_supported(struct rte_eth_dev *dev)
 RTE_PMD_REGISTER_PCI(net_iavf, rte_iavf_pmd);
 RTE_PMD_REGISTER_PCI_TABLE(net_iavf, pci_id_iavf_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_iavf, "* igb_uio | vfio-pci");
-RTE_PMD_REGISTER_PARAM_STRING(net_iavf, "cap=dcf");
+RTE_PMD_REGISTER_PARAM_STRING(net_iavf, "cap=dcf"
+                             IAVF_NO_RUNTIME_QUEUE_SETUP_ARG "=<0|1>");
 RTE_LOG_REGISTER_SUFFIX(iavf_logtype_init, init, NOTICE);
 RTE_LOG_REGISTER_SUFFIX(iavf_logtype_driver, driver, NOTICE);
 #ifdef RTE_ETHDEV_DEBUG_RX
-- 
2.47.3

---------------------------------------------------------------------
Intel Technology Poland sp. z o.o.
ul. Slowackiego 173 | 80-298 Gdansk | Sad Rejonowy Gdansk Polnoc | VII Wydzial 
Gospodarczy Krajowego Rejestru Sadowego - KRS 101882 | NIP 957-07-52-316 | 
Kapital zakladowy 200.000 PLN.
Spolka oswiadcza, ze posiada status duzego przedsiebiorcy w rozumieniu ustawy z 
dnia 8 marca 2013 r. o przeciwdzialaniu nadmiernym opoznieniom w transakcjach 
handlowych.

Ta wiadomosc wraz z zalacznikami jest przeznaczona dla okreslonego adresata i 
moze zawierac informacje poufne. W razie przypadkowego otrzymania tej 
wiadomosci, prosimy o powiadomienie nadawcy oraz trwale jej usuniecie; 
jakiekolwiek przegladanie lub rozpowszechnianie jest zabronione.
This e-mail and any attachments may contain confidential material for the sole 
use of the intended recipient(s). If you are not the intended recipient, please 
contact the sender and delete all copies; any review or distribution by others 
is strictly prohibited.

Reply via email to