[Intel-wired-lan] [rdma v3 17/24] RDMA/irdma: Add support for V2 HMC resource management scheme

2025-02-07 Thread Tatyana Nikolova
From: Vinoth Kumar Chandra Mohan 

HMC resource initialization is updated to support V1 or V2 approach
based on the FW capability. In the V2 approach, driver receives the
assigned HMC resources count and verifies if it will fit in the given
local memory. If it doesn't fit, the driver load fails.

Signed-off-by: Vinoth Kumar Chandra Mohan 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/ctrl.c | 121 -
 drivers/infiniband/hw/irdma/defs.h |   3 +
 drivers/infiniband/hw/irdma/type.h |  25 +++---
 3 files changed, 130 insertions(+), 19 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 52bb87f4b2c5..d01c55172f6a 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -2901,6 +2901,41 @@ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq,
return 0;
 }
 
+/**
+ * irdma_sc_get_decoded_ird_size_gen_3 - get decoded IRD size for GEN 3
+ * @ird_enc: IRD encoding
+ * IRD size defaults to a value of 4 in case of invalid input.
+ */
+static u16 irdma_sc_get_decoded_ird_size_gen_3(u8 ird_enc)
+{
+   switch (ird_enc) {
+   case IRDMA_IRD_HW_SIZE_4096_GEN3:
+   return 4096;
+   case IRDMA_IRD_HW_SIZE_2048_GEN3:
+   return 2048;
+   case IRDMA_IRD_HW_SIZE_1024_GEN3:
+   return 1024;
+   case IRDMA_IRD_HW_SIZE_512_GEN3:
+   return 512;
+   case IRDMA_IRD_HW_SIZE_256_GEN3:
+   return 256;
+   case IRDMA_IRD_HW_SIZE_128_GEN3:
+   return 128;
+   case IRDMA_IRD_HW_SIZE_64_GEN3:
+   return 64;
+   case IRDMA_IRD_HW_SIZE_32_GEN3:
+   return 32;
+   case IRDMA_IRD_HW_SIZE_16_GEN3:
+   return 16;
+   case IRDMA_IRD_HW_SIZE_8_GEN3:
+   return 8;
+   case IRDMA_IRD_HW_SIZE_4_GEN3:
+   return 4;
+   default:
+   return 4;
+   }
+}
+
 /**
  * irdma_check_cqp_progress - check cqp processing progress
  * @timeout: timeout info struct
@@ -3212,6 +3247,7 @@ static int irdma_sc_parse_fpm_query_buf(struct 
irdma_sc_dev *dev, __le64 *buf,
struct irdma_hmc_fpm_misc *hmc_fpm_misc)
 {
struct irdma_hmc_obj_info *obj_info;
+   u8 ird_encoding;
u64 temp;
u32 size;
u16 max_pe_sds;
@@ -3287,6 +3323,14 @@ static int irdma_sc_parse_fpm_query_buf(struct 
irdma_sc_dev *dev, __le64 *buf,
hmc_fpm_misc->max_ceqs = FIELD_GET(IRDMA_QUERY_FPM_MAX_CEQS, temp);
hmc_fpm_misc->ht_multiplier = FIELD_GET(IRDMA_QUERY_FPM_HTMULTIPLIER, 
temp);
hmc_fpm_misc->timer_bucket = FIELD_GET(IRDMA_QUERY_FPM_TIMERBUCKET, 
temp);
+   if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2,
+ dev->feature_info[IRDMA_FTN_FLAGS])) {
+   ird_encoding = (u8)FIELD_GET(IRDMA_QUERY_FPM_MAX_IRD, temp);
+   hmc_fpm_misc->ird =
+   irdma_sc_get_decoded_ird_size_gen_3(ird_encoding) / 2;
+   dev->hw_attrs.max_hw_ird = hmc_fpm_misc->ird;
+   dev->hw_attrs.max_hw_ord = hmc_fpm_misc->ird;
+   }
if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
return 0;
irdma_sc_decode_fpm_query(buf, 96, obj_info, IRDMA_HMC_IW_FSIMC);
@@ -5444,10 +5488,71 @@ static void irdma_set_host_hmc_rsrc_gen_3(struct 
irdma_sc_dev *dev)
avail_sds -= DIV_ROUND_UP(mrwanted, MAX_MR_PER_SD);
}
 
+   if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, 
dev->feature_info[IRDMA_FTN_FLAGS]) &&
+   pblewanted > avail_sds * MAX_PBLE_PER_SD)
+   ibdev_dbg(to_ibdev(dev),
+ "HMC: Warn: Resource version 2: pble wanted = 0x%x 
available = 0x%x\n",
+ pblewanted, avail_sds * MAX_PBLE_PER_SD);
+
pblewanted = min(pblewanted, avail_sds * MAX_PBLE_PER_SD);
hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted;
 }
 
+/**
+ * irdma_verify_commit_fpm_gen_3 - verify query fpm values
+ * @dev: sc device struct
+ * @max_pages: max local memory available
+ * @qpwanted: number of qp's wanted
+ */
+static int irdma_verify_commit_fpm_gen_3(struct irdma_sc_dev *dev,
+u32 max_pages,
+u32 qpwanted)
+{
+   struct irdma_hmc_fpm_misc *hmc_fpm_misc;
+   u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed;
+   struct irdma_hmc_info *hmc_info;
+   u32 rrffl_cnt = 0;
+   u32 xffl_cnt = 0;
+   u32 q1fl_cnt;
+
+   hmc_info = dev->hmc_info;
+   hmc_fpm_misc = &dev->hmc_fpm_misc;
+
+   rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted);
+
+   if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt)
+   rrffl_cnt =
+   hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt /
+   hmc_fpm_misc->rrf_block_size;
+
+   xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted

[Intel-wired-lan] [rdma v3 16/24] RDMA/irdma: Extend QP context programming for GEN3

2025-02-07 Thread Tatyana Nikolova
From: Shiraz Saleem 

Extend the QP context structure with support for new fields
specific to GEN3 hardware capabilities.

Signed-off-by: Shiraz Saleem 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/ctrl.c  | 184 +++-
 drivers/infiniband/hw/irdma/defs.h  |  24 +++-
 drivers/infiniband/hw/irdma/type.h  |   4 +
 drivers/infiniband/hw/irdma/uda_d.h |   5 +-
 drivers/infiniband/hw/irdma/verbs.c |   5 +
 5 files changed, 215 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 7d2ae701b8f5..52bb87f4b2c5 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -637,13 +637,14 @@ static u8 irdma_sc_get_encoded_ird_size(u16 ird_size)
 }
 
 /**
- * irdma_sc_qp_setctx_roce - set qp's context
+ * irdma_sc_qp_setctx_roce_gen_2 - set qp's context
  * @qp: sc qp
  * @qp_ctx: context ptr
  * @info: ctx info
  */
-void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx,
-struct irdma_qp_host_ctx_info *info)
+static void irdma_sc_qp_setctx_roce_gen_2(struct irdma_sc_qp *qp,
+ __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
 {
struct irdma_roce_offload_info *roce_info;
struct irdma_udp_offload_info *udp;
@@ -761,6 +762,183 @@ void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, 
__le64 *qp_ctx,
 8, qp_ctx, IRDMA_QP_CTX_SIZE, false);
 }
 
+/**
+ * irdma_sc_get_encoded_ird_size_gen_3 - get encoded IRD size for GEN 3
+ * @ird_size: IRD size
+ * The ird from the connection is rounded to a supported HW setting and then 
encoded
+ * for ird_size field of qp_ctx. Consumers are expected to provide valid ird 
size based
+ * on hardware attributes. IRD size defaults to a value of 4 in case of 
invalid input.
+ */
+static u8 irdma_sc_get_encoded_ird_size_gen_3(u16 ird_size)
+{
+   switch (ird_size ?
+   roundup_pow_of_two(2 * ird_size) : 4) {
+   case 4096:
+   return IRDMA_IRD_HW_SIZE_4096_GEN3;
+   case 2048:
+   return IRDMA_IRD_HW_SIZE_2048_GEN3;
+   case 1024:
+   return IRDMA_IRD_HW_SIZE_1024_GEN3;
+   case 512:
+   return IRDMA_IRD_HW_SIZE_512_GEN3;
+   case 256:
+   return IRDMA_IRD_HW_SIZE_256_GEN3;
+   case 128:
+   return IRDMA_IRD_HW_SIZE_128_GEN3;
+   case 64:
+   return IRDMA_IRD_HW_SIZE_64_GEN3;
+   case 32:
+   return IRDMA_IRD_HW_SIZE_32_GEN3;
+   case 16:
+   return IRDMA_IRD_HW_SIZE_16_GEN3;
+   case 8:
+   return IRDMA_IRD_HW_SIZE_8_GEN3;
+   case 4:
+   default:
+   break;
+   }
+
+   return IRDMA_IRD_HW_SIZE_4_GEN3;
+}
+
+/**
+ * irdma_sc_qp_setctx_roce_gen_3 - set qp's context
+ * @qp: sc qp
+ * @qp_ctx: context ptr
+ * @info: ctx info
+ */
+static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp,
+ __le64 *qp_ctx,
+ struct irdma_qp_host_ctx_info *info)
+{
+   struct irdma_roce_offload_info *roce_info = info->roce_info;
+   struct irdma_udp_offload_info *udp = info->udp_info;
+   u64 qw0, qw3, qw7 = 0, qw8 = 0;
+   u8 push_mode_en;
+   u32 push_idx;
+
+   qp->user_pri = info->user_pri;
+   if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) {
+   push_mode_en = 0;
+   push_idx = 0;
+   } else {
+   push_mode_en = 1;
+   push_idx = qp->push_idx;
+   }
+
+   qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) |
+ FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) |
+ FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) |
+ FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) |
+ FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) |
+ FIELD_PREP(IRDMAQPC_PPIDX, push_idx) |
+ FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) |
+ FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) |
+ FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) |
+ FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) |
+ FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) |
+ FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag);
+   set_64bit_val(qp_ctx, 0, qw0);
+   set_64bit_val(qp_ctx, 8, qp->sq_pa);
+   set_64bit_val(qp_ctx, 16, qp->rq_pa);
+   qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) |
+ FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) |
+ FIELD_PREP(IRDMAQPC_TTL, udp->ttl) |
+ FIELD_PREP(IRDMAQPC_TOS, udp->tos) |
+ FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) |
+ FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port);
+   set_64bit_val(qp_ctx, 24, qw3);
+   set_64bit_val(qp_ctx, 32,

[Intel-wired-lan] [rdma v3 09/24] RDMA/irdma: Add GEN3 core driver support

2025-02-07 Thread Tatyana Nikolova
From: Mustafa Ismail 

Introduce support for the GEN3 auxiliary core driver, which is
responsible for initializing PCI-level RDMA resources.

Facilitate host-driver communication with the device's Control Plane (CP)
to discover capabilities and perform privileged operations through an
RDMA-specific messaging interface built atop the IDPF mailbox and virtual
channel protocol.

Establish the RDMA virtual channel message interface and incorporate
operations to retrieve the hardware version and discover capabilities
from the CP.

Additionally, set up the RDMA MMIO regions and initialize the RF structure.

Signed-off-by: Mustafa Ismail 
Signed-off-by: Tatyana Nikolova 
---

v3:
* Move the call to get RDMA features just after CQP is created,
otherwise the feature flags are not defined before used.
* Round up to power of two the resource size for Read Responses and
Transmit Queue elements in irdma_set_loc_hmc_rsrc_gen_3().
* Clear the lower 2 bits of the tos field before setting the
ECN code point to avoid illegal code point AE.
* Handle large PD ID indices by populating IRDMA_CQPSQ_STAG_PDID_HI
field with the PD ID.
* Fix IRDMA_QUERY_FPM_MAX_PE_SDS field size for GEN1 and GEN2
which can't be safely extended to support GEN3.
* Adjust the HW AEQ size depending on the GEN.
* Fix #define values to accommodate field extensions required for GEN3.
* Make the HMC function id (hmc_fcn_id) 16 bits to enable.
more functions, supported by GEN3.
* Clean up unused #defines and variables.
* Fix sparse type warnings in ig3rdma_cfg_regions().
* Fix device hmc_fn_id initialization by using the response hmc_fn_id.

 drivers/infiniband/hw/irdma/Makefile |   2 +
 drivers/infiniband/hw/irdma/ctrl.c   | 484 +++
 drivers/infiniband/hw/irdma/defs.h   |  50 ++-
 drivers/infiniband/hw/irdma/hmc.c|  18 +-
 drivers/infiniband/hw/irdma/hmc.h|  19 +-
 drivers/infiniband/hw/irdma/hw.c |  18 +-
 drivers/infiniband/hw/irdma/i40iw_if.c   |   1 +
 drivers/infiniband/hw/irdma/icrdma_if.c  |   2 +
 drivers/infiniband/hw/irdma/ig3rdma_hw.h |  11 +
 drivers/infiniband/hw/irdma/ig3rdma_if.c | 171 
 drivers/infiniband/hw/irdma/irdma.h  |   5 +-
 drivers/infiniband/hw/irdma/main.c   |  55 +++
 drivers/infiniband/hw/irdma/main.h   |   4 +
 drivers/infiniband/hw/irdma/pble.c   |  20 +-
 drivers/infiniband/hw/irdma/puda.h   |   4 +-
 drivers/infiniband/hw/irdma/type.h   |  67 +++-
 drivers/infiniband/hw/irdma/user.h   |   5 +-
 drivers/infiniband/hw/irdma/virtchnl.c   | 302 ++
 drivers/infiniband/hw/irdma/virtchnl.h   |  96 +
 19 files changed, 1207 insertions(+), 127 deletions(-)
 create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_hw.h
 create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_if.c
 create mode 100644 drivers/infiniband/hw/irdma/virtchnl.c
 create mode 100644 drivers/infiniband/hw/irdma/virtchnl.h

diff --git a/drivers/infiniband/hw/irdma/Makefile 
b/drivers/infiniband/hw/irdma/Makefile
index 2522e4ca650b..3aa63b913377 100644
--- a/drivers/infiniband/hw/irdma/Makefile
+++ b/drivers/infiniband/hw/irdma/Makefile
@@ -13,6 +13,7 @@ irdma-objs := cm.o\
   hw.o\
   i40iw_hw.o  \
   i40iw_if.o  \
+ ig3rdma_if.o\
  icrdma_if.o \
   icrdma_hw.o \
   main.o  \
@@ -23,6 +24,7 @@ irdma-objs := cm.o\
   uk.o\
   utils.o \
   verbs.o \
+ virtchnl.o  \
   ws.o\
 
 CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 6aed6169c07d..2375d8dc0b01 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -1080,7 +1080,8 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
  FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) |
  FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len));
set_64bit_val(wqe, 16,
- FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx));
+ FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18));
set_64bit_val(wqe, 40,
  FIELD_PREP(IRDMA_CQPSQ_STAG_HMCFNIDX, 
info->hmc_fcn_index));
 
@@ -1165,6 +1166,7 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev 
*dev,
  FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID));
set_64bit_val(wqe, 16,
  FIELD_PREP(IRDMA_CQPSQ_STAG_KEY, info->stag_key) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18) |
  FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx));
if (!info->chunk_size) {
set_64bit_val(wqe, 32, info->reg_addr_pa);
@@ -1223,7 +1225,8 @@ static int irdma_sc_dealloc_stag(struct irdma

[Intel-wired-lan] [rdma v3 15/24] RDMA/irdma: Add GEN3 virtual QP1 support

2025-02-07 Thread Tatyana Nikolova
From: Shiraz Saleem 

Add a new RDMA virtual channel op during QP1 creation that allow the
Control Plane (CP) to virtualize a regular QP as QP1 on non-default
RDMA capable vPorts. Additionally, the CP will return the Qsets to use
on the ib_device of the vPort.

Signed-off-by: Shiraz Saleem 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/ctrl.c | 10 ++-
 drivers/infiniband/hw/irdma/main.h |  1 +
 drivers/infiniband/hw/irdma/utils.c| 30 -
 drivers/infiniband/hw/irdma/verbs.c| 84 --
 drivers/infiniband/hw/irdma/virtchnl.c | 52 
 drivers/infiniband/hw/irdma/virtchnl.h | 19 ++
 6 files changed, 174 insertions(+), 22 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 8fd2882f75af..7d2ae701b8f5 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -74,6 +74,14 @@ static void irdma_set_qos_info(struct irdma_sc_vsi  *vsi,
 {
u8 i;
 
+   if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+   for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
+   vsi->qos[i].qs_handle = vsi->dev->qos[i].qs_handle;
+   vsi->qos[i].valid = true;
+   }
+
+   return;
+   }
vsi->qos_rel_bw = l2p->vsi_rel_bw;
vsi->qos_prio_type = l2p->vsi_prio_type;
vsi->dscp_mode = l2p->dscp_mode;
@@ -1877,7 +1885,7 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi  *vsi,
mutex_init(&vsi->qos[i].qos_mutex);
INIT_LIST_HEAD(&vsi->qos[i].qplist);
}
-   if (vsi->register_qset) {
+   if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) {
vsi->dev->ws_add = irdma_ws_add;
vsi->dev->ws_remove = irdma_ws_remove;
vsi->dev->ws_reset = irdma_ws_reset;
diff --git a/drivers/infiniband/hw/irdma/main.h 
b/drivers/infiniband/hw/irdma/main.h
index 1dab2ffba5e5..f0196aafe59b 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -260,6 +260,7 @@ struct irdma_pci_f {
bool reset:1;
bool rsrc_created:1;
bool msix_shared:1;
+   bool hwqp1_rsvd:1;
u8 rsrc_profile;
u8 *hmc_info_mem;
u8 *mem_rsrc;
diff --git a/drivers/infiniband/hw/irdma/utils.c 
b/drivers/infiniband/hw/irdma/utils.c
index 8ab8af02abc9..87c88be47ee3 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -1113,6 +1113,26 @@ static void irdma_dealloc_push_page(struct irdma_pci_f 
*rf,
irdma_put_cqp_request(&rf->cqp, cqp_request);
 }
 
+static void irdma_free_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 qp_num)
+{
+   struct irdma_device *iwdev = iwqp->iwdev;
+   struct irdma_pci_f *rf = iwdev->rf;
+   unsigned long flags;
+
+   if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+   return;
+
+   irdma_vchnl_req_del_vport(&rf->sc_dev, iwdev->vport_id, qp_num);
+
+   if (qp_num == 1) {
+   spin_lock_irqsave(&rf->rsrc_lock, flags);
+   rf->hwqp1_rsvd = false;
+   spin_unlock_irqrestore(&rf->rsrc_lock, flags);
+   } else if (qp_num > 2) {
+   irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+   }
+}
+
 /**
  * irdma_free_qp_rsrc - free up memory resources for qp
  * @iwqp: qp ptr (user or kernel)
@@ -1121,7 +1141,7 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp)
 {
struct irdma_device *iwdev = iwqp->iwdev;
struct irdma_pci_f *rf = iwdev->rf;
-   u32 qp_num = iwqp->ibqp.qp_num;
+   u32 qp_num = iwqp->sc_qp.qp_uk.qp_id;
 
irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
irdma_dealloc_push_page(rf, &iwqp->sc_qp);
@@ -1131,8 +1151,12 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp)
   iwqp->sc_qp.user_pri);
}
 
-   if (qp_num > 2)
-   irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+   if (iwqp->ibqp.qp_type == IB_QPT_GSI) {
+   irdma_free_gsi_qp_rsrc(iwqp, qp_num);
+   } else {
+   if (qp_num > 2)
+   irdma_free_rsrc(rf, rf->allocated_qps, qp_num);
+   }
dma_free_coherent(rf->sc_dev.hw->device, iwqp->q2_ctx_mem.size,
  iwqp->q2_ctx_mem.va, iwqp->q2_ctx_mem.pa);
iwqp->q2_ctx_mem.va = NULL;
diff --git a/drivers/infiniband/hw/irdma/verbs.c 
b/drivers/infiniband/hw/irdma/verbs.c
index 2535e0f59ceb..cf5a5d28fe53 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -545,6 +545,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct 
ib_udata *udata)
irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp);
 
irdma_remove_push_mmap_entries(iwqp);
+
+   if (iwqp->sc_qp.qp_uk.qp_id == 1)
+   iwdev->rf->hwqp1_rsvd = false;
irdma_free_qp_rs

[Intel-wired-lan] [iwl-next v3 03/24] idpf: implement core RDMA auxiliary dev create, init, and destroy

2025-02-07 Thread Tatyana Nikolova
From: Joshua Hay 

Add the initial idpf_idc.c file with the functions to kick off the IDC
initialization, create and initialize a core RDMA auxiliary device, and
destroy said device.

The RDMA core has a dependency on the vports being created by the
control plane before it can be initialized. Therefore, once all the
vports are up after a hard reset (either during driver load a function
level reset), the core RDMA device info will be created. It is populated
with the function type (as distinguished by the IDC initialization
function pointer), the core idc_ops function points (just stubs for
now), the reserved RDMA MSIX table, and various other info the core RDMA
auxiliary driver will need. It is then plugged on to the bus.

During a function level reset or driver unload, the device will be
unplugged from the bus and destroyed.

Signed-off-by: Joshua Hay 
Signed-off-by: Tatyana Nikolova 
---

v3:
- Used signed ret value from ida_alloc and only assign unsigned id
  if no err
- capitalize some abbreviations
- add missing field descriptions

 drivers/net/ethernet/intel/idpf/Makefile  |   1 +
 drivers/net/ethernet/intel/idpf/idpf.h|  10 +
 drivers/net/ethernet/intel/idpf/idpf_dev.c|  13 ++
 drivers/net/ethernet/intel/idpf/idpf_idc.c| 220 ++
 drivers/net/ethernet/intel/idpf/idpf_lib.c|   4 +
 drivers/net/ethernet/intel/idpf/idpf_vf_dev.c |  13 ++
 .../net/ethernet/intel/idpf/idpf_virtchnl.c   |  19 ++
 .../net/ethernet/intel/idpf/idpf_virtchnl.h   |   3 +
 8 files changed, 283 insertions(+)
 create mode 100644 drivers/net/ethernet/intel/idpf/idpf_idc.c

diff --git a/drivers/net/ethernet/intel/idpf/Makefile 
b/drivers/net/ethernet/intel/idpf/Makefile
index 2ce01a0b5898..bde9c893d8a1 100644
--- a/drivers/net/ethernet/intel/idpf/Makefile
+++ b/drivers/net/ethernet/intel/idpf/Makefile
@@ -10,6 +10,7 @@ idpf-y := \
idpf_controlq_setup.o   \
idpf_dev.o  \
idpf_ethtool.o  \
+   idpf_idc.o  \
idpf_lib.o  \
idpf_main.o \
idpf_txrx.o \
diff --git a/drivers/net/ethernet/intel/idpf/idpf.h 
b/drivers/net/ethernet/intel/idpf/idpf.h
index 8ef7120e6717..64f731fe878c 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -17,6 +17,7 @@ struct idpf_vport_max_q;
 #include 
 #include 
 #include 
+#include 
 
 #include "virtchnl2.h"
 #include "idpf_txrx.h"
@@ -202,9 +203,12 @@ struct idpf_reg_ops {
 /**
  * struct idpf_dev_ops - Device specific operations
  * @reg_ops: Register operations
+ * @idc_init: IDC initialization
  */
 struct idpf_dev_ops {
struct idpf_reg_ops reg_ops;
+
+   int (*idc_init)(struct idpf_adapter *adapter);
 };
 
 /**
@@ -522,6 +526,7 @@ struct idpf_vc_xn_manager;
  * @caps: Negotiated capabilities with device
  * @vcxn_mngr: Virtchnl transaction manager
  * @dev_ops: See idpf_dev_ops
+ * @cdev_info: IDC core device info pointer
  * @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk
  *  to VFs but is used to initialize them
  * @crc_enable: Enable CRC insertion offload
@@ -580,6 +585,7 @@ struct idpf_adapter {
struct idpf_vc_xn_manager *vcxn_mngr;
 
struct idpf_dev_ops dev_ops;
+   struct idc_rdma_core_dev_info *cdev_info;
int num_vfs;
bool crc_enable;
bool req_tx_splitq;
@@ -858,5 +864,9 @@ int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs);
 
 u8 idpf_vport_get_hsplit(const struct idpf_vport *vport);
 bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val);
+int idpf_idc_init(struct idpf_adapter *adapter);
+int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter,
+  enum idc_function_type ftype);
+void idpf_idc_deinit_core_aux_device(struct idc_rdma_core_dev_info *cdev_info);
 
 #endif /* !_IDPF_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c 
b/drivers/net/ethernet/intel/idpf/idpf_dev.c
index 41e4bd49402a..351db7d5dace 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_dev.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c
@@ -148,6 +148,17 @@ static void idpf_trigger_reset(struct idpf_adapter 
*adapter,
   idpf_get_reg_addr(adapter, PFGEN_CTRL));
 }
 
+/**
+ * idpf_idc_register - register for IDC callbacks
+ * @adapter: Driver specific private structure
+ *
+ * Return: 0 on success or error code on failure.
+ */
+static int idpf_idc_register(struct idpf_adapter *adapter)
+{
+   return idpf_idc_init_aux_core_dev(adapter, IDC_FUNCTION_TYPE_PF);
+}
+
 /**
  * idpf_reg_ops_init - Initialize register API function pointers
  * @adapter: Driver specific private structure
@@ -168,4 +179,6 @@ static void idpf_reg_ops_init(struct idpf_adapter *adapter)
 void idpf_dev_ops_init(struct idpf_adapter *adapter)
 {
idpf_reg_ops_init(adapter);
+
+   adapter->dev_ops.idc_init = idpf_idc_register;
 }
diff --git a/drivers/net/ethernet/intel/idpf/id

[Intel-wired-lan] [iwl-next v3 01/24] iidc/ice/irdma: Update IDC to support multiple consumers

2025-02-07 Thread Tatyana Nikolova
From: Dave Ertman 

To support RDMA for E2000 product, the idpf driver will use the IDC
interface with the irdma auxiliary driver, thus becoming a second
consumer of it. This requires the IDC be updated to support multiple
consumers. The use of exported symbols no longer makes sense because it
will require all core drivers (ice/idpf) that can interface with irdma
auxiliary driver to be loaded even if hardware is not present for those
drivers.

To address this, implement an ops struct that will be universal set of
naked function pointers that will be populated by each core driver for
the irdma auxiliary driver to call.

Also previously, the ice driver was just exporting its entire PF struct
to the auxiliary driver, but since each core driver will have its own
different PF struct, implement a universal struct that all core drivers
can export to the auxiliary driver through the probe call.

The iidc.h header file will be divided into two files.  The first,
idc_rdma.h, will host all of the generic header info that will be needed
for RDMA support in the auxiliary device.  The second, iidc_rdma.h, will
contain specific elements used by Intel drivers to support RDMA.  This
will be primarily the implementation of a new struct that will be
assigned under the new generic opaque element of idc_priv in the
idc_core_dev_info struct.

Update ice and irdma to conform with the new IIDC interface definitions.

Signed-off-by: Dave Ertman 
Co-developed-by: Mustafa Ismail 
Signed-off-by: Mustafa Ismail 
Co-developed-by: Shiraz Saleem 
Signed-off-by: Shiraz Saleem 
Signed-off-by: Tatyana Nikolova 
---

v2:
* Minor modifications, like changing EINVAL to ENODEV error codes, etc.

 drivers/infiniband/hw/irdma/main.c| 110 
 drivers/infiniband/hw/irdma/main.h|   3 +-
 drivers/infiniband/hw/irdma/osdep.h   |   4 +-
 .../net/ethernet/intel/ice/devlink/devlink.c  |  40 ++-
 drivers/net/ethernet/intel/ice/ice.h  |   6 +-
 drivers/net/ethernet/intel/ice/ice_dcb_lib.c  |  46 +++-
 drivers/net/ethernet/intel/ice/ice_dcb_lib.h  |   4 +
 drivers/net/ethernet/intel/ice/ice_ethtool.c  |   8 +-
 drivers/net/ethernet/intel/ice/ice_idc.c  | 255 +++---
 drivers/net/ethernet/intel/ice/ice_idc_int.h  |   5 +-
 drivers/net/ethernet/intel/ice/ice_main.c |  18 +-
 include/linux/net/intel/idc_rdma.h| 138 ++
 include/linux/net/intel/iidc.h| 107 
 include/linux/net/intel/iidc_rdma.h   |  67 +
 14 files changed, 527 insertions(+), 284 deletions(-)
 create mode 100644 include/linux/net/intel/idc_rdma.h
 delete mode 100644 include/linux/net/intel/iidc.h
 create mode 100644 include/linux/net/intel/iidc_rdma.h

diff --git a/drivers/infiniband/hw/irdma/main.c 
b/drivers/infiniband/hw/irdma/main.c
index 3f13200ff71b..9b6f1d8bf06a 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2015 - 2021 Intel Corporation */
 #include "main.h"
-#include "../../../net/ethernet/intel/ice/ice.h"
 
 MODULE_ALIAS("i40iw");
 MODULE_AUTHOR("Intel Corporation, ");
@@ -61,7 +60,7 @@ static void irdma_log_invalid_mtu(u16 mtu, struct 
irdma_sc_dev *dev)
 }
 
 static void irdma_fill_qos_info(struct irdma_l2params *l2params,
-   struct iidc_qos_params *qos_info)
+   struct iidc_rdma_qos_params *qos_info)
 {
int i;
 
@@ -85,12 +84,13 @@ static void irdma_fill_qos_info(struct irdma_l2params 
*l2params,
}
 }
 
-static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event 
*event)
+static void irdma_idc_event_handler(struct idc_rdma_core_dev_info *cdev_info,
+   struct idc_rdma_event *event)
 {
-   struct irdma_device *iwdev = dev_get_drvdata(&pf->adev->dev);
+   struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev);
struct irdma_l2params l2params = {};
 
-   if (*event->type & BIT(IIDC_EVENT_AFTER_MTU_CHANGE)) {
+   if (*event->type & BIT(IDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", 
iwdev->netdev->mtu);
if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
l2params.mtu = iwdev->netdev->mtu;
@@ -98,25 +98,26 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, 
struct iidc_event *event
irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
irdma_change_l2params(&iwdev->vsi, &l2params);
}
-   } else if (*event->type & BIT(IIDC_EVENT_BEFORE_TC_CHANGE)) {
+   } else if (*event->type & BIT(IDC_RDMA_EVENT_BEFORE_TC_CHANGE)) {
if (iwdev->vsi.tc_change_pending)
return;
 
irdma_prep_tc_change(iwdev);
-   } else if (*event->type & BIT(IIDC_EVENT_AFTER_TC_CHANGE)) {
-  

[Intel-wired-lan] [rdma v3 10/24] RDMA/irdma: Discover and set up GEN3 hardware register layout

2025-02-07 Thread Tatyana Nikolova
From: Christopher Bednarz 

Discover the hardware register layout for GEN3 devices through an RDMA
virtual channel operation with the Control Plane (CP). Set up the
corresponding hardware attributes specific to GEN3 devices.

Signed-off-by: Christopher Bednarz 
Signed-off-by: Tatyana Nikolova 
---

v3: Decrease IG3RDMA_MAX_IRD/ORD_SIZE to 64 for smaller memory
requirements.

 drivers/infiniband/hw/irdma/Makefile |   1 +
 drivers/infiniband/hw/irdma/ctrl.c   |  31 ++--
 drivers/infiniband/hw/irdma/defs.h   |  12 +-
 drivers/infiniband/hw/irdma/i40iw_hw.c   |   2 +
 drivers/infiniband/hw/irdma/i40iw_hw.h   |   2 +
 drivers/infiniband/hw/irdma/icrdma_hw.c  |   3 +
 drivers/infiniband/hw/irdma/icrdma_hw.h  |   5 +-
 drivers/infiniband/hw/irdma/ig3rdma_hw.c |  65 +
 drivers/infiniband/hw/irdma/ig3rdma_hw.h |  18 +++
 drivers/infiniband/hw/irdma/irdma.h  |   5 +
 drivers/infiniband/hw/irdma/virtchnl.c   | 178 +++
 drivers/infiniband/hw/irdma/virtchnl.h   |  44 ++
 12 files changed, 351 insertions(+), 15 deletions(-)
 create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_hw.c

diff --git a/drivers/infiniband/hw/irdma/Makefile 
b/drivers/infiniband/hw/irdma/Makefile
index 3aa63b913377..03ceb9e5475f 100644
--- a/drivers/infiniband/hw/irdma/Makefile
+++ b/drivers/infiniband/hw/irdma/Makefile
@@ -16,6 +16,7 @@ irdma-objs := cm.o\
  ig3rdma_if.o\
  icrdma_if.o \
   icrdma_hw.o \
+ ig3rdma_hw.o\
   main.o  \
   pble.o  \
   puda.o  \
diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 2375d8dc0b01..082aaa5fc3bc 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -5672,6 +5672,9 @@ static inline void irdma_sc_init_hw(struct irdma_sc_dev 
*dev)
case IRDMA_GEN_2:
icrdma_init_hw(dev);
break;
+   case IRDMA_GEN_3:
+   ig3rdma_init_hw(dev);
+   break;
}
 }
 
@@ -5742,18 +5745,26 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct 
irdma_sc_dev *dev,
 
irdma_sc_init_hw(dev);
 
-   if (irdma_wait_pe_ready(dev))
-   return -ETIMEDOUT;
+   if (dev->privileged) {
+   if (irdma_wait_pe_ready(dev))
+   return -ETIMEDOUT;
 
-   val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]);
-   db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val);
-   if (db_size != IRDMA_PE_DB_SIZE_4M && db_size != IRDMA_PE_DB_SIZE_8M) {
-   ibdev_dbg(to_ibdev(dev),
- "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x 
db_size=%d\n",
- val, db_size);
-   return -ENODEV;
+   val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]);
+   db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val);
+   if (db_size != IRDMA_PE_DB_SIZE_4M &&
+   db_size != IRDMA_PE_DB_SIZE_8M) {
+   ibdev_dbg(to_ibdev(dev),
+ "DEV: RDMA PE doorbell is not enabled in CSR 
val 0x%x db_size=%d\n",
+ val, db_size);
+   return -ENODEV;
+   }
+   } else {
+   ret_code = irdma_vchnl_req_get_reg_layout(dev);
+   if (ret_code)
+   ibdev_dbg(to_ibdev(dev),
+ "DEV: Get Register layout failed ret = %d\n",
+ ret_code);
}
-   dev->db_addr = dev->hw->hw_addr + 
(uintptr_t)dev->hw_regs[IRDMA_DB_ADDR_OFFSET];
 
return ret_code;
 }
diff --git a/drivers/infiniband/hw/irdma/defs.h 
b/drivers/infiniband/hw/irdma/defs.h
index 7d363088b5c3..425bcd17abe9 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -115,6 +115,7 @@ enum irdma_protocol_used {
 #define IRDMA_FEATURE_BUF_SIZE (8 * IRDMA_MAX_FEATURES)
 
 #define ENABLE_LOC_MEM 63
+#define IRDMA_ATOMICS_ALLOWED_BIT  1
 #define MAX_PBLE_PER_SD0x4
 #define MAX_PBLE_SD_PER_FCN0x400
 #define MAX_MR_PER_SD  0x8000
@@ -127,7 +128,7 @@ enum irdma_protocol_used {
 #define IRDMA_QP_SW_MAX_RQ_QUANTA  32768
 #define IRDMA_MAX_QP_WRS(max_quanta_per_wr) \
((IRDMA_QP_SW_MAX_WQ_QUANTA - IRDMA_SQ_RSVD) / (max_quanta_per_wr))
-
+#define IRDMA_SRQ_MAX_QUANTA 262144
 #define IRDMAQP_TERM_SEND_TERM_AND_FIN 0
 #define IRDMAQP_TERM_SEND_TERM_ONLY1
 #define IRDMAQP_TERM_SEND_FIN_ONLY 2
@@ -153,8 +154,13 @@ enum irdma_protocol_used {
 #define IRDMA_SQ_RSVD  258
 #define IRDMA_RQ_RSVD  1
 
-#define IRDMA_FEATURE_RTS_AE   1ULL
-#define IRDMA_FEATURE_CQ_RESIZE2ULL
+#define IRDMA_FEATURE_RTS_AE   BIT_ULL(0)
+#def

[Intel-wired-lan] [rdma v3 13/24] RDMA/irdma: Add GEN3 HW statistics support

2025-02-07 Thread Tatyana Nikolova
From: Krzysztof Czurylo 

Plug into the unified HW statistics framework by adding a hardware
statistics map array for GEN3, defining the HW-specific width and
location for each counter in the statistics buffer.

Signed-off-by: Krzysztof Czurylo 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/ctrl.c   |  33 +--
 drivers/infiniband/hw/irdma/defs.h   |   2 +-
 drivers/infiniband/hw/irdma/ig3rdma_hw.c |  63 +
 drivers/infiniband/hw/irdma/type.h   |  19 +++-
 drivers/infiniband/hw/irdma/verbs.c  | 110 +--
 5 files changed, 166 insertions(+), 61 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 55080b56311b..8fd2882f75af 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -1968,7 +1968,8 @@ int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi,
(void *)((uintptr_t)stats_buff_mem->va +
 IRDMA_GATHER_STATS_BUF_SIZE);
 
-   irdma_hw_stats_start_timer(vsi);
+   if (vsi->dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+   irdma_hw_stats_start_timer(vsi);
 
/* when stat allocation is not required default to fcn_id. */
vsi->stats_idx = info->fcn_id;
@@ -2013,7 +2014,9 @@ void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi)
 
if (!vsi->pestat)
return;
-   irdma_hw_stats_stop_timer(vsi);
+
+   if (dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3)
+   irdma_hw_stats_stop_timer(vsi);
dma_free_coherent(vsi->pestat->hw->device,
  vsi->pestat->gather_info.stats_buff_mem.size,
  vsi->pestat->gather_info.stats_buff_mem.va,
@@ -5929,14 +5932,26 @@ void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, 
bool enable)
  */
 void sc_vsi_update_stats(struct irdma_sc_vsi *vsi)
 {
-   struct irdma_gather_stats *gather_stats;
-   struct irdma_gather_stats *last_gather_stats;
+   struct irdma_dev_hw_stats *hw_stats = &vsi->pestat->hw_stats;
+   struct irdma_gather_stats *gather_stats =
+   vsi->pestat->gather_info.gather_stats_va;
+   struct irdma_gather_stats *last_gather_stats =
+   vsi->pestat->gather_info.last_gather_stats_va;
+   const struct irdma_hw_stat_map *map = vsi->dev->hw_stats_map;
+   u16 max_stat_idx = vsi->dev->hw_attrs.max_stat_idx;
+   u16 i;
+
+   if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+   for (i = 0; i < max_stat_idx; i++) {
+   u16 idx = map[i].byteoff / sizeof(u64);
+
+   hw_stats->stats_val[i] = gather_stats->val[idx];
+   }
+   return;
+   }
 
-   gather_stats = vsi->pestat->gather_info.gather_stats_va;
-   last_gather_stats = vsi->pestat->gather_info.last_gather_stats_va;
-   irdma_update_stats(&vsi->pestat->hw_stats, gather_stats,
-  last_gather_stats, vsi->dev->hw_stats_map,
-  vsi->dev->hw_attrs.max_stat_idx);
+   irdma_update_stats(hw_stats, gather_stats, last_gather_stats,
+  map, max_stat_idx);
 }
 
 /**
diff --git a/drivers/infiniband/hw/irdma/defs.h 
b/drivers/infiniband/hw/irdma/defs.h
index 52ace06912eb..2fc8e3cf4395 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -415,7 +415,7 @@ enum irdma_cqp_op_type {
 #define IRDMA_CQPSQ_STATS_USE_INST BIT_ULL(61)
 #define IRDMA_CQPSQ_STATS_OP GENMASK_ULL(37, 32)
 #define IRDMA_CQPSQ_STATS_INST_INDEX GENMASK_ULL(6, 0)
-#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(15, 0)
 #define IRDMA_CQPSQ_WS_WQEVALID BIT_ULL(63)
 #define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(55, 52)
 #define IRDMA_SD_MAX GENMASK_ULL(15, 0)
diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c 
b/drivers/infiniband/hw/irdma/ig3rdma_hw.c
index 1d582c50e4d2..2a3d7144c771 100644
--- a/drivers/infiniband/hw/irdma/ig3rdma_hw.c
+++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c
@@ -48,9 +48,70 @@ static const struct irdma_irq_ops ig3rdma_irq_ops = {
.irdma_en_irq = ig3rdma_ena_irq,
 };
 
+static const struct irdma_hw_stat_map ig3rdma_hw_stat_map[] = {
+   [IRDMA_HW_STAT_INDEX_RXVLANERR] =   {   0, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP4RXOCTS] =   {   8, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP4RXPKTS] =   {  16, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] ={  24, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] =  {  32, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] =  {  40, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = {  48, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = {  56, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP6RXOCTS] =   {  64, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP6RXPKTS] =   {  72, 0, 0 },
+   [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] ={  80, 0, 0 },

[Intel-wired-lan] [iwl-next, rdma v3 00/24] Add RDMA support for Intel IPU E2000 (GEN3)

2025-02-07 Thread Tatyana Nikolova
This patch series is based on 6.14-rc1 and includes both netdev and RDMA
patches for ease of review. It can also be viewed here [1]. A shared pull
request will be sent for patches 1-7 following review.

The patch series introduces RDMA RoCEv2 support for the Intel Infrastructure
Processing Unit (IPU) E2000 line of products, referred to as GEN3 in the
irdma provider.  It supports both physical and virtual functions.
The irdma driver communicates with the device Control Plane (CP) to
discover capabilities and perform privileged operations through an
RDMA-specific messaging interface built atop the Infrastructure
Data-Plane Function (IDPF) mailbox and virtchannel protocol [2].

To support RDMA for E2000 product, the idpf driver requires the use of
the Inter-Driver Communication (IDC) interface which is currently already
in use between ice and irdma. With a second consumer, the IDC is
generalized to support multiple consumers and ice, idpf and irdma
are adapted to the IDC definitions.

The IPU model can host one or more logical network endpoints called vPorts
per PCI function that are flexibly associated with a physical port or an
internal communication port. irdma exposes a verbs device per vPort.

Other key highlights of this series as it pertains to GEN3 device include:

MMIO learning, RDMA capability negotiation and RDMA vectors
discovery between idpf and CP
PCI core device level RDMA resource initialization via
a GEN3 core auxiliary driver
Shared Receive Queue (SRQ) Support
Atomic Operations Support (Compare and Swap and Fetch and Add)
Completion Queue Element (CQE) Error and Flush Handling
Push Page Support

Changelog:

V3 series irdma changes:
* Move the call to get RDMA features just after CQP is created,
otherwise the feature flags are not defined before used.
* Move the check for supported atomic operations after reading
the RDMA feature info to correctly enable atomics.
* Round up to power of two the resource size for Read Responses and
Transmit Queue elements.
* Do not use the Work Queue element index passed in the Asynchronous Event
info to get SRQ context, because it is incorrect.
* Fix detection of Completion Queue (CQ) empty when 64-byte CQ elements
are enabled.
* Minor improvements and cleanup.

V3 series idpf changes:
* Reduce required minimum RDMA vectors to 2.

V2 RFC series includes only idpf changes:

* RDMA vector number adjustment
* Fix unplugging vport auxiliary device twice
* General cleanup and minor improvements

V2 RFC series is at https://lwn.net/Articles/987141/.

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/linux.git/log/?h=idpf-rdma
[2] 
https://elixir.bootlin.com/linux/latest/source/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c

Christopher Bednarz (1):
  RDMA/irdma: Discover and set up GEN3 hardware register layout

Dave Ertman (1):
  iidc/ice/irdma: Update IDC to support multiple consumers

Faisal Latif (2):
  RDMA/irdma: Add SRQ support
  RDMA/irdma: Add Atomic Operations support

Jay Bhat (1):
  RDMA/irdma: Add Push Page Support for GEN3

Joshua Hay (6):
  idpf: use reserved RDMA vectors from control plane
  idpf: implement core RDMA auxiliary dev create, init, and destroy
  idpf: implement RDMA vport auxiliary dev create, init, and destroy
  idpf: implement remaining IDC RDMA core callbacks and handlers
  idpf: implement IDC vport aux driver MTU change handler
  idpf: implement get LAN mmio memory regions

Krzysztof Czurylo (2):
  RDMA/irdma: Add GEN3 CQP support with deferred completions
  RDMA/irdma: Add GEN3 HW statistics support

Mustafa Ismail (3):
  RDMA/irdma: Refactor GEN2 auxiliary driver
  RDMA/irdma: Add GEN3 core driver support
  RDMA/irdma: Introduce GEN3 vPort driver support

Shiraz Saleem (7):
  RDMA/irdma: Add GEN3 support for AEQ and CEQ
  RDMA/irdma: Add GEN3 virtual QP1 support
  RDMA/irdma: Extend QP context programming for GEN3
  RDMA/irdma: Support 64-byte CQEs and GEN3 CQE opcode decoding
  RDMA/irdma: Restrict Memory Window and CQE Timestamping to GEN3
  RDMA/irdma: Extend CQE Error and Flush Handling for GEN3 Devices
  RDMA/irdma: Update Kconfig

Vinoth Kumar Chandra Mohan (1):
  RDMA/irdma: Add support for V2 HMC resource management scheme

 drivers/infiniband/hw/irdma/Kconfig   |7 +-
 drivers/infiniband/hw/irdma/Makefile  |4 +
 drivers/infiniband/hw/irdma/ctrl.c| 1469 +++--
 drivers/infiniband/hw/irdma/defs.h|  266 +--
 drivers/infiniband/hw/irdma/hmc.c |   18 +-
 drivers/infiniband/hw/irdma/hmc.h |   19 +-
 drivers/infiniband/hw/irdma/hw.c  |  357 ++--
 drivers/infiniband/hw/irdma/i40iw_hw.c|2 +
 drivers/infiniband/hw/irdma/i40iw_hw.h|2 +
 drivers/infiniband/hw/irdma/i40iw_if.c|3 +
 drivers/infiniband/hw/irdma/icrdma_hw.c   |3 +
 drivers/infiniband/hw/irdma/icrdma_hw.h   |5 +-
 drivers/infiniband/hw/irdma/icrdma_if.c   |  267 +++
 drivers/infiniband/hw/irdma/ig3rdma_hw.c  |  

[Intel-wired-lan] [iwl-next v3 06/24] idpf: implement IDC vport aux driver MTU change handler

2025-02-07 Thread Tatyana Nikolova
From: Joshua Hay 

The only event an RDMA vport aux driver cares about right now is an MTU
change on its underlying vport. Implement and plumb the handler to
signal the pre MTU change event and post MTU change events to the RDMA
vport aux driver.

Signed-off-by: Joshua Hay 
Signed-off-by: Tatyana Nikolova 
---

v3:
- add missing break statement
- remove unnecessary iadrv NULL check

 drivers/net/ethernet/intel/idpf/idpf.h |  2 ++
 drivers/net/ethernet/intel/idpf/idpf_idc.c | 31 ++
 drivers/net/ethernet/intel/idpf/idpf_lib.c | 11 +---
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf.h 
b/drivers/net/ethernet/intel/idpf/idpf.h
index 9516e946781a..491db5b2d79d 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -873,5 +873,7 @@ int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter,
 void idpf_idc_deinit_core_aux_device(struct idc_rdma_core_dev_info *cdev_info);
 void idpf_idc_deinit_vport_aux_device(struct idc_rdma_vport_dev_info 
*vdev_info);
 void idpf_idc_issue_reset_event(struct idc_rdma_core_dev_info *cdev_info);
+void idpf_idc_vdev_mtu_event(struct idc_rdma_vport_dev_info *vdev_info,
+enum idc_rdma_event_type event_type);
 
 #endif /* !_IDPF_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c 
b/drivers/net/ethernet/intel/idpf/idpf_idc.c
index 3dbd7e2a7e98..fb5b023557b6 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_idc.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c
@@ -137,6 +137,37 @@ static int idpf_idc_init_aux_vport_dev(struct idpf_vport 
*vport)
return 0;
 }
 
+/**
+ * idpf_idc_vdev_mtu_event - Function to handle IDC vport mtu change events
+ * @vdev_info: IDC vport device info pointer
+ * @event_type: type of event to pass to handler
+ */
+void idpf_idc_vdev_mtu_event(struct idc_rdma_vport_dev_info *vdev_info,
+enum idc_rdma_event_type event_type)
+{
+   struct idc_rdma_vport_auxiliary_drv *iadrv;
+   struct idc_rdma_event event = { };
+   struct auxiliary_device *adev;
+
+   if (!vdev_info)
+   /* RDMA is not enabled */
+   return;
+
+   set_bit(event_type, event.type);
+
+   device_lock(&vdev_info->adev->dev);
+   adev = vdev_info->adev;
+   if (!adev || !adev->dev.driver)
+   goto unlock;
+   iadrv = container_of(adev->dev.driver,
+struct idc_rdma_vport_auxiliary_drv,
+adrv.driver);
+   if (iadrv->event_handler)
+   iadrv->event_handler(vdev_info, &event);
+unlock:
+   device_unlock(&vdev_info->adev->dev);
+}
+
 /**
  * idpf_core_adev_release - function to be mapped to aux dev's release op
  * @dev: pointer to device to free
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c 
b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index 88a33c8b18fe..a9bc6e0f949c 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1931,6 +1931,9 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
idpf_vport_calc_num_q_desc(new_vport);
break;
case IDPF_SR_MTU_CHANGE:
+   idpf_idc_vdev_mtu_event(vport->vdev_info,
+   IDC_RDMA_EVENT_BEFORE_MTU_CHANGE);
+   break;
case IDPF_SR_RSC_CHANGE:
break;
default:
@@ -1975,9 +1978,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
if (current_state == __IDPF_VPORT_UP)
err = idpf_vport_open(vport);
 
-   kfree(new_vport);
-
-   return err;
+   goto free_vport;
 
 err_reset:
idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq,
@@ -1990,6 +1991,10 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport,
 free_vport:
kfree(new_vport);
 
+   if (reset_cause == IDPF_SR_MTU_CHANGE)
+   idpf_idc_vdev_mtu_event(vport->vdev_info,
+   IDC_RDMA_EVENT_AFTER_MTU_CHANGE);
+
return err;
 }
 
-- 
2.37.3



[Intel-wired-lan] [iwl-next v3 05/24] idpf: implement remaining IDC RDMA core callbacks and handlers

2025-02-07 Thread Tatyana Nikolova
From: Joshua Hay 

Implement the idpf_idc_request_reset and idpf_idc_rdma_vc_send_sync
callbacks for the rdma core auxiliary driver to issue reset events to
the idpf and send (synchronous) virtchnl messages to the control plane
respectively.

Implement and plumb the reset handler for the opposite flow as well,
i.e. when the idpf is resetiing and needs to notify the rdma core
auxiliary driver.

Signed-off-by: Joshua Hay 
Signed-off-by: Tatyana Nikolova 
---

v3:
- remove unnecessary iadrv NULL check

 drivers/net/ethernet/intel/idpf/idpf.h|  1 +
 drivers/net/ethernet/intel/idpf/idpf_idc.c| 43 ++-
 drivers/net/ethernet/intel/idpf/idpf_lib.c|  2 +
 .../net/ethernet/intel/idpf/idpf_virtchnl.c   | 23 +-
 drivers/net/ethernet/intel/idpf/virtchnl2.h   |  3 +-
 5 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf.h 
b/drivers/net/ethernet/intel/idpf/idpf.h
index a9c0639f0021..9516e946781a 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -872,5 +872,6 @@ int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter,
   enum idc_function_type ftype);
 void idpf_idc_deinit_core_aux_device(struct idc_rdma_core_dev_info *cdev_info);
 void idpf_idc_deinit_vport_aux_device(struct idc_rdma_vport_dev_info 
*vdev_info);
+void idpf_idc_issue_reset_event(struct idc_rdma_core_dev_info *cdev_info);
 
 #endif /* !_IDPF_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c 
b/drivers/net/ethernet/intel/idpf/idpf_idc.c
index a9049cb616a9..3dbd7e2a7e98 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_idc.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c
@@ -216,6 +216,38 @@ static void idpf_unplug_aux_dev(struct auxiliary_device 
*adev)
ida_free(&idpf_idc_ida, adev->id);
 }
 
+/**
+ * idpf_idc_issue_reset_event - Function to handle reset IDC event
+ * @cdev_info: IDC core device info pointer
+ */
+void idpf_idc_issue_reset_event(struct idc_rdma_core_dev_info *cdev_info)
+{
+   enum idc_rdma_event_type event_type = IDC_RDMA_EVENT_WARN_RESET;
+   struct idc_rdma_core_auxiliary_drv *iadrv;
+   struct idc_rdma_event event = { };
+   struct auxiliary_device *adev;
+
+   if (!cdev_info)
+   /* RDMA is not enabled */
+   return;
+
+   set_bit(event_type, event.type);
+
+   device_lock(&cdev_info->adev->dev);
+
+   adev = cdev_info->adev;
+   if (!adev || !adev->dev.driver)
+   goto unlock;
+
+   iadrv = container_of(adev->dev.driver,
+struct idc_rdma_core_auxiliary_drv,
+adrv.driver);
+   if (iadrv->event_handler)
+   iadrv->event_handler(cdev_info, &event);
+unlock:
+   device_unlock(&cdev_info->adev->dev);
+}
+
 /**
  * idpf_idc_vport_dev_up - called when CORE is ready for vport aux devs
  * @adapter: private data struct
@@ -300,7 +332,16 @@ static int
 idpf_idc_request_reset(struct idc_rdma_core_dev_info *cdev_info,
   enum idc_rdma_reset_type __always_unused reset_type)
 {
-   return -EOPNOTSUPP;
+   struct idpf_adapter *adapter = pci_get_drvdata(cdev_info->pdev);
+
+   if (!idpf_is_reset_in_prog(adapter)) {
+   set_bit(IDPF_HR_FUNC_RESET, adapter->flags);
+   queue_delayed_work(adapter->vc_event_wq,
+  &adapter->vc_event_task,
+  msecs_to_jiffies(10));
+   }
+
+   return 0;
 }
 
 /* Implemented by the Auxiliary Device and called by the Auxiliary Driver */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c 
b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index a211fca9e925..88a33c8b18fe 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -1803,6 +1803,8 @@ static int idpf_init_hard_reset(struct idpf_adapter 
*adapter)
} else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) {
bool is_reset = idpf_is_reset_detected(adapter);
 
+   idpf_idc_issue_reset_event(adapter->cdev_info);
+
idpf_set_vport_state(adapter);
idpf_vc_core_deinit(adapter);
if (!is_reset)
diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c 
b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
index eaffda7a2673..bd20d7b148c2 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c
@@ -3754,5 +3754,26 @@ int idpf_idc_rdma_vc_send_sync(struct 
idc_rdma_core_dev_info *cdev_info,
   u8 *send_msg, u16 msg_size,
   u8 *recv_msg, u16 *recv_len)
 {
-   return -EOPNOTSUPP;
+   struct idpf_adapter *adapter = pci_get_drvdata(cdev_info->pdev);
+   struct idpf_vc_xn_params xn_params = { };
+   ssize_t reply_sz;
+   u16 recv_size;
+
+   if

[Intel-wired-lan] [iwl-next v3 04/24] idpf: implement RDMA vport auxiliary dev create, init, and destroy

2025-02-07 Thread Tatyana Nikolova
From: Joshua Hay 

Implement the functions to create, initialize, and destroy an RDMA vport
auxiliary device. The vport aux dev creation is dependent on the
core aux device to call idpf_idc_vport_dev_ctrl to signal that it is
ready for vport aux devices. Implement that core callback to either
create and initialize the vport aux dev or deinitialize.

Rdma vport aux dev creation is also dependent on the control plane to
tell us the vport is RDMA enabled. Add a flag in the create vport
message to signal individual vport RDMA capabilities.

Signed-off-by: Joshua Hay 
Signed-off-by: Tatyana Nikolova 
---

v2: Guard against unplugging vport aux dev twice. This is possible if
irdma is unloaded and then idpf is unloaded. irdma calls
idpf_idc_vport_dev_down during its unload which calls unplug. Set the
adev to NULL in dev_down, so that the following call to
deinit_vport_aux_device during idpf unload will return early from
unplug.

v3:
- Used signed ret value from ida_alloc and only assign unsigned id if no
  err
- capitalize some abbreviations
- add missing field descriptions
- remove unnecessary casts

 drivers/net/ethernet/intel/idpf/idpf.h  |   4 +
 drivers/net/ethernet/intel/idpf/idpf_idc.c  | 178 +++-
 drivers/net/ethernet/intel/idpf/idpf_lib.c  |   2 +
 drivers/net/ethernet/intel/idpf/virtchnl2.h |  13 +-
 4 files changed, 194 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf.h 
b/drivers/net/ethernet/intel/idpf/idpf.h
index 64f731fe878c..a9c0639f0021 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -275,6 +275,7 @@ struct idpf_port_stats {
  *   group will yield total number of RX queues.
  * @rxq_model: Splitq queue or single queue queuing model
  * @rx_ptype_lkup: Lookup table for ptypes on RX
+ * @vdev_info: IDC vport device info pointer
  * @adapter: back pointer to associated adapter
  * @netdev: Associated net_device. Each vport should have one and only one
  * associated netdev.
@@ -317,6 +318,8 @@ struct idpf_vport {
u32 rxq_model;
struct libeth_rx_pt *rx_ptype_lkup;
 
+   struct idc_rdma_vport_dev_info *vdev_info;
+
struct idpf_adapter *adapter;
struct net_device *netdev;
DECLARE_BITMAP(flags, IDPF_VPORT_FLAGS_NBITS);
@@ -868,5 +871,6 @@ int idpf_idc_init(struct idpf_adapter *adapter);
 int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter,
   enum idc_function_type ftype);
 void idpf_idc_deinit_core_aux_device(struct idc_rdma_core_dev_info *cdev_info);
+void idpf_idc_deinit_vport_aux_device(struct idc_rdma_vport_dev_info 
*vdev_info);
 
 #endif /* !_IDPF_H_ */
diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c 
b/drivers/net/ethernet/intel/idpf/idpf_idc.c
index 4c7cf32d4863..a9049cb616a9 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_idc.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c
@@ -30,6 +30,113 @@ int idpf_idc_init(struct idpf_adapter *adapter)
return err;
 }
 
+/**
+ * idpf_vport_adev_release - function to be mapped to aux dev's release op
+ * @dev: pointer to device to free
+ */
+static void idpf_vport_adev_release(struct device *dev)
+{
+   struct idc_rdma_vport_auxiliary_dev *iadev;
+
+   iadev = container_of(dev, struct idc_rdma_vport_auxiliary_dev, 
adev.dev);
+   kfree(iadev);
+   iadev = NULL;
+}
+
+/* idpf_plug_vport_aux_dev - allocate and register a vport Auxiliary device
+ * @cdev_info: IDC core device info pointer
+ * @vdev_info: IDC vport device info pointer
+ *
+ * Return: 0 on success or error code on failure.
+ */
+static int idpf_plug_vport_aux_dev(struct idc_rdma_core_dev_info *cdev_info,
+  struct idc_rdma_vport_dev_info *vdev_info)
+{
+   struct idc_rdma_vport_auxiliary_dev *iadev;
+   char name[IDPF_IDC_MAX_ADEV_NAME_LEN];
+   struct auxiliary_device *adev;
+   int ret;
+
+   iadev = kzalloc(sizeof(*iadev), GFP_KERNEL);
+   if (!iadev)
+   return -ENOMEM;
+
+   adev = &iadev->adev;
+   vdev_info->adev = &iadev->adev;
+   iadev->vdev_info = vdev_info;
+
+   ret = ida_alloc(&idpf_idc_ida, GFP_KERNEL);
+   if (ret < 0) {
+   pr_err("failed to allocate unique device ID for Auxiliary 
driver\n");
+   goto err_ida_alloc;
+   }
+   adev->id = ret;
+   adev->dev.release = idpf_vport_adev_release;
+   adev->dev.parent = &cdev_info->pdev->dev;
+   sprintf(name, "%04x.rdma.vdev", cdev_info->pdev->vendor);
+   adev->name = name;
+
+   ret = auxiliary_device_init(adev);
+   if (ret)
+   goto err_aux_dev_init;
+
+   ret = auxiliary_device_add(adev);
+   if (ret)
+   goto err_aux_dev_add;
+
+   return 0;
+
+err_aux_dev_add:
+   vdev_info->adev = NULL;
+   auxiliary_device_uninit(adev);
+err_aux_dev_init:
+   ida_free(&idpf_idc_ida, adev->id);
+err_ida_alloc:
+   kfre

[Intel-wired-lan] [iwl-next v3 02/24] idpf: use reserved RDMA vectors from control plane

2025-02-07 Thread Tatyana Nikolova
From: Joshua Hay 

Fetch the number of reserved RDMA vectors from the control plane.
Adjust the number of reserved LAN vectors if necessary. Adjust the
minimum number of vectors the OS should reserve to include RDMA; and
fail if the OS cannot reserve enough vectors for the minimum number of
LAN and RDMA vectors required. Create a separate msix table for the
reserved RDMA vectors, which will just get handed off to the RDMA core
device to do with what it will.

Signed-off-by: Joshua Hay 
Signed-off-by: Tatyana Nikolova 
---
 drivers/net/ethernet/intel/idpf/idpf.h  | 28 +++-
 drivers/net/ethernet/intel/idpf/idpf_lib.c  | 74 +
 drivers/net/ethernet/intel/idpf/idpf_txrx.h |  1 +
 drivers/net/ethernet/intel/idpf/virtchnl2.h |  5 +-
 4 files changed, 92 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf.h 
b/drivers/net/ethernet/intel/idpf/idpf.h
index 66544faab710..8ef7120e6717 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -489,10 +489,11 @@ struct idpf_vc_xn_manager;
  * @flags: See enum idpf_flags
  * @reset_reg: See struct idpf_reset_reg
  * @hw: Device access data
- * @num_req_msix: Requested number of MSIX vectors
  * @num_avail_msix: Available number of MSIX vectors
  * @num_msix_entries: Number of entries in MSIX table
  * @msix_entries: MSIX table
+ * @num_rdma_msix_entries: Available number of MSIX vectors for RDMA
+ * @rdma_msix_entries: RDMA MSIX table
  * @req_vec_chunks: Requested vector chunk data
  * @mb_vector: Mailbox vector data
  * @vector_stack: Stack to store the msix vector indexes
@@ -542,10 +543,11 @@ struct idpf_adapter {
DECLARE_BITMAP(flags, IDPF_FLAGS_NBITS);
struct idpf_reset_reg reset_reg;
struct idpf_hw hw;
-   u16 num_req_msix;
u16 num_avail_msix;
u16 num_msix_entries;
struct msix_entry *msix_entries;
+   u16 num_rdma_msix_entries;
+   struct msix_entry *rdma_msix_entries;
struct virtchnl2_alloc_vectors *req_vec_chunks;
struct idpf_q_vector mb_vector;
struct idpf_vector_lifo vector_stack;
@@ -609,6 +611,17 @@ static inline int idpf_is_queue_model_split(u16 q_model)
 bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all,
enum idpf_cap_field field, u64 flag);
 
+/**
+ * idpf_is_rdma_cap_ena - Determine if RDMA is supported
+ * @adapter: private data struct
+ *
+ * Return: true if RDMA capability is enabled, false otherwise
+ */
+static inline bool idpf_is_rdma_cap_ena(struct idpf_adapter *adapter)
+{
+   return idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_RDMA);
+}
+
 #define IDPF_CAP_RSS (\
VIRTCHNL2_CAP_RSS_IPV4_TCP  |\
VIRTCHNL2_CAP_RSS_IPV4_TCP  |\
@@ -663,6 +676,17 @@ static inline u16 idpf_get_reserved_vecs(struct 
idpf_adapter *adapter)
return le16_to_cpu(adapter->caps.num_allocated_vectors);
 }
 
+/**
+ * idpf_get_reserved_rdma_vecs - Get reserved RDMA vectors
+ * @adapter: private data struct
+ *
+ * Return: number of vectors reserved for RDMA
+ */
+static inline u16 idpf_get_reserved_rdma_vecs(struct idpf_adapter *adapter)
+{
+   return le16_to_cpu(adapter->caps.num_rdma_allocated_vectors);
+}
+
 /**
  * idpf_get_default_vports - Get default number of vports
  * @adapter: private data struct
diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c 
b/drivers/net/ethernet/intel/idpf/idpf_lib.c
index b4fbb99bfad2..1330be825cd0 100644
--- a/drivers/net/ethernet/intel/idpf/idpf_lib.c
+++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c
@@ -87,6 +87,8 @@ void idpf_intr_rel(struct idpf_adapter *adapter)
idpf_deinit_vector_stack(adapter);
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
+   kfree(adapter->rdma_msix_entries);
+   adapter->rdma_msix_entries = NULL;
 }
 
 /**
@@ -314,13 +316,33 @@ int idpf_req_rel_vector_indexes(struct idpf_adapter 
*adapter,
  */
 int idpf_intr_req(struct idpf_adapter *adapter)
 {
+   u16 num_lan_vecs, min_lan_vecs, num_rdma_vecs = 0, min_rdma_vecs = 0;
u16 default_vports = idpf_get_default_vports(adapter);
int num_q_vecs, total_vecs, num_vec_ids;
int min_vectors, v_actual, err;
unsigned int vector;
u16 *vecids;
+   int i;
 
total_vecs = idpf_get_reserved_vecs(adapter);
+   num_lan_vecs = total_vecs;
+   if (idpf_is_rdma_cap_ena(adapter)) {
+   num_rdma_vecs = idpf_get_reserved_rdma_vecs(adapter);
+   min_rdma_vecs = IDPF_MIN_RDMA_VEC;
+
+   if (!num_rdma_vecs) {
+   /* If idpf_get_reserved_rdma_vecs is 0, vectors are
+* pulled from the LAN pool.
+*/
+   num_rdma_vecs = min_rdma_vecs;
+   } else if (num_rdma_vecs < min_rdma_vecs) {
+   dev_err(&adapter->pdev->dev,
+   "Not enough vect

[Intel-wired-lan] [rdma v3 24/24] RDMA/irdma: Update Kconfig

2025-02-07 Thread Tatyana Nikolova
From: Shiraz Saleem 

Update Kconfig to add dependency on idpf module. Additionally, add
IPU E2000 to list of devices supported.

Signed-off-by: Shiraz Saleem 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/Kconfig | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/Kconfig 
b/drivers/infiniband/hw/irdma/Kconfig
index b6f9c41bca51..f6b39f3a726e 100644
--- a/drivers/infiniband/hw/irdma/Kconfig
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -4,9 +4,10 @@ config INFINIBAND_IRDMA
depends on INET
depends on IPV6 || !IPV6
depends on PCI
-   depends on ICE && I40E
+   depends on (IDPF || ICE) && I40E
select GENERIC_ALLOCATOR
select AUXILIARY_BUS
help
- This is an Intel(R) Ethernet Protocol Driver for RDMA driver
- that support E810 (iWARP/RoCE) and X722 (iWARP) network devices.
+ This is an Intel(R) Ethernet Protocol Driver for RDMA that
+ support IPU E2000 (RoCEv2), E810 (iWARP/RoCE) and X722 (iWARP)
+ network devices.
-- 
2.37.3



[Intel-wired-lan] [rdma v3 11/24] RDMA/irdma: Add GEN3 CQP support with deferred completions

2025-02-07 Thread Tatyana Nikolova
From: Krzysztof Czurylo 

GEN3 introduces asynchronous handling of Control QP (CQP) operations to
minimize head-of-line blocking. Create the CQP using the updated GEN3-
specific descriptor fields and implement the necessary support for this
deferred completion mechanism.

Signed-off-by: Krzysztof Czurylo 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/ctrl.c   | 253 ++-
 drivers/infiniband/hw/irdma/defs.h   |  15 ++
 drivers/infiniband/hw/irdma/hw.c |  89 --
 drivers/infiniband/hw/irdma/main.h   |   2 +
 drivers/infiniband/hw/irdma/protos.h |   1 +
 drivers/infiniband/hw/irdma/type.h   |  43 -
 drivers/infiniband/hw/irdma/utils.c  |  50 +-
 7 files changed, 438 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 082aaa5fc3bc..96d7a5d34515 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -2741,6 +2741,89 @@ static inline void irdma_get_cqp_reg_info(struct 
irdma_sc_cqp *cqp, u32 *val,
*error = FIELD_GET(IRDMA_CQPTAIL_CQP_OP_ERR, *val);
 }
 
+/**
+ * irdma_sc_cqp_def_cmpl_ae_handler - remove completed requests from pending 
list
+ * @dev: sc device struct
+ * @info: AE entry info
+ * @first: true if this is the first call to this handler for given AEQE
+ * @scratch: (out) scratch entry pointer
+ * @sw_def_info: (in/out) SW ticket value for this AE
+ *
+ * In case of AE_DEF_CMPL event, this function should be called in a loop
+ * until it returns NULL-ptr via scratch.
+ * For each call, it looks for a matching CQP request on pending list,
+ * removes it from the list and returns the pointer to the associated scratch
+ * entry.
+ * If this is the first call to this function for given AEQE, sw_def_info
+ * value is not used to find matching requests.  Instead, it is populated
+ * with the value from the first matching cqp_request on the list.
+ * For subsequent calls, ooo_op->sw_def_info need to match the value passed
+ * by a caller.
+ *
+ * Return: scratch entry pointer for cqp_request to be released or NULL
+ * if no matching request is found.
+ */
+void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev,
+ struct irdma_aeqe_info *info,
+ bool first, u64 *scratch,
+ u32 *sw_def_info)
+{
+   struct irdma_ooo_cqp_op *ooo_op;
+   unsigned long flags;
+
+   *scratch = 0;
+
+   spin_lock_irqsave(&dev->cqp->ooo_list_lock, flags);
+   list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
+   if (ooo_op->deferred &&
+   ((first && ooo_op->def_info == info->def_info) ||
+(!first && ooo_op->sw_def_info == *sw_def_info))) {
+   *sw_def_info = ooo_op->sw_def_info;
+   *scratch = ooo_op->scratch;
+
+   list_move(&ooo_op->list_entry, &dev->cqp->ooo_avail);
+   atomic64_inc(&dev->cqp->completed_ops);
+
+   break;
+   }
+   }
+   spin_unlock_irqrestore(&dev->cqp->ooo_list_lock, flags);
+
+   if (first && !*scratch)
+   ibdev_dbg(to_ibdev(dev),
+ "AEQ: deferred completion with unknown ticket: 
def_info 0x%x\n",
+  info->def_info);
+}
+
+/**
+ * irdma_sc_cqp_cleanup_handler - remove requests from pending list
+ * @dev: sc device struct
+ *
+ * This function should be called in a loop from irdma_cleanup_pending_cqp_op.
+ * For each call, it returns first CQP request on pending list, removes it
+ * from the list and returns the pointer to the associated scratch entry.
+ *
+ * Return: scratch entry pointer for cqp_request to be released or NULL
+ * if pending list is empty.
+ */
+u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev)
+{
+   struct irdma_ooo_cqp_op *ooo_op;
+   u64 scratch = 0;
+
+   list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) {
+   scratch = ooo_op->scratch;
+
+   list_del(&ooo_op->list_entry);
+   list_add(&ooo_op->list_entry, &dev->cqp->ooo_avail);
+   atomic64_inc(&dev->cqp->completed_ops);
+
+   break;
+   }
+
+   return scratch;
+}
+
 /**
  * irdma_cqp_poll_registers - poll cqp registers
  * @cqp: struct for cqp hw
@@ -3126,6 +3209,8 @@ void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, 
struct irdma_sc_cq *cq)
 int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
  struct irdma_cqp_init_info *info)
 {
+   struct irdma_ooo_cqp_op *ooo_op;
+   u32 num_ooo_ops;
u8 hw_sq_size;
 
if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 ||
@@ -3156,17 +3241,43 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
cqp->rocev2_rto_policy = info->rocev2_rto_policy;
cqp->protocol_used = info->protocol_used;
memcpy(&cq

[Intel-wired-lan] [rdma v3 12/24] RDMA/irdma: Add GEN3 support for AEQ and CEQ

2025-02-07 Thread Tatyana Nikolova
From: Shiraz Saleem 

Extend support for GEN3 devices by programming the necessary hardware
IRQ registers and the updated descriptor fields for the Asynchronous
Event Queue (AEQ) and Completion Event Queue (CEQ). Introduce a RDMA
virtual channel operation with the Control Plane (CP) to associate
interrupt vectors appropriately with AEQ and CEQ. Add new Asynchronous
Event (AE) definitions specific to GEN3.

Additionally, refactor the AEQ and CEQ setup into the irdma_ctrl_init_hw
device control initialization routine.

This completes the PCI device level initialization for RDMA in the core
driver.

Signed-off-by: Shiraz Saleem 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/ctrl.c   |  76 ++---
 drivers/infiniband/hw/irdma/defs.h   |  29 -
 drivers/infiniband/hw/irdma/hw.c | 130 +--
 drivers/infiniband/hw/irdma/ig3rdma_hw.c |  45 
 drivers/infiniband/hw/irdma/irdma.h  |  11 +-
 drivers/infiniband/hw/irdma/main.h   |   6 +-
 drivers/infiniband/hw/irdma/type.h   |  11 +-
 drivers/infiniband/hw/irdma/virtchnl.c   |  84 +++
 drivers/infiniband/hw/irdma/virtchnl.h   |  19 
 9 files changed, 338 insertions(+), 73 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 96d7a5d34515..55080b56311b 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -2566,6 +2566,9 @@ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 
scratch,
  FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) |
  FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, check_overflow) |
  FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CQID_HIGH, cq->cq_uk.cq_id >> 22) |
+ FIELD_PREP(IRDMA_CQPSQ_CQ_CEQID_HIGH,
+(cq->ceq_id_valid ? cq->ceq_id : 0) >> 10) |
  FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) |
  FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) |
  FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) |
@@ -3928,7 +3931,7 @@ int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq,
ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL);
ceq->tph_en = info->tph_en;
ceq->tph_val = info->tph_val;
-   ceq->vsi = info->vsi;
+   ceq->vsi_idx = info->vsi_idx;
ceq->polarity = 1;
IRDMA_RING_INIT(ceq->ceq_ring, ceq->elem_cnt);
ceq->dev->ceq[info->ceq_id] = ceq;
@@ -3961,13 +3964,16 @@ static int irdma_sc_ceq_create(struct irdma_sc_ceq 
*ceq, u64 scratch,
  (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0));
set_64bit_val(wqe, 56,
  FIELD_PREP(IRDMA_CQPSQ_TPHVAL, ceq->tph_val) |
- FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi->vsi_idx));
+ FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid) |
+ FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi_idx));
hdr = FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID, ceq->ceq_id) |
+ FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID_HIGH, ceq->ceq_id >> 10) |
  FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CEQ) |
  FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) |
  FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) |
  FIELD_PREP(IRDMA_CQPSQ_CEQ_ITRNOEXPIRE, ceq->itr_no_expire) |
  FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) |
  FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
 
@@ -4022,7 +4028,7 @@ int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 
scratch)
int ret_code;
struct irdma_sc_dev *dev = ceq->dev;
 
-   dev->ccq->vsi = ceq->vsi;
+   dev->ccq->vsi_idx = ceq->vsi_idx;
if (ceq->reg_cq) {
ret_code = irdma_sc_add_cq_ctx(ceq, ceq->dev->ccq);
if (ret_code)
@@ -4055,11 +4061,14 @@ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 
scratch, bool post_sq)
 
set_64bit_val(wqe, 16, ceq->elem_cnt);
set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx);
+   set_64bit_val(wqe, 56,
+ FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid));
hdr = ceq->ceq_id |
  FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CEQ) |
  FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) |
  FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) |
  FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) |
+ FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) |
  FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
 
@@ -4223,10 +4232,13 @@ static int irdma_sc_aeq_create(struct irdma_sc_aeq 
*aeq, u64 scratch,
 

[Intel-wired-lan] [rdma v3 08/24] RDMA/irdma: Refactor GEN2 auxiliary driver

2025-02-07 Thread Tatyana Nikolova
From: Mustafa Ismail 

Refactor the irdma auxiliary driver and associated interfaces out of main.c
and into a standalone GEN2-specific source file and rename as gen_2 driver.

This is in preparation for adding GEN3 auxiliary drivers. Each HW
generation will have its own gen-specific interface file.

Additionally, move the Address Handle hash table and associated locks
under rf struct. This will allow GEN3 code to migrate to use it easily.

Signed-off-by: Mustafa Ismail 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/Makefile|   1 +
 drivers/infiniband/hw/irdma/i40iw_if.c  |   2 +
 drivers/infiniband/hw/irdma/icrdma_if.c | 265 +++
 drivers/infiniband/hw/irdma/main.c  | 272 +---
 drivers/infiniband/hw/irdma/main.h  |   9 +-
 drivers/infiniband/hw/irdma/verbs.c |  16 +-
 6 files changed, 290 insertions(+), 275 deletions(-)
 create mode 100644 drivers/infiniband/hw/irdma/icrdma_if.c

diff --git a/drivers/infiniband/hw/irdma/Makefile 
b/drivers/infiniband/hw/irdma/Makefile
index 48c3854235a0..2522e4ca650b 100644
--- a/drivers/infiniband/hw/irdma/Makefile
+++ b/drivers/infiniband/hw/irdma/Makefile
@@ -13,6 +13,7 @@ irdma-objs := cm.o\
   hw.o\
   i40iw_hw.o  \
   i40iw_if.o  \
+ icrdma_if.o \
   icrdma_hw.o \
   main.o  \
   pble.o  \
diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c 
b/drivers/infiniband/hw/irdma/i40iw_if.c
index cc50a7070371..6fa807ef4545 100644
--- a/drivers/infiniband/hw/irdma/i40iw_if.c
+++ b/drivers/infiniband/hw/irdma/i40iw_if.c
@@ -75,6 +75,8 @@ static void i40iw_fill_device_info(struct irdma_device 
*iwdev, struct i40e_info
struct irdma_pci_f *rf = iwdev->rf;
 
rf->rdma_ver = IRDMA_GEN_1;
+   rf->sc_dev.hw = &rf->hw;
+   rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_1;
rf->gen_ops.request_reset = i40iw_request_reset;
rf->pcidev = cdev_info->pcidev;
rf->pf_id = cdev_info->fid;
diff --git a/drivers/infiniband/hw/irdma/icrdma_if.c 
b/drivers/infiniband/hw/irdma/icrdma_if.c
new file mode 100644
index ..5fcbf695a1d3
--- /dev/null
+++ b/drivers/infiniband/hw/irdma/icrdma_if.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
+// /* Copyright (c) 2015 - 2024 Intel Corporation */
+#include "main.h"
+
+static void icrdma_prep_tc_change(struct irdma_device *iwdev)
+{
+   iwdev->vsi.tc_change_pending = true;
+   irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND);
+
+   /* Wait for all qp's to suspend */
+   wait_event_timeout(iwdev->suspend_wq,
+  !atomic_read(&iwdev->vsi.qp_suspend_reqs),
+  msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS));
+   irdma_ws_reset(&iwdev->vsi);
+}
+
+static void icrdma_idc_event_handler(struct idc_rdma_core_dev_info *cdev_info,
+struct idc_rdma_event *event)
+{
+   struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev);
+   struct irdma_l2params l2params = {};
+
+   if (*event->type & BIT(IDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
+   ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", 
iwdev->netdev->mtu);
+   if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
+   l2params.mtu = iwdev->netdev->mtu;
+   l2params.mtu_changed = true;
+   irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
+   irdma_change_l2params(&iwdev->vsi, &l2params);
+   }
+   } else if (*event->type & BIT(IDC_RDMA_EVENT_BEFORE_TC_CHANGE)) {
+   if (iwdev->vsi.tc_change_pending)
+   return;
+
+   icrdma_prep_tc_change(iwdev);
+   } else if (*event->type & BIT(IDC_RDMA_EVENT_AFTER_TC_CHANGE)) {
+   struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->idc_priv;
+
+   if (!iwdev->vsi.tc_change_pending)
+   return;
+
+   l2params.tc_changed = true;
+   ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n");
+
+   irdma_fill_qos_info(&l2params, &idc_priv->qos_info);
+   if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
+   iwdev->dcb_vlan_mode =
+   l2params.num_tc > 1 && !l2params.dscp_mode;
+   irdma_change_l2params(&iwdev->vsi, &l2params);
+   } else if (*event->type & BIT(IDC_RDMA_EVENT_CRIT_ERR)) {
+   ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 
0x%08x\n",
+  event->reg);
+   if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) {
+   u32 pe_criterr;
+
+   pe_criterr = 
readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]);
+#define IRDMA_Q1_RESOURCE_ERR 0x0001024d
+   if (pe_criterr

[Intel-wired-lan] [rdma v3 18/24] RDMA/irdma: Support 64-byte CQEs and GEN3 CQE opcode decoding

2025-02-07 Thread Tatyana Nikolova
From: Shiraz Saleem 

Introduce support for 64-byte CQEs in GEN3 devices. Additionally,
implement GEN3-specific CQE opcode decoding.

Signed-off-by: Shiraz Saleem 
Signed-off-by: Tatyana Nikolova 
---

v3:
* Fix detection of CQ empty when avoid_mem_cflct is on.
* In resize CQ, do not double the CQ size if avoid_mem_cflct is on.
* Make CQ size an even number, which is a GEN3 HW requirement.

 drivers/infiniband/hw/irdma/main.h  |  2 +-
 drivers/infiniband/hw/irdma/utils.c |  5 -
 drivers/infiniband/hw/irdma/verbs.c | 30 -
 drivers/infiniband/hw/irdma/verbs.h | 13 +
 4 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/main.h 
b/drivers/infiniband/hw/irdma/main.h
index f0196aafe59b..0c7f5f730f1f 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -69,7 +69,7 @@ extern struct idc_rdma_core_auxiliary_drv 
icrdma_core_auxiliary_drv;
 #define IRDMA_MACIP_ADD1
 #define IRDMA_MACIP_DELETE 2
 
-#define IW_CCQ_SIZE(IRDMA_CQP_SW_SQSIZE_2048 + 1)
+#define IW_CCQ_SIZE(IRDMA_CQP_SW_SQSIZE_2048 + 2)
 #define IW_CEQ_SIZE2048
 #define IW_AEQ_SIZE2048
 
diff --git a/drivers/infiniband/hw/irdma/utils.c 
b/drivers/infiniband/hw/irdma/utils.c
index 87c88be47ee3..60ef85e842d1 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -2381,7 +2381,10 @@ bool irdma_cq_empty(struct irdma_cq *iwcq)
u8 polarity;
 
ukcq  = &iwcq->sc_cq.cq_uk;
-   cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq);
+   if (ukcq->avoid_mem_cflct)
+   cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(ukcq);
+   else
+   cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq);
get_64bit_val(cqe, 24, &qword3);
polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
 
diff --git a/drivers/infiniband/hw/irdma/verbs.c 
b/drivers/infiniband/hw/irdma/verbs.c
index b5fe5f2fa68b..82a7cec25b52 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -1971,8 +1971,13 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int 
entries,
 
if (!iwcq->user_mode) {
entries++;
-   if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+
+   if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct &&
+   dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
entries *= 2;
+
+   if (entries & 1)
+   entries += 1; /* cq size must be an even number */
}
 
info.cq_size = max(entries, 4);
@@ -2115,6 +2120,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
unsigned long flags;
int err_code;
int entries = attr->cqe;
+   bool cqe_64byte_ena;
 
err_code = cq_validate_flags(attr->flags, 
dev->hw_attrs.uk_attrs.hw_rev);
if (err_code)
@@ -2138,6 +2144,9 @@ static int irdma_create_cq(struct ib_cq *ibcq,
info.dev = dev;
ukinfo->cq_size = max(entries, 4);
ukinfo->cq_id = cq_num;
+   cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & 
IRDMA_FEATURE_64_BYTE_CQE ?
+true : false;
+   ukinfo->avoid_mem_cflct = cqe_64byte_ena;
iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
if (attr->comp_vector < rf->ceqs_count)
info.ceq_id = attr->comp_vector;
@@ -2213,11 +,18 @@ static int irdma_create_cq(struct ib_cq *ibcq,
}
 
entries++;
-   if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
+   if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= 
IRDMA_GEN_2)
entries *= 2;
+
+   if (entries & 1)
+   entries += 1; /* cq size must be an even number */
+
ukinfo->cq_size = entries;
 
-   rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe);
+   if (cqe_64byte_ena)
+   rsize = info.cq_uk_init_info.cq_size * sizeof(struct 
irdma_extended_cqe);
+   else
+   rsize = info.cq_uk_init_info.cq_size * sizeof(struct 
irdma_cqe);
iwcq->kmem.size = ALIGN(round_up(rsize, 256), 256);
iwcq->kmem.va = dma_alloc_coherent(dev->hw->device,
   iwcq->kmem.size,
@@ -3775,8 +3791,12 @@ static void irdma_process_cqe(struct ib_wc *entry,
if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) {
set_ib_wc_op_sq(cq_poll_info, entry);
} else {
-   set_ib_wc_op_rq(cq_poll_info, entry,
-   qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM);
+   if (qp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2)
+   set_ib_wc_op_rq(cq_poll_info, entry,
+   qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM 
?
+   true : fal

[Intel-wired-lan] [rdma v3 22/24] RDMA/irdma: Extend CQE Error and Flush Handling for GEN3 Devices

2025-02-07 Thread Tatyana Nikolova
From: Shiraz Saleem 

Enhance the CQE error and flush handling specific to GEN3 devices.
Unlike GEN1/2 devices, which depend on software to generate completions
in error, GEN3 devices leverage firmware to generate CQEs in error for
all WQEs posted after a QP moves to an error state.

Key changes include:
- Updating the CQ poll logic to properly advance the CQ head in the
event of a flush CQE.
- Updating the flush logic for GEN3 to pass error WQE idx
for SQ on an AE to flush out unprocessed WQEs in error.
- Isolating the decoding of AE to flush codes into a separate routine
irdma_ae_to_qp_err_code. This routine can now be leveraged to
flush error CQEs on an AE and when error CQE is received for SRQ.

Signed-off-by: Shiraz Saleem 
Signed-off-by: Tatyana Nikolova 
---

v3:
* Use irdma_aeqe_info to update QP err_rq_idx when setting
the QP flush fields.
* Remove the checks which prevented the use of the delayed
flush worker thread for GEN3.

 drivers/infiniband/hw/irdma/ctrl.c  |   9 ++
 drivers/infiniband/hw/irdma/defs.h  | 105 +--
 drivers/infiniband/hw/irdma/hw.c|  99 --
 drivers/infiniband/hw/irdma/type.h  |  14 +-
 drivers/infiniband/hw/irdma/uk.c|  39 +-
 drivers/infiniband/hw/irdma/user.h  | 194 +++-
 drivers/infiniband/hw/irdma/verbs.c |  10 +-
 7 files changed, 280 insertions(+), 190 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 75411c4b68fb..4158db00085f 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -2674,6 +2674,12 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE,
   info->ae_src) : 0;
set_64bit_val(wqe, 8, temp);
+   if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) {
+   set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX, 
info->err_sq_idx));
+   set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX, 
info->err_rq_idx));
+   }
 
hdr = qp->qp_uk.qp_id |
  FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) |
@@ -2682,6 +2688,9 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp,
  FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) |
  FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) |
  FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
+   if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+   hdr |= FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID, 
info->err_sq_idx_valid) |
+  FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID, 
info->err_rq_idx_valid);
dma_wmb(); /* make sure WQE is written before valid bit is set */
 
set_64bit_val(wqe, 24, hdr);
diff --git a/drivers/infiniband/hw/irdma/defs.h 
b/drivers/infiniband/hw/irdma/defs.h
index 3b3680816a65..983b22d7ae23 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -301,107 +301,6 @@ enum irdma_cqp_op_type {
 #define IRDMA_CQP_OP_GATHER_STATS  0x2e
 #define IRDMA_CQP_OP_UP_MAP0x2f
 
-/* Async Events codes */
-#define IRDMA_AE_AMP_UNALLOCATED_STAG  0x0102
-#define IRDMA_AE_AMP_INVALID_STAG  0x0103
-#define IRDMA_AE_AMP_BAD_QP0x0104
-#define IRDMA_AE_AMP_BAD_PD0x0105
-#define IRDMA_AE_AMP_BAD_STAG_KEY  0x0106
-#define IRDMA_AE_AMP_BAD_STAG_INDEX0x0107
-#define IRDMA_AE_AMP_BOUNDS_VIOLATION  0x0108
-#define IRDMA_AE_AMP_RIGHTS_VIOLATION  0x0109
-#define IRDMA_AE_AMP_TO_WRAP   0x010a
-#define IRDMA_AE_AMP_FASTREG_VALID_STAG
0x010c
-#define IRDMA_AE_AMP_FASTREG_MW_STAG   0x010d
-#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS0x010e
-#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH0x0110
-#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111
-#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS
0x0112
-#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS  0x0113
-#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114
-#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115
-#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116
-#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117
-#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118
-#define IRDM

[Intel-wired-lan] [rdma v3 19/24] RDMA/irdma: Add SRQ support

2025-02-07 Thread Tatyana Nikolova
From: Faisal Latif 

Implement verb API and UAPI changes to support SRQ functionality in GEN3
devices.

Signed-off-by: Faisal Latif 
Signed-off-by: Tatyana Nikolova 
---

v3:
* Clean up SRQ unrelated changes.
* Do not use wqe_idx to get SRQ context in irdma_sc_get_next_aeqe()
because its lower 6 bits are not correctly set.

 drivers/infiniband/hw/irdma/ctrl.c  | 236 +-
 drivers/infiniband/hw/irdma/defs.h  |  36 ++-
 drivers/infiniband/hw/irdma/hw.c|  21 +-
 drivers/infiniband/hw/irdma/irdma.h |   1 +
 drivers/infiniband/hw/irdma/main.h  |  12 +-
 drivers/infiniband/hw/irdma/type.h  |  66 
 drivers/infiniband/hw/irdma/uk.c| 162 +-
 drivers/infiniband/hw/irdma/user.h  |  41 +++
 drivers/infiniband/hw/irdma/utils.c |  27 ++
 drivers/infiniband/hw/irdma/verbs.c | 475 +++-
 drivers/infiniband/hw/irdma/verbs.h |  25 ++
 include/uapi/rdma/irdma-abi.h   |  15 +-
 12 files changed, 1103 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index d01c55172f6a..c4da8f981dac 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -412,7 +412,8 @@ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct 
irdma_qp_init_info *info)
pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt;
 
if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) ||
-   (info->virtual_map && info->rq_pa >= pble_obj_cnt))
+   (!info->qp_uk_init_info.srq_uk &&
+info->virtual_map && info->rq_pa >= pble_obj_cnt))
return -EINVAL;
 
qp->llp_stream_handle = (void *)(-1);
@@ -446,6 +447,208 @@ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct 
irdma_qp_init_info *info)
return 0;
 }
 
+/**
+ * irdma_sc_srq_init - init sc_srq structure
+ * @srq: srq sc struct
+ * @info: parameters for srq init
+ */
+int irdma_sc_srq_init(struct irdma_sc_srq *srq,
+ struct irdma_srq_init_info *info)
+{
+   u32 srq_size_quanta;
+   int ret_code;
+
+   ret_code = irdma_uk_srq_init(&srq->srq_uk, &info->srq_uk_init_info);
+   if (ret_code)
+   return ret_code;
+
+   srq->dev = info->pd->dev;
+   srq->pd = info->pd;
+   srq->vsi = info->vsi;
+   srq->srq_pa = info->srq_pa;
+   srq->first_pm_pbl_idx = info->first_pm_pbl_idx;
+   srq->pasid = info->pasid;
+   srq->pasid_valid = info->pasid_valid;
+   srq->srq_limit = info->srq_limit;
+   srq->leaf_pbl_size = info->leaf_pbl_size;
+   srq->virtual_map = info->virtual_map;
+   srq->tph_en = info->tph_en;
+   srq->arm_limit_event = info->arm_limit_event;
+   srq->tph_val = info->tph_value;
+   srq->shadow_area_pa = info->shadow_area_pa;
+
+   /* Smallest SRQ size is 256B i.e. 8 quanta */
+   srq_size_quanta = max((u32)IRDMA_SRQ_MIN_QUANTA,
+ srq->srq_uk.srq_size *
+ srq->srq_uk.wqe_size_multiplier);
+   srq->hw_srq_size = irdma_get_encoded_wqe_size(srq_size_quanta,
+ IRDMA_QUEUE_TYPE_SRQ);
+
+   return 0;
+}
+
+/**
+ * irdma_sc_srq_create - send srq create CQP WQE
+ * @srq: srq sc struct
+ * @scratch: u64 saved to be used during cqp completion
+ * @post_sq: flag for cqp db to ring
+ */
+static int irdma_sc_srq_create(struct irdma_sc_srq *srq, u64 scratch,
+  bool post_sq)
+{
+   struct irdma_sc_cqp *cqp;
+   __le64 *wqe;
+   u64 hdr;
+
+   cqp = srq->pd->dev->cqp;
+   if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id ||
+   srq->srq_uk.srq_id >
+   (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1))
+   return -EINVAL;
+
+   wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch);
+   if (!wqe)
+   return -ENOMEM;
+
+   set_64bit_val(wqe, 0,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, srq->srq_limit) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) |
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, 
srq->srq_uk.wqe_size));
+   set_64bit_val(wqe, 8, (uintptr_t)srq);
+   set_64bit_val(wqe, 16,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id));
+   set_64bit_val(wqe, 32,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR,
+srq->srq_pa >>
+IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S));
+   set_64bit_val(wqe, 40,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR,
+srq->shadow_area_pa >>
+IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S));
+   set_64bit_val(wqe, 48,
+ FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX,
+srq->first_pm_pbl_idx));
+
+   hdr = srq->srq_uk.srq_id

[Intel-wired-lan] [rdma v3 20/24] RDMA/irdma: Restrict Memory Window and CQE Timestamping to GEN3

2025-02-07 Thread Tatyana Nikolova
From: Shiraz Saleem 

With the deprecation of Memory Window and Timestamping support in GEN2,
move these features to be exclusive to GEN3. This iteration supports
only Type2 Memory Windows. Additionally, it includes the reporting of
the timestamp mask and Host Channel Adapter (HCA) core clock frequency
via the query device verb.

Signed-off-by: Shiraz Saleem 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/verbs.c | 42 ++---
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/verbs.c 
b/drivers/infiniband/hw/irdma/verbs.c
index 4ab81bf60543..fc5b9b629a51 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -41,7 +41,8 @@ static int irdma_query_device(struct ib_device *ibdev,
props->max_cq = rf->max_cq - rf->used_cqs;
props->max_cqe = rf->max_cqe - 1;
props->max_mr = rf->max_mr - rf->used_mrs;
-   props->max_mw = props->max_mr;
+   if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3)
+   props->max_mw = props->max_mr;
props->max_pd = rf->max_pd - rf->used_pds;
props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges;
props->max_qp_rd_atom = hw_attrs->max_hw_ird;
@@ -56,12 +57,16 @@ static int irdma_query_device(struct ib_device *ibdev,
props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX;
props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX;
props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR;
-#define HCA_CLOCK_TIMESTAMP_MASK 0x1
-   if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_2)
-   props->timestamp_mask = HCA_CLOCK_TIMESTAMP_MASK;
props->max_srq = rf->max_srq - rf->used_srqs;
props->max_srq_wr = IRDMA_MAX_SRQ_WRS;
props->max_srq_sge = hw_attrs->uk_attrs.max_hw_wq_frags;
+   if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) {
+#define HCA_CORE_CLOCK_KHZ 100UL
+   props->timestamp_mask = GENMASK(31, 0);
+   props->hca_core_clock = HCA_CORE_CLOCK_KHZ;
+   }
+   if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3)
+   props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
 
return 0;
 }
@@ -798,7 +803,8 @@ static void irdma_roce_fill_and_set_qpctx_info(struct 
irdma_qp *iwqp,
roce_info->is_qp1 = true;
roce_info->rd_en = true;
roce_info->wr_rdresp_en = true;
-   roce_info->bind_en = true;
+   if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3)
+   roce_info->bind_en = true;
roce_info->dcqcn_en = false;
roce_info->rtomin = 5;
 
@@ -829,7 +835,6 @@ static void irdma_iw_fill_and_set_qpctx_info(struct 
irdma_qp *iwqp,
ether_addr_copy(iwarp_info->mac_addr, iwdev->netdev->dev_addr);
iwarp_info->rd_en = true;
iwarp_info->wr_rdresp_en = true;
-   iwarp_info->bind_en = true;
iwarp_info->ecn_en = true;
iwarp_info->rtomin = 5;
 
@@ -1147,8 +1152,6 @@ static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp)
}
if (iwqp->iwarp_info.rd_en)
acc_flags |= IB_ACCESS_REMOTE_READ;
-   if (iwqp->iwarp_info.bind_en)
-   acc_flags |= IB_ACCESS_MW_BIND;
}
return acc_flags;
 }
@@ -2433,8 +2436,8 @@ static int irdma_query_srq(struct ib_srq *ibsrq, struct 
ib_srq_attr *attr)
 
 static inline int cq_validate_flags(u32 flags, u8 hw_rev)
 {
-   /* GEN1 does not support CQ create flags */
-   if (hw_rev == IRDMA_GEN_1)
+   /* GEN1/2 does not support CQ create flags */
+   if (hw_rev <= IRDMA_GEN_2)
return flags ? -EOPNOTSUPP : 0;
 
return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 
0;
@@ -2660,8 +2663,9 @@ static int irdma_create_cq(struct ib_cq *ibcq,
 /**
  * irdma_get_mr_access - get hw MR access permissions from IB access flags
  * @access: IB access flags
+ * @hw_rev: Hardware version
  */
-static inline u16 irdma_get_mr_access(int access)
+static inline u16 irdma_get_mr_access(int access, u8 hw_rev)
 {
u16 hw_access = 0;
 
@@ -2671,8 +2675,10 @@ static inline u16 irdma_get_mr_access(int access)
 IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0;
hw_access |= (access & IB_ACCESS_REMOTE_READ) ?
 IRDMA_ACCESS_FLAGS_REMOTEREAD : 0;
-   hw_access |= (access & IB_ACCESS_MW_BIND) ?
-IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0;
+   if (hw_rev >= IRDMA_GEN_3) {
+   hw_access |= (access & IB_ACCESS_MW_BIND) ?
+IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0;
+   }
hw_access |= (access & IB_ZERO_BASED) ?
 IRDMA_ACCESS_FLAGS_ZERO_BASED : 0;
hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD;
@@ -3242,7 +3248,8 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, 
struct irdma_mr *iwmr,
stag_info->stag_idx = iwmr->stag >> IRDMA_CQ

[Intel-wired-lan] [rdma v3 21/24] RDMA/irdma: Add Atomic Operations support

2025-02-07 Thread Tatyana Nikolova
From: Faisal Latif 

Extend irdma to support atomic operations, namely Compare and Swap and
Fetch and Add, for GEN3 devices.

Signed-off-by: Faisal Latif 
Signed-off-by: Tatyana Nikolova 
---

v3: Check IRDMA_ATOMICS_ALLOWED_BIT after the feature info has been
read from FW.

 drivers/infiniband/hw/irdma/ctrl.c   |  11 +++
 drivers/infiniband/hw/irdma/defs.h   |  10 ++-
 drivers/infiniband/hw/irdma/ig3rdma_hw.c |   3 -
 drivers/infiniband/hw/irdma/type.h   |   4 +
 drivers/infiniband/hw/irdma/uk.c | 102 +++
 drivers/infiniband/hw/irdma/user.h   |  27 ++
 drivers/infiniband/hw/irdma/verbs.c  |  38 +
 drivers/infiniband/hw/irdma/verbs.h  |   6 ++
 8 files changed, 197 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index c4da8f981dac..75411c4b68fb 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -1110,6 +1110,8 @@ static void irdma_sc_qp_setctx_roce_gen_3(struct 
irdma_sc_qp *qp,
  FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE,
 roce_info->udprivcq_en) |
  FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) |
+ FIELD_PREP(IRDMAQPC_REMOTE_ATOMIC_EN,
+info->remote_atomics_en) |
  FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en));
set_64bit_val(qp_ctx, 168,
  FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx));
@@ -1490,6 +1492,8 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev,
  FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, info->remote_access) |
  FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) 
|
  FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN,
+info->remote_atomics_en) |
  FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
 
@@ -1582,6 +1586,8 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev 
*dev,
  FIELD_PREP(IRDMA_CQPSQ_STAG_VABASEDTO, addr_type) |
  FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) 
|
  FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) |
+ FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN,
+info->remote_atomics_en) |
  FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
 
@@ -1740,6 +1746,7 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp,
  FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) |
  FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) |
  FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) |
+ FIELD_PREP(IRDMAQPSQ_REMOTE_ATOMICS_EN, info->remote_atomics_en) |
  FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity);
dma_wmb(); /* make sure WQE is written before valid bit is set */
 
@@ -5542,6 +5549,10 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev)
}
dev->feature_info[feat_type] = temp;
}
+
+   if (dev->feature_info[IRDMA_FTN_FLAGS] & IRDMA_ATOMICS_ALLOWED_BIT)
+   dev->hw_attrs.uk_attrs.feature_flags |= 
IRDMA_FEATURE_ATOMIC_OPS;
+
 exit:
dma_free_coherent(dev->hw->device, feat_buf.size, feat_buf.va,
  feat_buf.pa);
diff --git a/drivers/infiniband/hw/irdma/defs.h 
b/drivers/infiniband/hw/irdma/defs.h
index 408058b6ba55..3b3680816a65 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -189,6 +189,8 @@ enum irdma_protocol_used {
 #define IRDMAQP_OP_RDMA_READ_LOC_INV   0x0b
 #define IRDMAQP_OP_NOP 0x0c
 #define IRDMAQP_OP_RDMA_WRITE_SOL  0x0d
+#define IRDMAQP_OP_ATOMIC_FETCH_ADD0x0f
+#define IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD 0x11
 #define IRDMAQP_OP_GEN_RTS_AE  0x30
 
 enum irdma_cqp_op_type {
@@ -694,7 +696,8 @@ enum irdma_cqp_op_type {
 #define IRDMA_CQPSQ_STAG_USEPFRID BIT_ULL(61)
 
 #define IRDMA_CQPSQ_STAG_PBA IRDMA_CQPHC_QPCTX
-#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(5, 0)
+#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(15, 0)
+#define IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN BIT_ULL(61)
 
 #define IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX GENMASK_ULL(27, 0)
 #define IRDMA_CQPSQ_QUERYSTAG_IDX IRDMA_CQPSQ_STAG_IDX
@@ -981,6 +984,9 @@ enum irdma_cqp_op_type {
 
 #define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX
 
+#define IRDMAQPSQ_STAG GENMASK_ULL(31, 0)
+#define IRDMAQPSQ_REMOTE_STAG GENMASK_ULL(31, 0)
+
 #define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48)
 #define IRDMAQPSQ_VABASEDTO BIT_ULL(53)
 #define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54)
@@ -991,6 +997,8 @@ enu

[Intel-wired-lan] [rdma v3 14/24] RDMA/irdma: Introduce GEN3 vPort driver support

2025-02-07 Thread Tatyana Nikolova
From: Mustafa Ismail 

In the IPU model, a function can host one or more logical network
endpoints called vPorts. Each vPort may be associated with either a
physical or an internal communication port, and can be RDMA capable. A
vPort features a netdev and, if RDMA capable, must have an associated
ib_dev.

This change introduces a GEN3 auxiliary vPort driver responsible for
registering a verbs device for every RDMA-capable vPort. Additionally,
the UAPI is updated to prevent the binding of GEN3 devices to older
user-space providers.

Signed-off-by: Mustafa Ismail 
Signed-off-by: Tatyana Nikolova 
---
 drivers/infiniband/hw/irdma/ig3rdma_if.c | 108 +++
 drivers/infiniband/hw/irdma/main.c   |  12 +++
 drivers/infiniband/hw/irdma/main.h   |   3 +
 drivers/infiniband/hw/irdma/verbs.c  |  12 ++-
 include/uapi/rdma/irdma-abi.h|   1 +
 5 files changed, 134 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ig3rdma_if.c 
b/drivers/infiniband/hw/irdma/ig3rdma_if.c
index 9d0623467af2..2c7dc93dc576 100644
--- a/drivers/infiniband/hw/irdma/ig3rdma_if.c
+++ b/drivers/infiniband/hw/irdma/ig3rdma_if.c
@@ -14,6 +14,23 @@ static void ig3rdma_idc_core_event_handler(struct 
idc_rdma_core_dev_info *cdev_i
}
 }
 
+static void ig3rdma_idc_vport_event_handler(struct idc_rdma_vport_dev_info 
*cdev_info,
+   struct idc_rdma_event *event)
+{
+   struct irdma_device *iwdev = auxiliary_get_drvdata(cdev_info->adev);
+   struct irdma_l2params l2params = {};
+
+   if (*event->type & BIT(IDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
+   ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", 
iwdev->netdev->mtu);
+   if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
+   l2params.mtu = iwdev->netdev->mtu;
+   l2params.mtu_changed = true;
+   irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
+   irdma_change_l2params(&iwdev->vsi, &l2params);
+   }
+   }
+}
+
 static int ig3rdma_cfg_regions(struct irdma_hw *hw,
   struct idc_rdma_core_dev_info *cdev_info)
 {
@@ -169,3 +186,94 @@ struct idc_rdma_core_auxiliary_drv 
ig3rdma_core_auxiliary_drv = {
},
.event_handler = ig3rdma_idc_core_event_handler,
 };
+
+static int ig3rdma_vport_probe(struct auxiliary_device *aux_dev,
+  const struct auxiliary_device_id *id)
+{
+   struct idc_rdma_vport_auxiliary_dev *idc_adev =
+   container_of(aux_dev, struct idc_rdma_vport_auxiliary_dev, 
adev);
+   struct auxiliary_device *aux_core_dev = idc_adev->vdev_info->core_adev;
+   struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_core_dev);
+   struct iidc_rdma_qos_params qos_info = {};
+   struct irdma_l2params l2params = {};
+   struct irdma_device *iwdev;
+   int err;
+
+   if (!rf) {
+   WARN_ON_ONCE(1);
+   return -ENOMEM;
+   }
+   iwdev = ib_alloc_device(irdma_device, ibdev);
+   /* Fill iwdev info */
+   iwdev->is_vport = true;
+   iwdev->rf = rf;
+   iwdev->vport_id = idc_adev->vdev_info->vport_id;
+   iwdev->netdev = idc_adev->vdev_info->netdev;
+   iwdev->init_state = INITIAL_STATE;
+   iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
+   iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
+   iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED;
+   iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
+   iwdev->roce_mode = true;
+   iwdev->push_mode = true;
+
+   l2params.mtu = iwdev->netdev->mtu;
+   irdma_fill_qos_info(&l2params, &qos_info);
+
+   err = irdma_rt_init_hw(iwdev, &l2params);
+   if (err)
+   goto err_rt_init;
+
+   err = irdma_ib_register_device(iwdev);
+   if (err)
+   goto err_ibreg;
+
+   auxiliary_set_drvdata(aux_dev, iwdev);
+
+   ibdev_dbg(&iwdev->ibdev,
+ "INIT: Gen[%d] vport[%d] probe success. dev_name = %s, 
core_dev_name = %s, netdev=%s\n",
+ rf->rdma_ver, idc_adev->vdev_info->vport_id,
+ dev_name(&aux_dev->dev),
+ dev_name(&idc_adev->vdev_info->core_adev->dev),
+ netdev_name(idc_adev->vdev_info->netdev));
+
+   return 0;
+err_ibreg:
+   irdma_rt_deinit_hw(iwdev);
+err_rt_init:
+   ib_dealloc_device(&iwdev->ibdev);
+
+   return err;
+}
+
+static void ig3rdma_vport_remove(struct auxiliary_device *aux_dev)
+{
+   struct idc_rdma_vport_auxiliary_dev *idc_adev =
+   container_of(aux_dev, struct idc_rdma_vport_auxiliary_dev, 
adev);
+   struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
+
+   ibdev_dbg(&iwdev->ibdev,
+ "INIT: Gen[%d] dev_name = %s, core_dev_name = %s, 
netdev=%s\n",
+ iwdev->rf->rdma_ver, dev_name(&aux_dev->dev),
+ dev_name(&id

Re: [Intel-wired-lan] [PATCH v5 iwl-next 00/10] idpf: add initial PTP support

2025-02-07 Thread Singh, Krishneil K


> -Original Message-
> From: Intel-wired-lan  On Behalf Of
> Milena Olech
> Sent: Friday, January 17, 2025 4:41 AM
> To: intel-wired-...@lists.osuosl.org
> Cc: net...@vger.kernel.org; Nguyen, Anthony L
> ; Kitszel, Przemyslaw
> ; Olech, Milena 
> Subject: [Intel-wired-lan] [PATCH v5 iwl-next 00/10] idpf: add initial PTP
> support
> 
> This patch series introduces support for Precision Time Protocol (PTP) to
> Intel(R) Infrastructure Data Path Function (IDPF) driver. PTP feature is
> supported when the PTP capability is negotiated with the Control
> Plane (CP). IDPF creates a PTP clock and sets a set of supported
> functions.
> 
> During the PTP initialization, IDPF requests a set of PTP capabilities
> and receives a writeback from the CP with the set of supported options.
> These options are:
> - get time of the PTP clock
> - get cross timestamp
> - set the time of the PTP clock
> - adjust the PTP clock
> - Tx timestamping
> 
> Each feature is considered to have direct access, where the operations
> on PCIe BAR registers are allowed, or the mailbox access, where the
> virtchnl messages are used to perform any PTP action. Mailbox access
> means that PTP requests are sent to the CP through dedicated secondary
> mailbox and the CP reads/writes/modifies desired resource - PTP Clock
> or Tx timestamp registers.
> 
> Tx timestamp capabilities are negotiated only for vports that have
> UPLINK_VPORT flag set by the CP. Capabilities provide information about
> the number of available Tx timestamp latches, their indexes and size of
> the Tx timestamp value. IDPF requests Tx timestamp by setting the
> TSYN bit and the requested timestamp index in the context descriptor for
> the PTP packets. When the completion tag for that packet is received,
> IDPF schedules a worker to read the Tx timestamp value.
> 
> Current implementation of the IDPF driver does not allow to get stable
> Tx timestamping, when more than 1 request per 1 second is sent to the
> driver. Debug is in progress, however PTP feature seems to be affected by
> the IDPF transmit flow, as the Tx timestamping relies on the completion
> tag.
> 
> v4 -> v5: fix spin unlock when Tx timestamp index is requested
> v3 -> v4: change timestamp filters dependent on Tx timestamp cap,
> rewrite function that extends Tx timestamp value, minor fixes
> v2 -> v3: fix minor issues, revert idpf_for_each_vport changes,
> extend idpf_ptp_set_rx_tstamp, split tstamp statistics
> v1 -> v2: add stats for timestamping, use ndo_hwtamp_get/set,
> fix minor spelling issues
> 
> Milena Olech (10):
>   idpf: add initial PTP support
>   virtchnl: add PTP virtchnl definitions
>   idpf: move virtchnl structures to the header file
>   idpf: negotiate PTP capabilities and get PTP clock
>   idpf: add mailbox access to read PTP clock time
>   idpf: add PTP clock configuration
>   idpf: add Tx timestamp capabilities negotiation
>   idpf: add Tx timestamp flows
>   idpf: add support for Rx timestamping
>   idpf: change the method for mailbox workqueue allocation
> 
>  drivers/net/ethernet/intel/idpf/Kconfig   |   1 +
>  drivers/net/ethernet/intel/idpf/Makefile  |   3 +
>  drivers/net/ethernet/intel/idpf/idpf.h|  34 +
>  .../ethernet/intel/idpf/idpf_controlq_api.h   |   3 +
>  drivers/net/ethernet/intel/idpf/idpf_dev.c|  14 +
>  .../net/ethernet/intel/idpf/idpf_ethtool.c|  70 +-
>  .../ethernet/intel/idpf/idpf_lan_pf_regs.h|   4 +
>  .../net/ethernet/intel/idpf/idpf_lan_txrx.h   |  13 +-
>  drivers/net/ethernet/intel/idpf/idpf_lib.c|  47 +
>  drivers/net/ethernet/intel/idpf/idpf_main.c   |   9 +-
>  drivers/net/ethernet/intel/idpf/idpf_ptp.c| 983 ++
>  drivers/net/ethernet/intel/idpf/idpf_ptp.h| 351 +++
>  drivers/net/ethernet/intel/idpf/idpf_txrx.c   | 169 ++-
>  drivers/net/ethernet/intel/idpf/idpf_txrx.h   |  18 +-
>  .../net/ethernet/intel/idpf/idpf_virtchnl.c   | 160 ++-
>  .../net/ethernet/intel/idpf/idpf_virtchnl.h   |  84 ++
>  .../ethernet/intel/idpf/idpf_virtchnl_ptp.c   | 677 
>  drivers/net/ethernet/intel/idpf/virtchnl2.h   | 314 +-
>  18 files changed, 2852 insertions(+), 102 deletions(-)
>  create mode 100644 drivers/net/ethernet/intel/idpf/idpf_ptp.c
>  create mode 100644 drivers/net/ethernet/intel/idpf/idpf_ptp.h
>  create mode 100644 drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c
> 
> 
> base-commit: e1e8afea623cb80941623188a8190d3ca80a6e08
> --
> 2.31.1

On testing on this [series|patch], the following issue was observed, PTP 
hardware receive filter modes does not report all the supported modes from 
hardware.
 
Time stamping parameters for ethX:
Capabilities:
hardware-transmit
software-transmit
hardware-receive
software-receive
software-system-clock
hardware-raw-clock
PTP Hardware Clock: 2
Hardware Transmit Timestamp Modes:
off
on
Hardware Receive Filter Modes: none

Thank You 
Krishneil Singh


Re: [Intel-wired-lan] [PATCH net-next v7 0/5] net: napi: add CPU affinity to napi->config

2025-02-07 Thread Jakub Kicinski
On Tue,  4 Feb 2025 15:06:17 -0700 Ahmed Zaki wrote:
> Drivers usually need to re-apply the user-set IRQ affinity to their IRQs
> after reset. However, since there can be only one IRQ affinity notifier
> for each IRQ, registering IRQ notifiers conflicts with the ARFS rmap
> management in the core (which also registers separate IRQ affinity
> notifiers).   
> 
> Move the IRQ affinity management to the napi struct. This way we can have
> a unified IRQ notifier to re-apply the user-set affinity and also manage
> the ARFS rmaps. The first patch  moves the ARFS rmap management to CORE.
> The second patch adds the IRQ affinity mask to napi_config and re-applies
> the mask after reset. Patches 3-5 use the new API for bnxt, ice and idpf
> drivers.

Hi Ahmed!

I put together a selftest for maintaining the affinity:
https://github.com/kuba-moo/linux/commit/de7d2475750ac05b6e414d7e5201e354b05cf146

It depends on a couple of selftest infra patches (in that branch) 
which I just posted to the list. But if you'd like you can use
it against your drivers.


[Intel-wired-lan] [iwl-next v3 07/24] idpf: implement get LAN mmio memory regions

2025-02-07 Thread Tatyana Nikolova
From: Joshua Hay 

The rdma driver needs to map its own mmio regions for the sake of
performance, meaning the idpf needs to avoid mapping portions of the bar
space. However, to be vendor agnostic, the idpf cannot assume where
these are and must avoid mapping hard coded regions.  Instead, the idpf
will map the bare minimum to load and communicate with the control
plane, i.e. the mailbox registers and the reset state registers. The
idpf will then call a new virtchnl op to fetch a list of mmio regions
that it should map. All other registers will calculate which region they
should store their address from.

Signed-off-by: Joshua Hay 
Signed-off-by: Tatyana Nikolova 
---

v3:
- header cleanup

 drivers/net/ethernet/intel/idpf/idpf.h|  69 +++-
 .../net/ethernet/intel/idpf/idpf_controlq.c   |  14 +-
 .../net/ethernet/intel/idpf/idpf_controlq.h   |  15 +-
 drivers/net/ethernet/intel/idpf/idpf_dev.c|  35 ++--
 drivers/net/ethernet/intel/idpf/idpf_idc.c|  26 ++-
 drivers/net/ethernet/intel/idpf/idpf_main.c   |  32 +++-
 drivers/net/ethernet/intel/idpf/idpf_mem.h|   8 +-
 drivers/net/ethernet/intel/idpf/idpf_vf_dev.c |  31 ++--
 .../net/ethernet/intel/idpf/idpf_virtchnl.c   | 149 +-
 drivers/net/ethernet/intel/idpf/virtchnl2.h   |  31 +++-
 10 files changed, 359 insertions(+), 51 deletions(-)

diff --git a/drivers/net/ethernet/intel/idpf/idpf.h 
b/drivers/net/ethernet/intel/idpf/idpf.h
index 491db5b2d79d..84aa00e788ca 100644
--- a/drivers/net/ethernet/intel/idpf/idpf.h
+++ b/drivers/net/ethernet/intel/idpf/idpf.h
@@ -192,7 +192,8 @@ struct idpf_vport_max_q {
  * @trigger_reset: Trigger a reset to occur
  */
 struct idpf_reg_ops {
-   void (*ctlq_reg_init)(struct idpf_ctlq_create_info *cq);
+   void (*ctlq_reg_init)(struct idpf_adapter *adapter,
+ struct idpf_ctlq_create_info *cq);
int (*intr_reg_init)(struct idpf_vport *vport);
void (*mb_intr_reg_init)(struct idpf_adapter *adapter);
void (*reset_reg_init)(struct idpf_adapter *adapter);
@@ -200,15 +201,29 @@ struct idpf_reg_ops {
  enum idpf_flags trig_cause);
 };
 
+#define IDPF_PF_MBX_REGION_SZ  4096
+#define IDPF_PF_RSTAT_REGION_SZ2048
+#define IDPF_VF_MBX_REGION_SZ  10240
+#define IDPF_VF_RSTAT_REGION_SZ2048
+
 /**
  * struct idpf_dev_ops - Device specific operations
  * @reg_ops: Register operations
  * @idc_init: IDC initialization
+ * @mbx_reg_start: offset to start of mailbox registers
+ * @mbx_reg_sz: size of mailbox register region
+ * @rstat_reg_start: offset to start of rstat registers
+ * @rstat_reg_sz: size of rstat register region
  */
 struct idpf_dev_ops {
struct idpf_reg_ops reg_ops;
 
int (*idc_init)(struct idpf_adapter *adapter);
+
+   resource_size_t mbx_reg_start;
+   resource_size_t mbx_reg_sz;
+   resource_size_t rstat_reg_start;
+   resource_size_t rstat_reg_sz;
 };
 
 /**
@@ -734,6 +749,35 @@ static inline u8 idpf_get_min_tx_pkt_len(struct 
idpf_adapter *adapter)
return pkt_len ? pkt_len : IDPF_TX_MIN_PKT_LEN;
 }
 
+/**
+ * idpf_get_mbx_reg_addr - Get BAR0 mailbox register address
+ * @adapter: private data struct
+ * @reg_offset: register offset value
+ *
+ * Return: BAR0 mailbox register address based on register offset.
+ */
+static inline void __iomem *idpf_get_mbx_reg_addr(struct idpf_adapter *adapter,
+ resource_size_t reg_offset)
+{
+   return adapter->hw.mbx.addr + reg_offset;
+}
+
+/**
+ * idpf_get_rstat_reg_addr - Get BAR0 rstat register address
+ * @adapter: private data struct
+ * @reg_offset: register offset value
+ *
+ * Return: BAR0 rstat register address based on register offset.
+ */
+static inline
+void __iomem *idpf_get_rstat_reg_addr(struct idpf_adapter *adapter,
+ resource_size_t reg_offset)
+{
+   reg_offset -= adapter->dev_ops.rstat_reg_start;
+
+   return adapter->hw.rstat.addr + reg_offset;
+}
+
 /**
  * idpf_get_reg_addr - Get BAR0 register address
  * @adapter: private data struct
@@ -744,7 +788,26 @@ static inline u8 idpf_get_min_tx_pkt_len(struct 
idpf_adapter *adapter)
 static inline void __iomem *idpf_get_reg_addr(struct idpf_adapter *adapter,
  resource_size_t reg_offset)
 {
-   return (void __iomem *)(adapter->hw.hw_addr + reg_offset);
+   struct idpf_hw *hw = &adapter->hw;
+
+   for (int i = 0; i < hw->num_lan_regs; i++) {
+   struct idpf_mmio_reg *region = &hw->lan_regs[i];
+
+   if (reg_offset >= region->addr_start &&
+   reg_offset < (region->addr_start + region->addr_len)) {
+   reg_offset -= region->addr_start;
+
+   return region->addr + reg_offset;
+   }
+   }
+
+   /* It's impossible to hit this case with offsets from the CP. But if we
+ 

[Intel-wired-lan] [rdma v3 23/24] RDMA/irdma: Add Push Page Support for GEN3

2025-02-07 Thread Tatyana Nikolova
From: Jay Bhat 

Implement the necessary support for enabling push on GEN3 devices.

Key Changes:
- Introduce a RDMA virtual channel operation with the Control Plane (CP)
to manage the doorbell/push page which is a privileged operation.
- Implement the MMIO mapping of push pages which adheres to the updated
  BAR layout and page indexing specific to GEN3 devices.
- Support up to 16 QPs on a single push page, given that they are tied
to the same Queue Set.
- Impose limits on the size of WQEs pushed based on the message length
constraints provided by the CP.

Signed-off-by: Jay Bhat 
Signed-off-by: Tatyana Nikolova 
---

v3:
* Populate hmc_fn_id and use_hmc_fn_id fields in
irdma_cqp_manage_push_page_info structure.
* Remove logic for push page sharing among QPs.

 drivers/infiniband/hw/irdma/ctrl.c |  1 -
 drivers/infiniband/hw/irdma/defs.h |  2 +
 drivers/infiniband/hw/irdma/irdma.h|  1 +
 drivers/infiniband/hw/irdma/type.h |  3 ++
 drivers/infiniband/hw/irdma/user.h |  1 -
 drivers/infiniband/hw/irdma/utils.c| 21 +--
 drivers/infiniband/hw/irdma/verbs.c| 51 +-
 drivers/infiniband/hw/irdma/verbs.h|  3 ++
 drivers/infiniband/hw/irdma/virtchnl.c | 40 
 drivers/infiniband/hw/irdma/virtchnl.h | 11 ++
 include/uapi/rdma/irdma-abi.h  |  3 +-
 11 files changed, 121 insertions(+), 16 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/ctrl.c 
b/drivers/infiniband/hw/irdma/ctrl.c
index 4158db00085f..a6df550eb8c8 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -6584,7 +6584,6 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct 
irdma_sc_dev *dev,
dev->hw_attrs.max_hw_outbound_msg_size = IRDMA_MAX_OUTBOUND_MSG_SIZE;
dev->hw_attrs.max_mr_size = IRDMA_MAX_MR_SIZE;
dev->hw_attrs.max_hw_inbound_msg_size = IRDMA_MAX_INBOUND_MSG_SIZE;
-   dev->hw_attrs.max_hw_device_pages = IRDMA_MAX_PUSH_PAGE_COUNT;
dev->hw_attrs.uk_attrs.max_hw_inline = IRDMA_MAX_INLINE_DATA_SIZE;
dev->hw_attrs.max_hw_wqes = IRDMA_MAX_WQ_ENTRIES;
dev->hw_attrs.max_qp_wr = IRDMA_MAX_QP_WRS(IRDMA_MAX_QUANTA_PER_WR);
diff --git a/drivers/infiniband/hw/irdma/defs.h 
b/drivers/infiniband/hw/irdma/defs.h
index 983b22d7ae23..46330513085b 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -167,6 +167,8 @@ enum irdma_protocol_used {
 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2
 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3
 
+#define IRDMA_DEFAULT_MAX_PUSH_LEN 8192
+
 #define IRDMA_SQ_RSVD  258
 #define IRDMA_RQ_RSVD  1
 
diff --git a/drivers/infiniband/hw/irdma/irdma.h 
b/drivers/infiniband/hw/irdma/irdma.h
index ff938a01d70c..def6a16f5d6e 100644
--- a/drivers/infiniband/hw/irdma/irdma.h
+++ b/drivers/infiniband/hw/irdma/irdma.h
@@ -133,6 +133,7 @@ struct irdma_uk_attrs {
u32 min_hw_cq_size;
u32 max_hw_cq_size;
u32 max_hw_srq_quanta;
+   u16 max_hw_push_len;
u16 max_hw_sq_chunk;
u16 min_hw_wq_size;
u8 hw_rev;
diff --git a/drivers/infiniband/hw/irdma/type.h 
b/drivers/infiniband/hw/irdma/type.h
index 665dc74cb10a..e04d6c35cf59 100644
--- a/drivers/infiniband/hw/irdma/type.h
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -1282,8 +1282,11 @@ struct irdma_qhash_table_info {
 struct irdma_cqp_manage_push_page_info {
u32 push_idx;
u16 qs_handle;
+   u16 hmc_fn_id;
u8 free_page;
u8 push_page_type;
+   u8 page_type;
+   u8 use_hmc_fn_id;
 };
 
 struct irdma_qp_flush_info {
diff --git a/drivers/infiniband/hw/irdma/user.h 
b/drivers/infiniband/hw/irdma/user.h
index ab57f689827a..47617ba285c1 100644
--- a/drivers/infiniband/hw/irdma/user.h
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -182,7 +182,6 @@ enum irdma_device_caps_const {
IRDMA_MAX_SGE_RD =  13,
IRDMA_MAX_OUTBOUND_MSG_SIZE =   2147483647,
IRDMA_MAX_INBOUND_MSG_SIZE =2147483647,
-   IRDMA_MAX_PUSH_PAGE_COUNT = 1024,
IRDMA_MAX_PE_ENA_VF_COUNT = 32,
IRDMA_MAX_VF_FPM_ID =   47,
IRDMA_MAX_SQ_PAYLOAD_SIZE = 2145386496,
diff --git a/drivers/infiniband/hw/irdma/utils.c 
b/drivers/infiniband/hw/irdma/utils.c
index 552a4cf2c51b..11ceca099538 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -1085,18 +1085,29 @@ int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, 
struct irdma_sc_qp *qp)
 /**
  * irdma_dealloc_push_page - free a push page for qp
  * @rf: RDMA PCI function
- * @qp: hardware control qp
+ * @iwqp: QP pointer
  */
 static void irdma_dealloc_push_page(struct irdma_pci_f *rf,
-   struct irdma_sc_qp *qp)
+   struct irdma_qp *iwqp)
 {
struct irdma_cqp_request *cqp_request;
struct cqp_cmds_info *cqp_info;
int status;
+   struct irdm

[Intel-wired-lan] [tnguy-net-queue:main] BUILD SUCCESS 1438f5d07b9a7afb15e1d0e26df04a6fd4e56a3c

2025-02-07 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue.git main
branch HEAD: 1438f5d07b9a7afb15e1d0e26df04a6fd4e56a3c  rtnetlink: fix netns 
leak with rtnl_setlink()

elapsed time: 1135m

configs tested: 256
configs skipped: 4

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alpha allnoconfiggcc-14.2.0
alphaallyesconfigclang-21
alphaallyesconfiggcc-14.2.0
alpha   defconfigclang-19
alpha   defconfiggcc-14.2.0
arc  allmodconfigclang-18
arc   allnoconfiggcc-14.2.0
arc  allyesconfigclang-18
arc defconfiggcc-14.2.0
arc haps_hs_smp_defconfigclang-19
arc   randconfig-001-20250207gcc-13.2.0
arc   randconfig-001-20250208gcc-13.2.0
arc   randconfig-002-20250207gcc-13.2.0
arc   randconfig-002-20250208gcc-13.2.0
arm  allmodconfigclang-18
arm   allnoconfiggcc-14.2.0
arm  allyesconfigclang-18
arm at91_dt_defconfigclang-21
arm defconfiggcc-14.2.0
arm lpc32xx_defconfigclang-21
arm lpc32xx_defconfiggcc-14.2.0
arm  pxa910_defconfiggcc-14.2.0
arm   randconfig-001-20250207gcc-13.2.0
arm   randconfig-001-20250208gcc-13.2.0
arm   randconfig-002-20250207gcc-13.2.0
arm   randconfig-002-20250208gcc-13.2.0
arm   randconfig-003-20250207gcc-13.2.0
arm   randconfig-003-20250208gcc-13.2.0
arm   randconfig-004-20250207gcc-13.2.0
arm   randconfig-004-20250208gcc-13.2.0
arm s3c6400_defconfigclang-19
arm wpcm450_defconfigclang-19
arm64allmodconfigclang-18
arm64 allnoconfiggcc-14.2.0
arm64   defconfiggcc-14.2.0
arm64 randconfig-001-20250207gcc-13.2.0
arm64 randconfig-001-20250208gcc-13.2.0
arm64 randconfig-002-20250207gcc-13.2.0
arm64 randconfig-002-20250208gcc-13.2.0
arm64 randconfig-003-20250207gcc-13.2.0
arm64 randconfig-003-20250208gcc-13.2.0
arm64 randconfig-004-20250207gcc-13.2.0
arm64 randconfig-004-20250208gcc-13.2.0
csky  allnoconfiggcc-14.2.0
cskydefconfigclang-21
cskydefconfiggcc-14.2.0
csky  randconfig-001-20250207gcc-14.2.0
csky  randconfig-001-20250208gcc-14.2.0
csky  randconfig-002-20250207gcc-14.2.0
csky  randconfig-002-20250208gcc-14.2.0
hexagon  alldefconfigclang-21
hexagon  allmodconfigclang-21
hexagon   allnoconfiggcc-14.2.0
hexagon  allyesconfigclang-18
hexagon  allyesconfigclang-21
hexagon defconfiggcc-14.2.0
hexagon   randconfig-001-20250207gcc-14.2.0
hexagon   randconfig-001-20250208clang-21
hexagon   randconfig-002-20250207gcc-14.2.0
hexagon   randconfig-002-20250208clang-21
i386 allmodconfigclang-19
i386  allnoconfigclang-19
i386 allyesconfigclang-19
i386buildonly-randconfig-001-20250207gcc-12
i386buildonly-randconfig-001-20250208gcc-11
i386buildonly-randconfig-002-20250207gcc-12
i386buildonly-randconfig-002-20250208clang-19
i386buildonly-randconfig-002-20250208gcc-11
i386buildonly-randconfig-003-20250207gcc-12
i386buildonly-randconfig-003-20250208gcc-11
i386buildonly-randconfig-003-20250208gcc-12
i386buildonly-randconfig-004-20250207gcc-12
i386buildonly-randconfig-004-20250208clang-19
i386buildonly-randconfig-004-20250208gcc-11
i386buildonly-randconfig-005-20250207gcc-12
i386buildonly-randconfig-005-20250208clang-19
i386buildonly-randconfig-005-20250208gcc-11
i386buildonly-randconfig-006-20250207gcc-12
i386buildonly-randconfig-006-20250208

[Intel-wired-lan] [PATCH iwl-next 0/3] ice: decouple control of SMA/U.FL/SDP pins

2025-02-07 Thread Arkadiusz Kubalewski
Previously control of the dpll SMA/U.FL pins was partially done through
ptp API, decouple pins control from both interfaces (dpll and ptp).
Allow the SMA/U.FL pins control over a dpll subsystem, and leave ptp
related SDP pins control over a ptp subsystem.

Arkadiusz Kubalewski (1):
  ice: redesign dpll sma/u.fl pins control

Karol Kolacinski (2):
  ice: change SMA pins to SDP in PTP API
  ice: add ice driver PTP pin documentation

 .../device_drivers/ethernet/intel/ice.rst |  13 +
 drivers/net/ethernet/intel/ice/ice_dpll.c | 952 +-
 drivers/net/ethernet/intel/ice/ice_dpll.h |  23 +-
 drivers/net/ethernet/intel/ice/ice_ptp.c  | 254 +
 drivers/net/ethernet/intel/ice/ice_ptp.h  |   3 -
 5 files changed, 1011 insertions(+), 234 deletions(-)


base-commit: 233a2b1480a0bdf6b40d4debf58a07084e9921ff
prerequisite-patch-id: 2cda134043ccfc781dd595052cfc60a3e2ea48ea
prerequisite-patch-id: 62ac41823e7278621af3745a171aae07508711c8
prerequisite-patch-id: 1330728a760d99174344cb421336ae9b01e17f38
prerequisite-patch-id: ff2afa3e3a2c60a590d17a880b610e2a37e7af0c
prerequisite-patch-id: cbff95efd09cb57e17c68c464ee1e317d01cf822
prerequisite-patch-id: e5be07f7b169f2443c034f04e3d0a00a8d0a8894
prerequisite-patch-id: a5f362eec88b62ff098203469cef8534f176d2a8
prerequisite-patch-id: 545b9e38f61ccfd5b33ab9c3e3a6e7a9f899e306
prerequisite-patch-id: a74b6b981ecd8a320284454d75b1dfc9e555b5f0
prerequisite-patch-id: df0a5f503065fa5869b1c915721a54eb3c7394cb
prerequisite-patch-id: faebd604b0a6eb2a888e99b8977f803abe035abf
prerequisite-patch-id: b7543662f5225ce13a1c95749504c68ef4733aea
prerequisite-patch-id: a7297c1e743f01d118c7f77b39e5755f7a704e17
prerequisite-patch-id: 6f036cdf7bca2a272b153ecc5b3a767f41517c38
prerequisite-patch-id: bb790f877236aad43dae0bdbdceb0a3553260d10
prerequisite-patch-id: 2f53433b0d2a98cd42b18429bdbec1542b175b1f
prerequisite-patch-id: cc9bf85bb9d988d92ab6cb1524bf213ec1351032
prerequisite-patch-id: 112c048b7ae143edda05244b0d8b5ab928d3eff4
prerequisite-patch-id: 124be0607c41aebe292c7b81910857489027baf1
prerequisite-patch-id: b6b5f0e405d566879133d53c26fd998e9f330ff2
prerequisite-patch-id: 777e25e09efe2ec4863e3bebdb247bac3e037c85
prerequisite-patch-id: bf13dbef14d654b243150d4f2603eb90ae497058
prerequisite-patch-id: 76f1c5ef5dacad0600339d5cf843ca14fcfa9dde
prerequisite-patch-id: 586431a13be4f1ecf0adf450242aa7e90975d38f
prerequisite-patch-id: e5c687a47edf3659dca8519e4c5250bbea89171b
prerequisite-patch-id: 9f8081c59e275240cd76911fbede7d2737473357
prerequisite-patch-id: f4d6edba52edea1276e0095e132733f4438de720
prerequisite-patch-id: 5e7afab1204a42d90b8b6a14e3881cf1d4987954
prerequisite-patch-id: 708e14a83a03377f2909b3ce0d72d21a4619a03d
prerequisite-patch-id: ae9720262fb8d1f92b157865f02a9fc7d9aa1582
prerequisite-patch-id: 11c806ab6cc8d29c86218d5760ca22cf3ef2ae05
prerequisite-patch-id: 1aae146d6c20d41b4785d37962052a52c320ac3b
prerequisite-patch-id: 59b00a073b5055091ccf55905e746a372dfc5e8e
prerequisite-patch-id: 5b640578751b48ab50748dbe6f864ce14f1978c9
prerequisite-patch-id: 725ea892cdefd598a1841323c6e74efe160dd3fe
prerequisite-patch-id: 03bb4b3b1f37211fbcd379a19ebff5621c9d901f
prerequisite-patch-id: 877ab147dd7c2e56beeb97bc4651fef89590cc23
prerequisite-patch-id: 798f81cfb09f75af615986689658787d29427e85
prerequisite-patch-id: 4e64a22702fa030f57436da273da1093153cfa7a
prerequisite-patch-id: c8b8f75ae6c949e68a8ee0b6e7b09344a700663f
prerequisite-patch-id: 19fed1ea4aaa320e4a4e46f9c39c7e994f09c7d9
prerequisite-patch-id: 546c7611f620c90a054da039dd19cbc7339edb39
prerequisite-patch-id: 272344e3e7ca650f3833ad62ffa75aa3b080fd72
prerequisite-patch-id: b1d967b8973ec9320e239653773c7caa9d54de70
-- 
2.38.1



[Intel-wired-lan] [PATCH iwl-next 3/3] ice: add ice driver PTP pin documentation

2025-02-07 Thread Arkadiusz Kubalewski
From: Karol Kolacinski 

Add a description of PTP pins support by the adapters to ice driver
documentation.

Reviewed-by: Milena Olech 
Signed-off-by: Karol Kolacinski 
Signed-off-by: Arkadiusz Kubalewski 
---
 .../device_drivers/ethernet/intel/ice.rst   | 13 +
 1 file changed, 13 insertions(+)

diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst 
b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index 3c46a48d99ba..0bca293cf9cb 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -927,6 +927,19 @@ To enable/disable UDP Segmentation Offload, issue the 
following command::
 
   # ethtool -K  tx-udp-segmentation [off|on]
 
+PTP pin interface
+-
+All adapters support standard PTP pin interface. SDPs (Software Definable Pin)
+are single ended pins with both periodic output and external timestamp
+supported. There are also specific differential input/output pins (TIME_SYNC,
+1PPS) with only one of the functions supported.
+
+There are adapters with DPLL, where pins are connected to the DPLL instead of
+being exposed on the board. You have to be aware that in those configurations,
+only SDP pins are exposed and each pin has its own fixed direction.
+To see input signal on those PTP pins, you need to configure DPLL properly.
+Output signal is only visible on DPLL and to send it to the board SMA/U.FL 
pins,
+DPLL output pins have to be manually configured.
 
 GNSS module
 ---
-- 
2.38.1



[Intel-wired-lan] [PATCH iwl-next 1/3] ice: redesign dpll sma/u.fl pins control

2025-02-07 Thread Arkadiusz Kubalewski
DPLL-enabled E810 NIC driver provides user with list of input and output
pins. Hardware internal design impacts user control over SMA and U.FL
pins. Currently end-user view on those dpll pins doesn't provide any layer
of abstraction. On the hardware level SMA and U.FL pins are tied together
due to existence of direction control logic for each pair:
- SMA1 (bi-directional) and U.FL1 (only output)
- SMA2 (bi-directional) and U.FL2 (only input)
The user activity on each pin of the pair may impact the state of the
other.

Previously all the pins were provided to the user as is, without the
control over SMA pins direction.

Introduce a software controlled layer of abstraction over external board
pins, instead of providing the user with access to raw pins connected to
the dpll:
- new software controlled SMA and U.FL pins,
- callback operations directing user requests to corresponding hardware
  pins according to the runtime configuration,
- ability to control SMA pins direction.

Reviewed-by: Przemek Kitszel 
Signed-off-by: Arkadiusz Kubalewski 
---
 drivers/net/ethernet/intel/ice/ice_dpll.c | 952 +-
 drivers/net/ethernet/intel/ice/ice_dpll.h |  23 +-
 2 files changed, 959 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c 
b/drivers/net/ethernet/intel/ice/ice_dpll.c
index 8d806d8ad761..1af4bfff012b 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -11,6 +11,28 @@
 #define ICE_DPLL_RCLK_NUM_PER_PF   1
 #define ICE_DPLL_PIN_ESYNC_PULSE_HIGH_PERCENT  25
 #define ICE_DPLL_PIN_GEN_RCLK_FREQ 1953125
+#define ICE_DPLL_PIN_PRIO_OUTPUT   0xff
+#define ICE_DPLL_SW_PIN_INPUT_BASE_SFP 4
+#define ICE_DPLL_SW_PIN_INPUT_BASE_QSFP6
+#define ICE_DPLL_SW_PIN_OUTPUT_BASE0
+
+#define ICE_DPLL_PIN_SW_INPUT_ABS(in_idx) \
+   (ICE_DPLL_SW_PIN_INPUT_BASE_SFP + (in_idx))
+
+#define ICE_DPLL_PIN_SW_1_INPUT_ABS_IDX \
+   (ICE_DPLL_PIN_SW_INPUT_ABS(ICE_DPLL_PIN_SW_1_IDX))
+
+#define ICE_DPLL_PIN_SW_2_INPUT_ABS_IDX \
+   (ICE_DPLL_PIN_SW_INPUT_ABS(ICE_DPLL_PIN_SW_2_IDX))
+
+#define ICE_DPLL_PIN_SW_OUTPUT_ABS(out_idx) \
+   (ICE_DPLL_SW_PIN_OUTPUT_BASE + (out_idx))
+
+#define ICE_DPLL_PIN_SW_1_OUTPUT_ABS_IDX \
+   (ICE_DPLL_PIN_SW_OUTPUT_ABS(ICE_DPLL_PIN_SW_1_IDX))
+
+#define ICE_DPLL_PIN_SW_2_OUTPUT_ABS_IDX \
+   (ICE_DPLL_PIN_SW_OUTPUT_ABS(ICE_DPLL_PIN_SW_2_IDX))
 
 /**
  * enum ice_dpll_pin_type - enumerate ice pin types:
@@ -18,24 +40,60 @@
  * @ICE_DPLL_PIN_TYPE_INPUT: input pin
  * @ICE_DPLL_PIN_TYPE_OUTPUT: output pin
  * @ICE_DPLL_PIN_TYPE_RCLK_INPUT: recovery clock input pin
+ * @ICE_DPLL_PIN_TYPE_SOFTWARE: software controlled SMA/U.FL pins
  */
 enum ice_dpll_pin_type {
ICE_DPLL_PIN_INVALID,
ICE_DPLL_PIN_TYPE_INPUT,
ICE_DPLL_PIN_TYPE_OUTPUT,
ICE_DPLL_PIN_TYPE_RCLK_INPUT,
+   ICE_DPLL_PIN_TYPE_SOFTWARE,
 };
 
 static const char * const pin_type_name[] = {
[ICE_DPLL_PIN_TYPE_INPUT] = "input",
[ICE_DPLL_PIN_TYPE_OUTPUT] = "output",
[ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input",
+   [ICE_DPLL_PIN_TYPE_SOFTWARE] = "software",
 };
 
+static const char * const ice_dpll_sw_pin_sma[] = { "SMA1", "SMA2" };
+static const char * const ice_dpll_sw_pin_ufl[] = { "U.FL1", "U.FL2" };
+
 static const struct dpll_pin_frequency ice_esync_range[] = {
DPLL_PIN_FREQUENCY_RANGE(0, DPLL_PIN_FREQUENCY_1_HZ),
 };
 
+/**
+ * ice_dpll_is_sw_pin - check if given pin shall be controlled by SW
+ * @pf: private board structure
+ * @index: index of a pin as understood by FW
+ * @input: true for input, false for output
+ *
+ * Check if the pin shall be controlled by SW - instead of providing raw access
+ * for pin control. For E810 NIC with dpll there is additional MUX-related 
logic
+ * between SMA/U.FL pins/connectors and dpll device, best to give user access
+ * with series of wrapper functions as from user perspective they convey single
+ * functionality rather then separated pins.
+ *
+ * Return:
+ * * true - pin controlled by SW
+ * * false - pin not controlled by SW
+ */
+static bool ice_dpll_is_sw_pin(struct ice_pf *pf, u8 index, bool input)
+{
+   if (input && pf->hw.device_id == ICE_DEV_ID_E810C_QSFP)
+   index -= ICE_DPLL_SW_PIN_INPUT_BASE_QSFP -
+ICE_DPLL_SW_PIN_INPUT_BASE_SFP;
+
+   if ((input && (index == ICE_DPLL_PIN_SW_1_INPUT_ABS_IDX ||
+  index == ICE_DPLL_PIN_SW_2_INPUT_ABS_IDX)) ||
+   (!input && (index == ICE_DPLL_PIN_SW_1_OUTPUT_ABS_IDX ||
+   index == ICE_DPLL_PIN_SW_2_OUTPUT_ABS_IDX)))
+   return true;
+   return false;
+}
+
 /**
  * ice_dpll_is_reset - check if reset is in progress
  * @pf: private board structure
@@ -279,6 +337,87 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, 
void *pin_priv,
  extack, ICE_D

[Intel-wired-lan] [PATCH iwl-next 2/3] ice: change SMA pins to SDP in PTP API

2025-02-07 Thread Arkadiusz Kubalewski
From: Karol Kolacinski 

This change aligns E810 PTP pin control to all other products.

Currently, SMA/U.FL port expanders are controlled together with SDP pins
connected to 1588 clock. To align this, separate this control by
exposing only SDP20..23 pins in PTP API on adapters with DPLL.

Clear error for all E810 on absent NVM pin section or other errors to
allow proper initialization on SMA E810 with NVM section.

Use ARRAY_SIZE for pin array instead of internal definition.

Reviewed-by: Milena Olech 
Signed-off-by: Karol Kolacinski 
Signed-off-by: Arkadiusz Kubalewski 
---
 drivers/net/ethernet/intel/ice/ice_ptp.c | 254 ---
 drivers/net/ethernet/intel/ice/ice_ptp.h |   3 -
 2 files changed, 39 insertions(+), 218 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c 
b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 1bb0033347c7..a7aa6d5fb775 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -40,21 +40,19 @@ static const struct ice_ptp_pin_desc ice_pin_desc_e810[] = {
{  ONE_PPS,   { -1,  5 }, { 0, 1 }},
 };
 
-static const char ice_pin_names_nvm[][64] = {
-   "GNSS",
-   "SMA1",
-   "U.FL1",
-   "SMA2",
-   "U.FL2",
+static const char ice_pin_names_dpll[][64] = {
+   "SDP20",
+   "SDP21",
+   "SDP22",
+   "SDP23",
 };
 
-static const struct ice_ptp_pin_desc ice_pin_desc_e810_sma[] = {
+static const struct ice_ptp_pin_desc ice_pin_desc_dpll[] = {
/* name,   gpio,   delay */
-   {  GNSS, {  1, -1 }, { 0, 0 }},
-   {  SMA1, {  1,  0 }, { 0, 1 }},
-   {  UFL1, { -1,  0 }, { 0, 1 }},
-   {  SMA2, {  3,  2 }, { 0, 1 }},
-   {  UFL2, {  3, -1 }, { 0, 0 }},
+   {  SDP0, { -1,  0 }, { 0, 1 }},
+   {  SDP1, {  1, -1 }, { 0, 0 }},
+   {  SDP2, { -1,  2 }, { 0, 1 }},
+   {  SDP3, {  3, -1 }, { 0, 0 }},
 };
 
 static struct ice_pf *ice_get_ctrl_pf(struct ice_pf *pf)
@@ -92,101 +90,6 @@ static int ice_ptp_find_pin_idx(struct ice_pf *pf, enum 
ptp_pin_function func,
return -1;
 }
 
-/**
- * ice_ptp_update_sma_data - update SMA pins data according to pins setup
- * @pf: Board private structure
- * @sma_pins: parsed SMA pins status
- * @data: SMA data to update
- */
-static void ice_ptp_update_sma_data(struct ice_pf *pf, unsigned int sma_pins[],
-   u8 *data)
-{
-   const char *state1, *state2;
-
-   /* Set the right state based on the desired configuration.
-* When bit is set, functionality is disabled.
-*/
-   *data &= ~ICE_ALL_SMA_MASK;
-   if (!sma_pins[UFL1 - 1]) {
-   if (sma_pins[SMA1 - 1] == PTP_PF_EXTTS) {
-   state1 = "SMA1 Rx, U.FL1 disabled";
-   *data |= ICE_SMA1_TX_EN;
-   } else if (sma_pins[SMA1 - 1] == PTP_PF_PEROUT) {
-   state1 = "SMA1 Tx U.FL1 disabled";
-   *data |= ICE_SMA1_DIR_EN;
-   } else {
-   state1 = "SMA1 disabled, U.FL1 disabled";
-   *data |= ICE_SMA1_MASK;
-   }
-   } else {
-   /* U.FL1 Tx will always enable SMA1 Rx */
-   state1 = "SMA1 Rx, U.FL1 Tx";
-   }
-
-   if (!sma_pins[UFL2 - 1]) {
-   if (sma_pins[SMA2 - 1] == PTP_PF_EXTTS) {
-   state2 = "SMA2 Rx, U.FL2 disabled";
-   *data |= ICE_SMA2_TX_EN | ICE_SMA2_UFL2_RX_DIS;
-   } else if (sma_pins[SMA2 - 1] == PTP_PF_PEROUT) {
-   state2 = "SMA2 Tx, U.FL2 disabled";
-   *data |= ICE_SMA2_DIR_EN | ICE_SMA2_UFL2_RX_DIS;
-   } else {
-   state2 = "SMA2 disabled, U.FL2 disabled";
-   *data |= ICE_SMA2_MASK;
-   }
-   } else {
-   if (!sma_pins[SMA2 - 1]) {
-   state2 = "SMA2 disabled, U.FL2 Rx";
-   *data |= ICE_SMA2_DIR_EN | ICE_SMA2_TX_EN;
-   } else {
-   state2 = "SMA2 Tx, U.FL2 Rx";
-   *data |= ICE_SMA2_DIR_EN;
-   }
-   }
-
-   dev_dbg(ice_pf_to_dev(pf), "%s, %s\n", state1, state2);
-}
-
-/**
- * ice_ptp_set_sma_cfg - set the configuration of the SMA control logic
- * @pf: Board private structure
- *
- * Return: 0 on success, negative error code otherwise
- */
-static int ice_ptp_set_sma_cfg(struct ice_pf *pf)
-{
-   const struct ice_ptp_pin_desc *ice_pins = pf->ptp.ice_pin_desc;
-   struct ptp_pin_desc *pins = pf->ptp.pin_desc;
-   unsigned int sma_pins[ICE_SMA_PINS_NUM] = {};
-   int err;
-   u8 data;
-
-   /* Read initial pin state value */
-   err = ice_read_sma_ctrl(&pf->hw, &data);
-   if (err)
-   return err;
-
-   /* Get SMA/U.FL pins states */
-   for (int i = 0; i < pf->ptp.info.n_pins; i++)
-   if (pins[i].func) {
-  

Re: [Intel-wired-lan] [PATCH iwl-net v2] idpf: call set_real_num_queues in idpf_open

2025-02-07 Thread Salin, Samuel



> -Original Message-
> From: Intel-wired-lan  On Behalf Of
> Joshua Hay
> Sent: Tuesday, February 4, 2025 6:08 PM
> To: intel-wired-...@lists.osuosl.org
> Cc: Samudrala, Sridhar ; Hay, Joshua A
> ; Chittim, Madhu 
> Subject: [Intel-wired-lan] [PATCH iwl-net v2] idpf: call set_real_num_queues 
> in
> idpf_open
> 
> On initial driver load, alloc_etherdev_mqs is called with whatever max queue
> values are provided by the control plane. However, if the driver is loaded on 
> a
> system where num_online_cpus() returns less than the max queues, the
> netdev will think there are more queues than are actually available. Only
> num_online_cpus() will be allocated, but
> skb_get_queue_mapping(skb) could possibly return an index beyond the
> range of allocated queues. Consequently, the packet is silently dropped and it
> appears as if TX is broken.
> 
> Set the real number of queues during open so the netdev knows how many
> queues will be allocated.
> 
> v2:
> - call set_real_num_queues in idpf_open. Previous change called
>   set_real_num_queues function in idpf_up_complete, but it is possible
>   for up_complete to be called without holding the RTNL lock. If user
>   brings up interface, then issues a reset, the init_task will call
>   idpf_vport_open->idpf_up_complete. Since this is initiated by the
>   driver, the RTNL lock is not taken.
> - adjust title to reflect new changes.
> 
> Signed-off-by: Joshua Hay 
> Fixes: 1c325aac10a8 ("idpf: configure resources for TX queues")
> Reviewed-by: Madhu Chittim 

Tested-by: Samuel Salin 


[Intel-wired-lan] igb: XDP/ZC busy polling

2025-02-07 Thread Kurt Kanzenbach
Hello Joe,

I noticed that XDP/ZC busy polling does not work anymore in combination
with igb driver. This seems to be related to commit 5ef44b3cb43b ("xsk:
Bring back busy polling support") which relies on
netif_queue_set_napi().

I see you implemented it for e1000, igc and so on. However, igb is
missing. Do you have any plans to add the missing registration to igb?
Just asking. Otherwise, I can send a patch for it.

Thanks,
Kurt


signature.asc
Description: PGP signature


Re: [Intel-wired-lan] [PATCH iwl-net] ixgbe: fix media cage present detection for E610 device

2025-02-07 Thread Simon Horman
On Thu, Feb 06, 2025 at 04:19:20PM +0100, Piotr Kwapulinski wrote:
> The commit 23c0e5a16bcc ("ixgbe: Add link management support for E610
> device") introduced incorrect checking of media cage presence for E610
> device. Fix it.
> 
> Fixes: 23c0e5a16bcc ("ixgbe: Add link management support for E610 device")
> Reported-by: Dan Carpenter 
> Closes: 
> https://lore.kernel.org/all/e7d73b32-f12a-49d1-8b60-1ef83359ec13@stanley.mountain/
> Reviewed-by: Michal Swiatkowski 
> Reviewed-by: Przemek Kitszel 
> Signed-off-by: Piotr Kwapulinski 

Reviewed-by: Simon Horman 



Re: [Intel-wired-lan] [PATCH iwl-next v1 3/3] ice: E825C PHY register cleanup

2025-02-07 Thread Simon Horman
On Thu, Feb 06, 2025 at 09:36:55AM +0100, Grzegorz Nitka wrote:
> From: Karol Kolacinski 
> 
> Minor PTP register refactor, including logical grouping E825C 1-step
> timestamping registers. Remove unused register definitions
> (PHY_REG_GPCS_BITSLIP, PHY_REG_REVISION).
> Also, apply preferred GENMASK macro (instead of ICE_M) for register
> fields definition affected by this patch.
> 
> Reviewed-by: Przemek Kitszel 
> Signed-off-by: Karol Kolacinski 
> Signed-off-by: Grzegorz Nitka 

In reference to my comment on patch 1/3, this patch is also doing sevearl
things. But I think that is fine because: they are all cleanups; they are
somewhat related to each other; and overall the patch is still not so long.

Reviewed-by: Simon Horman 

...


[Intel-wired-lan] [iwl-next v1 2/4] ixgbe: check for MDD events

2025-02-07 Thread Michal Swiatkowski
From: Don Skidmore 

When an event is detected it is logged and, for the time being, the
queue is immediately re-enabled.  This is due to the lack of an API
to the hypervisor so it could deal with it as it chooses.

Reviewed-by: Przemek Kitszel 
Reviewed-by: Jedrzej Jagielski 
Reviewed-by: Marcin Szycik 
Signed-off-by: Don Skidmore 
Signed-off-by: Michal Swiatkowski 
---
 .../net/ethernet/intel/ixgbe/ixgbe_sriov.h|  1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |  2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  3 ++
 .../net/ethernet/intel/ixgbe/ixgbe_sriov.c| 50 +++
 4 files changed, 56 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index 0690ecb8dfa3..bc4cab976bf9 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -15,6 +15,7 @@
 #ifdef CONFIG_PCI_IOV
 void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
 #endif
+bool ixgbe_check_mdd_event(struct ixgbe_adapter *adapter);
 void ixgbe_msg_task(struct ixgbe_adapter *adapter);
 int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
 void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index d446c375335a..aa3b498558bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -402,6 +402,8 @@ struct ixgbe_nvm_version {
 #define IXGBE_MRCTL(_i)  (0x0F600 + ((_i) * 4))
 #define IXGBE_VMRVLAN(_i)(0x0F610 + ((_i) * 4))
 #define IXGBE_VMRVM(_i)  (0x0F630 + ((_i) * 4))
+#define IXGBE_LVMMC_RX  0x2FA8
+#define IXGBE_LVMMC_TX  0x8108
 #define IXGBE_WQBR_RX(_i)(0x2FB0 + ((_i) * 4)) /* 4 total */
 #define IXGBE_WQBR_TX(_i)(0x8130 + ((_i) * 4)) /* 4 total */
 #define IXGBE_L34T_IMIR(_i)  (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 467f81239e12..3ff48207165c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7959,6 +7959,9 @@ static void ixgbe_watchdog_link_is_up(struct 
ixgbe_adapter *adapter)
netif_carrier_on(netdev);
ixgbe_check_vf_rate_limit(adapter);
 
+   if (adapter->num_vfs && hw->mac.ops.enable_mdd)
+   hw->mac.ops.enable_mdd(hw);
+
/* enable transmits */
netif_tx_wake_all_queues(adapter->netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index ccdce80edd14..c374ebd4a56b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -207,6 +207,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, 
unsigned int max_vfs)
 int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
 {
unsigned int num_vfs = adapter->num_vfs, vf;
+   struct ixgbe_hw *hw = &adapter->hw;
unsigned long flags;
int rss;
 
@@ -237,6 +238,9 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
return 0;
 
+   if (hw->mac.ops.disable_mdd)
+   hw->mac.ops.disable_mdd(hw);
+
 #ifdef CONFIG_PCI_IOV
/*
 * If our VFs are assigned we cannot shut down SR-IOV
@@ -1353,12 +1357,58 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter 
*adapter, u32 vf)
ixgbe_write_mbx(hw, &msg, 1, vf);
 }
 
+/**
+ * ixgbe_check_mdd_event - check for MDD event on all VFs
+ * @adapter: pointer to ixgbe adapter
+ *
+ * Return: true if there is a VF on which MDD event occurred, false otherwise.
+ */
+bool ixgbe_check_mdd_event(struct ixgbe_adapter *adapter)
+{
+   struct ixgbe_hw *hw = &adapter->hw;
+   DECLARE_BITMAP(vf_bitmap, 64);
+   bool ret = false;
+   int i;
+
+   if (!hw->mac.ops.handle_mdd)
+   return false;
+
+   /* Did we have a malicious event */
+   hw->mac.ops.handle_mdd(hw, vf_bitmap);
+
+   /* Log any blocked queues and release lock */
+   for_each_set_bit(i, vf_bitmap, 64) {
+   dev_warn(&adapter->pdev->dev,
+"Malicious event on VF %d tx:%x rx:%x\n", i,
+IXGBE_READ_REG(hw, IXGBE_LVMMC_TX),
+IXGBE_READ_REG(hw, IXGBE_LVMMC_RX));
+
+   if (hw->mac.ops.restore_mdd_vf) {
+   u32 ping;
+
+   hw->mac.ops.restore_mdd_vf(hw, i);
+
+   /* get the VF to rebuild its queues */
+   adapter->vfinfo[i].clear_to_send = 0;
+   ping = IXGBE_PF_CONTROL_MSG |
+  IXGBE_VT_MSGTYPE_CTS;
+   ixgbe_write_mbx(hw, &ping, 1, i);
+   

[Intel-wired-lan] [iwl-next v1 3/4] ixgbe: add Tx hang detection unhandled MDD

2025-02-07 Thread Michal Swiatkowski
From: Slawomir Mrozowicz 

Add Tx Hang detection due to an unhandled MDD Event.

Previously, a malicious VF could disable the entire port causing
TX to hang on the E610 card.
Those events that caused PF to freeze were not detected
as an MDD event and usually required a Tx Hang watchdog timer
to catch the suspension, and perform a physical function reset.

Implement flows in the affected PF driver in such a way to check
the cause of the hang, detect it as an MDD event and log an
entry of the malicious VF that caused the Hang.

The PF blocks the malicious VF, if it continues to be the source
of several MDD events.

Reviewed-by: Przemek Kitszel 
Reviewed-by: Marcin Szycik 
Signed-off-by: Slawomir Mrozowicz 
Co-developed-by: Michal Swiatkowski 
Signed-off-by: Michal Swiatkowski 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h  |   5 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |  12 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c  |   3 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 212 --
 4 files changed, 210 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index e6a380d4929b..81ccb4c591ba 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -427,6 +427,10 @@ enum ixgbe_ring_f_enum {
 #define IXGBE_BAD_L2A_QUEUE3
 #define IXGBE_MAX_MACVLANS 63
 
+#define IXGBE_MAX_TX_QUEUES128
+#define IXGBE_MAX_TX_DESCRIPTORS   40
+#define IXGBE_MAX_TX_VF_HANGS  4
+
 DECLARE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key);
 
 struct ixgbe_ring_feature {
@@ -798,6 +802,7 @@ struct ixgbe_adapter {
u32 timer_event_accumulator;
u32 vferr_refcount;
struct ixgbe_mac_addr *mac_table;
+   u8 tx_hang_count[IXGBE_MAX_TX_QUEUES];
struct kobject *info_kobj;
u16 lse_mask;
 #ifdef CONFIG_IXGBE_HWMON
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index aa3b498558bc..e07b56625595 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -1044,6 +1044,7 @@ struct ixgbe_nvm_version {
 #define IXGBE_GCR_EXT_VT_MODE_160x0001
 #define IXGBE_GCR_EXT_VT_MODE_320x0002
 #define IXGBE_GCR_EXT_VT_MODE_640x0003
+#define IXGBE_GCR_EXT_VT_MODE_MASK 0x0003
 #define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \
 IXGBE_GCR_EXT_VT_MODE_64)
 
@@ -2935,6 +2936,13 @@ struct ixgbe_adv_tx_context_desc {
__le32 mss_l4len_idx;
 };
 
+enum {
+   IXGBE_VLAN_MACIP_LENS_REG   = 0,
+   IXGBE_FCEOF_SAIDX_REG   = 1,
+   IXGBE_TYPE_TUCMD_MLHL   = 2,
+   IXGBE_MSS_L4LEN_IDX = 3,
+};
+
 /* Adv Transmit Descriptor Config Masks */
 #define IXGBE_ADVTXD_DTALEN_MASK  0x /* Data buf length(bytes) */
 #define IXGBE_ADVTXD_MAC_LINKSEC  0x0004 /* Insert LinkSec */
@@ -2942,7 +2950,7 @@ struct ixgbe_adv_tx_context_desc {
 #define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK   0x03FF /* IPSec SA index */
 #define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK0x01FF /* IPSec ESP length */
 #define IXGBE_ADVTXD_DTYP_MASK  0x00F0 /* DTYP mask */
-#define IXGBE_ADVTXD_DTYP_CTXT  0x0020 /* Advanced Context Desc */
+#define IXGBE_ADVTXD_DTYP_CTXT 0x2 /* Advanced Context Desc */
 #define IXGBE_ADVTXD_DTYP_DATA  0x0030 /* Advanced Data Descriptor */
 #define IXGBE_ADVTXD_DCMD_EOP   IXGBE_TXD_CMD_EOP  /* End of Packet */
 #define IXGBE_ADVTXD_DCMD_IFCS  IXGBE_TXD_CMD_IFCS /* Insert FCS */
@@ -2991,6 +2999,8 @@ struct ixgbe_adv_tx_context_desc {
 #define IXGBE_ADVTXD_FCOEF_EOF_MASK  (3u << 10)  /* FC EOF index */
 #define IXGBE_ADVTXD_L4LEN_SHIFT 8  /* Adv ctxt L4LEN shift */
 #define IXGBE_ADVTXD_MSS_SHIFT   16  /* Adv ctxt MSS shift */
+#define IXGBE_ADVTXD_MSS_MASK  GENMASK(31, IXGBE_ADVTXD_MSS_SHIFT)
+#define IXGBE_ADVTXD_HEADER_LEN_MASK   GENMASK(8, 0)
 
 /* Autonegotiation advertised speeds */
 typedef u32 ixgbe_autoneg_advertised;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 336d47ffb95a..54d75cf94cc1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -1293,7 +1293,8 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 
vlan_macip_lens,
tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
 
/* set bits to identify this as an advanced context descriptor */
-   type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
+   type_tucmd |= IXGBE_TXD_CMD_DEXT |
+   FIELD_PREP(IXGBE_ADVTXD_DTYP_MASK, IXGBE_ADVTXD_DTYP_CTXT);
 
context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
context_desc->fceof_saidx   = cpu_to_le32(fceof_saidx);
diff --git a/drivers/net/et

[Intel-wired-lan] [iwl-next v1 0/4] ixgbe: support MDD events

2025-02-07 Thread Michal Swiatkowski
Hi,

This patchset is adding support for MDD (malicious driver detection) for
ixgbe driver. It can catch the error on VF side and reset malicious VF.

An MDD event can be triggered for example by sending from VF a TSO packet
with segment number set to 0.

Add checking for Tx hang in case of MDD is unhandled. It will prevent VF
from staying in Tx hang state.

Don Skidmore (1):
  ixgbe: check for MDD events

Paul Greenwalt (1):
  ixgbe: add MDD support

Radoslaw Tyl (1):
  ixgbe: turn off MDD while modifying SRRCTL

Slawomir Mrozowicz (1):
  ixgbe: add Tx hang detection unhandled MDD

 drivers/net/ethernet/intel/ixgbe/ixgbe.h  |   5 +
 .../net/ethernet/intel/ixgbe/ixgbe_sriov.h|   1 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |  42 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h |   5 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c |   4 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c  |   3 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 224 --
 .../net/ethernet/intel/ixgbe/ixgbe_sriov.c|  50 
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 119 ++
 9 files changed, 430 insertions(+), 23 deletions(-)

-- 
2.42.0



[Intel-wired-lan] [iwl-next v1 4/4] ixgbe: turn off MDD while modifying SRRCTL

2025-02-07 Thread Michal Swiatkowski
From: Radoslaw Tyl 

Modifying SRRCTL register can generate MDD event.

Turn MDD off during SRRCTL register write to prevent generating MDD.

Fix RCT in ixgbe_set_rx_drop_en().

Reviewed-by: Marcin Szycik 
Reviewed-by: Przemek Kitszel 
Signed-off-by: Radoslaw Tyl 
Signed-off-by: Michal Swiatkowski 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 5c1c067ffb7c..6bb2a0edf2ea 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -4100,8 +4100,12 @@ void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter)
 static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter)
 #endif
 {
-   int i;
bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
+   struct ixgbe_hw *hw = &adapter->hw;
+   int i;
+
+   if (hw->mac.ops.disable_mdd)
+   hw->mac.ops.disable_mdd(hw);
 
if (adapter->ixgbe_ieee_pfc)
pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
@@ -4123,6 +4127,9 @@ static void ixgbe_set_rx_drop_en(struct ixgbe_adapter 
*adapter)
for (i = 0; i < adapter->num_rx_queues; i++)
ixgbe_disable_rx_drop(adapter, adapter->rx_ring[i]);
}
+
+   if (hw->mac.ops.enable_mdd)
+   hw->mac.ops.enable_mdd(hw);
 }
 
 #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
-- 
2.42.0



[Intel-wired-lan] [iwl-next v1 1/4] ixgbe: add MDD support

2025-02-07 Thread Michal Swiatkowski
From: Paul Greenwalt 

Add malicious driver detection. Support enabling MDD, disabling MDD,
handling a MDD event, and restoring a MDD VF.

Reviewed-by: Przemek Kitszel 
Reviewed-by: Jedrzej Jagielski 
Reviewed-by: Marcin Szycik 
Signed-off-by: Paul Greenwalt 
Signed-off-by: Michal Swiatkowski 
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_type.h |  28 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h |   5 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c |   4 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 119 ++
 4 files changed, 156 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 5fdf32d79d82..d446c375335a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -2746,6 +2746,28 @@ enum ixgbe_fdir_pballoc_type {
 #define FW_PHY_INFO_ID_HI_MASK 0xu
 #define FW_PHY_INFO_ID_LO_MASK 0xu
 
+/* There are only 3 options for VFs creation on this device:
+ * 16 VFs pool with 8 queues each
+ * 32 VFs pool with 4 queues each
+ * 64 VFs pool with 2 queues each
+ *
+ * That means reading some VF registers that map VF to queue depending on
+ * chosen option. Define values that help dealing with each scenario.
+ */
+/* Number of queues based on VFs pool */
+#define IXGBE_16VFS_QUEUES 8
+#define IXGBE_32VFS_QUEUES 4
+#define IXGBE_64VFS_QUEUES 2
+/* Mask for getting queues bits based on VFs pool */
+#define IXGBE_16VFS_BITMASKGENMASK(IXGBE_16VFS_QUEUES - 1, 0)
+#define IXGBE_32VFS_BITMASKGENMASK(IXGBE_32VFS_QUEUES - 1, 0)
+#define IXGBE_64VFS_BITMASKGENMASK(IXGBE_64VFS_QUEUES - 1, 0)
+/* Convert queue index to register number.
+ * We have 4 registers with 32 queues in each.
+ */
+#define IXGBE_QUEUES_PER_REG   32
+#define IXGBE_QUEUES_REG_AMOUNT4
+
 /* Host Interface Command Structures */
 struct ixgbe_hic_hdr {
u8 cmd;
@@ -3534,6 +3556,12 @@ struct ixgbe_mac_operations {
int (*dmac_config_tcs)(struct ixgbe_hw *hw);
int (*read_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32 *);
int (*write_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32);
+
+   /* MDD events */
+   void (*enable_mdd)(struct ixgbe_hw *hw);
+   void (*disable_mdd)(struct ixgbe_hw *hw);
+   void (*restore_mdd_vf)(struct ixgbe_hw *hw, u32 vf);
+   void (*handle_mdd)(struct ixgbe_hw *hw, unsigned long *vf_bitmap);
 };
 
 struct ixgbe_phy_operations {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h
index 3e4092f8da3e..2a11147fb1bc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h
@@ -17,4 +17,9 @@ void ixgbe_set_source_address_pruning_x550(struct ixgbe_hw 
*hw,
 void ixgbe_set_ethertype_anti_spoofing_x550(struct ixgbe_hw *hw,
bool enable, int vf);
 
+void ixgbe_enable_mdd_x550(struct ixgbe_hw *hw);
+void ixgbe_disable_mdd_x550(struct ixgbe_hw *hw);
+void ixgbe_restore_mdd_vf_x550(struct ixgbe_hw *hw, u32 vf);
+void ixgbe_handle_mdd_x550(struct ixgbe_hw *hw, unsigned long *vf_bitmap);
+
 #endif /* _IXGBE_X550_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c
index 683c668672d6..e67d105fd99a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c
@@ -2630,6 +2630,10 @@ static const struct ixgbe_mac_operations mac_ops_e610 = {
.prot_autoc_write   = prot_autoc_write_generic,
.setup_fc   = ixgbe_setup_fc_e610,
.fc_autoneg = ixgbe_fc_autoneg_e610,
+   .enable_mdd = ixgbe_enable_mdd_x550,
+   .disable_mdd= ixgbe_disable_mdd_x550,
+   .restore_mdd_vf = ixgbe_restore_mdd_vf_x550,
+   .handle_mdd = ixgbe_handle_mdd_x550,
 };
 
 static const struct ixgbe_phy_operations phy_ops_e610 = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c 
b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 277ceaf8a793..f148d3f29378 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -3800,6 +3800,121 @@ static int ixgbe_write_phy_reg_x550a(struct ixgbe_hw 
*hw, u32 reg_addr,
return status;
 }
 
+static void ixgbe_set_mdd_x550(struct ixgbe_hw *hw, bool ena)
+{
+   u32 reg_dma, reg_rdr;
+
+   reg_dma = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
+   reg_rdr = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
+
+   if (ena) {
+   reg_dma |= (IXGBE_DMATXCTL_MDP_EN | IXGBE_DMATXCTL_MBINTEN);
+   reg_rdr |= (IXGBE_RDRXCTL_MDP_EN | IXGBE_RDRXCTL_MBINTEN);
+   } else {
+   reg_dma &= ~(IXGBE_DMATXCTL_MDP_EN | IXG

Re: [Intel-wired-lan] [PATCH iwl-next v1 01/13] ixgbe: add initial devlink support

2025-02-07 Thread Simon Horman
On Mon, Feb 03, 2025 at 04:03:16PM +0100, Jedrzej Jagielski wrote:
> Add an initial support for devlink interface to ixgbe driver.
> 
> Similarly to i40e driver the implementation doesn't enable
> devlink to manage device-wide configuration. Devlink instance
> is created for each physical function of PCIe device.
> 
> Create separate directory for devlink related ixgbe files
> and use naming scheme similar to the one used in the ice driver.
> 
> Add a stub for Documentation, to be extended by further patches.
> 
> Reviewed-by: Mateusz Polchlopek 
> Signed-off-by: Jedrzej Jagielski 

...

> diff --git a/Documentation/networking/devlink/ixgbe.rst 
> b/Documentation/networking/devlink/ixgbe.rst
> new file mode 100644
> index ..ca920d421d42
> --- /dev/null
> +++ b/Documentation/networking/devlink/ixgbe.rst
> @@ -0,0 +1,8 @@
> +.. SPDX-License-Identifier: GPL-2.0
> +
> +
> +ixgbe devlink support
> +

nit: the '=' lines are one character too short wrt the text they decorate.

Flagged by make htmldocs.

> +
> +This document describes the devlink features implemented by the ``ixgbe``
> +device driver.

...


Re: [Intel-wired-lan] [PATCH iwl-next v1 2/3] ice: Refactor E825C PHY registers info struct

2025-02-07 Thread Simon Horman
On Thu, Feb 06, 2025 at 09:36:54AM +0100, Grzegorz Nitka wrote:
> From: Karol Kolacinski 
> 
> Simplify ice_phy_reg_info_eth56g struct definition to include base
> address for the very first quad. Use base address info and 'step'
> value to determine address for specific PHY quad.
> 
> Reviewed-by: Przemek Kitszel 
> Signed-off-by: Karol Kolacinski 
> Signed-off-by: Grzegorz Nitka 

Reviewed-by: Simon Horman 



Re: [Intel-wired-lan] [PATCH iwl-next v1 1/3] ice: Add sync delay for E825C

2025-02-07 Thread Simon Horman
On Thu, Feb 06, 2025 at 09:36:53AM +0100, Grzegorz Nitka wrote:
> From: Karol Kolacinski 
> 
> Implement setting GLTSYN_SYNC_DLAY for E825C products.
> This is the execution delay compensation of SYNC command between
> PHC and PHY.
> Also, refactor the code by changing ice_ptp_init_phc_eth56g function
> name to ice_ptp_init_phc_e825, to be consistent with the naming pattern
> for other devices.

Adding support for GLTSYN_SYNC_DLAY and the refactor seem
to be two distinct changes, albeit touching common code.

I think it would be slightly better to split this into two patches.

> Reviewed-by: Przemek Kitszel 
> Signed-off-by: Karol Kolacinski 
> Signed-off-by: Grzegorz Nitka 

...


Re: [Intel-wired-lan] [PATCH iwl-next v1 2/3] ice: Refactor E825C PHY registers info struct

2025-02-07 Thread Simon Horman
On Fri, Feb 07, 2025 at 10:03:45AM +, Simon Horman wrote:
> On Thu, Feb 06, 2025 at 09:36:54AM +0100, Grzegorz Nitka wrote:
> > From: Karol Kolacinski 
> > 
> > Simplify ice_phy_reg_info_eth56g struct definition to include base
> > address for the very first quad. Use base address info and 'step'
> > value to determine address for specific PHY quad.
> > 
> > Reviewed-by: Przemek Kitszel 
> > Signed-off-by: Karol Kolacinski 
> > Signed-off-by: Grzegorz Nitka 
> 
> Reviewed-by: Simon Horman 

Sorry, I failed to notice that the kdoc for ice_phy_reg_info_eth56g
needs to be updated to document base_addr instead of base.


Re: [Intel-wired-lan] igb: XDP/ZC busy polling

2025-02-07 Thread Joe Damato
On Fri, Feb 07, 2025 at 09:38:41AM +0100, Kurt Kanzenbach wrote:
> Hello Joe,
> 
> I noticed that XDP/ZC busy polling does not work anymore in combination
> with igb driver. This seems to be related to commit 5ef44b3cb43b ("xsk:
> Bring back busy polling support") which relies on
> netif_queue_set_napi().
> 
> I see you implemented it for e1000, igc and so on. However, igb is
> missing. Do you have any plans to add the missing registration to igb?
> Just asking. Otherwise, I can send a patch for it.

Please feel free; I don't have an igb device so I wouldn't be able
to test it, but I'd happily review it so please CC me.

BTW, I wrote a small series that updates the documentation and adds
a test for AF_XDP [1] that you may want to consider applying/running
(if it is not merged by the time you add support to igb).

[1]: https://lore.kernel.org/lkml/20250207030916.32751-1-jdam...@fastly.com/


Re: [Intel-wired-lan] [PATCH iwl-net v2] ice: health.c: fix compilation on gcc 7.5

2025-02-07 Thread Simon Horman
On Thu, Feb 06, 2025 at 11:30:23PM +0100, Przemek Kitszel wrote:
> GCC 7 is not as good as GCC 8+ in telling what is a compile-time
> const, and thus could be used for static storage.
> Fortunately keeping strings as const arrays is enough to make old
> gcc happy.
> 
> Excerpt from the report:
> My GCC is: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0.
> 
>   CC [M]  drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.o
> drivers/net/ethernet/intel/ice/devlink/health.c:35:3: error: initializer 
> element is not constant
>ice_common_port_solutions, {ice_port_number_label}},
>^
> drivers/net/ethernet/intel/ice/devlink/health.c:35:3: note: (near 
> initialization for 'ice_health_status_lookup[0].solution')
> drivers/net/ethernet/intel/ice/devlink/health.c:35:31: error: initializer 
> element is not constant
>ice_common_port_solutions, {ice_port_number_label}},
>^
> drivers/net/ethernet/intel/ice/devlink/health.c:35:31: note: (near 
> initialization for 'ice_health_status_lookup[0].data_label[0]')
> drivers/net/ethernet/intel/ice/devlink/health.c:37:46: error: initializer 
> element is not constant
>"Change or replace the module or cable.", {ice_port_number_label}},
>   ^
> drivers/net/ethernet/intel/ice/devlink/health.c:37:46: note: (near 
> initialization for 'ice_health_status_lookup[1].data_label[0]')
> drivers/net/ethernet/intel/ice/devlink/health.c:39:3: error: initializer 
> element is not constant
>ice_common_port_solutions, {ice_port_number_label}},
>^
> 
> Fixes: 85d6164ec56d ("ice: add fw and port health reporters")
> Reported-by: Qiuxu Zhuo 
> Closes: 
> https://lore.kernel.org/netdev/cy8pr11mb7134bf7a46d71e50d25fa7a989...@cy8pr11mb7134.namprd11.prod.outlook.com
> Reviewed-by: Michal Swiatkowski 
> Suggested-by: Simon Horman 
> Signed-off-by: Przemek Kitszel 
> ---
> v2: use static const char[] instead of #define - Simon
> +added RB tag from Michal, but not adding TB tag from Qiuxu
> 
> v1:
>  
> https://lore.kernel.org/netdev/20250205104252.30464-2-przemyslaw.kits...@intel.com
> 
> CC: Kees Cook 
> CC: Jiri Slaby 

Thanks Przemek,

Testing locally gcc 7.5.0 [1] seems happy with this.

Reviewed-by: Simon Horman 

[1] https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/x86_64/7.5.0/


Re: [Intel-wired-lan] [iwl-next v1 3/4] ixgbe: add Tx hang detection unhandled MDD

2025-02-07 Thread Simon Horman
On Fri, Feb 07, 2025 at 11:43:42AM +0100, Michal Swiatkowski wrote:
> From: Slawomir Mrozowicz 
> 
> Add Tx Hang detection due to an unhandled MDD Event.
> 
> Previously, a malicious VF could disable the entire port causing
> TX to hang on the E610 card.
> Those events that caused PF to freeze were not detected
> as an MDD event and usually required a Tx Hang watchdog timer
> to catch the suspension, and perform a physical function reset.
> 
> Implement flows in the affected PF driver in such a way to check
> the cause of the hang, detect it as an MDD event and log an
> entry of the malicious VF that caused the Hang.
> 
> The PF blocks the malicious VF, if it continues to be the source
> of several MDD events.
> 
> Reviewed-by: Przemek Kitszel 
> Reviewed-by: Marcin Szycik 
> Signed-off-by: Slawomir Mrozowicz 
> Co-developed-by: Michal Swiatkowski 
> Signed-off-by: Michal Swiatkowski 

...

> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> index aa3b498558bc..e07b56625595 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
> @@ -1044,6 +1044,7 @@ struct ixgbe_nvm_version {
>  #define IXGBE_GCR_EXT_VT_MODE_160x0001
>  #define IXGBE_GCR_EXT_VT_MODE_320x0002
>  #define IXGBE_GCR_EXT_VT_MODE_640x0003
> +#define IXGBE_GCR_EXT_VT_MODE_MASK   0x0003

nit: For consistency I think spaces should be used to indent 0x0003

>  #define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \
>IXGBE_GCR_EXT_VT_MODE_64)
>  

...

> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

...

> +static u32 ixgbe_poll_tx_icache(struct ixgbe_hw *hw, u16 queue, u16 idx)
> +{
> + IXGBE_WRITE_REG(hw, IXGBE_TXDESCIC, queue * idx);
> + return IXGBE_READ_REG(hw, IXGBE_TXDESCIC);
> +}
> +
> +/**
> + * ixgbe_check_illegal_queue - search for queue with illegal packet
> + * @adapter: structure containing ring specific data
> + * @queue: queue index
> + *
> + * Check if tx descriptor connected with input queue
> + * contains illegal packet.
> + *
> + * Returns: true if queue contain illegal packet.
> + */
> +static bool ixgbe_check_illegal_queue(struct ixgbe_adapter *adapter,
> +   u16 queue)
> +{
> + u32 hdr_len_reg, mss_len_reg, type_reg;
> + struct ixgbe_hw *hw = &adapter->hw;
> + u32 mss_len, header_len, reg;
> +
> + for (u16 i = 0; i < IXGBE_MAX_TX_DESCRIPTORS; i++) {
> + /* HW will clear bit IXGBE_TXDESCIC_READY when address
> +  * is written to address field. HW will set this bit
> +  * when iCache read is done, and data is ready at TIC_DWx.
> +  * Set descriptor address.
> +  */
> + read_poll_timeout(ixgbe_poll_tx_icache, reg,
> +   !(reg & IXGBE_TXDESCIC_READY), 0, 0, false,
> +   hw, queue, i);
> +
> + /* read tx descriptor access registers */
> + hdr_len_reg = IXGBE_READ_REG(hw, 
> IXGBE_TIC_DW2(IXGBE_VLAN_MACIP_LENS_REG));
> + type_reg = IXGBE_READ_REG(hw, 
> IXGBE_TIC_DW2(IXGBE_TYPE_TUCMD_MLHL));
> + mss_len_reg = IXGBE_READ_REG(hw, 
> IXGBE_TIC_DW2(IXGBE_MSS_L4LEN_IDX));
> +
> + /* check if Advanced Context Descriptor */
> + if (FIELD_GET(IXGBE_ADVTXD_DTYP_MASK, type_reg) !=
> + IXGBE_ADVTXD_DTYP_CTXT)
> + continue;
> +
> + /* check for illegal MSS and Header length */
> + mss_len = FIELD_GET(IXGBE_ADVTXD_MSS_MASK, mss_len_reg);
> + header_len = FIELD_GET(IXGBE_ADVTXD_HEADER_LEN_MASK,
> +hdr_len_reg);
> + if ((mss_len + header_len) > SZ_16K) {
> + e_warn(probe,
> +"mss len + header len too long\n");

nit: The above two lines can be a single line.

> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> +/**
> + * ixgbe_handle_mdd_event - handle mdd event
> + * @adapter: structure containing ring specific data
> + * @tx_ring: tx descriptor ring to handle
> + *
> + * Reset VF driver if malicious vf detected or
> + * illegal packet in an any queue detected.
> + */
> +static void ixgbe_handle_mdd_event(struct ixgbe_adapter *adapter,
> +struct ixgbe_ring *tx_ring)
> +{
> + u16 vf, q;
> +
> + if (adapter->vfinfo && ixgbe_check_mdd_event(adapter)) {
> + /* vf mdd info and malicious vf detected */
> + if (!ixgbe_get_vf_idx(adapter, tx_ring->queue_index, &vf))
> + ixgbe_vf_handle_tx_hang(adapter, vf);
> + } else {
> + /* malicious vf not detected */
> + for (q = 0; q < IXGBE_MAX_TX_QUEUES; q++) {

Re: [Intel-wired-lan] [iwl-next v1 1/4] ixgbe: add MDD support

2025-02-07 Thread Simon Horman
On Fri, Feb 07, 2025 at 11:43:40AM +0100, Michal Swiatkowski wrote:
> From: Paul Greenwalt 
> 
> Add malicious driver detection. Support enabling MDD, disabling MDD,
> handling a MDD event, and restoring a MDD VF.
> 
> Reviewed-by: Przemek Kitszel 
> Reviewed-by: Jedrzej Jagielski 
> Reviewed-by: Marcin Szycik 
> Signed-off-by: Paul Greenwalt 
> Signed-off-by: Michal Swiatkowski 

...

> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c 
> b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c

...

> +/**
> + * ixgbe_handle_mdd_x550 - handle malicious driver detection event
> + * @hw: pointer to hardware structure
> + * @vf_bitmap: output vf bitmap of malicious vfs
> + */
> +void ixgbe_handle_mdd_x550(struct ixgbe_hw *hw, unsigned long *vf_bitmap)
> +{
> + u32 i, j, reg, q, div, vf, wqbr;
> +
> + /* figure out pool size for mapping to vf's */
> + reg = IXGBE_READ_REG(hw, IXGBE_MRQC);
> + switch (reg & IXGBE_MRQC_MRQE_MASK) {
> + case IXGBE_MRQC_VMDQRT8TCEN:
> + div = IXGBE_16VFS_QUEUES;
> + break;
> + case IXGBE_MRQC_VMDQRSS32EN:
> + case IXGBE_MRQC_VMDQRT4TCEN:
> + div = IXGBE_32VFS_QUEUES;
> + break;
> + default:
> + div = IXGBE_64VFS_QUEUES;
> + break;
> + }
> +
> + /* Read WQBR_TX and WQBR_RX and check for malicious queues */
> + for (i = 0; i < IXGBE_QUEUES_REG_AMOUNT; i++) {
> + wqbr = IXGBE_READ_REG(hw, IXGBE_WQBR_TX(i)) |
> +IXGBE_READ_REG(hw, IXGBE_WQBR_RX(i));
> + if (!wqbr)
> + continue;
> +
> + /* Get malicious queue */
> + for_each_set_bit(j, (unsigned long *)&wqbr,
> +  IXGBE_QUEUES_PER_REG) {

The type of wqbr is a u32, that is it is 32-bits wide.
Above it's address is cast to unsigned long *.
But, unsigned long may be 64-bits wide, e.g. on x86_64.

GCC 14.2.0 EXTRA_CFLAGS=-Warray-bounds builds report this as:

In file included from ./include/linux/bitmap.h:11,
 from ./include/linux/cpumask.h:12,
 from ./arch/x86/include/asm/paravirt.h:21,
 from ./arch/x86/include/asm/cpuid.h:71,
 from ./arch/x86/include/asm/processor.h:19,
 from ./arch/x86/include/asm/cpufeature.h:5,
 from ./arch/x86/include/asm/thread_info.h:59,
 from ./include/linux/thread_info.h:60,
 from ./include/linux/uio.h:9,
 from ./include/linux/socket.h:8,
 from ./include/uapi/linux/if.h:25,
 from ./include/linux/mii.h:12,
 from ./include/uapi/linux/mdio.h:15,
 from ./include/linux/mdio.h:9,
 from drivers/net/ethernet/intel/ixgbe/ixgbe_type.h:8,
 from drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h:7,
 from drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c:4:
In function ‘find_next_bit’,
inlined from ‘ixgbe_handle_mdd_x550’ at 
drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c:3907:3:
./include/linux/find.h:65:23: error: array subscript ‘long unsigned int[0]’ is 
partly outside array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} 
[-Werror=array-bounds=]
   65 | val = *addr & GENMASK(size - 1, offset);
  |   ^

I think this can be addressed by changing the type of wqmbr to unsigned long.

> + /* Get queue from bitmask */
> + q = j + (i * IXGBE_QUEUES_PER_REG);
> + /* Map queue to vf */
> + vf = q / div;
> + set_bit(vf, vf_bitmap);
> + }
> + }
> +}
> +
>  #define X550_COMMON_MAC \
>   .init_hw= &ixgbe_init_hw_generic, \
>   .start_hw   = &ixgbe_start_hw_X540, \

...


Re: [Intel-wired-lan] [PATCH iwl-net] ice: health.c: fix compilation on gcc 7.5

2025-02-07 Thread Kees Cook
On Wed, Feb 05, 2025 at 08:45:46PM +, Simon Horman wrote:
> I ran into a similar problem not so long ago and I'm wondering if
> the following, based on a suggestion by Jiri Slaby, resolves your
> problem.
> 
> diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c 
> b/drivers/net/ethernet/intel/ice/devlink/health.c
> index ea40f7941259..19c3d37aa768 100644
> --- a/drivers/net/ethernet/intel/ice/devlink/health.c
> +++ b/drivers/net/ethernet/intel/ice/devlink/health.c
> @@ -25,10 +25,10 @@ struct ice_health_status {
>   * The below lookup requires to be sorted by code.
>   */
>  
> -static const char *const ice_common_port_solutions =
> +static const char ice_common_port_solutions[] =
>   "Check your cable connection. Change or replace the module or cable. 
> Manually set speed and duplex.";
> -static const char *const ice_port_number_label = "Port Number";
> -static const char *const ice_update_nvm_solution = "Update to the latest NVM 
> image.";
> +static const char ice_port_number_label[] = "Port Number";
> +static const char ice_update_nvm_solution[] = "Update to the latest NVM 
> image.";
>  
>  static const struct ice_health_status ice_health_status_lookup[] = {
>   {ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT, "An unsupported module 
> was detected.",
> 

I'd agree that would be the preferred fix. :)

-- 
Kees Cook


[Intel-wired-lan] [PATCH iwl-next v3 2/9] igc: Rename xdp_get_tx_ring() for non-xdp usage

2025-02-07 Thread Faizal Rahim
Renamed xdp_get_tx_ring() function to a more generic name for use in
upcoming frame preemption patches.

Signed-off-by: Faizal Rahim 
---
 drivers/net/ethernet/intel/igc/igc.h  |  2 +-
 drivers/net/ethernet/intel/igc/igc_main.c | 10 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h 
b/drivers/net/ethernet/intel/igc/igc.h
index b8111ad9a9a8..22ecdac26cf4 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -736,7 +736,7 @@ struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter 
*adapter,
  u32 location);
 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule);
 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule);
-
+struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu);
 void igc_ptp_init(struct igc_adapter *adapter);
 void igc_ptp_reset(struct igc_adapter *adapter);
 void igc_ptp_suspend(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c 
b/drivers/net/ethernet/intel/igc/igc_main.c
index 56a35d58e7a6..44e4f925491f 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2444,8 +2444,8 @@ static int igc_xdp_init_tx_descriptor(struct igc_ring 
*ring,
return -ENOMEM;
 }
 
-static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
-   int cpu)
+struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter,
+int cpu)
 {
int index = cpu;
 
@@ -2469,7 +2469,7 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, 
struct xdp_buff *xdp)
if (unlikely(!xdpf))
return -EFAULT;
 
-   ring = igc_xdp_get_tx_ring(adapter, cpu);
+   ring = igc_get_tx_ring(adapter, cpu);
nq = txring_txq(ring);
 
__netif_tx_lock(nq, cpu);
@@ -2546,7 +2546,7 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, 
int status)
struct igc_ring *ring;
 
if (status & IGC_XDP_TX) {
-   ring = igc_xdp_get_tx_ring(adapter, cpu);
+   ring = igc_get_tx_ring(adapter, cpu);
nq = txring_txq(ring);
 
__netif_tx_lock(nq, cpu);
@@ -6699,7 +6699,7 @@ static int igc_xdp_xmit(struct net_device *dev, int 
num_frames,
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
return -EINVAL;
 
-   ring = igc_xdp_get_tx_ring(adapter, cpu);
+   ring = igc_get_tx_ring(adapter, cpu);
nq = txring_txq(ring);
 
__netif_tx_lock(nq, cpu);
-- 
2.34.1



[Intel-wired-lan] [PATCH iwl-next v3 1/9] net: ethtool: mm: extract stmmac verification logic into common library

2025-02-07 Thread Faizal Rahim
From: Vladimir Oltean 

It appears that stmmac is not the only hardware which requires a
software-driven verification state machine for the MAC Merge layer.

While on the one hand it's good to encourage hardware implementations,
on the other hand it's quite difficult to tolerate multiple drivers
implementing independently fairly non-trivial logic.

Extract the hardware-independent logic from stmmac into library code and
put it in ethtool. Name the state structure "mmsv" for MAC Merge
Software Verification. Let this expose an operations structure for
executing the hardware stuff: sync hardware with the tx_active boolean
(result of verification process), enable/disable the pMAC, send mPackets,
notify library of external events (reception of mPackets), as well as
link state changes.

Note that it is assumed that the external events are received in hardirq
context. If they are not, it is probably a good idea to disable hardirqs
when calling ethtool_mmsv_event_handle(), because the library does not
do so.

Also, the MM software verification process has no business with the
tx_min_frag_size, that is all the driver's to handle.

Signed-off-by: Vladimir Oltean 
Co-developed-by: Choong Yong Liang 
Signed-off-by: Choong Yong Liang 
Co-developed-by: Faizal Rahim 
Signed-off-by: Faizal Rahim 
Tested-by: Choong Yong Liang 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |  16 +-
 .../ethernet/stmicro/stmmac/stmmac_ethtool.c  |  41 +---
 .../net/ethernet/stmicro/stmmac/stmmac_fpe.c  | 174 +++---
 .../net/ethernet/stmicro/stmmac/stmmac_fpe.h  |   5 -
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |   8 +-
 include/linux/ethtool.h   |  61 +
 net/ethtool/mm.c  | 222 ++
 7 files changed, 327 insertions(+), 200 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h 
b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index f05cae103d83..c9cc41af258a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -147,21 +147,9 @@ struct stmmac_channel {
 };
 
 struct stmmac_fpe_cfg {
-   /* Serialize access to MAC Merge state between ethtool requests
-* and link state updates.
-*/
-   spinlock_t lock;
-
+   struct ethtool_mmsv mmsv;
const struct stmmac_fpe_reg *reg;
-   u32 fpe_csr;/* MAC_FPE_CTRL_STS reg cache */
-
-   enum ethtool_mm_verify_status status;
-   struct timer_list verify_timer;
-   bool verify_enabled;
-   int verify_retries;
-   bool pmac_enabled;
-   u32 verify_time;
-   bool tx_enabled;
+   u32 fpe_csr;/* MAC_FPE_CTRL_STS reg cache */
 };
 
 struct stmmac_tc_entry {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 918a32f8fda8..8e6b052e00d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -1210,37 +1210,17 @@ static int stmmac_get_mm(struct net_device *ndev,
 struct ethtool_mm_state *state)
 {
struct stmmac_priv *priv = netdev_priv(ndev);
-   unsigned long flags;
u32 frag_size;
 
if (!stmmac_fpe_supported(priv))
return -EOPNOTSUPP;
 
-   spin_lock_irqsave(&priv->fpe_cfg.lock, flags);
+   ethtool_mmsv_get_mm(&priv->fpe_cfg.mmsv, state);
 
-   state->max_verify_time = STMMAC_FPE_MM_MAX_VERIFY_TIME_MS;
-   state->verify_enabled = priv->fpe_cfg.verify_enabled;
-   state->pmac_enabled = priv->fpe_cfg.pmac_enabled;
-   state->verify_time = priv->fpe_cfg.verify_time;
-   state->tx_enabled = priv->fpe_cfg.tx_enabled;
-   state->verify_status = priv->fpe_cfg.status;
state->rx_min_frag_size = ETH_ZLEN;
-
-   /* FPE active if common tx_enabled and
-* (verification success or disabled(forced))
-*/
-   if (state->tx_enabled &&
-   (state->verify_status == ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED ||
-state->verify_status == ETHTOOL_MM_VERIFY_STATUS_DISABLED))
-   state->tx_active = true;
-   else
-   state->tx_active = false;
-
frag_size = stmmac_fpe_get_add_frag_size(priv);
state->tx_min_frag_size = ethtool_mm_frag_size_add_to_min(frag_size);
 
-   spin_unlock_irqrestore(&priv->fpe_cfg.lock, flags);
-
return 0;
 }
 
@@ -1248,8 +1228,6 @@ static int stmmac_set_mm(struct net_device *ndev, struct 
ethtool_mm_cfg *cfg,
 struct netlink_ext_ack *extack)
 {
struct stmmac_priv *priv = netdev_priv(ndev);
-   struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg;
-   unsigned long flags;
u32 frag_size;
int err;
 
@@ -1258,23 +1236,8 @@ static int stmmac_set_mm(struct net_device *ndev, struct 
ethtool_mm_cfg *cfg,
if (err)
return err;
 
-   /* Wait for the verifi

[Intel-wired-lan] [PATCH iwl-next v3 5/9] igc: Add support for frame preemption verification

2025-02-07 Thread Faizal Rahim
This patch implements the "ethtool --set-mm" callback to trigger the
frame preemption verification handshake.

Uses the MAC Merge Software Verification (mmsv) mechanism in ethtool
to perform the verification handshake for igc.
The structure fpe.mmsv is set by mmsv in ethtool and should remain
read-only for the driver.

Other mmsv callbacks:
a) configure_tx() -> not used yet at this point
   - igc lacks registers to configure FPE in the transmit direction, so
 this API is not utilized for now. A future patch will use it to
 control preemptible queue config.

b) configure_pmac() -> not used
   - this callback dynamically controls pmac_enabled at runtime. For
 example, mmsv calls configure_pmac() and disables pmac_enabled when
 the link partner goes down, even if the user previously enabled it.
 The intention is to save power but it is not feasible in igc
 because it causes an endless adapter reset loop:

 1) Board A and Board B complete the verification handshake.
Tx mode register for both boards are in TSN mode.
 2) Board B link goes down.

 On Board A:
 3) mmsv calls configure_pmac() with pmac_enabled = false.
 4) configure_pmac() in igc updates a new field based on
pmac_enabled. Driver uses this field in igc_tsn_new_flags()
to indicate that the user enabled/disabled FPE.
 5) configure_pmac() in igc calls igc_tsn_offload_apply() to check
whether an adapter reset is needed. Calls existing logic in
igc_tsn_will_tx_mode_change() and igc_tsn_new_flags().
 6) Since pmac_enabled is now disabled and no other TSN feature
is active, igc_tsn_will_tx_mode_change() evaluates to true
because Tx mode will switch from TSN to Legacy.
 7) Driver resets the adapter.
 8) Registers are set, and Tx mode switches to Legacy.
 9) When link partner is up, steps 3–8 repeat, but this time
with pmac_enabled = true, reactivating TSN.
igc_tsn_will_tx_mode_change() evaluates to true again,
since Tx mode will switch from Legacy to TSN.
10) Driver resets the adapter.
11) Rest adapter completes, registers are set, and Tx mode
switches to TSN.

On Board B:
12) Adapter reset on Board A at step 10 causes it to detect its
link partner as down.
13) Repeats steps 3–8.
14) Once reset adapter on Board A is completed at step 11, it
detects its link partner as up.
15) Repeats steps 9–11.

   - this cycle repeats indefinitely. To avoid this issue, igc only uses
 mmsv.pmac_enabled to track whether FPE is enabled or disabled.

Co-developed-by: Vinicius Costa Gomes 
Signed-off-by: Vinicius Costa Gomes 
Co-developed-by: Choong Yong Liang 
Signed-off-by: Choong Yong Liang 
Signed-off-by: Faizal Rahim 
---
 drivers/net/ethernet/intel/igc/igc.h |  12 +-
 drivers/net/ethernet/intel/igc/igc_base.h|   1 +
 drivers/net/ethernet/intel/igc/igc_defines.h |   8 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c |  21 +++
 drivers/net/ethernet/intel/igc/igc_main.c|  54 ++-
 drivers/net/ethernet/intel/igc/igc_tsn.c | 157 ++-
 drivers/net/ethernet/intel/igc/igc_tsn.h |  33 
 7 files changed, 281 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h 
b/drivers/net/ethernet/intel/igc/igc.h
index 22ecdac26cf4..705bd4739e3b 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -40,6 +40,10 @@ void igc_ethtool_set_ops(struct net_device *);
 
 #define IGC_MAX_TX_TSTAMP_REGS 4
 
+struct fpe_t {
+   struct ethtool_mmsv mmsv;
+};
+
 enum igc_mac_filter_type {
IGC_MAC_FILTER_TYPE_DST = 0,
IGC_MAC_FILTER_TYPE_SRC
@@ -332,6 +336,8 @@ struct igc_adapter {
struct timespec64 period;
} perout[IGC_N_PEROUT];
 
+   struct fpe_t fpe;
+
/* LEDs */
struct mutex led_mutex;
struct igc_led_classdev *leds;
@@ -389,10 +395,11 @@ extern char igc_driver_name[];
 #define IGC_FLAG_TSN_QBV_ENABLED   BIT(17)
 #define IGC_FLAG_TSN_QAV_ENABLED   BIT(18)
 #define IGC_FLAG_TSN_LEGACY_ENABLEDBIT(19)
+#define IGC_FLAG_TSN_PREEMPT_ENABLED   BIT(20)
 
 #define IGC_FLAG_TSN_ANY_ENABLED   \
(IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED |  \
-IGC_FLAG_TSN_LEGACY_ENABLED)
+IGC_FLAG_TSN_LEGACY_ENABLED | IGC_FLAG_TSN_PREEMPT_ENABLED)
 
 #define IGC_FLAG_RSS_FIELD_IPV4_UDPBIT(6)
 #define IGC_FLAG_RSS_FIELD_IPV6_UDPBIT(7)
@@ -736,7 +743,10 @@ struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter 
*adapter,
  u32 location);
 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule);
 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule);
+void igc_disab

[Intel-wired-lan] [PATCH iwl-next v3 3/9] igc: Optimize the TX packet buffer utilization

2025-02-07 Thread Faizal Rahim
Packet buffers (RX + TX) total 64KB. Neither RX or TX buffers can be
larger than 34KB. So divide the buffer equally, 32KB for each.

Co-developed-by: Vinicius Costa Gomes 
Signed-off-by: Vinicius Costa Gomes 
Signed-off-by: Faizal Rahim 
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h 
b/drivers/net/ethernet/intel/igc/igc_defines.h
index 8e449904aa7d..516ef70c98e9 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -400,7 +400,8 @@
 #define I225_TXPBSIZE_DEFAULT  0x0414 /* TXPBSIZE default */
 #define IGC_RXPBS_CFG_TS_EN0x8000 /* Timestamp in Rx buffer */
 
-#define IGC_TXPBSIZE_TSN   0x04145145 /* 5k bytes buffer for each queue */
+ /* 7KB bytes buffer for each tx queue (total 4 queues) + 4KB for BMC*/
+#define IGC_TXPBSIZE_TSN   0x041c71c7
 
 #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */
 #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */
-- 
2.34.1



[Intel-wired-lan] [PATCH iwl-next v3 4/9] igc: Set the RX packet buffer size for TSN mode

2025-02-07 Thread Faizal Rahim
In preparation for supporting frame preemption, when entering TSN mode
set the receive packet buffer to 16KB for the Express MAC, 16KB for
the Preemptible MAC and 2KB for the BMC, according to the datasheet
section 7.1.3.2.

Co-developed-by: Vinicius Costa Gomes 
Signed-off-by: Vinicius Costa Gomes 
Signed-off-by: Faizal Rahim 
---
 drivers/net/ethernet/intel/igc/igc_defines.h |  3 +++
 drivers/net/ethernet/intel/igc/igc_tsn.c | 13 +++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h 
b/drivers/net/ethernet/intel/igc/igc_defines.h
index 516ef70c98e9..b19ac6f30dac 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -402,6 +402,9 @@
 
  /* 7KB bytes buffer for each tx queue (total 4 queues) + 4KB for BMC*/
 #define IGC_TXPBSIZE_TSN   0x041c71c7
+/* 15KB for EXP + 15KB for BE + 2KB for BMC */
+#define IGC_RXPBSIZE_TSN   0xf08f
+#define IGC_RXPBSIZE_SIZE_MASK 0x0001
 
 #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */
 #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c 
b/drivers/net/ethernet/intel/igc/igc_tsn.c
index 1e44374ca1ff..f0213cfce07d 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -132,13 +132,17 @@ static int igc_tsn_disable_offload(struct igc_adapter 
*adapter)
 {
u16 queue_per_tc[4] = { 3, 2, 1, 0 };
struct igc_hw *hw = &adapter->hw;
-   u32 tqavctrl;
+   u32 tqavctrl, rxpbs;
int i;
 
wr32(IGC_GTXOFFSET, 0);
wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
 
+   rxpbs = rd32(IGC_RXPBS) & ~IGC_RXPBSIZE_SIZE_MASK;
+   rxpbs |= I225_RXPBSIZE_DEFAULT;
+   wr32(IGC_RXPBS, rxpbs);
+
if (igc_is_device_id_i226(hw))
igc_tsn_restore_retx_default(adapter);
 
@@ -194,7 +198,7 @@ static int igc_tsn_enable_offload(struct igc_adapter 
*adapter)
 {
struct igc_hw *hw = &adapter->hw;
u32 tqavctrl, baset_l, baset_h;
-   u32 sec, nsec, cycle;
+   u32 sec, nsec, cycle, rxpbs;
ktime_t base_time, systim;
int i;
 
@@ -202,6 +206,11 @@ static int igc_tsn_enable_offload(struct igc_adapter 
*adapter)
wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN);
 
+   rxpbs = rd32(IGC_RXPBS) & ~IGC_RXPBSIZE_SIZE_MASK;
+   rxpbs |= IGC_RXPBSIZE_TSN;
+
+   wr32(IGC_RXPBS, rxpbs);
+
if (igc_is_device_id_i226(hw))
igc_tsn_set_retx_qbvfullthreshold(adapter);
 
-- 
2.34.1



[Intel-wired-lan] [PATCH iwl-next v3 0/9] igc: Add support for Frame Preemption feature in IGC

2025-02-07 Thread Faizal Rahim
Introduces support for the FPE feature in the IGC driver.

The patches aligns with the upstream FPE API:
https://patchwork.kernel.org/project/netdevbpf/cover/20230220122343.1156614-1-vladimir.olt...@nxp.com/
https://patchwork.kernel.org/project/netdevbpf/cover/20230119122705.73054-1-vladimir.olt...@nxp.com/

It builds upon earlier work:
https://patchwork.kernel.org/project/netdevbpf/cover/20220520011538.109-1-vinicius.go...@intel.com/

The patch series adds the following functionalities to the IGC driver:
a) Configure FPE using `ethtool --set-mm`.
b) Display FPE settings via `ethtool --show-mm`.
c) View FPE statistics using `ethtool --include-statistics --show-mm'.
e) Enable preemptible/express queue with `fp`:
   tc qdisc add ... root taprio \
   fp E E P P

Change Log:
v2 -> v3:
- Implement configure_tx() mmsv callback (Vladimir)
- Use static_branch_inc() and static_branch_dec() (Vladimir)
- Add adapter->fpe.mmsv.pmac_enabled as extra check (Vladimir)
- Remove unnecessary error check in igc_fpe_init_tx_descriptor() (Vladimir)
- Additional places to use FIELD_PREP() instead of manual bit manipulation 
(Vladimir)
- IGC_TXD_POPTS_SMD_V and IGC_TXD_POPTS_SMD_R type change to enum (Vladimir)
- Remove unnecessary netif_running() check in igc_fpe_xmit_frame (Vladimir)
- Rate limit print in igc_fpe_send_mpacket (Vladimir)

v1 -> v2:
- Extract the stmmac verification logic into a common library (Vladimir)
- igc to use common library for verification (Vladimir)
- Fix syntax for kernel-doc to use "Return:" (Vladimir)
- Use FIELD_GET instead of manual bit masking (Vladimir)
- Don't assign 0 to statistics counter in igc_ethtool_get_mm_stats() (Vladimir)
- Use pmac-enabled as a condition to allow MAC address value 0 (Vladimir)
- Define macro register value in increasing value order (Vladimir)
- Fix tx-min-frag-size handling for igc (Vladimir)
- Handle link state changes with verification in igc (Vladimir)
- Add static key for fast path code (Vladimir)
- rx_min_frag_size get from constant (Vladimir)

v1: 
https://patchwork.kernel.org/project/netdevbpf/cover/20241216064720.931522-1-faizal.abdul.ra...@linux.intel.com/
v2: 
https://patchwork.kernel.org/project/netdevbpf/cover/20250205100524.1138523-1-faizal.abdul.ra...@linux.intel.com/

Faizal Rahim (8):
  igc: Rename xdp_get_tx_ring() for non-xdp usage
  igc: Optimize the TX packet buffer utilization
  igc: Set the RX packet buffer size for TSN mode
  igc: Add support for frame preemption verification
  igc: Add support to set tx-min-frag-size
  igc: Add support for preemptible traffic class in taprio
  igc: Add support to get MAC Merge data via ethtool
  igc: Add support to get frame preemption statistics via ethtool

Vladimir Oltean (1):
  net: ethtool: mm: extract stmmac verification logic into common
library

 drivers/net/ethernet/intel/igc/igc.h  |  18 +-
 drivers/net/ethernet/intel/igc/igc_base.h |   1 +
 drivers/net/ethernet/intel/igc/igc_defines.h  |  16 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c  |  76 ++
 drivers/net/ethernet/intel/igc/igc_main.c | 101 +++-
 drivers/net/ethernet/intel/igc/igc_regs.h |  16 ++
 drivers/net/ethernet/intel/igc/igc_tsn.c  | 220 -
 drivers/net/ethernet/intel/igc/igc_tsn.h  |  34 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |  16 +-
 .../ethernet/stmicro/stmmac/stmmac_ethtool.c  |  41 +---
 .../net/ethernet/stmicro/stmmac/stmmac_fpe.c  | 174 +++---
 .../net/ethernet/stmicro/stmmac/stmmac_fpe.h  |   5 -
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |   8 +-
 include/linux/ethtool.h   |  61 +
 net/ethtool/mm.c  | 224 +-
 15 files changed, 794 insertions(+), 217 deletions(-)

--
2.34.1



[Intel-wired-lan] [PATCH iwl-next v3 7/9] igc: Add support for preemptible traffic class in taprio

2025-02-07 Thread Faizal Rahim
Set queue as preemptible or express via taprio.
This will eventually set queue-specific preemptible field in TXQCTL
register.

Implement configure_tx(), a callback triggered by mmsv, to set tx_enabled
and update preemptible queue settings. tx_enabled is a new field that
serves as a condition in igc_tsn_enable_offload() before configuring the
preemptible queue. This provides some control over FPE in TX, despite
lacking a dedicated register.

Verified that the correct preemptible hardware queue is set using the
following commands:

a) 1:1 TC-to-Queue Mapping
   $ sudo tc qdisc replace dev enp1s0 parent root handle 100 \
 taprio num_tc 4 map 3 2 1 0 3 3 3 3 3 3 3 3 3 3 3 3 \
 queues 1@0 1@1 1@2 1@3 base-time 0 sched-entry S F 10 \
 fp E E P P

b) Non-1:1 TC-to-Queue Mapping
   $ sudo tc qdisc replace  dev enp1s0 parent root handle 100 \
 taprio num_tc 3 map 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 2
 queues 2@0 1@2 1@3
 fp E E P

Co-developed-by: Vinicius Costa Gomes 
Signed-off-by: Vinicius Costa Gomes 
Signed-off-by: Faizal Rahim 
---
 drivers/net/ethernet/intel/igc/igc.h |  3 +-
 drivers/net/ethernet/intel/igc/igc_defines.h |  1 +
 drivers/net/ethernet/intel/igc/igc_main.c| 36 
 drivers/net/ethernet/intel/igc/igc_tsn.c | 17 +
 4 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h 
b/drivers/net/ethernet/intel/igc/igc.h
index 2f3662143589..59e6fca808e4 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -43,6 +43,7 @@ void igc_ethtool_set_ops(struct net_device *);
 struct fpe_t {
struct ethtool_mmsv mmsv;
u32 tx_min_frag_size;
+   bool tx_enabled;
 };
 
 enum igc_mac_filter_type {
@@ -163,7 +164,7 @@ struct igc_ring {
bool launchtime_enable; /* true if LaunchTime is enabled */
ktime_t last_tx_cycle;  /* end of the cycle with a launchtime 
transmission */
ktime_t last_ff_cycle;  /* Last cycle with an active first flag 
*/
-
+   bool preemptible;   /* True if not express */
u32 start_time;
u32 end_time;
u32 max_sdu;
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h 
b/drivers/net/ethernet/intel/igc/igc_defines.h
index 038ee89f1e08..208899e67308 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -556,6 +556,7 @@
 #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT  0x0001
 #define IGC_TXQCTL_STRICT_CYCLE0x0002
 #define IGC_TXQCTL_STRICT_END  0x0004
+#define IGC_TXQCTL_PREEMPTIBLE 0x0008
 #define IGC_TXQCTL_QAV_SEL_MASK0x00C0
 #define IGC_TXQCTL_QAV_SEL_CBS00x0080
 #define IGC_TXQCTL_QAV_SEL_CBS10x00C0
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c 
b/drivers/net/ethernet/intel/igc/igc_main.c
index 7fe6875d7bf7..f15ac7565fbd 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6258,6 +6258,39 @@ static bool is_base_time_past(ktime_t base_time, const 
struct timespec64 *now)
return timespec64_compare(now, &b) > 0;
 }
 
+static u32 igc_map_tc_to_queue(const struct igc_adapter *adapter,
+  unsigned long preemptible_tcs)
+{
+   struct net_device *dev = adapter->netdev;
+   u32 i, queue = 0;
+
+   for (i = 0; i < dev->num_tc; i++) {
+   u32 offset, count;
+
+   if (!(preemptible_tcs & BIT(i)))
+   continue;
+
+   offset = dev->tc_to_txq[i].offset;
+   count = dev->tc_to_txq[i].count;
+   queue |= GENMASK(offset + count - 1, offset);
+   }
+
+   return queue;
+}
+
+static void igc_save_preempt_queue(struct igc_adapter *adapter,
+  const struct tc_mqprio_qopt_offload *mqprio)
+{
+   u32 preemptible_queue = igc_map_tc_to_queue(adapter,
+   mqprio->preemptible_tcs);
+
+   for (int i = 0; i < adapter->num_tx_queues; i++) {
+   struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+   tx_ring->preemptible = preemptible_queue & BIT(i);
+   }
+}
+
 static bool validate_schedule(struct igc_adapter *adapter,
  const struct tc_taprio_qopt_offload *qopt)
 {
@@ -6344,6 +6377,7 @@ static int igc_qbv_clear_schedule(struct igc_adapter 
*adapter)
ring->start_time = 0;
ring->end_time = NSEC_PER_SEC;
ring->max_sdu = 0;
+   ring->preemptible = false;
}
 
spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
@@ -6500,6 +6534,8 @@ static int igc_save_qbv_schedule(struct igc_adapter 
*adapter,
ring->max_sdu = 0;
}
 
+   igc_save_preempt_queue(adapter, &qopt->mqprio);
+
return 0;
 

[Intel-wired-lan] [PATCH iwl-next v3 6/9] igc: Add support to set tx-min-frag-size

2025-02-07 Thread Faizal Rahim
Add support to set tx-min-frag-size via set_mm callback in igc.
Increase the max limit of tx-ming-frag-size in ethtool from 252 to 256
since i225/6 value range is 64, 128, 192 and 256.

Co-developed-by: Vinicius Costa Gomes 
Signed-off-by: Vinicius Costa Gomes 
Signed-off-by: Faizal Rahim 
---
 drivers/net/ethernet/intel/igc/igc.h |  1 +
 drivers/net/ethernet/intel/igc/igc_defines.h |  1 +
 drivers/net/ethernet/intel/igc/igc_ethtool.c |  5 +++
 drivers/net/ethernet/intel/igc/igc_tsn.c | 37 ++--
 drivers/net/ethernet/intel/igc/igc_tsn.h |  2 +-
 net/ethtool/mm.c |  2 +-
 6 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h 
b/drivers/net/ethernet/intel/igc/igc.h
index 705bd4739e3b..2f3662143589 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -42,6 +42,7 @@ void igc_ethtool_set_ops(struct net_device *);
 
 struct fpe_t {
struct ethtool_mmsv mmsv;
+   u32 tx_min_frag_size;
 };
 
 enum igc_mac_filter_type {
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h 
b/drivers/net/ethernet/intel/igc/igc_defines.h
index 22db1de02964..038ee89f1e08 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -551,6 +551,7 @@
 #define IGC_TQAVCTRL_PREEMPT_ENA   0x0002
 #define IGC_TQAVCTRL_ENHANCED_QAV  0x0008
 #define IGC_TQAVCTRL_FUTSCDDIS 0x0080
+#define IGC_TQAVCTRL_MIN_FRAG_MASK 0xC000
 
 #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT  0x0001
 #define IGC_TXQCTL_STRICT_CYCLE0x0002
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c 
b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index e2a14edf7552..081e24f228b2 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1789,6 +1789,11 @@ static int igc_ethtool_set_mm(struct net_device *netdev,
struct igc_adapter *adapter = netdev_priv(netdev);
struct fpe_t *fpe = &adapter->fpe;
 
+   fpe->tx_min_frag_size = 
igc_fpe_get_supported_frag_size(cmd->tx_min_frag_size);
+   if (fpe->tx_min_frag_size != cmd->tx_min_frag_size)
+   NL_SET_ERR_MSG_MOD(extack,
+  "tx-min-frag-size value set is unsupported. 
Rounded up to supported value (64, 128, 192, 256)");
+
if (fpe->mmsv.pmac_enabled != cmd->pmac_enabled) {
if (cmd->pmac_enabled)
static_branch_inc(&igc_fpe_enabled);
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c 
b/drivers/net/ethernet/intel/igc/igc_tsn.c
index 7b3c46993cec..d9de2cfb0c17 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -7,6 +7,12 @@
 #include "igc_hw.h"
 #include "igc_tsn.h"
 
+#define MIN_MULTPLIER_TX_MIN_FRAG  0
+#define MAX_MULTPLIER_TX_MIN_FRAG  3
+/* Frag size is based on the Section 8.12.2 of the SW User Manual */
+#define TX_MIN_FRAG_SIZE   64
+#define TX_MAX_FRAG_SIZE   (TX_MIN_FRAG_SIZE * (MAX_MULTPLIER_TX_MIN_FRAG 
+ 1))
+
 enum igc_txd_popts_type {
SMD_V = 0x01,
SMD_R = 0x02
@@ -142,6 +148,7 @@ static const struct ethtool_mmsv_ops igc_mmsv_ops = {
 
 void igc_fpe_init(struct igc_adapter *adapter)
 {
+   adapter->fpe.tx_min_frag_size = TX_MIN_FRAG_SIZE;
ethtool_mmsv_init(&adapter->fpe.mmsv, adapter->netdev, &igc_mmsv_ops);
 }
 
@@ -292,7 +299,7 @@ static int igc_tsn_disable_offload(struct igc_adapter 
*adapter)
tqavctrl = rd32(IGC_TQAVCTRL);
tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN |
  IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS |
- IGC_TQAVCTRL_PREEMPT_ENA);
+ IGC_TQAVCTRL_PREEMPT_ENA | IGC_TQAVCTRL_MIN_FRAG_MASK);
 
wr32(IGC_TQAVCTRL, tqavctrl);
 
@@ -338,12 +345,34 @@ static void igc_tsn_set_retx_qbvfullthreshold(struct 
igc_adapter *adapter)
wr32(IGC_RETX_CTL, retxctl);
 }
 
+static u8 igc_fpe_get_frag_size_mult(const struct fpe_t *fpe)
+{
+   u8 mult = (fpe->tx_min_frag_size / TX_MIN_FRAG_SIZE) - 1;
+
+   return clamp_t(u8, mult, MIN_MULTPLIER_TX_MIN_FRAG,
+  MAX_MULTPLIER_TX_MIN_FRAG);
+}
+
+u32 igc_fpe_get_supported_frag_size(u32 frag_size)
+{
+   const u32 supported_sizes[] = {64, 128, 192, 256};
+
+   /* Find the smallest supported size that is >= frag_size */
+   for (int i = 0; i < ARRAY_SIZE(supported_sizes); i++) {
+   if (frag_size <= supported_sizes[i])
+   return supported_sizes[i];
+   }
+
+   return TX_MAX_FRAG_SIZE; /* Should not happen, value > 256 is blocked 
by ethtool */
+}
+
 static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 {
struct igc_hw *hw = &adapter->hw;
u32 tqavctrl, baset_l, baset_h;
u32 sec, nsec, cycle, rxpbs;
ktime_t base_time, systim

[Intel-wired-lan] [PATCH iwl-next v3 8/9] igc: Add support to get MAC Merge data via ethtool

2025-02-07 Thread Faizal Rahim
Implement "ethtool --show-mm" callback for IGC.

Tested with command:
$ ethtool --show-mm enp1s0.
  MAC Merge layer state for enp1s0:
  pMAC enabled: on
  TX enabled: on
  TX active: on
  TX minimum fragment size: 64
  RX minimum fragment size: 60
  Verify enabled: on
  Verify time: 128
  Max verify time: 128
  Verification status: SUCCEEDED

Verified that the fields value are retrieved correctly.

Signed-off-by: Faizal Rahim 
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 14 ++
 drivers/net/ethernet/intel/igc/igc_tsn.h |  1 +
 2 files changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c 
b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 081e24f228b2..7f0052e0d50c 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1782,6 +1782,19 @@ static int igc_ethtool_set_eee(struct net_device *netdev,
return 0;
 }
 
+static int igc_ethtool_get_mm(struct net_device *netdev,
+ struct ethtool_mm_state *cmd)
+{
+   struct igc_adapter *adapter = netdev_priv(netdev);
+   struct fpe_t *fpe = &adapter->fpe;
+
+   ethtool_mmsv_get_mm(&fpe->mmsv, cmd);
+   cmd->tx_min_frag_size = fpe->tx_min_frag_size;
+   cmd->rx_min_frag_size = IGC_RX_MIN_FRAG_SIZE;
+
+   return 0;
+}
+
 static int igc_ethtool_set_mm(struct net_device *netdev,
  struct ethtool_mm_cfg *cmd,
  struct netlink_ext_ack *extack)
@@ -2093,6 +2106,7 @@ static const struct ethtool_ops igc_ethtool_ops = {
.set_rxfh   = igc_ethtool_set_rxfh,
.get_ts_info= igc_ethtool_get_ts_info,
.get_channels   = igc_ethtool_get_channels,
+   .get_mm = igc_ethtool_get_mm,
.set_mm = igc_ethtool_set_mm,
.set_channels   = igc_ethtool_set_channels,
.get_priv_flags = igc_ethtool_get_priv_flags,
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h 
b/drivers/net/ethernet/intel/igc/igc_tsn.h
index 898c4630bc70..c82f9718cb85 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.h
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.h
@@ -4,6 +4,7 @@
 #ifndef _IGC_TSN_H_
 #define _IGC_TSN_H_
 
+#define IGC_RX_MIN_FRAG_SIZE   60
 #define SMD_FRAME_SIZE 60
 
 DECLARE_STATIC_KEY_FALSE(igc_fpe_enabled);
-- 
2.34.1



Re: [Intel-wired-lan] [PATCH iwl-next v2 5/9] igc: Add support for frame preemption verification

2025-02-07 Thread Abdul Rahim, Faizal




On 6/2/2025 11:04 pm, Vladimir Oltean wrote:

On Thu, Feb 06, 2025 at 10:40:11PM +0800, Abdul Rahim, Faizal wrote:


Hi Vladimir,

Thanks for the quick review, appreciate your help.

On 6/2/2025 1:12 am, Vladimir Oltean wrote:

On Wed, Feb 05, 2025 at 05:05:20AM -0500, Faizal Rahim wrote:

This patch implements the "ethtool --set-mm" callback to trigger the
frame preemption verification handshake.

Uses the MAC Merge Software Verification (mmsv) mechanism in ethtool
to perform the verification handshake for igc.
The structure fpe.mmsv is set by mmsv in ethtool and should remain
read-only for the driver.

igc does not use two mmsv callbacks:
a) configure_tx()
 - igc lacks registers to configure FPE in the transmit direction.


Yes, maybe, but it's still important to handle this. It tells you when
the preemptible traffic classes should be sent as preemptible on the wire
(i.e. when the verification is either disabled, or it succeeded).

There is a selftest called manual_failed_verification() which supposedly
tests this exact condition: if verification fails, then packets sent to
TC0 are supposed to bump the eMAC's TX counters, even though TC0 is
configured as preemptible. Otherwise stated: even if the tc program says
that a certain traffic class is preemptible, you don't want to actually
send preemptible packets if you haven't verified the link partner can
handle them, since it will likely drop them on RX otherwise.


Even though fpe in tx direction isn't set in igc, it still checks
ethtool_mmsv_is_tx_active() before setting a queue as preemptible.

This is done in :
igc_tsn_enable_offload(struct igc_adapter *adapter) {
{

if (ethtool_mmsv_is_tx_active(&adapter->fpe.mmsv) &&
 ring->preemptible)
txqctl |= IGC_TXQCTL_PREEMPTIBLE;


Wouldn't this handle the situation mentioned ?
Sorry if I miss something here.


And what if tx_active becomes true after you had already configured the
queues with tc (and the above check caused IGC_TXQCTL_PREEMPTIBLE to not
be set)? Shouldn't you set IGC_TXQCTL_PREEMPTIBLE now? Isn't
ethtool_mmsv_configure_tx() exactly the function that notifies you of
changes to tx_active, and hence, aren't you interested in setting up a
callback for it?



Ahh okay, got it. I sent v3 that also included this update. Thanks!


[Intel-wired-lan] [PATCH iwl-next v3 9/9] igc: Add support to get frame preemption statistics via ethtool

2025-02-07 Thread Faizal Rahim
Implemented "ethtool --include-statistics --show-mm" callback for IGC.

Tested preemption scenario to check preemption statistics:
1) Trigger verification handshake on both boards:
$ sudo ethtool --set-mm enp1s0 pmac-enabled on
$ sudo ethtool --set-mm enp1s0 tx-enabled on
$ sudo ethtool --set-mm enp1s0 verify-enabled on
2) Set preemptible or express queue in taprio for tx board:
$ sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \
  num_tc 4 map 3 2 1 0 3 3 3 3 3 3 3 3 3 3 3 3 \
  queues 1@0 1@1 1@2 1@3 base-time 0 sched-entry S F 10 \
  fp E E P P
3) Send large size packets on preemptible queue
4) Send small size packets on express queue to preempt packets in
   preemptible queue
5) Show preemption statistics on the receiving board:
   $ ethtool --include-statistics --show-mm enp1s0
 MAC Merge layer state for enp1s0:
 pMAC enabled: on
 TX enabled: on
 TX active: on
 TX minimum fragment size: 64
 RX minimum fragment size: 60
 Verify enabled: on
 Verify time: 128
 Max verify time: 128
 Verification status: SUCCEEDED
 Statistics:
MACMergeFrameAssErrorCount: 0
MACMergeFrameSmdErrorCount: 0
MACMergeFrameAssOkCount: 511
MACMergeFragCountRx: 764
MACMergeFragCountTx: 0
MACMergeHoldCount: 0

Co-developed-by: Vinicius Costa Gomes 
Signed-off-by: Vinicius Costa Gomes 
Signed-off-by: Faizal Rahim 
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 36 
 drivers/net/ethernet/intel/igc/igc_main.c|  1 +
 drivers/net/ethernet/intel/igc/igc_regs.h| 16 +
 3 files changed, 53 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c 
b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 7f0052e0d50c..97a1194399b1 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1819,6 +1819,41 @@ static int igc_ethtool_set_mm(struct net_device *netdev,
return igc_tsn_offload_apply(adapter);
 }
 
+/**
+ * igc_ethtool_get_frame_ass_error - Get the frame assembly error count.
+ * @dev: Pointer to the net_device structure.
+ * Return: The count of frame assembly errors.
+ */
+static u64 igc_ethtool_get_frame_ass_error(struct net_device *dev)
+{
+   struct igc_adapter *adapter = netdev_priv(dev);
+   u32 ooo_smdc, ooo_frame_cnt, ooo_frag_cnt; /* Out of order statistics */
+   struct igc_hw *hw = &adapter->hw;
+   u32 miss_frame_frag_cnt;
+   u32 reg_value;
+
+   reg_value = rd32(IGC_PRMEXPRCNT);
+   ooo_smdc = FIELD_GET(IGC_PRMEXPRCNT_OOO_SMDC, reg_value);
+   ooo_frame_cnt = FIELD_GET(IGC_PRMEXPRCNT_OOO_FRAME_CNT, reg_value);
+   ooo_frag_cnt = FIELD_GET(IGC_PRMEXPRCNT_OOO_FRAG_CNT, reg_value);
+   miss_frame_frag_cnt = FIELD_GET(IGC_PRMEXPRCNT_MISS_FRAME_FRAG_CNT,
+   reg_value);
+
+   return ooo_smdc + ooo_frame_cnt + ooo_frag_cnt + miss_frame_frag_cnt;
+}
+
+static void igc_ethtool_get_mm_stats(struct net_device *dev,
+struct ethtool_mm_stats *stats)
+{
+   struct igc_adapter *adapter = netdev_priv(dev);
+   struct igc_hw *hw = &adapter->hw;
+
+   stats->MACMergeFrameAssErrorCount = 
igc_ethtool_get_frame_ass_error(dev);
+   stats->MACMergeFrameAssOkCount = rd32(IGC_PRMPTDRCNT);
+   stats->MACMergeFragCountRx =  rd32(IGC_PRMEVNTRCNT);
+   stats->MACMergeFragCountTx = rd32(IGC_PRMEVNTTCNT);
+}
+
 static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
  struct ethtool_link_ksettings *cmd)
 {
@@ -2108,6 +2143,7 @@ static const struct ethtool_ops igc_ethtool_ops = {
.get_channels   = igc_ethtool_get_channels,
.get_mm = igc_ethtool_get_mm,
.set_mm = igc_ethtool_set_mm,
+   .get_mm_stats   = igc_ethtool_get_mm_stats,
.set_channels   = igc_ethtool_set_channels,
.get_priv_flags = igc_ethtool_get_priv_flags,
.set_priv_flags = igc_ethtool_set_priv_flags,
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c 
b/drivers/net/ethernet/intel/igc/igc_main.c
index f15ac7565fbd..cd5160315993 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3076,6 +3076,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector 
*q_vector, int napi_budget)
break;
 
if (static_branch_unlikely(&igc_fpe_enabled) &&
+   adapter->fpe.mmsv.pmac_enabled &&
igc_fpe_transmitted_smd_v(tx_desc))
ethtool_mmsv_event_handle(&adapter->fpe.mmsv,
  
ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET);
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h 
b/drivers/net/ethernet/intel/igc/igc_regs.h
index 12ddc5793651..41dbfb07eb2f 100644
---