[Intel-wired-lan] [rdma v3 17/24] RDMA/irdma: Add support for V2 HMC resource management scheme
From: Vinoth Kumar Chandra Mohan HMC resource initialization is updated to support V1 or V2 approach based on the FW capability. In the V2 approach, driver receives the assigned HMC resources count and verifies if it will fit in the given local memory. If it doesn't fit, the driver load fails. Signed-off-by: Vinoth Kumar Chandra Mohan Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/ctrl.c | 121 - drivers/infiniband/hw/irdma/defs.h | 3 + drivers/infiniband/hw/irdma/type.h | 25 +++--- 3 files changed, 130 insertions(+), 19 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 52bb87f4b2c5..d01c55172f6a 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2901,6 +2901,41 @@ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq, return 0; } +/** + * irdma_sc_get_decoded_ird_size_gen_3 - get decoded IRD size for GEN 3 + * @ird_enc: IRD encoding + * IRD size defaults to a value of 4 in case of invalid input. + */ +static u16 irdma_sc_get_decoded_ird_size_gen_3(u8 ird_enc) +{ + switch (ird_enc) { + case IRDMA_IRD_HW_SIZE_4096_GEN3: + return 4096; + case IRDMA_IRD_HW_SIZE_2048_GEN3: + return 2048; + case IRDMA_IRD_HW_SIZE_1024_GEN3: + return 1024; + case IRDMA_IRD_HW_SIZE_512_GEN3: + return 512; + case IRDMA_IRD_HW_SIZE_256_GEN3: + return 256; + case IRDMA_IRD_HW_SIZE_128_GEN3: + return 128; + case IRDMA_IRD_HW_SIZE_64_GEN3: + return 64; + case IRDMA_IRD_HW_SIZE_32_GEN3: + return 32; + case IRDMA_IRD_HW_SIZE_16_GEN3: + return 16; + case IRDMA_IRD_HW_SIZE_8_GEN3: + return 8; + case IRDMA_IRD_HW_SIZE_4_GEN3: + return 4; + default: + return 4; + } +} + /** * irdma_check_cqp_progress - check cqp processing progress * @timeout: timeout info struct @@ -3212,6 +3247,7 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, struct irdma_hmc_fpm_misc *hmc_fpm_misc) { struct irdma_hmc_obj_info *obj_info; + u8 ird_encoding; u64 temp; u32 size; u16 max_pe_sds; @@ -3287,6 +3323,14 @@ static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, hmc_fpm_misc->max_ceqs = FIELD_GET(IRDMA_QUERY_FPM_MAX_CEQS, temp); hmc_fpm_misc->ht_multiplier = FIELD_GET(IRDMA_QUERY_FPM_HTMULTIPLIER, temp); hmc_fpm_misc->timer_bucket = FIELD_GET(IRDMA_QUERY_FPM_TIMERBUCKET, temp); + if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, + dev->feature_info[IRDMA_FTN_FLAGS])) { + ird_encoding = (u8)FIELD_GET(IRDMA_QUERY_FPM_MAX_IRD, temp); + hmc_fpm_misc->ird = + irdma_sc_get_decoded_ird_size_gen_3(ird_encoding) / 2; + dev->hw_attrs.max_hw_ird = hmc_fpm_misc->ird; + dev->hw_attrs.max_hw_ord = hmc_fpm_misc->ird; + } if (dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) return 0; irdma_sc_decode_fpm_query(buf, 96, obj_info, IRDMA_HMC_IW_FSIMC); @@ -5444,10 +5488,71 @@ static void irdma_set_host_hmc_rsrc_gen_3(struct irdma_sc_dev *dev) avail_sds -= DIV_ROUND_UP(mrwanted, MAX_MR_PER_SD); } + if (FIELD_GET(IRDMA_MANAGE_RSRC_VER2, dev->feature_info[IRDMA_FTN_FLAGS]) && + pblewanted > avail_sds * MAX_PBLE_PER_SD) + ibdev_dbg(to_ibdev(dev), + "HMC: Warn: Resource version 2: pble wanted = 0x%x available = 0x%x\n", + pblewanted, avail_sds * MAX_PBLE_PER_SD); + pblewanted = min(pblewanted, avail_sds * MAX_PBLE_PER_SD); hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt = pblewanted; } +/** + * irdma_verify_commit_fpm_gen_3 - verify query fpm values + * @dev: sc device struct + * @max_pages: max local memory available + * @qpwanted: number of qp's wanted + */ +static int irdma_verify_commit_fpm_gen_3(struct irdma_sc_dev *dev, +u32 max_pages, +u32 qpwanted) +{ + struct irdma_hmc_fpm_misc *hmc_fpm_misc; + u32 rrf_cnt, xf_cnt, timer_cnt, pages_needed; + struct irdma_hmc_info *hmc_info; + u32 rrffl_cnt = 0; + u32 xffl_cnt = 0; + u32 q1fl_cnt; + + hmc_info = dev->hmc_info; + hmc_fpm_misc = &dev->hmc_fpm_misc; + + rrf_cnt = roundup_pow_of_two(IRDMA_RRF_MULTIPLIER * qpwanted); + + if (hmc_info->hmc_obj[IRDMA_HMC_IW_RRFFL].max_cnt) + rrffl_cnt = + hmc_info->hmc_obj[IRDMA_HMC_IW_RRF].cnt / + hmc_fpm_misc->rrf_block_size; + + xf_cnt = roundup_pow_of_two(IRDMA_XF_MULTIPLIER * qpwanted
[Intel-wired-lan] [rdma v3 16/24] RDMA/irdma: Extend QP context programming for GEN3
From: Shiraz Saleem Extend the QP context structure with support for new fields specific to GEN3 hardware capabilities. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/ctrl.c | 184 +++- drivers/infiniband/hw/irdma/defs.h | 24 +++- drivers/infiniband/hw/irdma/type.h | 4 + drivers/infiniband/hw/irdma/uda_d.h | 5 +- drivers/infiniband/hw/irdma/verbs.c | 5 + 5 files changed, 215 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 7d2ae701b8f5..52bb87f4b2c5 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -637,13 +637,14 @@ static u8 irdma_sc_get_encoded_ird_size(u16 ird_size) } /** - * irdma_sc_qp_setctx_roce - set qp's context + * irdma_sc_qp_setctx_roce_gen_2 - set qp's context * @qp: sc qp * @qp_ctx: context ptr * @info: ctx info */ -void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, -struct irdma_qp_host_ctx_info *info) +static void irdma_sc_qp_setctx_roce_gen_2(struct irdma_sc_qp *qp, + __le64 *qp_ctx, + struct irdma_qp_host_ctx_info *info) { struct irdma_roce_offload_info *roce_info; struct irdma_udp_offload_info *udp; @@ -761,6 +762,183 @@ void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, 8, qp_ctx, IRDMA_QP_CTX_SIZE, false); } +/** + * irdma_sc_get_encoded_ird_size_gen_3 - get encoded IRD size for GEN 3 + * @ird_size: IRD size + * The ird from the connection is rounded to a supported HW setting and then encoded + * for ird_size field of qp_ctx. Consumers are expected to provide valid ird size based + * on hardware attributes. IRD size defaults to a value of 4 in case of invalid input. + */ +static u8 irdma_sc_get_encoded_ird_size_gen_3(u16 ird_size) +{ + switch (ird_size ? + roundup_pow_of_two(2 * ird_size) : 4) { + case 4096: + return IRDMA_IRD_HW_SIZE_4096_GEN3; + case 2048: + return IRDMA_IRD_HW_SIZE_2048_GEN3; + case 1024: + return IRDMA_IRD_HW_SIZE_1024_GEN3; + case 512: + return IRDMA_IRD_HW_SIZE_512_GEN3; + case 256: + return IRDMA_IRD_HW_SIZE_256_GEN3; + case 128: + return IRDMA_IRD_HW_SIZE_128_GEN3; + case 64: + return IRDMA_IRD_HW_SIZE_64_GEN3; + case 32: + return IRDMA_IRD_HW_SIZE_32_GEN3; + case 16: + return IRDMA_IRD_HW_SIZE_16_GEN3; + case 8: + return IRDMA_IRD_HW_SIZE_8_GEN3; + case 4: + default: + break; + } + + return IRDMA_IRD_HW_SIZE_4_GEN3; +} + +/** + * irdma_sc_qp_setctx_roce_gen_3 - set qp's context + * @qp: sc qp + * @qp_ctx: context ptr + * @info: ctx info + */ +static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp, + __le64 *qp_ctx, + struct irdma_qp_host_ctx_info *info) +{ + struct irdma_roce_offload_info *roce_info = info->roce_info; + struct irdma_udp_offload_info *udp = info->udp_info; + u64 qw0, qw3, qw7 = 0, qw8 = 0; + u8 push_mode_en; + u32 push_idx; + + qp->user_pri = info->user_pri; + if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) { + push_mode_en = 0; + push_idx = 0; + } else { + push_mode_en = 1; + push_idx = qp->push_idx; + } + + qw0 = FIELD_PREP(IRDMAQPC_RQWQESIZE, qp->qp_uk.rq_wqe_size) | + FIELD_PREP(IRDMAQPC_RCVTPHEN, qp->rcv_tph_en) | + FIELD_PREP(IRDMAQPC_XMITTPHEN, qp->xmit_tph_en) | + FIELD_PREP(IRDMAQPC_RQTPHEN, qp->rq_tph_en) | + FIELD_PREP(IRDMAQPC_SQTPHEN, qp->sq_tph_en) | + FIELD_PREP(IRDMAQPC_PPIDX, push_idx) | + FIELD_PREP(IRDMAQPC_PMENA, push_mode_en) | + FIELD_PREP(IRDMAQPC_DC_TCP_EN, roce_info->dctcp_en) | + FIELD_PREP(IRDMAQPC_ISQP1, roce_info->is_qp1) | + FIELD_PREP(IRDMAQPC_ROCE_TVER, roce_info->roce_tver) | + FIELD_PREP(IRDMAQPC_IPV4, udp->ipv4) | + FIELD_PREP(IRDMAQPC_INSERTVLANTAG, udp->insert_vlan_tag); + set_64bit_val(qp_ctx, 0, qw0); + set_64bit_val(qp_ctx, 8, qp->sq_pa); + set_64bit_val(qp_ctx, 16, qp->rq_pa); + qw3 = FIELD_PREP(IRDMAQPC_RQSIZE, qp->hw_rq_size) | + FIELD_PREP(IRDMAQPC_SQSIZE, qp->hw_sq_size) | + FIELD_PREP(IRDMAQPC_TTL, udp->ttl) | + FIELD_PREP(IRDMAQPC_TOS, udp->tos) | + FIELD_PREP(IRDMAQPC_SRCPORTNUM, udp->src_port) | + FIELD_PREP(IRDMAQPC_DESTPORTNUM, udp->dst_port); + set_64bit_val(qp_ctx, 24, qw3); + set_64bit_val(qp_ctx, 32,
[Intel-wired-lan] [rdma v3 09/24] RDMA/irdma: Add GEN3 core driver support
From: Mustafa Ismail Introduce support for the GEN3 auxiliary core driver, which is responsible for initializing PCI-level RDMA resources. Facilitate host-driver communication with the device's Control Plane (CP) to discover capabilities and perform privileged operations through an RDMA-specific messaging interface built atop the IDPF mailbox and virtual channel protocol. Establish the RDMA virtual channel message interface and incorporate operations to retrieve the hardware version and discover capabilities from the CP. Additionally, set up the RDMA MMIO regions and initialize the RF structure. Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova --- v3: * Move the call to get RDMA features just after CQP is created, otherwise the feature flags are not defined before used. * Round up to power of two the resource size for Read Responses and Transmit Queue elements in irdma_set_loc_hmc_rsrc_gen_3(). * Clear the lower 2 bits of the tos field before setting the ECN code point to avoid illegal code point AE. * Handle large PD ID indices by populating IRDMA_CQPSQ_STAG_PDID_HI field with the PD ID. * Fix IRDMA_QUERY_FPM_MAX_PE_SDS field size for GEN1 and GEN2 which can't be safely extended to support GEN3. * Adjust the HW AEQ size depending on the GEN. * Fix #define values to accommodate field extensions required for GEN3. * Make the HMC function id (hmc_fcn_id) 16 bits to enable. more functions, supported by GEN3. * Clean up unused #defines and variables. * Fix sparse type warnings in ig3rdma_cfg_regions(). * Fix device hmc_fn_id initialization by using the response hmc_fn_id. drivers/infiniband/hw/irdma/Makefile | 2 + drivers/infiniband/hw/irdma/ctrl.c | 484 +++ drivers/infiniband/hw/irdma/defs.h | 50 ++- drivers/infiniband/hw/irdma/hmc.c| 18 +- drivers/infiniband/hw/irdma/hmc.h| 19 +- drivers/infiniband/hw/irdma/hw.c | 18 +- drivers/infiniband/hw/irdma/i40iw_if.c | 1 + drivers/infiniband/hw/irdma/icrdma_if.c | 2 + drivers/infiniband/hw/irdma/ig3rdma_hw.h | 11 + drivers/infiniband/hw/irdma/ig3rdma_if.c | 171 drivers/infiniband/hw/irdma/irdma.h | 5 +- drivers/infiniband/hw/irdma/main.c | 55 +++ drivers/infiniband/hw/irdma/main.h | 4 + drivers/infiniband/hw/irdma/pble.c | 20 +- drivers/infiniband/hw/irdma/puda.h | 4 +- drivers/infiniband/hw/irdma/type.h | 67 +++- drivers/infiniband/hw/irdma/user.h | 5 +- drivers/infiniband/hw/irdma/virtchnl.c | 302 ++ drivers/infiniband/hw/irdma/virtchnl.h | 96 + 19 files changed, 1207 insertions(+), 127 deletions(-) create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_hw.h create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_if.c create mode 100644 drivers/infiniband/hw/irdma/virtchnl.c create mode 100644 drivers/infiniband/hw/irdma/virtchnl.h diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile index 2522e4ca650b..3aa63b913377 100644 --- a/drivers/infiniband/hw/irdma/Makefile +++ b/drivers/infiniband/hw/irdma/Makefile @@ -13,6 +13,7 @@ irdma-objs := cm.o\ hw.o\ i40iw_hw.o \ i40iw_if.o \ + ig3rdma_if.o\ icrdma_if.o \ icrdma_hw.o \ main.o \ @@ -23,6 +24,7 @@ irdma-objs := cm.o\ uk.o\ utils.o \ verbs.o \ + virtchnl.o \ ws.o\ CFLAGS_trace.o = -I$(src) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 6aed6169c07d..2375d8dc0b01 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1080,7 +1080,8 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) | FIELD_PREP(IRDMA_CQPSQ_STAG_STAGLEN, info->total_len)); set_64bit_val(wqe, 16, - FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx)); + FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx) | + FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18)); set_64bit_val(wqe, 40, FIELD_PREP(IRDMA_CQPSQ_STAG_HMCFNIDX, info->hmc_fcn_index)); @@ -1165,6 +1166,7 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); set_64bit_val(wqe, 16, FIELD_PREP(IRDMA_CQPSQ_STAG_KEY, info->stag_key) | + FIELD_PREP(IRDMA_CQPSQ_STAG_PDID_HI, info->pd_id >> 18) | FIELD_PREP(IRDMA_CQPSQ_STAG_IDX, info->stag_idx)); if (!info->chunk_size) { set_64bit_val(wqe, 32, info->reg_addr_pa); @@ -1223,7 +1225,8 @@ static int irdma_sc_dealloc_stag(struct irdma
[Intel-wired-lan] [rdma v3 15/24] RDMA/irdma: Add GEN3 virtual QP1 support
From: Shiraz Saleem Add a new RDMA virtual channel op during QP1 creation that allow the Control Plane (CP) to virtualize a regular QP as QP1 on non-default RDMA capable vPorts. Additionally, the CP will return the Qsets to use on the ib_device of the vPort. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/ctrl.c | 10 ++- drivers/infiniband/hw/irdma/main.h | 1 + drivers/infiniband/hw/irdma/utils.c| 30 - drivers/infiniband/hw/irdma/verbs.c| 84 -- drivers/infiniband/hw/irdma/virtchnl.c | 52 drivers/infiniband/hw/irdma/virtchnl.h | 19 ++ 6 files changed, 174 insertions(+), 22 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 8fd2882f75af..7d2ae701b8f5 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -74,6 +74,14 @@ static void irdma_set_qos_info(struct irdma_sc_vsi *vsi, { u8 i; + if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { + vsi->qos[i].qs_handle = vsi->dev->qos[i].qs_handle; + vsi->qos[i].valid = true; + } + + return; + } vsi->qos_rel_bw = l2p->vsi_rel_bw; vsi->qos_prio_type = l2p->vsi_prio_type; vsi->dscp_mode = l2p->dscp_mode; @@ -1877,7 +1885,7 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, mutex_init(&vsi->qos[i].qos_mutex); INIT_LIST_HEAD(&vsi->qos[i].qplist); } - if (vsi->register_qset) { + if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2) { vsi->dev->ws_add = irdma_ws_add; vsi->dev->ws_remove = irdma_ws_remove; vsi->dev->ws_reset = irdma_ws_reset; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 1dab2ffba5e5..f0196aafe59b 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -260,6 +260,7 @@ struct irdma_pci_f { bool reset:1; bool rsrc_created:1; bool msix_shared:1; + bool hwqp1_rsvd:1; u8 rsrc_profile; u8 *hmc_info_mem; u8 *mem_rsrc; diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 8ab8af02abc9..87c88be47ee3 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -1113,6 +1113,26 @@ static void irdma_dealloc_push_page(struct irdma_pci_f *rf, irdma_put_cqp_request(&rf->cqp, cqp_request); } +static void irdma_free_gsi_qp_rsrc(struct irdma_qp *iwqp, u32 qp_num) +{ + struct irdma_device *iwdev = iwqp->iwdev; + struct irdma_pci_f *rf = iwdev->rf; + unsigned long flags; + + if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3) + return; + + irdma_vchnl_req_del_vport(&rf->sc_dev, iwdev->vport_id, qp_num); + + if (qp_num == 1) { + spin_lock_irqsave(&rf->rsrc_lock, flags); + rf->hwqp1_rsvd = false; + spin_unlock_irqrestore(&rf->rsrc_lock, flags); + } else if (qp_num > 2) { + irdma_free_rsrc(rf, rf->allocated_qps, qp_num); + } +} + /** * irdma_free_qp_rsrc - free up memory resources for qp * @iwqp: qp ptr (user or kernel) @@ -1121,7 +1141,7 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp) { struct irdma_device *iwdev = iwqp->iwdev; struct irdma_pci_f *rf = iwdev->rf; - u32 qp_num = iwqp->ibqp.qp_num; + u32 qp_num = iwqp->sc_qp.qp_uk.qp_id; irdma_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); irdma_dealloc_push_page(rf, &iwqp->sc_qp); @@ -1131,8 +1151,12 @@ void irdma_free_qp_rsrc(struct irdma_qp *iwqp) iwqp->sc_qp.user_pri); } - if (qp_num > 2) - irdma_free_rsrc(rf, rf->allocated_qps, qp_num); + if (iwqp->ibqp.qp_type == IB_QPT_GSI) { + irdma_free_gsi_qp_rsrc(iwqp, qp_num); + } else { + if (qp_num > 2) + irdma_free_rsrc(rf, rf->allocated_qps, qp_num); + } dma_free_coherent(rf->sc_dev.hw->device, iwqp->q2_ctx_mem.size, iwqp->q2_ctx_mem.va, iwqp->q2_ctx_mem.pa); iwqp->q2_ctx_mem.va = NULL; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 2535e0f59ceb..cf5a5d28fe53 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -545,6 +545,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); irdma_remove_push_mmap_entries(iwqp); + + if (iwqp->sc_qp.qp_uk.qp_id == 1) + iwdev->rf->hwqp1_rsvd = false; irdma_free_qp_rs
[Intel-wired-lan] [iwl-next v3 03/24] idpf: implement core RDMA auxiliary dev create, init, and destroy
From: Joshua Hay Add the initial idpf_idc.c file with the functions to kick off the IDC initialization, create and initialize a core RDMA auxiliary device, and destroy said device. The RDMA core has a dependency on the vports being created by the control plane before it can be initialized. Therefore, once all the vports are up after a hard reset (either during driver load a function level reset), the core RDMA device info will be created. It is populated with the function type (as distinguished by the IDC initialization function pointer), the core idc_ops function points (just stubs for now), the reserved RDMA MSIX table, and various other info the core RDMA auxiliary driver will need. It is then plugged on to the bus. During a function level reset or driver unload, the device will be unplugged from the bus and destroyed. Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova --- v3: - Used signed ret value from ida_alloc and only assign unsigned id if no err - capitalize some abbreviations - add missing field descriptions drivers/net/ethernet/intel/idpf/Makefile | 1 + drivers/net/ethernet/intel/idpf/idpf.h| 10 + drivers/net/ethernet/intel/idpf/idpf_dev.c| 13 ++ drivers/net/ethernet/intel/idpf/idpf_idc.c| 220 ++ drivers/net/ethernet/intel/idpf/idpf_lib.c| 4 + drivers/net/ethernet/intel/idpf/idpf_vf_dev.c | 13 ++ .../net/ethernet/intel/idpf/idpf_virtchnl.c | 19 ++ .../net/ethernet/intel/idpf/idpf_virtchnl.h | 3 + 8 files changed, 283 insertions(+) create mode 100644 drivers/net/ethernet/intel/idpf/idpf_idc.c diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile index 2ce01a0b5898..bde9c893d8a1 100644 --- a/drivers/net/ethernet/intel/idpf/Makefile +++ b/drivers/net/ethernet/intel/idpf/Makefile @@ -10,6 +10,7 @@ idpf-y := \ idpf_controlq_setup.o \ idpf_dev.o \ idpf_ethtool.o \ + idpf_idc.o \ idpf_lib.o \ idpf_main.o \ idpf_txrx.o \ diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 8ef7120e6717..64f731fe878c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -17,6 +17,7 @@ struct idpf_vport_max_q; #include #include #include +#include #include "virtchnl2.h" #include "idpf_txrx.h" @@ -202,9 +203,12 @@ struct idpf_reg_ops { /** * struct idpf_dev_ops - Device specific operations * @reg_ops: Register operations + * @idc_init: IDC initialization */ struct idpf_dev_ops { struct idpf_reg_ops reg_ops; + + int (*idc_init)(struct idpf_adapter *adapter); }; /** @@ -522,6 +526,7 @@ struct idpf_vc_xn_manager; * @caps: Negotiated capabilities with device * @vcxn_mngr: Virtchnl transaction manager * @dev_ops: See idpf_dev_ops + * @cdev_info: IDC core device info pointer * @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk * to VFs but is used to initialize them * @crc_enable: Enable CRC insertion offload @@ -580,6 +585,7 @@ struct idpf_adapter { struct idpf_vc_xn_manager *vcxn_mngr; struct idpf_dev_ops dev_ops; + struct idc_rdma_core_dev_info *cdev_info; int num_vfs; bool crc_enable; bool req_tx_splitq; @@ -858,5 +864,9 @@ int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs); u8 idpf_vport_get_hsplit(const struct idpf_vport *vport); bool idpf_vport_set_hsplit(const struct idpf_vport *vport, u8 val); +int idpf_idc_init(struct idpf_adapter *adapter); +int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, + enum idc_function_type ftype); +void idpf_idc_deinit_core_aux_device(struct idc_rdma_core_dev_info *cdev_info); #endif /* !_IDPF_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c index 41e4bd49402a..351db7d5dace 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c @@ -148,6 +148,17 @@ static void idpf_trigger_reset(struct idpf_adapter *adapter, idpf_get_reg_addr(adapter, PFGEN_CTRL)); } +/** + * idpf_idc_register - register for IDC callbacks + * @adapter: Driver specific private structure + * + * Return: 0 on success or error code on failure. + */ +static int idpf_idc_register(struct idpf_adapter *adapter) +{ + return idpf_idc_init_aux_core_dev(adapter, IDC_FUNCTION_TYPE_PF); +} + /** * idpf_reg_ops_init - Initialize register API function pointers * @adapter: Driver specific private structure @@ -168,4 +179,6 @@ static void idpf_reg_ops_init(struct idpf_adapter *adapter) void idpf_dev_ops_init(struct idpf_adapter *adapter) { idpf_reg_ops_init(adapter); + + adapter->dev_ops.idc_init = idpf_idc_register; } diff --git a/drivers/net/ethernet/intel/idpf/id
[Intel-wired-lan] [iwl-next v3 01/24] iidc/ice/irdma: Update IDC to support multiple consumers
From: Dave Ertman To support RDMA for E2000 product, the idpf driver will use the IDC interface with the irdma auxiliary driver, thus becoming a second consumer of it. This requires the IDC be updated to support multiple consumers. The use of exported symbols no longer makes sense because it will require all core drivers (ice/idpf) that can interface with irdma auxiliary driver to be loaded even if hardware is not present for those drivers. To address this, implement an ops struct that will be universal set of naked function pointers that will be populated by each core driver for the irdma auxiliary driver to call. Also previously, the ice driver was just exporting its entire PF struct to the auxiliary driver, but since each core driver will have its own different PF struct, implement a universal struct that all core drivers can export to the auxiliary driver through the probe call. The iidc.h header file will be divided into two files. The first, idc_rdma.h, will host all of the generic header info that will be needed for RDMA support in the auxiliary device. The second, iidc_rdma.h, will contain specific elements used by Intel drivers to support RDMA. This will be primarily the implementation of a new struct that will be assigned under the new generic opaque element of idc_priv in the idc_core_dev_info struct. Update ice and irdma to conform with the new IIDC interface definitions. Signed-off-by: Dave Ertman Co-developed-by: Mustafa Ismail Signed-off-by: Mustafa Ismail Co-developed-by: Shiraz Saleem Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova --- v2: * Minor modifications, like changing EINVAL to ENODEV error codes, etc. drivers/infiniband/hw/irdma/main.c| 110 drivers/infiniband/hw/irdma/main.h| 3 +- drivers/infiniband/hw/irdma/osdep.h | 4 +- .../net/ethernet/intel/ice/devlink/devlink.c | 40 ++- drivers/net/ethernet/intel/ice/ice.h | 6 +- drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 46 +++- drivers/net/ethernet/intel/ice/ice_dcb_lib.h | 4 + drivers/net/ethernet/intel/ice/ice_ethtool.c | 8 +- drivers/net/ethernet/intel/ice/ice_idc.c | 255 +++--- drivers/net/ethernet/intel/ice/ice_idc_int.h | 5 +- drivers/net/ethernet/intel/ice/ice_main.c | 18 +- include/linux/net/intel/idc_rdma.h| 138 ++ include/linux/net/intel/iidc.h| 107 include/linux/net/intel/iidc_rdma.h | 67 + 14 files changed, 527 insertions(+), 284 deletions(-) create mode 100644 include/linux/net/intel/idc_rdma.h delete mode 100644 include/linux/net/intel/iidc.h create mode 100644 include/linux/net/intel/iidc_rdma.h diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 3f13200ff71b..9b6f1d8bf06a 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "main.h" -#include "../../../net/ethernet/intel/ice/ice.h" MODULE_ALIAS("i40iw"); MODULE_AUTHOR("Intel Corporation, "); @@ -61,7 +60,7 @@ static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev) } static void irdma_fill_qos_info(struct irdma_l2params *l2params, - struct iidc_qos_params *qos_info) + struct iidc_rdma_qos_params *qos_info) { int i; @@ -85,12 +84,13 @@ static void irdma_fill_qos_info(struct irdma_l2params *l2params, } } -static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event) +static void irdma_idc_event_handler(struct idc_rdma_core_dev_info *cdev_info, + struct idc_rdma_event *event) { - struct irdma_device *iwdev = dev_get_drvdata(&pf->adev->dev); + struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev); struct irdma_l2params l2params = {}; - if (*event->type & BIT(IIDC_EVENT_AFTER_MTU_CHANGE)) { + if (*event->type & BIT(IDC_RDMA_EVENT_AFTER_MTU_CHANGE)) { ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu); if (iwdev->vsi.mtu != iwdev->netdev->mtu) { l2params.mtu = iwdev->netdev->mtu; @@ -98,25 +98,26 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); irdma_change_l2params(&iwdev->vsi, &l2params); } - } else if (*event->type & BIT(IIDC_EVENT_BEFORE_TC_CHANGE)) { + } else if (*event->type & BIT(IDC_RDMA_EVENT_BEFORE_TC_CHANGE)) { if (iwdev->vsi.tc_change_pending) return; irdma_prep_tc_change(iwdev); - } else if (*event->type & BIT(IIDC_EVENT_AFTER_TC_CHANGE)) { -
[Intel-wired-lan] [rdma v3 10/24] RDMA/irdma: Discover and set up GEN3 hardware register layout
From: Christopher Bednarz Discover the hardware register layout for GEN3 devices through an RDMA virtual channel operation with the Control Plane (CP). Set up the corresponding hardware attributes specific to GEN3 devices. Signed-off-by: Christopher Bednarz Signed-off-by: Tatyana Nikolova --- v3: Decrease IG3RDMA_MAX_IRD/ORD_SIZE to 64 for smaller memory requirements. drivers/infiniband/hw/irdma/Makefile | 1 + drivers/infiniband/hw/irdma/ctrl.c | 31 ++-- drivers/infiniband/hw/irdma/defs.h | 12 +- drivers/infiniband/hw/irdma/i40iw_hw.c | 2 + drivers/infiniband/hw/irdma/i40iw_hw.h | 2 + drivers/infiniband/hw/irdma/icrdma_hw.c | 3 + drivers/infiniband/hw/irdma/icrdma_hw.h | 5 +- drivers/infiniband/hw/irdma/ig3rdma_hw.c | 65 + drivers/infiniband/hw/irdma/ig3rdma_hw.h | 18 +++ drivers/infiniband/hw/irdma/irdma.h | 5 + drivers/infiniband/hw/irdma/virtchnl.c | 178 +++ drivers/infiniband/hw/irdma/virtchnl.h | 44 ++ 12 files changed, 351 insertions(+), 15 deletions(-) create mode 100644 drivers/infiniband/hw/irdma/ig3rdma_hw.c diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile index 3aa63b913377..03ceb9e5475f 100644 --- a/drivers/infiniband/hw/irdma/Makefile +++ b/drivers/infiniband/hw/irdma/Makefile @@ -16,6 +16,7 @@ irdma-objs := cm.o\ ig3rdma_if.o\ icrdma_if.o \ icrdma_hw.o \ + ig3rdma_hw.o\ main.o \ pble.o \ puda.o \ diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 2375d8dc0b01..082aaa5fc3bc 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -5672,6 +5672,9 @@ static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev) case IRDMA_GEN_2: icrdma_init_hw(dev); break; + case IRDMA_GEN_3: + ig3rdma_init_hw(dev); + break; } } @@ -5742,18 +5745,26 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, irdma_sc_init_hw(dev); - if (irdma_wait_pe_ready(dev)) - return -ETIMEDOUT; + if (dev->privileged) { + if (irdma_wait_pe_ready(dev)) + return -ETIMEDOUT; - val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]); - db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val); - if (db_size != IRDMA_PE_DB_SIZE_4M && db_size != IRDMA_PE_DB_SIZE_8M) { - ibdev_dbg(to_ibdev(dev), - "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n", - val, db_size); - return -ENODEV; + val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]); + db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val); + if (db_size != IRDMA_PE_DB_SIZE_4M && + db_size != IRDMA_PE_DB_SIZE_8M) { + ibdev_dbg(to_ibdev(dev), + "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n", + val, db_size); + return -ENODEV; + } + } else { + ret_code = irdma_vchnl_req_get_reg_layout(dev); + if (ret_code) + ibdev_dbg(to_ibdev(dev), + "DEV: Get Register layout failed ret = %d\n", + ret_code); } - dev->db_addr = dev->hw->hw_addr + (uintptr_t)dev->hw_regs[IRDMA_DB_ADDR_OFFSET]; return ret_code; } diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 7d363088b5c3..425bcd17abe9 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -115,6 +115,7 @@ enum irdma_protocol_used { #define IRDMA_FEATURE_BUF_SIZE (8 * IRDMA_MAX_FEATURES) #define ENABLE_LOC_MEM 63 +#define IRDMA_ATOMICS_ALLOWED_BIT 1 #define MAX_PBLE_PER_SD0x4 #define MAX_PBLE_SD_PER_FCN0x400 #define MAX_MR_PER_SD 0x8000 @@ -127,7 +128,7 @@ enum irdma_protocol_used { #define IRDMA_QP_SW_MAX_RQ_QUANTA 32768 #define IRDMA_MAX_QP_WRS(max_quanta_per_wr) \ ((IRDMA_QP_SW_MAX_WQ_QUANTA - IRDMA_SQ_RSVD) / (max_quanta_per_wr)) - +#define IRDMA_SRQ_MAX_QUANTA 262144 #define IRDMAQP_TERM_SEND_TERM_AND_FIN 0 #define IRDMAQP_TERM_SEND_TERM_ONLY1 #define IRDMAQP_TERM_SEND_FIN_ONLY 2 @@ -153,8 +154,13 @@ enum irdma_protocol_used { #define IRDMA_SQ_RSVD 258 #define IRDMA_RQ_RSVD 1 -#define IRDMA_FEATURE_RTS_AE 1ULL -#define IRDMA_FEATURE_CQ_RESIZE2ULL +#define IRDMA_FEATURE_RTS_AE BIT_ULL(0) +#def
[Intel-wired-lan] [rdma v3 13/24] RDMA/irdma: Add GEN3 HW statistics support
From: Krzysztof Czurylo Plug into the unified HW statistics framework by adding a hardware statistics map array for GEN3, defining the HW-specific width and location for each counter in the statistics buffer. Signed-off-by: Krzysztof Czurylo Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/ctrl.c | 33 +-- drivers/infiniband/hw/irdma/defs.h | 2 +- drivers/infiniband/hw/irdma/ig3rdma_hw.c | 63 + drivers/infiniband/hw/irdma/type.h | 19 +++- drivers/infiniband/hw/irdma/verbs.c | 110 +-- 5 files changed, 166 insertions(+), 61 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 55080b56311b..8fd2882f75af 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1968,7 +1968,8 @@ int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, (void *)((uintptr_t)stats_buff_mem->va + IRDMA_GATHER_STATS_BUF_SIZE); - irdma_hw_stats_start_timer(vsi); + if (vsi->dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3) + irdma_hw_stats_start_timer(vsi); /* when stat allocation is not required default to fcn_id. */ vsi->stats_idx = info->fcn_id; @@ -2013,7 +2014,9 @@ void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi) if (!vsi->pestat) return; - irdma_hw_stats_stop_timer(vsi); + + if (dev->hw_attrs.uk_attrs.hw_rev < IRDMA_GEN_3) + irdma_hw_stats_stop_timer(vsi); dma_free_coherent(vsi->pestat->hw->device, vsi->pestat->gather_info.stats_buff_mem.size, vsi->pestat->gather_info.stats_buff_mem.va, @@ -5929,14 +5932,26 @@ void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable) */ void sc_vsi_update_stats(struct irdma_sc_vsi *vsi) { - struct irdma_gather_stats *gather_stats; - struct irdma_gather_stats *last_gather_stats; + struct irdma_dev_hw_stats *hw_stats = &vsi->pestat->hw_stats; + struct irdma_gather_stats *gather_stats = + vsi->pestat->gather_info.gather_stats_va; + struct irdma_gather_stats *last_gather_stats = + vsi->pestat->gather_info.last_gather_stats_va; + const struct irdma_hw_stat_map *map = vsi->dev->hw_stats_map; + u16 max_stat_idx = vsi->dev->hw_attrs.max_stat_idx; + u16 i; + + if (vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + for (i = 0; i < max_stat_idx; i++) { + u16 idx = map[i].byteoff / sizeof(u64); + + hw_stats->stats_val[i] = gather_stats->val[idx]; + } + return; + } - gather_stats = vsi->pestat->gather_info.gather_stats_va; - last_gather_stats = vsi->pestat->gather_info.last_gather_stats_va; - irdma_update_stats(&vsi->pestat->hw_stats, gather_stats, - last_gather_stats, vsi->dev->hw_stats_map, - vsi->dev->hw_attrs.max_stat_idx); + irdma_update_stats(hw_stats, gather_stats, last_gather_stats, + map, max_stat_idx); } /** diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 52ace06912eb..2fc8e3cf4395 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -415,7 +415,7 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_STATS_USE_INST BIT_ULL(61) #define IRDMA_CQPSQ_STATS_OP GENMASK_ULL(37, 32) #define IRDMA_CQPSQ_STATS_INST_INDEX GENMASK_ULL(6, 0) -#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(5, 0) +#define IRDMA_CQPSQ_STATS_HMC_FCN_INDEX GENMASK_ULL(15, 0) #define IRDMA_CQPSQ_WS_WQEVALID BIT_ULL(63) #define IRDMA_CQPSQ_WS_NODEOP GENMASK_ULL(55, 52) #define IRDMA_SD_MAX GENMASK_ULL(15, 0) diff --git a/drivers/infiniband/hw/irdma/ig3rdma_hw.c b/drivers/infiniband/hw/irdma/ig3rdma_hw.c index 1d582c50e4d2..2a3d7144c771 100644 --- a/drivers/infiniband/hw/irdma/ig3rdma_hw.c +++ b/drivers/infiniband/hw/irdma/ig3rdma_hw.c @@ -48,9 +48,70 @@ static const struct irdma_irq_ops ig3rdma_irq_ops = { .irdma_en_irq = ig3rdma_ena_irq, }; +static const struct irdma_hw_stat_map ig3rdma_hw_stat_map[] = { + [IRDMA_HW_STAT_INDEX_RXVLANERR] = { 0, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXOCTS] = { 8, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXPKTS] = { 16, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXDISCARD] ={ 24, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXTRUNC] = { 32, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXFRAGS] = { 40, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXMCOCTS] = { 48, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP4RXMCPKTS] = { 56, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXOCTS] = { 64, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXPKTS] = { 72, 0, 0 }, + [IRDMA_HW_STAT_INDEX_IP6RXDISCARD] ={ 80, 0, 0 },
[Intel-wired-lan] [iwl-next, rdma v3 00/24] Add RDMA support for Intel IPU E2000 (GEN3)
This patch series is based on 6.14-rc1 and includes both netdev and RDMA patches for ease of review. It can also be viewed here [1]. A shared pull request will be sent for patches 1-7 following review. The patch series introduces RDMA RoCEv2 support for the Intel Infrastructure Processing Unit (IPU) E2000 line of products, referred to as GEN3 in the irdma provider. It supports both physical and virtual functions. The irdma driver communicates with the device Control Plane (CP) to discover capabilities and perform privileged operations through an RDMA-specific messaging interface built atop the Infrastructure Data-Plane Function (IDPF) mailbox and virtchannel protocol [2]. To support RDMA for E2000 product, the idpf driver requires the use of the Inter-Driver Communication (IDC) interface which is currently already in use between ice and irdma. With a second consumer, the IDC is generalized to support multiple consumers and ice, idpf and irdma are adapted to the IDC definitions. The IPU model can host one or more logical network endpoints called vPorts per PCI function that are flexibly associated with a physical port or an internal communication port. irdma exposes a verbs device per vPort. Other key highlights of this series as it pertains to GEN3 device include: MMIO learning, RDMA capability negotiation and RDMA vectors discovery between idpf and CP PCI core device level RDMA resource initialization via a GEN3 core auxiliary driver Shared Receive Queue (SRQ) Support Atomic Operations Support (Compare and Swap and Fetch and Add) Completion Queue Element (CQE) Error and Flush Handling Push Page Support Changelog: V3 series irdma changes: * Move the call to get RDMA features just after CQP is created, otherwise the feature flags are not defined before used. * Move the check for supported atomic operations after reading the RDMA feature info to correctly enable atomics. * Round up to power of two the resource size for Read Responses and Transmit Queue elements. * Do not use the Work Queue element index passed in the Asynchronous Event info to get SRQ context, because it is incorrect. * Fix detection of Completion Queue (CQ) empty when 64-byte CQ elements are enabled. * Minor improvements and cleanup. V3 series idpf changes: * Reduce required minimum RDMA vectors to 2. V2 RFC series includes only idpf changes: * RDMA vector number adjustment * Fix unplugging vport auxiliary device twice * General cleanup and minor improvements V2 RFC series is at https://lwn.net/Articles/987141/. [1] https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/linux.git/log/?h=idpf-rdma [2] https://elixir.bootlin.com/linux/latest/source/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c Christopher Bednarz (1): RDMA/irdma: Discover and set up GEN3 hardware register layout Dave Ertman (1): iidc/ice/irdma: Update IDC to support multiple consumers Faisal Latif (2): RDMA/irdma: Add SRQ support RDMA/irdma: Add Atomic Operations support Jay Bhat (1): RDMA/irdma: Add Push Page Support for GEN3 Joshua Hay (6): idpf: use reserved RDMA vectors from control plane idpf: implement core RDMA auxiliary dev create, init, and destroy idpf: implement RDMA vport auxiliary dev create, init, and destroy idpf: implement remaining IDC RDMA core callbacks and handlers idpf: implement IDC vport aux driver MTU change handler idpf: implement get LAN mmio memory regions Krzysztof Czurylo (2): RDMA/irdma: Add GEN3 CQP support with deferred completions RDMA/irdma: Add GEN3 HW statistics support Mustafa Ismail (3): RDMA/irdma: Refactor GEN2 auxiliary driver RDMA/irdma: Add GEN3 core driver support RDMA/irdma: Introduce GEN3 vPort driver support Shiraz Saleem (7): RDMA/irdma: Add GEN3 support for AEQ and CEQ RDMA/irdma: Add GEN3 virtual QP1 support RDMA/irdma: Extend QP context programming for GEN3 RDMA/irdma: Support 64-byte CQEs and GEN3 CQE opcode decoding RDMA/irdma: Restrict Memory Window and CQE Timestamping to GEN3 RDMA/irdma: Extend CQE Error and Flush Handling for GEN3 Devices RDMA/irdma: Update Kconfig Vinoth Kumar Chandra Mohan (1): RDMA/irdma: Add support for V2 HMC resource management scheme drivers/infiniband/hw/irdma/Kconfig |7 +- drivers/infiniband/hw/irdma/Makefile |4 + drivers/infiniband/hw/irdma/ctrl.c| 1469 +++-- drivers/infiniband/hw/irdma/defs.h| 266 +-- drivers/infiniband/hw/irdma/hmc.c | 18 +- drivers/infiniband/hw/irdma/hmc.h | 19 +- drivers/infiniband/hw/irdma/hw.c | 357 ++-- drivers/infiniband/hw/irdma/i40iw_hw.c|2 + drivers/infiniband/hw/irdma/i40iw_hw.h|2 + drivers/infiniband/hw/irdma/i40iw_if.c|3 + drivers/infiniband/hw/irdma/icrdma_hw.c |3 + drivers/infiniband/hw/irdma/icrdma_hw.h |5 +- drivers/infiniband/hw/irdma/icrdma_if.c | 267 +++ drivers/infiniband/hw/irdma/ig3rdma_hw.c |
[Intel-wired-lan] [iwl-next v3 06/24] idpf: implement IDC vport aux driver MTU change handler
From: Joshua Hay The only event an RDMA vport aux driver cares about right now is an MTU change on its underlying vport. Implement and plumb the handler to signal the pre MTU change event and post MTU change events to the RDMA vport aux driver. Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova --- v3: - add missing break statement - remove unnecessary iadrv NULL check drivers/net/ethernet/intel/idpf/idpf.h | 2 ++ drivers/net/ethernet/intel/idpf/idpf_idc.c | 31 ++ drivers/net/ethernet/intel/idpf/idpf_lib.c | 11 +--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 9516e946781a..491db5b2d79d 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -873,5 +873,7 @@ int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, void idpf_idc_deinit_core_aux_device(struct idc_rdma_core_dev_info *cdev_info); void idpf_idc_deinit_vport_aux_device(struct idc_rdma_vport_dev_info *vdev_info); void idpf_idc_issue_reset_event(struct idc_rdma_core_dev_info *cdev_info); +void idpf_idc_vdev_mtu_event(struct idc_rdma_vport_dev_info *vdev_info, +enum idc_rdma_event_type event_type); #endif /* !_IDPF_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index 3dbd7e2a7e98..fb5b023557b6 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -137,6 +137,37 @@ static int idpf_idc_init_aux_vport_dev(struct idpf_vport *vport) return 0; } +/** + * idpf_idc_vdev_mtu_event - Function to handle IDC vport mtu change events + * @vdev_info: IDC vport device info pointer + * @event_type: type of event to pass to handler + */ +void idpf_idc_vdev_mtu_event(struct idc_rdma_vport_dev_info *vdev_info, +enum idc_rdma_event_type event_type) +{ + struct idc_rdma_vport_auxiliary_drv *iadrv; + struct idc_rdma_event event = { }; + struct auxiliary_device *adev; + + if (!vdev_info) + /* RDMA is not enabled */ + return; + + set_bit(event_type, event.type); + + device_lock(&vdev_info->adev->dev); + adev = vdev_info->adev; + if (!adev || !adev->dev.driver) + goto unlock; + iadrv = container_of(adev->dev.driver, +struct idc_rdma_vport_auxiliary_drv, +adrv.driver); + if (iadrv->event_handler) + iadrv->event_handler(vdev_info, &event); +unlock: + device_unlock(&vdev_info->adev->dev); +} + /** * idpf_core_adev_release - function to be mapped to aux dev's release op * @dev: pointer to device to free diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 88a33c8b18fe..a9bc6e0f949c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1931,6 +1931,9 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, idpf_vport_calc_num_q_desc(new_vport); break; case IDPF_SR_MTU_CHANGE: + idpf_idc_vdev_mtu_event(vport->vdev_info, + IDC_RDMA_EVENT_BEFORE_MTU_CHANGE); + break; case IDPF_SR_RSC_CHANGE: break; default: @@ -1975,9 +1978,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, if (current_state == __IDPF_VPORT_UP) err = idpf_vport_open(vport); - kfree(new_vport); - - return err; + goto free_vport; err_reset: idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq, @@ -1990,6 +1991,10 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, free_vport: kfree(new_vport); + if (reset_cause == IDPF_SR_MTU_CHANGE) + idpf_idc_vdev_mtu_event(vport->vdev_info, + IDC_RDMA_EVENT_AFTER_MTU_CHANGE); + return err; } -- 2.37.3
[Intel-wired-lan] [iwl-next v3 05/24] idpf: implement remaining IDC RDMA core callbacks and handlers
From: Joshua Hay Implement the idpf_idc_request_reset and idpf_idc_rdma_vc_send_sync callbacks for the rdma core auxiliary driver to issue reset events to the idpf and send (synchronous) virtchnl messages to the control plane respectively. Implement and plumb the reset handler for the opposite flow as well, i.e. when the idpf is resetiing and needs to notify the rdma core auxiliary driver. Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova --- v3: - remove unnecessary iadrv NULL check drivers/net/ethernet/intel/idpf/idpf.h| 1 + drivers/net/ethernet/intel/idpf/idpf_idc.c| 43 ++- drivers/net/ethernet/intel/idpf/idpf_lib.c| 2 + .../net/ethernet/intel/idpf/idpf_virtchnl.c | 23 +- drivers/net/ethernet/intel/idpf/virtchnl2.h | 3 +- 5 files changed, 69 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index a9c0639f0021..9516e946781a 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -872,5 +872,6 @@ int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, enum idc_function_type ftype); void idpf_idc_deinit_core_aux_device(struct idc_rdma_core_dev_info *cdev_info); void idpf_idc_deinit_vport_aux_device(struct idc_rdma_vport_dev_info *vdev_info); +void idpf_idc_issue_reset_event(struct idc_rdma_core_dev_info *cdev_info); #endif /* !_IDPF_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index a9049cb616a9..3dbd7e2a7e98 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -216,6 +216,38 @@ static void idpf_unplug_aux_dev(struct auxiliary_device *adev) ida_free(&idpf_idc_ida, adev->id); } +/** + * idpf_idc_issue_reset_event - Function to handle reset IDC event + * @cdev_info: IDC core device info pointer + */ +void idpf_idc_issue_reset_event(struct idc_rdma_core_dev_info *cdev_info) +{ + enum idc_rdma_event_type event_type = IDC_RDMA_EVENT_WARN_RESET; + struct idc_rdma_core_auxiliary_drv *iadrv; + struct idc_rdma_event event = { }; + struct auxiliary_device *adev; + + if (!cdev_info) + /* RDMA is not enabled */ + return; + + set_bit(event_type, event.type); + + device_lock(&cdev_info->adev->dev); + + adev = cdev_info->adev; + if (!adev || !adev->dev.driver) + goto unlock; + + iadrv = container_of(adev->dev.driver, +struct idc_rdma_core_auxiliary_drv, +adrv.driver); + if (iadrv->event_handler) + iadrv->event_handler(cdev_info, &event); +unlock: + device_unlock(&cdev_info->adev->dev); +} + /** * idpf_idc_vport_dev_up - called when CORE is ready for vport aux devs * @adapter: private data struct @@ -300,7 +332,16 @@ static int idpf_idc_request_reset(struct idc_rdma_core_dev_info *cdev_info, enum idc_rdma_reset_type __always_unused reset_type) { - return -EOPNOTSUPP; + struct idpf_adapter *adapter = pci_get_drvdata(cdev_info->pdev); + + if (!idpf_is_reset_in_prog(adapter)) { + set_bit(IDPF_HR_FUNC_RESET, adapter->flags); + queue_delayed_work(adapter->vc_event_wq, + &adapter->vc_event_task, + msecs_to_jiffies(10)); + } + + return 0; } /* Implemented by the Auxiliary Device and called by the Auxiliary Driver */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index a211fca9e925..88a33c8b18fe 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -1803,6 +1803,8 @@ static int idpf_init_hard_reset(struct idpf_adapter *adapter) } else if (test_and_clear_bit(IDPF_HR_FUNC_RESET, adapter->flags)) { bool is_reset = idpf_is_reset_detected(adapter); + idpf_idc_issue_reset_event(adapter->cdev_info); + idpf_set_vport_state(adapter); idpf_vc_core_deinit(adapter); if (!is_reset) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index eaffda7a2673..bd20d7b148c2 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -3754,5 +3754,26 @@ int idpf_idc_rdma_vc_send_sync(struct idc_rdma_core_dev_info *cdev_info, u8 *send_msg, u16 msg_size, u8 *recv_msg, u16 *recv_len) { - return -EOPNOTSUPP; + struct idpf_adapter *adapter = pci_get_drvdata(cdev_info->pdev); + struct idpf_vc_xn_params xn_params = { }; + ssize_t reply_sz; + u16 recv_size; + + if
[Intel-wired-lan] [iwl-next v3 04/24] idpf: implement RDMA vport auxiliary dev create, init, and destroy
From: Joshua Hay Implement the functions to create, initialize, and destroy an RDMA vport auxiliary device. The vport aux dev creation is dependent on the core aux device to call idpf_idc_vport_dev_ctrl to signal that it is ready for vport aux devices. Implement that core callback to either create and initialize the vport aux dev or deinitialize. Rdma vport aux dev creation is also dependent on the control plane to tell us the vport is RDMA enabled. Add a flag in the create vport message to signal individual vport RDMA capabilities. Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova --- v2: Guard against unplugging vport aux dev twice. This is possible if irdma is unloaded and then idpf is unloaded. irdma calls idpf_idc_vport_dev_down during its unload which calls unplug. Set the adev to NULL in dev_down, so that the following call to deinit_vport_aux_device during idpf unload will return early from unplug. v3: - Used signed ret value from ida_alloc and only assign unsigned id if no err - capitalize some abbreviations - add missing field descriptions - remove unnecessary casts drivers/net/ethernet/intel/idpf/idpf.h | 4 + drivers/net/ethernet/intel/idpf/idpf_idc.c | 178 +++- drivers/net/ethernet/intel/idpf/idpf_lib.c | 2 + drivers/net/ethernet/intel/idpf/virtchnl2.h | 13 +- 4 files changed, 194 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 64f731fe878c..a9c0639f0021 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -275,6 +275,7 @@ struct idpf_port_stats { * group will yield total number of RX queues. * @rxq_model: Splitq queue or single queue queuing model * @rx_ptype_lkup: Lookup table for ptypes on RX + * @vdev_info: IDC vport device info pointer * @adapter: back pointer to associated adapter * @netdev: Associated net_device. Each vport should have one and only one * associated netdev. @@ -317,6 +318,8 @@ struct idpf_vport { u32 rxq_model; struct libeth_rx_pt *rx_ptype_lkup; + struct idc_rdma_vport_dev_info *vdev_info; + struct idpf_adapter *adapter; struct net_device *netdev; DECLARE_BITMAP(flags, IDPF_VPORT_FLAGS_NBITS); @@ -868,5 +871,6 @@ int idpf_idc_init(struct idpf_adapter *adapter); int idpf_idc_init_aux_core_dev(struct idpf_adapter *adapter, enum idc_function_type ftype); void idpf_idc_deinit_core_aux_device(struct idc_rdma_core_dev_info *cdev_info); +void idpf_idc_deinit_vport_aux_device(struct idc_rdma_vport_dev_info *vdev_info); #endif /* !_IDPF_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index 4c7cf32d4863..a9049cb616a9 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -30,6 +30,113 @@ int idpf_idc_init(struct idpf_adapter *adapter) return err; } +/** + * idpf_vport_adev_release - function to be mapped to aux dev's release op + * @dev: pointer to device to free + */ +static void idpf_vport_adev_release(struct device *dev) +{ + struct idc_rdma_vport_auxiliary_dev *iadev; + + iadev = container_of(dev, struct idc_rdma_vport_auxiliary_dev, adev.dev); + kfree(iadev); + iadev = NULL; +} + +/* idpf_plug_vport_aux_dev - allocate and register a vport Auxiliary device + * @cdev_info: IDC core device info pointer + * @vdev_info: IDC vport device info pointer + * + * Return: 0 on success or error code on failure. + */ +static int idpf_plug_vport_aux_dev(struct idc_rdma_core_dev_info *cdev_info, + struct idc_rdma_vport_dev_info *vdev_info) +{ + struct idc_rdma_vport_auxiliary_dev *iadev; + char name[IDPF_IDC_MAX_ADEV_NAME_LEN]; + struct auxiliary_device *adev; + int ret; + + iadev = kzalloc(sizeof(*iadev), GFP_KERNEL); + if (!iadev) + return -ENOMEM; + + adev = &iadev->adev; + vdev_info->adev = &iadev->adev; + iadev->vdev_info = vdev_info; + + ret = ida_alloc(&idpf_idc_ida, GFP_KERNEL); + if (ret < 0) { + pr_err("failed to allocate unique device ID for Auxiliary driver\n"); + goto err_ida_alloc; + } + adev->id = ret; + adev->dev.release = idpf_vport_adev_release; + adev->dev.parent = &cdev_info->pdev->dev; + sprintf(name, "%04x.rdma.vdev", cdev_info->pdev->vendor); + adev->name = name; + + ret = auxiliary_device_init(adev); + if (ret) + goto err_aux_dev_init; + + ret = auxiliary_device_add(adev); + if (ret) + goto err_aux_dev_add; + + return 0; + +err_aux_dev_add: + vdev_info->adev = NULL; + auxiliary_device_uninit(adev); +err_aux_dev_init: + ida_free(&idpf_idc_ida, adev->id); +err_ida_alloc: + kfre
[Intel-wired-lan] [iwl-next v3 02/24] idpf: use reserved RDMA vectors from control plane
From: Joshua Hay Fetch the number of reserved RDMA vectors from the control plane. Adjust the number of reserved LAN vectors if necessary. Adjust the minimum number of vectors the OS should reserve to include RDMA; and fail if the OS cannot reserve enough vectors for the minimum number of LAN and RDMA vectors required. Create a separate msix table for the reserved RDMA vectors, which will just get handed off to the RDMA core device to do with what it will. Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova --- drivers/net/ethernet/intel/idpf/idpf.h | 28 +++- drivers/net/ethernet/intel/idpf/idpf_lib.c | 74 + drivers/net/ethernet/intel/idpf/idpf_txrx.h | 1 + drivers/net/ethernet/intel/idpf/virtchnl2.h | 5 +- 4 files changed, 92 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 66544faab710..8ef7120e6717 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -489,10 +489,11 @@ struct idpf_vc_xn_manager; * @flags: See enum idpf_flags * @reset_reg: See struct idpf_reset_reg * @hw: Device access data - * @num_req_msix: Requested number of MSIX vectors * @num_avail_msix: Available number of MSIX vectors * @num_msix_entries: Number of entries in MSIX table * @msix_entries: MSIX table + * @num_rdma_msix_entries: Available number of MSIX vectors for RDMA + * @rdma_msix_entries: RDMA MSIX table * @req_vec_chunks: Requested vector chunk data * @mb_vector: Mailbox vector data * @vector_stack: Stack to store the msix vector indexes @@ -542,10 +543,11 @@ struct idpf_adapter { DECLARE_BITMAP(flags, IDPF_FLAGS_NBITS); struct idpf_reset_reg reset_reg; struct idpf_hw hw; - u16 num_req_msix; u16 num_avail_msix; u16 num_msix_entries; struct msix_entry *msix_entries; + u16 num_rdma_msix_entries; + struct msix_entry *rdma_msix_entries; struct virtchnl2_alloc_vectors *req_vec_chunks; struct idpf_q_vector mb_vector; struct idpf_vector_lifo vector_stack; @@ -609,6 +611,17 @@ static inline int idpf_is_queue_model_split(u16 q_model) bool idpf_is_capability_ena(struct idpf_adapter *adapter, bool all, enum idpf_cap_field field, u64 flag); +/** + * idpf_is_rdma_cap_ena - Determine if RDMA is supported + * @adapter: private data struct + * + * Return: true if RDMA capability is enabled, false otherwise + */ +static inline bool idpf_is_rdma_cap_ena(struct idpf_adapter *adapter) +{ + return idpf_is_cap_ena(adapter, IDPF_OTHER_CAPS, VIRTCHNL2_CAP_RDMA); +} + #define IDPF_CAP_RSS (\ VIRTCHNL2_CAP_RSS_IPV4_TCP |\ VIRTCHNL2_CAP_RSS_IPV4_TCP |\ @@ -663,6 +676,17 @@ static inline u16 idpf_get_reserved_vecs(struct idpf_adapter *adapter) return le16_to_cpu(adapter->caps.num_allocated_vectors); } +/** + * idpf_get_reserved_rdma_vecs - Get reserved RDMA vectors + * @adapter: private data struct + * + * Return: number of vectors reserved for RDMA + */ +static inline u16 idpf_get_reserved_rdma_vecs(struct idpf_adapter *adapter) +{ + return le16_to_cpu(adapter->caps.num_rdma_allocated_vectors); +} + /** * idpf_get_default_vports - Get default number of vports * @adapter: private data struct diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index b4fbb99bfad2..1330be825cd0 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -87,6 +87,8 @@ void idpf_intr_rel(struct idpf_adapter *adapter) idpf_deinit_vector_stack(adapter); kfree(adapter->msix_entries); adapter->msix_entries = NULL; + kfree(adapter->rdma_msix_entries); + adapter->rdma_msix_entries = NULL; } /** @@ -314,13 +316,33 @@ int idpf_req_rel_vector_indexes(struct idpf_adapter *adapter, */ int idpf_intr_req(struct idpf_adapter *adapter) { + u16 num_lan_vecs, min_lan_vecs, num_rdma_vecs = 0, min_rdma_vecs = 0; u16 default_vports = idpf_get_default_vports(adapter); int num_q_vecs, total_vecs, num_vec_ids; int min_vectors, v_actual, err; unsigned int vector; u16 *vecids; + int i; total_vecs = idpf_get_reserved_vecs(adapter); + num_lan_vecs = total_vecs; + if (idpf_is_rdma_cap_ena(adapter)) { + num_rdma_vecs = idpf_get_reserved_rdma_vecs(adapter); + min_rdma_vecs = IDPF_MIN_RDMA_VEC; + + if (!num_rdma_vecs) { + /* If idpf_get_reserved_rdma_vecs is 0, vectors are +* pulled from the LAN pool. +*/ + num_rdma_vecs = min_rdma_vecs; + } else if (num_rdma_vecs < min_rdma_vecs) { + dev_err(&adapter->pdev->dev, + "Not enough vect
[Intel-wired-lan] [rdma v3 24/24] RDMA/irdma: Update Kconfig
From: Shiraz Saleem Update Kconfig to add dependency on idpf module. Additionally, add IPU E2000 to list of devices supported. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/Kconfig | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig index b6f9c41bca51..f6b39f3a726e 100644 --- a/drivers/infiniband/hw/irdma/Kconfig +++ b/drivers/infiniband/hw/irdma/Kconfig @@ -4,9 +4,10 @@ config INFINIBAND_IRDMA depends on INET depends on IPV6 || !IPV6 depends on PCI - depends on ICE && I40E + depends on (IDPF || ICE) && I40E select GENERIC_ALLOCATOR select AUXILIARY_BUS help - This is an Intel(R) Ethernet Protocol Driver for RDMA driver - that support E810 (iWARP/RoCE) and X722 (iWARP) network devices. + This is an Intel(R) Ethernet Protocol Driver for RDMA that + support IPU E2000 (RoCEv2), E810 (iWARP/RoCE) and X722 (iWARP) + network devices. -- 2.37.3
[Intel-wired-lan] [rdma v3 11/24] RDMA/irdma: Add GEN3 CQP support with deferred completions
From: Krzysztof Czurylo GEN3 introduces asynchronous handling of Control QP (CQP) operations to minimize head-of-line blocking. Create the CQP using the updated GEN3- specific descriptor fields and implement the necessary support for this deferred completion mechanism. Signed-off-by: Krzysztof Czurylo Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/ctrl.c | 253 ++- drivers/infiniband/hw/irdma/defs.h | 15 ++ drivers/infiniband/hw/irdma/hw.c | 89 -- drivers/infiniband/hw/irdma/main.h | 2 + drivers/infiniband/hw/irdma/protos.h | 1 + drivers/infiniband/hw/irdma/type.h | 43 - drivers/infiniband/hw/irdma/utils.c | 50 +- 7 files changed, 438 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 082aaa5fc3bc..96d7a5d34515 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2741,6 +2741,89 @@ static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val, *error = FIELD_GET(IRDMA_CQPTAIL_CQP_OP_ERR, *val); } +/** + * irdma_sc_cqp_def_cmpl_ae_handler - remove completed requests from pending list + * @dev: sc device struct + * @info: AE entry info + * @first: true if this is the first call to this handler for given AEQE + * @scratch: (out) scratch entry pointer + * @sw_def_info: (in/out) SW ticket value for this AE + * + * In case of AE_DEF_CMPL event, this function should be called in a loop + * until it returns NULL-ptr via scratch. + * For each call, it looks for a matching CQP request on pending list, + * removes it from the list and returns the pointer to the associated scratch + * entry. + * If this is the first call to this function for given AEQE, sw_def_info + * value is not used to find matching requests. Instead, it is populated + * with the value from the first matching cqp_request on the list. + * For subsequent calls, ooo_op->sw_def_info need to match the value passed + * by a caller. + * + * Return: scratch entry pointer for cqp_request to be released or NULL + * if no matching request is found. + */ +void irdma_sc_cqp_def_cmpl_ae_handler(struct irdma_sc_dev *dev, + struct irdma_aeqe_info *info, + bool first, u64 *scratch, + u32 *sw_def_info) +{ + struct irdma_ooo_cqp_op *ooo_op; + unsigned long flags; + + *scratch = 0; + + spin_lock_irqsave(&dev->cqp->ooo_list_lock, flags); + list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) { + if (ooo_op->deferred && + ((first && ooo_op->def_info == info->def_info) || +(!first && ooo_op->sw_def_info == *sw_def_info))) { + *sw_def_info = ooo_op->sw_def_info; + *scratch = ooo_op->scratch; + + list_move(&ooo_op->list_entry, &dev->cqp->ooo_avail); + atomic64_inc(&dev->cqp->completed_ops); + + break; + } + } + spin_unlock_irqrestore(&dev->cqp->ooo_list_lock, flags); + + if (first && !*scratch) + ibdev_dbg(to_ibdev(dev), + "AEQ: deferred completion with unknown ticket: def_info 0x%x\n", + info->def_info); +} + +/** + * irdma_sc_cqp_cleanup_handler - remove requests from pending list + * @dev: sc device struct + * + * This function should be called in a loop from irdma_cleanup_pending_cqp_op. + * For each call, it returns first CQP request on pending list, removes it + * from the list and returns the pointer to the associated scratch entry. + * + * Return: scratch entry pointer for cqp_request to be released or NULL + * if pending list is empty. + */ +u64 irdma_sc_cqp_cleanup_handler(struct irdma_sc_dev *dev) +{ + struct irdma_ooo_cqp_op *ooo_op; + u64 scratch = 0; + + list_for_each_entry(ooo_op, &dev->cqp->ooo_pnd, list_entry) { + scratch = ooo_op->scratch; + + list_del(&ooo_op->list_entry); + list_add(&ooo_op->list_entry, &dev->cqp->ooo_avail); + atomic64_inc(&dev->cqp->completed_ops); + + break; + } + + return scratch; +} + /** * irdma_cqp_poll_registers - poll cqp registers * @cqp: struct for cqp hw @@ -3126,6 +3209,8 @@ void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq) int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, struct irdma_cqp_init_info *info) { + struct irdma_ooo_cqp_op *ooo_op; + u32 num_ooo_ops; u8 hw_sq_size; if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 || @@ -3156,17 +3241,43 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, cqp->rocev2_rto_policy = info->rocev2_rto_policy; cqp->protocol_used = info->protocol_used; memcpy(&cq
[Intel-wired-lan] [rdma v3 12/24] RDMA/irdma: Add GEN3 support for AEQ and CEQ
From: Shiraz Saleem Extend support for GEN3 devices by programming the necessary hardware IRQ registers and the updated descriptor fields for the Asynchronous Event Queue (AEQ) and Completion Event Queue (CEQ). Introduce a RDMA virtual channel operation with the Control Plane (CP) to associate interrupt vectors appropriately with AEQ and CEQ. Add new Asynchronous Event (AE) definitions specific to GEN3. Additionally, refactor the AEQ and CEQ setup into the irdma_ctrl_init_hw device control initialization routine. This completes the PCI device level initialization for RDMA in the core driver. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/ctrl.c | 76 ++--- drivers/infiniband/hw/irdma/defs.h | 29 - drivers/infiniband/hw/irdma/hw.c | 130 +-- drivers/infiniband/hw/irdma/ig3rdma_hw.c | 45 drivers/infiniband/hw/irdma/irdma.h | 11 +- drivers/infiniband/hw/irdma/main.h | 6 +- drivers/infiniband/hw/irdma/type.h | 11 +- drivers/infiniband/hw/irdma/virtchnl.c | 84 +++ drivers/infiniband/hw/irdma/virtchnl.h | 19 9 files changed, 338 insertions(+), 73 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 96d7a5d34515..55080b56311b 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2566,6 +2566,9 @@ static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch, FIELD_PREP(IRDMA_CQPSQ_CQ_LPBLSIZE, cq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CQ_CHKOVERFLOW, check_overflow) | FIELD_PREP(IRDMA_CQPSQ_CQ_VIRTMAP, cq->virtual_map) | + FIELD_PREP(IRDMA_CQPSQ_CQ_CQID_HIGH, cq->cq_uk.cq_id >> 22) | + FIELD_PREP(IRDMA_CQPSQ_CQ_CEQID_HIGH, +(cq->ceq_id_valid ? cq->ceq_id : 0) >> 10) | FIELD_PREP(IRDMA_CQPSQ_CQ_ENCEQEMASK, cq->ceqe_mask) | FIELD_PREP(IRDMA_CQPSQ_CQ_CEQIDVALID, cq->ceq_id_valid) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, cq->tph_en) | @@ -3928,7 +3931,7 @@ int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, ceq->pbl_list = (ceq->virtual_map ? info->pbl_list : NULL); ceq->tph_en = info->tph_en; ceq->tph_val = info->tph_val; - ceq->vsi = info->vsi; + ceq->vsi_idx = info->vsi_idx; ceq->polarity = 1; IRDMA_RING_INIT(ceq->ceq_ring, ceq->elem_cnt); ceq->dev->ceq[info->ceq_id] = ceq; @@ -3961,13 +3964,16 @@ static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch, (ceq->virtual_map ? ceq->first_pm_pbl_idx : 0)); set_64bit_val(wqe, 56, FIELD_PREP(IRDMA_CQPSQ_TPHVAL, ceq->tph_val) | - FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi->vsi_idx)); + FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid) | + FIELD_PREP(IRDMA_CQPSQ_VSIIDX, ceq->vsi_idx)); hdr = FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID, ceq->ceq_id) | + FIELD_PREP(IRDMA_CQPSQ_CEQ_CEQID_HIGH, ceq->ceq_id >> 10) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_CREATE_CEQ) | FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_CEQ_ITRNOEXPIRE, ceq->itr_no_expire) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) | + FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -4022,7 +4028,7 @@ int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) int ret_code; struct irdma_sc_dev *dev = ceq->dev; - dev->ccq->vsi = ceq->vsi; + dev->ccq->vsi_idx = ceq->vsi_idx; if (ceq->reg_cq) { ret_code = irdma_sc_add_cq_ctx(ceq, ceq->dev->ccq); if (ret_code) @@ -4055,11 +4061,14 @@ int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq) set_64bit_val(wqe, 16, ceq->elem_cnt); set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx); + set_64bit_val(wqe, 56, + FIELD_PREP(IRDMA_CQPSQ_PASID, ceq->pasid)); hdr = ceq->ceq_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_CEQ) | FIELD_PREP(IRDMA_CQPSQ_CEQ_LPBLSIZE, ceq->pbl_chunk_size) | FIELD_PREP(IRDMA_CQPSQ_CEQ_VMAP, ceq->virtual_map) | FIELD_PREP(IRDMA_CQPSQ_TPHEN, ceq->tph_en) | + FIELD_PREP(IRDMA_CQPSQ_PASID_VALID, ceq->pasid_valid) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -4223,10 +4232,13 @@ static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch,
[Intel-wired-lan] [rdma v3 08/24] RDMA/irdma: Refactor GEN2 auxiliary driver
From: Mustafa Ismail Refactor the irdma auxiliary driver and associated interfaces out of main.c and into a standalone GEN2-specific source file and rename as gen_2 driver. This is in preparation for adding GEN3 auxiliary drivers. Each HW generation will have its own gen-specific interface file. Additionally, move the Address Handle hash table and associated locks under rf struct. This will allow GEN3 code to migrate to use it easily. Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/Makefile| 1 + drivers/infiniband/hw/irdma/i40iw_if.c | 2 + drivers/infiniband/hw/irdma/icrdma_if.c | 265 +++ drivers/infiniband/hw/irdma/main.c | 272 +--- drivers/infiniband/hw/irdma/main.h | 9 +- drivers/infiniband/hw/irdma/verbs.c | 16 +- 6 files changed, 290 insertions(+), 275 deletions(-) create mode 100644 drivers/infiniband/hw/irdma/icrdma_if.c diff --git a/drivers/infiniband/hw/irdma/Makefile b/drivers/infiniband/hw/irdma/Makefile index 48c3854235a0..2522e4ca650b 100644 --- a/drivers/infiniband/hw/irdma/Makefile +++ b/drivers/infiniband/hw/irdma/Makefile @@ -13,6 +13,7 @@ irdma-objs := cm.o\ hw.o\ i40iw_hw.o \ i40iw_if.o \ + icrdma_if.o \ icrdma_hw.o \ main.o \ pble.o \ diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c index cc50a7070371..6fa807ef4545 100644 --- a/drivers/infiniband/hw/irdma/i40iw_if.c +++ b/drivers/infiniband/hw/irdma/i40iw_if.c @@ -75,6 +75,8 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info struct irdma_pci_f *rf = iwdev->rf; rf->rdma_ver = IRDMA_GEN_1; + rf->sc_dev.hw = &rf->hw; + rf->sc_dev.hw_attrs.uk_attrs.hw_rev = IRDMA_GEN_1; rf->gen_ops.request_reset = i40iw_request_reset; rf->pcidev = cdev_info->pcidev; rf->pf_id = cdev_info->fid; diff --git a/drivers/infiniband/hw/irdma/icrdma_if.c b/drivers/infiniband/hw/irdma/icrdma_if.c new file mode 100644 index ..5fcbf695a1d3 --- /dev/null +++ b/drivers/infiniband/hw/irdma/icrdma_if.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB +// /* Copyright (c) 2015 - 2024 Intel Corporation */ +#include "main.h" + +static void icrdma_prep_tc_change(struct irdma_device *iwdev) +{ + iwdev->vsi.tc_change_pending = true; + irdma_sc_suspend_resume_qps(&iwdev->vsi, IRDMA_OP_SUSPEND); + + /* Wait for all qp's to suspend */ + wait_event_timeout(iwdev->suspend_wq, + !atomic_read(&iwdev->vsi.qp_suspend_reqs), + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS)); + irdma_ws_reset(&iwdev->vsi); +} + +static void icrdma_idc_event_handler(struct idc_rdma_core_dev_info *cdev_info, +struct idc_rdma_event *event) +{ + struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev); + struct irdma_l2params l2params = {}; + + if (*event->type & BIT(IDC_RDMA_EVENT_AFTER_MTU_CHANGE)) { + ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu); + if (iwdev->vsi.mtu != iwdev->netdev->mtu) { + l2params.mtu = iwdev->netdev->mtu; + l2params.mtu_changed = true; + irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); + irdma_change_l2params(&iwdev->vsi, &l2params); + } + } else if (*event->type & BIT(IDC_RDMA_EVENT_BEFORE_TC_CHANGE)) { + if (iwdev->vsi.tc_change_pending) + return; + + icrdma_prep_tc_change(iwdev); + } else if (*event->type & BIT(IDC_RDMA_EVENT_AFTER_TC_CHANGE)) { + struct iidc_rdma_priv_dev_info *idc_priv = cdev_info->idc_priv; + + if (!iwdev->vsi.tc_change_pending) + return; + + l2params.tc_changed = true; + ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n"); + + irdma_fill_qos_info(&l2params, &idc_priv->qos_info); + if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + iwdev->dcb_vlan_mode = + l2params.num_tc > 1 && !l2params.dscp_mode; + irdma_change_l2params(&iwdev->vsi, &l2params); + } else if (*event->type & BIT(IDC_RDMA_EVENT_CRIT_ERR)) { + ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n", + event->reg); + if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) { + u32 pe_criterr; + + pe_criterr = readl(iwdev->rf->sc_dev.hw_regs[IRDMA_GLPE_CRITERR]); +#define IRDMA_Q1_RESOURCE_ERR 0x0001024d + if (pe_criterr
[Intel-wired-lan] [rdma v3 18/24] RDMA/irdma: Support 64-byte CQEs and GEN3 CQE opcode decoding
From: Shiraz Saleem Introduce support for 64-byte CQEs in GEN3 devices. Additionally, implement GEN3-specific CQE opcode decoding. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova --- v3: * Fix detection of CQ empty when avoid_mem_cflct is on. * In resize CQ, do not double the CQ size if avoid_mem_cflct is on. * Make CQ size an even number, which is a GEN3 HW requirement. drivers/infiniband/hw/irdma/main.h | 2 +- drivers/infiniband/hw/irdma/utils.c | 5 - drivers/infiniband/hw/irdma/verbs.c | 30 - drivers/infiniband/hw/irdma/verbs.h | 13 + 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index f0196aafe59b..0c7f5f730f1f 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -69,7 +69,7 @@ extern struct idc_rdma_core_auxiliary_drv icrdma_core_auxiliary_drv; #define IRDMA_MACIP_ADD1 #define IRDMA_MACIP_DELETE 2 -#define IW_CCQ_SIZE(IRDMA_CQP_SW_SQSIZE_2048 + 1) +#define IW_CCQ_SIZE(IRDMA_CQP_SW_SQSIZE_2048 + 2) #define IW_CEQ_SIZE2048 #define IW_AEQ_SIZE2048 diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 87c88be47ee3..60ef85e842d1 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2381,7 +2381,10 @@ bool irdma_cq_empty(struct irdma_cq *iwcq) u8 polarity; ukcq = &iwcq->sc_cq.cq_uk; - cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); + if (ukcq->avoid_mem_cflct) + cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(ukcq); + else + cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); get_64bit_val(cqe, 24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index b5fe5f2fa68b..82a7cec25b52 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1971,8 +1971,13 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, if (!iwcq->user_mode) { entries++; - if (rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) + + if (!iwcq->sc_cq.cq_uk.avoid_mem_cflct && + dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) entries *= 2; + + if (entries & 1) + entries += 1; /* cq size must be an even number */ } info.cq_size = max(entries, 4); @@ -2115,6 +2120,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, unsigned long flags; int err_code; int entries = attr->cqe; + bool cqe_64byte_ena; err_code = cq_validate_flags(attr->flags, dev->hw_attrs.uk_attrs.hw_rev); if (err_code) @@ -2138,6 +2144,9 @@ static int irdma_create_cq(struct ib_cq *ibcq, info.dev = dev; ukinfo->cq_size = max(entries, 4); ukinfo->cq_id = cq_num; + cqe_64byte_ena = dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? +true : false; + ukinfo->avoid_mem_cflct = cqe_64byte_ena; iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size; if (attr->comp_vector < rf->ceqs_count) info.ceq_id = attr->comp_vector; @@ -2213,11 +,18 @@ static int irdma_create_cq(struct ib_cq *ibcq, } entries++; - if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) + if (!cqe_64byte_ena && dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) entries *= 2; + + if (entries & 1) + entries += 1; /* cq size must be an even number */ + ukinfo->cq_size = entries; - rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); + if (cqe_64byte_ena) + rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_extended_cqe); + else + rsize = info.cq_uk_init_info.cq_size * sizeof(struct irdma_cqe); iwcq->kmem.size = ALIGN(round_up(rsize, 256), 256); iwcq->kmem.va = dma_alloc_coherent(dev->hw->device, iwcq->kmem.size, @@ -3775,8 +3791,12 @@ static void irdma_process_cqe(struct ib_wc *entry, if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) { set_ib_wc_op_sq(cq_poll_info, entry); } else { - set_ib_wc_op_rq(cq_poll_info, entry, - qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM); + if (qp->dev->hw_attrs.uk_attrs.hw_rev <= IRDMA_GEN_2) + set_ib_wc_op_rq(cq_poll_info, entry, + qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ? + true : fal
[Intel-wired-lan] [rdma v3 22/24] RDMA/irdma: Extend CQE Error and Flush Handling for GEN3 Devices
From: Shiraz Saleem Enhance the CQE error and flush handling specific to GEN3 devices. Unlike GEN1/2 devices, which depend on software to generate completions in error, GEN3 devices leverage firmware to generate CQEs in error for all WQEs posted after a QP moves to an error state. Key changes include: - Updating the CQ poll logic to properly advance the CQ head in the event of a flush CQE. - Updating the flush logic for GEN3 to pass error WQE idx for SQ on an AE to flush out unprocessed WQEs in error. - Isolating the decoding of AE to flush codes into a separate routine irdma_ae_to_qp_err_code. This routine can now be leveraged to flush error CQEs on an AE and when error CQE is received for SRQ. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova --- v3: * Use irdma_aeqe_info to update QP err_rq_idx when setting the QP flush fields. * Remove the checks which prevented the use of the delayed flush worker thread for GEN3. drivers/infiniband/hw/irdma/ctrl.c | 9 ++ drivers/infiniband/hw/irdma/defs.h | 105 +-- drivers/infiniband/hw/irdma/hw.c| 99 -- drivers/infiniband/hw/irdma/type.h | 14 +- drivers/infiniband/hw/irdma/uk.c| 39 +- drivers/infiniband/hw/irdma/user.h | 194 +++- drivers/infiniband/hw/irdma/verbs.c | 10 +- 7 files changed, 280 insertions(+), 190 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 75411c4b68fb..4158db00085f 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2674,6 +2674,12 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE, info->ae_src) : 0; set_64bit_val(wqe, 8, temp); + if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) { + set_64bit_val(wqe, 40, + FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX, info->err_sq_idx)); + set_64bit_val(wqe, 48, + FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX, info->err_rq_idx)); + } hdr = qp->qp_uk.qp_id | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_FLUSH_WQES) | @@ -2682,6 +2688,9 @@ int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHSQ, flush_sq) | FIELD_PREP(IRDMA_CQPSQ_FWQE_FLUSHRQ, flush_rq) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); + if (cqp->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + hdr |= FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_SQ_IDX_VALID, info->err_sq_idx_valid) | + FIELD_PREP(IRDMA_CQPSQ_FWQE_ERR_RQ_IDX_VALID, info->err_rq_idx_valid); dma_wmb(); /* make sure WQE is written before valid bit is set */ set_64bit_val(wqe, 24, hdr); diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 3b3680816a65..983b22d7ae23 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -301,107 +301,6 @@ enum irdma_cqp_op_type { #define IRDMA_CQP_OP_GATHER_STATS 0x2e #define IRDMA_CQP_OP_UP_MAP0x2f -/* Async Events codes */ -#define IRDMA_AE_AMP_UNALLOCATED_STAG 0x0102 -#define IRDMA_AE_AMP_INVALID_STAG 0x0103 -#define IRDMA_AE_AMP_BAD_QP0x0104 -#define IRDMA_AE_AMP_BAD_PD0x0105 -#define IRDMA_AE_AMP_BAD_STAG_KEY 0x0106 -#define IRDMA_AE_AMP_BAD_STAG_INDEX0x0107 -#define IRDMA_AE_AMP_BOUNDS_VIOLATION 0x0108 -#define IRDMA_AE_AMP_RIGHTS_VIOLATION 0x0109 -#define IRDMA_AE_AMP_TO_WRAP 0x010a -#define IRDMA_AE_AMP_FASTREG_VALID_STAG 0x010c -#define IRDMA_AE_AMP_FASTREG_MW_STAG 0x010d -#define IRDMA_AE_AMP_FASTREG_INVALID_RIGHTS0x010e -#define IRDMA_AE_AMP_FASTREG_INVALID_LENGTH0x0110 -#define IRDMA_AE_AMP_INVALIDATE_SHARED 0x0111 -#define IRDMA_AE_AMP_INVALIDATE_NO_REMOTE_ACCESS_RIGHTS 0x0112 -#define IRDMA_AE_AMP_INVALIDATE_MR_WITH_BOUND_WINDOWS 0x0113 -#define IRDMA_AE_AMP_MWBIND_VALID_STAG 0x0114 -#define IRDMA_AE_AMP_MWBIND_OF_MR_STAG 0x0115 -#define IRDMA_AE_AMP_MWBIND_TO_ZERO_BASED_STAG 0x0116 -#define IRDMA_AE_AMP_MWBIND_TO_MW_STAG 0x0117 -#define IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS 0x0118 -#define IRDM
[Intel-wired-lan] [rdma v3 19/24] RDMA/irdma: Add SRQ support
From: Faisal Latif Implement verb API and UAPI changes to support SRQ functionality in GEN3 devices. Signed-off-by: Faisal Latif Signed-off-by: Tatyana Nikolova --- v3: * Clean up SRQ unrelated changes. * Do not use wqe_idx to get SRQ context in irdma_sc_get_next_aeqe() because its lower 6 bits are not correctly set. drivers/infiniband/hw/irdma/ctrl.c | 236 +- drivers/infiniband/hw/irdma/defs.h | 36 ++- drivers/infiniband/hw/irdma/hw.c| 21 +- drivers/infiniband/hw/irdma/irdma.h | 1 + drivers/infiniband/hw/irdma/main.h | 12 +- drivers/infiniband/hw/irdma/type.h | 66 drivers/infiniband/hw/irdma/uk.c| 162 +- drivers/infiniband/hw/irdma/user.h | 41 +++ drivers/infiniband/hw/irdma/utils.c | 27 ++ drivers/infiniband/hw/irdma/verbs.c | 475 +++- drivers/infiniband/hw/irdma/verbs.h | 25 ++ include/uapi/rdma/irdma-abi.h | 15 +- 12 files changed, 1103 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index d01c55172f6a..c4da8f981dac 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -412,7 +412,8 @@ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info) pble_obj_cnt = info->pd->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) || - (info->virtual_map && info->rq_pa >= pble_obj_cnt)) + (!info->qp_uk_init_info.srq_uk && +info->virtual_map && info->rq_pa >= pble_obj_cnt)) return -EINVAL; qp->llp_stream_handle = (void *)(-1); @@ -446,6 +447,208 @@ int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info) return 0; } +/** + * irdma_sc_srq_init - init sc_srq structure + * @srq: srq sc struct + * @info: parameters for srq init + */ +int irdma_sc_srq_init(struct irdma_sc_srq *srq, + struct irdma_srq_init_info *info) +{ + u32 srq_size_quanta; + int ret_code; + + ret_code = irdma_uk_srq_init(&srq->srq_uk, &info->srq_uk_init_info); + if (ret_code) + return ret_code; + + srq->dev = info->pd->dev; + srq->pd = info->pd; + srq->vsi = info->vsi; + srq->srq_pa = info->srq_pa; + srq->first_pm_pbl_idx = info->first_pm_pbl_idx; + srq->pasid = info->pasid; + srq->pasid_valid = info->pasid_valid; + srq->srq_limit = info->srq_limit; + srq->leaf_pbl_size = info->leaf_pbl_size; + srq->virtual_map = info->virtual_map; + srq->tph_en = info->tph_en; + srq->arm_limit_event = info->arm_limit_event; + srq->tph_val = info->tph_value; + srq->shadow_area_pa = info->shadow_area_pa; + + /* Smallest SRQ size is 256B i.e. 8 quanta */ + srq_size_quanta = max((u32)IRDMA_SRQ_MIN_QUANTA, + srq->srq_uk.srq_size * + srq->srq_uk.wqe_size_multiplier); + srq->hw_srq_size = irdma_get_encoded_wqe_size(srq_size_quanta, + IRDMA_QUEUE_TYPE_SRQ); + + return 0; +} + +/** + * irdma_sc_srq_create - send srq create CQP WQE + * @srq: srq sc struct + * @scratch: u64 saved to be used during cqp completion + * @post_sq: flag for cqp db to ring + */ +static int irdma_sc_srq_create(struct irdma_sc_srq *srq, u64 scratch, + bool post_sq) +{ + struct irdma_sc_cqp *cqp; + __le64 *wqe; + u64 hdr; + + cqp = srq->pd->dev->cqp; + if (srq->srq_uk.srq_id < cqp->dev->hw_attrs.min_hw_srq_id || + srq->srq_uk.srq_id > + (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_SRQ].max_cnt - 1)) + return -EINVAL; + + wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); + if (!wqe) + return -ENOMEM; + + set_64bit_val(wqe, 0, + FIELD_PREP(IRDMA_CQPSQ_SRQ_SRQ_LIMIT, srq->srq_limit) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_RQSIZE, srq->hw_srq_size) | + FIELD_PREP(IRDMA_CQPSQ_SRQ_RQ_WQE_SIZE, srq->srq_uk.wqe_size)); + set_64bit_val(wqe, 8, (uintptr_t)srq); + set_64bit_val(wqe, 16, + FIELD_PREP(IRDMA_CQPSQ_SRQ_PD_ID, srq->pd->pd_id)); + set_64bit_val(wqe, 32, + FIELD_PREP(IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR, +srq->srq_pa >> +IRDMA_CQPSQ_SRQ_PHYSICAL_BUFFER_ADDR_S)); + set_64bit_val(wqe, 40, + FIELD_PREP(IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR, +srq->shadow_area_pa >> +IRDMA_CQPSQ_SRQ_DB_SHADOW_ADDR_S)); + set_64bit_val(wqe, 48, + FIELD_PREP(IRDMA_CQPSQ_SRQ_FIRST_PM_PBL_IDX, +srq->first_pm_pbl_idx)); + + hdr = srq->srq_uk.srq_id
[Intel-wired-lan] [rdma v3 20/24] RDMA/irdma: Restrict Memory Window and CQE Timestamping to GEN3
From: Shiraz Saleem With the deprecation of Memory Window and Timestamping support in GEN2, move these features to be exclusive to GEN3. This iteration supports only Type2 Memory Windows. Additionally, it includes the reporting of the timestamp mask and Host Channel Adapter (HCA) core clock frequency via the query device verb. Signed-off-by: Shiraz Saleem Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/verbs.c | 42 ++--- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 4ab81bf60543..fc5b9b629a51 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -41,7 +41,8 @@ static int irdma_query_device(struct ib_device *ibdev, props->max_cq = rf->max_cq - rf->used_cqs; props->max_cqe = rf->max_cqe - 1; props->max_mr = rf->max_mr - rf->used_mrs; - props->max_mw = props->max_mr; + if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) + props->max_mw = props->max_mr; props->max_pd = rf->max_pd - rf->used_pds; props->max_sge_rd = hw_attrs->uk_attrs.max_hw_read_sges; props->max_qp_rd_atom = hw_attrs->max_hw_ird; @@ -56,12 +57,16 @@ static int irdma_query_device(struct ib_device *ibdev, props->max_mcast_qp_attach = IRDMA_MAX_MGS_PER_CTX; props->max_total_mcast_qp_attach = rf->max_qp * IRDMA_MAX_MGS_PER_CTX; props->max_fast_reg_page_list_len = IRDMA_MAX_PAGES_PER_FMR; -#define HCA_CLOCK_TIMESTAMP_MASK 0x1 - if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_2) - props->timestamp_mask = HCA_CLOCK_TIMESTAMP_MASK; props->max_srq = rf->max_srq - rf->used_srqs; props->max_srq_wr = IRDMA_MAX_SRQ_WRS; props->max_srq_sge = hw_attrs->uk_attrs.max_hw_wq_frags; + if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) { +#define HCA_CORE_CLOCK_KHZ 100UL + props->timestamp_mask = GENMASK(31, 0); + props->hca_core_clock = HCA_CORE_CLOCK_KHZ; + } + if (hw_attrs->uk_attrs.hw_rev >= IRDMA_GEN_3) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; return 0; } @@ -798,7 +803,8 @@ static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, roce_info->is_qp1 = true; roce_info->rd_en = true; roce_info->wr_rdresp_en = true; - roce_info->bind_en = true; + if (dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_3) + roce_info->bind_en = true; roce_info->dcqcn_en = false; roce_info->rtomin = 5; @@ -829,7 +835,6 @@ static void irdma_iw_fill_and_set_qpctx_info(struct irdma_qp *iwqp, ether_addr_copy(iwarp_info->mac_addr, iwdev->netdev->dev_addr); iwarp_info->rd_en = true; iwarp_info->wr_rdresp_en = true; - iwarp_info->bind_en = true; iwarp_info->ecn_en = true; iwarp_info->rtomin = 5; @@ -1147,8 +1152,6 @@ static int irdma_get_ib_acc_flags(struct irdma_qp *iwqp) } if (iwqp->iwarp_info.rd_en) acc_flags |= IB_ACCESS_REMOTE_READ; - if (iwqp->iwarp_info.bind_en) - acc_flags |= IB_ACCESS_MW_BIND; } return acc_flags; } @@ -2433,8 +2436,8 @@ static int irdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) static inline int cq_validate_flags(u32 flags, u8 hw_rev) { - /* GEN1 does not support CQ create flags */ - if (hw_rev == IRDMA_GEN_1) + /* GEN1/2 does not support CQ create flags */ + if (hw_rev <= IRDMA_GEN_2) return flags ? -EOPNOTSUPP : 0; return flags & ~IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION ? -EOPNOTSUPP : 0; @@ -2660,8 +2663,9 @@ static int irdma_create_cq(struct ib_cq *ibcq, /** * irdma_get_mr_access - get hw MR access permissions from IB access flags * @access: IB access flags + * @hw_rev: Hardware version */ -static inline u16 irdma_get_mr_access(int access) +static inline u16 irdma_get_mr_access(int access, u8 hw_rev) { u16 hw_access = 0; @@ -2671,8 +2675,10 @@ static inline u16 irdma_get_mr_access(int access) IRDMA_ACCESS_FLAGS_REMOTEWRITE : 0; hw_access |= (access & IB_ACCESS_REMOTE_READ) ? IRDMA_ACCESS_FLAGS_REMOTEREAD : 0; - hw_access |= (access & IB_ACCESS_MW_BIND) ? -IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0; + if (hw_rev >= IRDMA_GEN_3) { + hw_access |= (access & IB_ACCESS_MW_BIND) ? +IRDMA_ACCESS_FLAGS_BIND_WINDOW : 0; + } hw_access |= (access & IB_ZERO_BASED) ? IRDMA_ACCESS_FLAGS_ZERO_BASED : 0; hw_access |= IRDMA_ACCESS_FLAGS_LOCALREAD; @@ -3242,7 +3248,8 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, stag_info->stag_idx = iwmr->stag >> IRDMA_CQ
[Intel-wired-lan] [rdma v3 21/24] RDMA/irdma: Add Atomic Operations support
From: Faisal Latif Extend irdma to support atomic operations, namely Compare and Swap and Fetch and Add, for GEN3 devices. Signed-off-by: Faisal Latif Signed-off-by: Tatyana Nikolova --- v3: Check IRDMA_ATOMICS_ALLOWED_BIT after the feature info has been read from FW. drivers/infiniband/hw/irdma/ctrl.c | 11 +++ drivers/infiniband/hw/irdma/defs.h | 10 ++- drivers/infiniband/hw/irdma/ig3rdma_hw.c | 3 - drivers/infiniband/hw/irdma/type.h | 4 + drivers/infiniband/hw/irdma/uk.c | 102 +++ drivers/infiniband/hw/irdma/user.h | 27 ++ drivers/infiniband/hw/irdma/verbs.c | 38 + drivers/infiniband/hw/irdma/verbs.h | 6 ++ 8 files changed, 197 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index c4da8f981dac..75411c4b68fb 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1110,6 +1110,8 @@ static void irdma_sc_qp_setctx_roce_gen_3(struct irdma_sc_qp *qp, FIELD_PREP(IRDMAQPC_UDPRIVCQENABLE, roce_info->udprivcq_en) | FIELD_PREP(IRDMAQPC_PRIVEN, roce_info->priv_mode_en) | + FIELD_PREP(IRDMAQPC_REMOTE_ATOMIC_EN, +info->remote_atomics_en) | FIELD_PREP(IRDMAQPC_TIMELYENABLE, roce_info->timely_en)); set_64bit_val(qp_ctx, 168, FIELD_PREP(IRDMAQPC_QPCOMPCTX, info->qp_compl_ctx)); @@ -1490,6 +1492,8 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, FIELD_PREP(IRDMA_CQPSQ_STAG_REMACCENABLED, info->remote_access) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) | + FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN, +info->remote_atomics_en) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -1582,6 +1586,8 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, FIELD_PREP(IRDMA_CQPSQ_STAG_VABASEDTO, addr_type) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEHMCFNIDX, info->use_hmc_fcn_index) | FIELD_PREP(IRDMA_CQPSQ_STAG_USEPFRID, info->use_pf_rid) | + FIELD_PREP(IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN, +info->remote_atomics_en) | FIELD_PREP(IRDMA_CQPSQ_WQEVALID, cqp->polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -1740,6 +1746,7 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | + FIELD_PREP(IRDMAQPSQ_REMOTE_ATOMICS_EN, info->remote_atomics_en) | FIELD_PREP(IRDMAQPSQ_VALID, qp->qp_uk.swqe_polarity); dma_wmb(); /* make sure WQE is written before valid bit is set */ @@ -5542,6 +5549,10 @@ int irdma_get_rdma_features(struct irdma_sc_dev *dev) } dev->feature_info[feat_type] = temp; } + + if (dev->feature_info[IRDMA_FTN_FLAGS] & IRDMA_ATOMICS_ALLOWED_BIT) + dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_ATOMIC_OPS; + exit: dma_free_coherent(dev->hw->device, feat_buf.size, feat_buf.va, feat_buf.pa); diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 408058b6ba55..3b3680816a65 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -189,6 +189,8 @@ enum irdma_protocol_used { #define IRDMAQP_OP_RDMA_READ_LOC_INV 0x0b #define IRDMAQP_OP_NOP 0x0c #define IRDMAQP_OP_RDMA_WRITE_SOL 0x0d +#define IRDMAQP_OP_ATOMIC_FETCH_ADD0x0f +#define IRDMAQP_OP_ATOMIC_COMPARE_SWAP_ADD 0x11 #define IRDMAQP_OP_GEN_RTS_AE 0x30 enum irdma_cqp_op_type { @@ -694,7 +696,8 @@ enum irdma_cqp_op_type { #define IRDMA_CQPSQ_STAG_USEPFRID BIT_ULL(61) #define IRDMA_CQPSQ_STAG_PBA IRDMA_CQPHC_QPCTX -#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(5, 0) +#define IRDMA_CQPSQ_STAG_HMCFNIDX GENMASK_ULL(15, 0) +#define IRDMA_CQPSQ_STAG_REMOTE_ATOMIC_EN BIT_ULL(61) #define IRDMA_CQPSQ_STAG_FIRSTPMPBLIDX GENMASK_ULL(27, 0) #define IRDMA_CQPSQ_QUERYSTAG_IDX IRDMA_CQPSQ_STAG_IDX @@ -981,6 +984,9 @@ enum irdma_cqp_op_type { #define IRDMAQPSQ_REMTO IRDMA_CQPHC_QPCTX +#define IRDMAQPSQ_STAG GENMASK_ULL(31, 0) +#define IRDMAQPSQ_REMOTE_STAG GENMASK_ULL(31, 0) + #define IRDMAQPSQ_STAGRIGHTS GENMASK_ULL(52, 48) #define IRDMAQPSQ_VABASEDTO BIT_ULL(53) #define IRDMAQPSQ_MEMWINDOWTYPE BIT_ULL(54) @@ -991,6 +997,8 @@ enu
[Intel-wired-lan] [rdma v3 14/24] RDMA/irdma: Introduce GEN3 vPort driver support
From: Mustafa Ismail In the IPU model, a function can host one or more logical network endpoints called vPorts. Each vPort may be associated with either a physical or an internal communication port, and can be RDMA capable. A vPort features a netdev and, if RDMA capable, must have an associated ib_dev. This change introduces a GEN3 auxiliary vPort driver responsible for registering a verbs device for every RDMA-capable vPort. Additionally, the UAPI is updated to prevent the binding of GEN3 devices to older user-space providers. Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova --- drivers/infiniband/hw/irdma/ig3rdma_if.c | 108 +++ drivers/infiniband/hw/irdma/main.c | 12 +++ drivers/infiniband/hw/irdma/main.h | 3 + drivers/infiniband/hw/irdma/verbs.c | 12 ++- include/uapi/rdma/irdma-abi.h| 1 + 5 files changed, 134 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ig3rdma_if.c b/drivers/infiniband/hw/irdma/ig3rdma_if.c index 9d0623467af2..2c7dc93dc576 100644 --- a/drivers/infiniband/hw/irdma/ig3rdma_if.c +++ b/drivers/infiniband/hw/irdma/ig3rdma_if.c @@ -14,6 +14,23 @@ static void ig3rdma_idc_core_event_handler(struct idc_rdma_core_dev_info *cdev_i } } +static void ig3rdma_idc_vport_event_handler(struct idc_rdma_vport_dev_info *cdev_info, + struct idc_rdma_event *event) +{ + struct irdma_device *iwdev = auxiliary_get_drvdata(cdev_info->adev); + struct irdma_l2params l2params = {}; + + if (*event->type & BIT(IDC_RDMA_EVENT_AFTER_MTU_CHANGE)) { + ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu); + if (iwdev->vsi.mtu != iwdev->netdev->mtu) { + l2params.mtu = iwdev->netdev->mtu; + l2params.mtu_changed = true; + irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev); + irdma_change_l2params(&iwdev->vsi, &l2params); + } + } +} + static int ig3rdma_cfg_regions(struct irdma_hw *hw, struct idc_rdma_core_dev_info *cdev_info) { @@ -169,3 +186,94 @@ struct idc_rdma_core_auxiliary_drv ig3rdma_core_auxiliary_drv = { }, .event_handler = ig3rdma_idc_core_event_handler, }; + +static int ig3rdma_vport_probe(struct auxiliary_device *aux_dev, + const struct auxiliary_device_id *id) +{ + struct idc_rdma_vport_auxiliary_dev *idc_adev = + container_of(aux_dev, struct idc_rdma_vport_auxiliary_dev, adev); + struct auxiliary_device *aux_core_dev = idc_adev->vdev_info->core_adev; + struct irdma_pci_f *rf = auxiliary_get_drvdata(aux_core_dev); + struct iidc_rdma_qos_params qos_info = {}; + struct irdma_l2params l2params = {}; + struct irdma_device *iwdev; + int err; + + if (!rf) { + WARN_ON_ONCE(1); + return -ENOMEM; + } + iwdev = ib_alloc_device(irdma_device, ibdev); + /* Fill iwdev info */ + iwdev->is_vport = true; + iwdev->rf = rf; + iwdev->vport_id = idc_adev->vdev_info->vport_id; + iwdev->netdev = idc_adev->vdev_info->netdev; + iwdev->init_state = INITIAL_STATE; + iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT; + iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT; + iwdev->rcv_wnd = IRDMA_CM_DEFAULT_RCV_WND_SCALED; + iwdev->rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE; + iwdev->roce_mode = true; + iwdev->push_mode = true; + + l2params.mtu = iwdev->netdev->mtu; + irdma_fill_qos_info(&l2params, &qos_info); + + err = irdma_rt_init_hw(iwdev, &l2params); + if (err) + goto err_rt_init; + + err = irdma_ib_register_device(iwdev); + if (err) + goto err_ibreg; + + auxiliary_set_drvdata(aux_dev, iwdev); + + ibdev_dbg(&iwdev->ibdev, + "INIT: Gen[%d] vport[%d] probe success. dev_name = %s, core_dev_name = %s, netdev=%s\n", + rf->rdma_ver, idc_adev->vdev_info->vport_id, + dev_name(&aux_dev->dev), + dev_name(&idc_adev->vdev_info->core_adev->dev), + netdev_name(idc_adev->vdev_info->netdev)); + + return 0; +err_ibreg: + irdma_rt_deinit_hw(iwdev); +err_rt_init: + ib_dealloc_device(&iwdev->ibdev); + + return err; +} + +static void ig3rdma_vport_remove(struct auxiliary_device *aux_dev) +{ + struct idc_rdma_vport_auxiliary_dev *idc_adev = + container_of(aux_dev, struct idc_rdma_vport_auxiliary_dev, adev); + struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev); + + ibdev_dbg(&iwdev->ibdev, + "INIT: Gen[%d] dev_name = %s, core_dev_name = %s, netdev=%s\n", + iwdev->rf->rdma_ver, dev_name(&aux_dev->dev), + dev_name(&id
Re: [Intel-wired-lan] [PATCH v5 iwl-next 00/10] idpf: add initial PTP support
> -Original Message- > From: Intel-wired-lan On Behalf Of > Milena Olech > Sent: Friday, January 17, 2025 4:41 AM > To: intel-wired-...@lists.osuosl.org > Cc: net...@vger.kernel.org; Nguyen, Anthony L > ; Kitszel, Przemyslaw > ; Olech, Milena > Subject: [Intel-wired-lan] [PATCH v5 iwl-next 00/10] idpf: add initial PTP > support > > This patch series introduces support for Precision Time Protocol (PTP) to > Intel(R) Infrastructure Data Path Function (IDPF) driver. PTP feature is > supported when the PTP capability is negotiated with the Control > Plane (CP). IDPF creates a PTP clock and sets a set of supported > functions. > > During the PTP initialization, IDPF requests a set of PTP capabilities > and receives a writeback from the CP with the set of supported options. > These options are: > - get time of the PTP clock > - get cross timestamp > - set the time of the PTP clock > - adjust the PTP clock > - Tx timestamping > > Each feature is considered to have direct access, where the operations > on PCIe BAR registers are allowed, or the mailbox access, where the > virtchnl messages are used to perform any PTP action. Mailbox access > means that PTP requests are sent to the CP through dedicated secondary > mailbox and the CP reads/writes/modifies desired resource - PTP Clock > or Tx timestamp registers. > > Tx timestamp capabilities are negotiated only for vports that have > UPLINK_VPORT flag set by the CP. Capabilities provide information about > the number of available Tx timestamp latches, their indexes and size of > the Tx timestamp value. IDPF requests Tx timestamp by setting the > TSYN bit and the requested timestamp index in the context descriptor for > the PTP packets. When the completion tag for that packet is received, > IDPF schedules a worker to read the Tx timestamp value. > > Current implementation of the IDPF driver does not allow to get stable > Tx timestamping, when more than 1 request per 1 second is sent to the > driver. Debug is in progress, however PTP feature seems to be affected by > the IDPF transmit flow, as the Tx timestamping relies on the completion > tag. > > v4 -> v5: fix spin unlock when Tx timestamp index is requested > v3 -> v4: change timestamp filters dependent on Tx timestamp cap, > rewrite function that extends Tx timestamp value, minor fixes > v2 -> v3: fix minor issues, revert idpf_for_each_vport changes, > extend idpf_ptp_set_rx_tstamp, split tstamp statistics > v1 -> v2: add stats for timestamping, use ndo_hwtamp_get/set, > fix minor spelling issues > > Milena Olech (10): > idpf: add initial PTP support > virtchnl: add PTP virtchnl definitions > idpf: move virtchnl structures to the header file > idpf: negotiate PTP capabilities and get PTP clock > idpf: add mailbox access to read PTP clock time > idpf: add PTP clock configuration > idpf: add Tx timestamp capabilities negotiation > idpf: add Tx timestamp flows > idpf: add support for Rx timestamping > idpf: change the method for mailbox workqueue allocation > > drivers/net/ethernet/intel/idpf/Kconfig | 1 + > drivers/net/ethernet/intel/idpf/Makefile | 3 + > drivers/net/ethernet/intel/idpf/idpf.h| 34 + > .../ethernet/intel/idpf/idpf_controlq_api.h | 3 + > drivers/net/ethernet/intel/idpf/idpf_dev.c| 14 + > .../net/ethernet/intel/idpf/idpf_ethtool.c| 70 +- > .../ethernet/intel/idpf/idpf_lan_pf_regs.h| 4 + > .../net/ethernet/intel/idpf/idpf_lan_txrx.h | 13 +- > drivers/net/ethernet/intel/idpf/idpf_lib.c| 47 + > drivers/net/ethernet/intel/idpf/idpf_main.c | 9 +- > drivers/net/ethernet/intel/idpf/idpf_ptp.c| 983 ++ > drivers/net/ethernet/intel/idpf/idpf_ptp.h| 351 +++ > drivers/net/ethernet/intel/idpf/idpf_txrx.c | 169 ++- > drivers/net/ethernet/intel/idpf/idpf_txrx.h | 18 +- > .../net/ethernet/intel/idpf/idpf_virtchnl.c | 160 ++- > .../net/ethernet/intel/idpf/idpf_virtchnl.h | 84 ++ > .../ethernet/intel/idpf/idpf_virtchnl_ptp.c | 677 > drivers/net/ethernet/intel/idpf/virtchnl2.h | 314 +- > 18 files changed, 2852 insertions(+), 102 deletions(-) > create mode 100644 drivers/net/ethernet/intel/idpf/idpf_ptp.c > create mode 100644 drivers/net/ethernet/intel/idpf/idpf_ptp.h > create mode 100644 drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c > > > base-commit: e1e8afea623cb80941623188a8190d3ca80a6e08 > -- > 2.31.1 On testing on this [series|patch], the following issue was observed, PTP hardware receive filter modes does not report all the supported modes from hardware. Time stamping parameters for ethX: Capabilities: hardware-transmit software-transmit hardware-receive software-receive software-system-clock hardware-raw-clock PTP Hardware Clock: 2 Hardware Transmit Timestamp Modes: off on Hardware Receive Filter Modes: none Thank You Krishneil Singh
Re: [Intel-wired-lan] [PATCH net-next v7 0/5] net: napi: add CPU affinity to napi->config
On Tue, 4 Feb 2025 15:06:17 -0700 Ahmed Zaki wrote: > Drivers usually need to re-apply the user-set IRQ affinity to their IRQs > after reset. However, since there can be only one IRQ affinity notifier > for each IRQ, registering IRQ notifiers conflicts with the ARFS rmap > management in the core (which also registers separate IRQ affinity > notifiers). > > Move the IRQ affinity management to the napi struct. This way we can have > a unified IRQ notifier to re-apply the user-set affinity and also manage > the ARFS rmaps. The first patch moves the ARFS rmap management to CORE. > The second patch adds the IRQ affinity mask to napi_config and re-applies > the mask after reset. Patches 3-5 use the new API for bnxt, ice and idpf > drivers. Hi Ahmed! I put together a selftest for maintaining the affinity: https://github.com/kuba-moo/linux/commit/de7d2475750ac05b6e414d7e5201e354b05cf146 It depends on a couple of selftest infra patches (in that branch) which I just posted to the list. But if you'd like you can use it against your drivers.
[Intel-wired-lan] [iwl-next v3 07/24] idpf: implement get LAN mmio memory regions
From: Joshua Hay The rdma driver needs to map its own mmio regions for the sake of performance, meaning the idpf needs to avoid mapping portions of the bar space. However, to be vendor agnostic, the idpf cannot assume where these are and must avoid mapping hard coded regions. Instead, the idpf will map the bare minimum to load and communicate with the control plane, i.e. the mailbox registers and the reset state registers. The idpf will then call a new virtchnl op to fetch a list of mmio regions that it should map. All other registers will calculate which region they should store their address from. Signed-off-by: Joshua Hay Signed-off-by: Tatyana Nikolova --- v3: - header cleanup drivers/net/ethernet/intel/idpf/idpf.h| 69 +++- .../net/ethernet/intel/idpf/idpf_controlq.c | 14 +- .../net/ethernet/intel/idpf/idpf_controlq.h | 15 +- drivers/net/ethernet/intel/idpf/idpf_dev.c| 35 ++-- drivers/net/ethernet/intel/idpf/idpf_idc.c| 26 ++- drivers/net/ethernet/intel/idpf/idpf_main.c | 32 +++- drivers/net/ethernet/intel/idpf/idpf_mem.h| 8 +- drivers/net/ethernet/intel/idpf/idpf_vf_dev.c | 31 ++-- .../net/ethernet/intel/idpf/idpf_virtchnl.c | 149 +- drivers/net/ethernet/intel/idpf/virtchnl2.h | 31 +++- 10 files changed, 359 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index 491db5b2d79d..84aa00e788ca 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -192,7 +192,8 @@ struct idpf_vport_max_q { * @trigger_reset: Trigger a reset to occur */ struct idpf_reg_ops { - void (*ctlq_reg_init)(struct idpf_ctlq_create_info *cq); + void (*ctlq_reg_init)(struct idpf_adapter *adapter, + struct idpf_ctlq_create_info *cq); int (*intr_reg_init)(struct idpf_vport *vport); void (*mb_intr_reg_init)(struct idpf_adapter *adapter); void (*reset_reg_init)(struct idpf_adapter *adapter); @@ -200,15 +201,29 @@ struct idpf_reg_ops { enum idpf_flags trig_cause); }; +#define IDPF_PF_MBX_REGION_SZ 4096 +#define IDPF_PF_RSTAT_REGION_SZ2048 +#define IDPF_VF_MBX_REGION_SZ 10240 +#define IDPF_VF_RSTAT_REGION_SZ2048 + /** * struct idpf_dev_ops - Device specific operations * @reg_ops: Register operations * @idc_init: IDC initialization + * @mbx_reg_start: offset to start of mailbox registers + * @mbx_reg_sz: size of mailbox register region + * @rstat_reg_start: offset to start of rstat registers + * @rstat_reg_sz: size of rstat register region */ struct idpf_dev_ops { struct idpf_reg_ops reg_ops; int (*idc_init)(struct idpf_adapter *adapter); + + resource_size_t mbx_reg_start; + resource_size_t mbx_reg_sz; + resource_size_t rstat_reg_start; + resource_size_t rstat_reg_sz; }; /** @@ -734,6 +749,35 @@ static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter) return pkt_len ? pkt_len : IDPF_TX_MIN_PKT_LEN; } +/** + * idpf_get_mbx_reg_addr - Get BAR0 mailbox register address + * @adapter: private data struct + * @reg_offset: register offset value + * + * Return: BAR0 mailbox register address based on register offset. + */ +static inline void __iomem *idpf_get_mbx_reg_addr(struct idpf_adapter *adapter, + resource_size_t reg_offset) +{ + return adapter->hw.mbx.addr + reg_offset; +} + +/** + * idpf_get_rstat_reg_addr - Get BAR0 rstat register address + * @adapter: private data struct + * @reg_offset: register offset value + * + * Return: BAR0 rstat register address based on register offset. + */ +static inline +void __iomem *idpf_get_rstat_reg_addr(struct idpf_adapter *adapter, + resource_size_t reg_offset) +{ + reg_offset -= adapter->dev_ops.rstat_reg_start; + + return adapter->hw.rstat.addr + reg_offset; +} + /** * idpf_get_reg_addr - Get BAR0 register address * @adapter: private data struct @@ -744,7 +788,26 @@ static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter) static inline void __iomem *idpf_get_reg_addr(struct idpf_adapter *adapter, resource_size_t reg_offset) { - return (void __iomem *)(adapter->hw.hw_addr + reg_offset); + struct idpf_hw *hw = &adapter->hw; + + for (int i = 0; i < hw->num_lan_regs; i++) { + struct idpf_mmio_reg *region = &hw->lan_regs[i]; + + if (reg_offset >= region->addr_start && + reg_offset < (region->addr_start + region->addr_len)) { + reg_offset -= region->addr_start; + + return region->addr + reg_offset; + } + } + + /* It's impossible to hit this case with offsets from the CP. But if we +
[Intel-wired-lan] [rdma v3 23/24] RDMA/irdma: Add Push Page Support for GEN3
From: Jay Bhat Implement the necessary support for enabling push on GEN3 devices. Key Changes: - Introduce a RDMA virtual channel operation with the Control Plane (CP) to manage the doorbell/push page which is a privileged operation. - Implement the MMIO mapping of push pages which adheres to the updated BAR layout and page indexing specific to GEN3 devices. - Support up to 16 QPs on a single push page, given that they are tied to the same Queue Set. - Impose limits on the size of WQEs pushed based on the message length constraints provided by the CP. Signed-off-by: Jay Bhat Signed-off-by: Tatyana Nikolova --- v3: * Populate hmc_fn_id and use_hmc_fn_id fields in irdma_cqp_manage_push_page_info structure. * Remove logic for push page sharing among QPs. drivers/infiniband/hw/irdma/ctrl.c | 1 - drivers/infiniband/hw/irdma/defs.h | 2 + drivers/infiniband/hw/irdma/irdma.h| 1 + drivers/infiniband/hw/irdma/type.h | 3 ++ drivers/infiniband/hw/irdma/user.h | 1 - drivers/infiniband/hw/irdma/utils.c| 21 +-- drivers/infiniband/hw/irdma/verbs.c| 51 +- drivers/infiniband/hw/irdma/verbs.h| 3 ++ drivers/infiniband/hw/irdma/virtchnl.c | 40 drivers/infiniband/hw/irdma/virtchnl.h | 11 ++ include/uapi/rdma/irdma-abi.h | 3 +- 11 files changed, 121 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 4158db00085f..a6df550eb8c8 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -6584,7 +6584,6 @@ int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, dev->hw_attrs.max_hw_outbound_msg_size = IRDMA_MAX_OUTBOUND_MSG_SIZE; dev->hw_attrs.max_mr_size = IRDMA_MAX_MR_SIZE; dev->hw_attrs.max_hw_inbound_msg_size = IRDMA_MAX_INBOUND_MSG_SIZE; - dev->hw_attrs.max_hw_device_pages = IRDMA_MAX_PUSH_PAGE_COUNT; dev->hw_attrs.uk_attrs.max_hw_inline = IRDMA_MAX_INLINE_DATA_SIZE; dev->hw_attrs.max_hw_wqes = IRDMA_MAX_WQ_ENTRIES; dev->hw_attrs.max_qp_wr = IRDMA_MAX_QP_WRS(IRDMA_MAX_QUANTA_PER_WR); diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index 983b22d7ae23..46330513085b 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -167,6 +167,8 @@ enum irdma_protocol_used { #define IRDMA_MAX_RQ_WQE_SHIFT_GEN1 2 #define IRDMA_MAX_RQ_WQE_SHIFT_GEN2 3 +#define IRDMA_DEFAULT_MAX_PUSH_LEN 8192 + #define IRDMA_SQ_RSVD 258 #define IRDMA_RQ_RSVD 1 diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index ff938a01d70c..def6a16f5d6e 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -133,6 +133,7 @@ struct irdma_uk_attrs { u32 min_hw_cq_size; u32 max_hw_cq_size; u32 max_hw_srq_quanta; + u16 max_hw_push_len; u16 max_hw_sq_chunk; u16 min_hw_wq_size; u8 hw_rev; diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 665dc74cb10a..e04d6c35cf59 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -1282,8 +1282,11 @@ struct irdma_qhash_table_info { struct irdma_cqp_manage_push_page_info { u32 push_idx; u16 qs_handle; + u16 hmc_fn_id; u8 free_page; u8 push_page_type; + u8 page_type; + u8 use_hmc_fn_id; }; struct irdma_qp_flush_info { diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index ab57f689827a..47617ba285c1 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -182,7 +182,6 @@ enum irdma_device_caps_const { IRDMA_MAX_SGE_RD = 13, IRDMA_MAX_OUTBOUND_MSG_SIZE = 2147483647, IRDMA_MAX_INBOUND_MSG_SIZE =2147483647, - IRDMA_MAX_PUSH_PAGE_COUNT = 1024, IRDMA_MAX_PE_ENA_VF_COUNT = 32, IRDMA_MAX_VF_FPM_ID = 47, IRDMA_MAX_SQ_PAYLOAD_SIZE = 2145386496, diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 552a4cf2c51b..11ceca099538 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -1085,18 +1085,29 @@ int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) /** * irdma_dealloc_push_page - free a push page for qp * @rf: RDMA PCI function - * @qp: hardware control qp + * @iwqp: QP pointer */ static void irdma_dealloc_push_page(struct irdma_pci_f *rf, - struct irdma_sc_qp *qp) + struct irdma_qp *iwqp) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; int status; + struct irdm
[Intel-wired-lan] [tnguy-net-queue:main] BUILD SUCCESS 1438f5d07b9a7afb15e1d0e26df04a6fd4e56a3c
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/tnguy/net-queue.git main branch HEAD: 1438f5d07b9a7afb15e1d0e26df04a6fd4e56a3c rtnetlink: fix netns leak with rtnl_setlink() elapsed time: 1135m configs tested: 256 configs skipped: 4 The following configs have been built successfully. More configs may be tested in the coming days. tested configs: alpha allnoconfiggcc-14.2.0 alphaallyesconfigclang-21 alphaallyesconfiggcc-14.2.0 alpha defconfigclang-19 alpha defconfiggcc-14.2.0 arc allmodconfigclang-18 arc allnoconfiggcc-14.2.0 arc allyesconfigclang-18 arc defconfiggcc-14.2.0 arc haps_hs_smp_defconfigclang-19 arc randconfig-001-20250207gcc-13.2.0 arc randconfig-001-20250208gcc-13.2.0 arc randconfig-002-20250207gcc-13.2.0 arc randconfig-002-20250208gcc-13.2.0 arm allmodconfigclang-18 arm allnoconfiggcc-14.2.0 arm allyesconfigclang-18 arm at91_dt_defconfigclang-21 arm defconfiggcc-14.2.0 arm lpc32xx_defconfigclang-21 arm lpc32xx_defconfiggcc-14.2.0 arm pxa910_defconfiggcc-14.2.0 arm randconfig-001-20250207gcc-13.2.0 arm randconfig-001-20250208gcc-13.2.0 arm randconfig-002-20250207gcc-13.2.0 arm randconfig-002-20250208gcc-13.2.0 arm randconfig-003-20250207gcc-13.2.0 arm randconfig-003-20250208gcc-13.2.0 arm randconfig-004-20250207gcc-13.2.0 arm randconfig-004-20250208gcc-13.2.0 arm s3c6400_defconfigclang-19 arm wpcm450_defconfigclang-19 arm64allmodconfigclang-18 arm64 allnoconfiggcc-14.2.0 arm64 defconfiggcc-14.2.0 arm64 randconfig-001-20250207gcc-13.2.0 arm64 randconfig-001-20250208gcc-13.2.0 arm64 randconfig-002-20250207gcc-13.2.0 arm64 randconfig-002-20250208gcc-13.2.0 arm64 randconfig-003-20250207gcc-13.2.0 arm64 randconfig-003-20250208gcc-13.2.0 arm64 randconfig-004-20250207gcc-13.2.0 arm64 randconfig-004-20250208gcc-13.2.0 csky allnoconfiggcc-14.2.0 cskydefconfigclang-21 cskydefconfiggcc-14.2.0 csky randconfig-001-20250207gcc-14.2.0 csky randconfig-001-20250208gcc-14.2.0 csky randconfig-002-20250207gcc-14.2.0 csky randconfig-002-20250208gcc-14.2.0 hexagon alldefconfigclang-21 hexagon allmodconfigclang-21 hexagon allnoconfiggcc-14.2.0 hexagon allyesconfigclang-18 hexagon allyesconfigclang-21 hexagon defconfiggcc-14.2.0 hexagon randconfig-001-20250207gcc-14.2.0 hexagon randconfig-001-20250208clang-21 hexagon randconfig-002-20250207gcc-14.2.0 hexagon randconfig-002-20250208clang-21 i386 allmodconfigclang-19 i386 allnoconfigclang-19 i386 allyesconfigclang-19 i386buildonly-randconfig-001-20250207gcc-12 i386buildonly-randconfig-001-20250208gcc-11 i386buildonly-randconfig-002-20250207gcc-12 i386buildonly-randconfig-002-20250208clang-19 i386buildonly-randconfig-002-20250208gcc-11 i386buildonly-randconfig-003-20250207gcc-12 i386buildonly-randconfig-003-20250208gcc-11 i386buildonly-randconfig-003-20250208gcc-12 i386buildonly-randconfig-004-20250207gcc-12 i386buildonly-randconfig-004-20250208clang-19 i386buildonly-randconfig-004-20250208gcc-11 i386buildonly-randconfig-005-20250207gcc-12 i386buildonly-randconfig-005-20250208clang-19 i386buildonly-randconfig-005-20250208gcc-11 i386buildonly-randconfig-006-20250207gcc-12 i386buildonly-randconfig-006-20250208
[Intel-wired-lan] [PATCH iwl-next 0/3] ice: decouple control of SMA/U.FL/SDP pins
Previously control of the dpll SMA/U.FL pins was partially done through ptp API, decouple pins control from both interfaces (dpll and ptp). Allow the SMA/U.FL pins control over a dpll subsystem, and leave ptp related SDP pins control over a ptp subsystem. Arkadiusz Kubalewski (1): ice: redesign dpll sma/u.fl pins control Karol Kolacinski (2): ice: change SMA pins to SDP in PTP API ice: add ice driver PTP pin documentation .../device_drivers/ethernet/intel/ice.rst | 13 + drivers/net/ethernet/intel/ice/ice_dpll.c | 952 +- drivers/net/ethernet/intel/ice/ice_dpll.h | 23 +- drivers/net/ethernet/intel/ice/ice_ptp.c | 254 + drivers/net/ethernet/intel/ice/ice_ptp.h | 3 - 5 files changed, 1011 insertions(+), 234 deletions(-) base-commit: 233a2b1480a0bdf6b40d4debf58a07084e9921ff prerequisite-patch-id: 2cda134043ccfc781dd595052cfc60a3e2ea48ea prerequisite-patch-id: 62ac41823e7278621af3745a171aae07508711c8 prerequisite-patch-id: 1330728a760d99174344cb421336ae9b01e17f38 prerequisite-patch-id: ff2afa3e3a2c60a590d17a880b610e2a37e7af0c prerequisite-patch-id: cbff95efd09cb57e17c68c464ee1e317d01cf822 prerequisite-patch-id: e5be07f7b169f2443c034f04e3d0a00a8d0a8894 prerequisite-patch-id: a5f362eec88b62ff098203469cef8534f176d2a8 prerequisite-patch-id: 545b9e38f61ccfd5b33ab9c3e3a6e7a9f899e306 prerequisite-patch-id: a74b6b981ecd8a320284454d75b1dfc9e555b5f0 prerequisite-patch-id: df0a5f503065fa5869b1c915721a54eb3c7394cb prerequisite-patch-id: faebd604b0a6eb2a888e99b8977f803abe035abf prerequisite-patch-id: b7543662f5225ce13a1c95749504c68ef4733aea prerequisite-patch-id: a7297c1e743f01d118c7f77b39e5755f7a704e17 prerequisite-patch-id: 6f036cdf7bca2a272b153ecc5b3a767f41517c38 prerequisite-patch-id: bb790f877236aad43dae0bdbdceb0a3553260d10 prerequisite-patch-id: 2f53433b0d2a98cd42b18429bdbec1542b175b1f prerequisite-patch-id: cc9bf85bb9d988d92ab6cb1524bf213ec1351032 prerequisite-patch-id: 112c048b7ae143edda05244b0d8b5ab928d3eff4 prerequisite-patch-id: 124be0607c41aebe292c7b81910857489027baf1 prerequisite-patch-id: b6b5f0e405d566879133d53c26fd998e9f330ff2 prerequisite-patch-id: 777e25e09efe2ec4863e3bebdb247bac3e037c85 prerequisite-patch-id: bf13dbef14d654b243150d4f2603eb90ae497058 prerequisite-patch-id: 76f1c5ef5dacad0600339d5cf843ca14fcfa9dde prerequisite-patch-id: 586431a13be4f1ecf0adf450242aa7e90975d38f prerequisite-patch-id: e5c687a47edf3659dca8519e4c5250bbea89171b prerequisite-patch-id: 9f8081c59e275240cd76911fbede7d2737473357 prerequisite-patch-id: f4d6edba52edea1276e0095e132733f4438de720 prerequisite-patch-id: 5e7afab1204a42d90b8b6a14e3881cf1d4987954 prerequisite-patch-id: 708e14a83a03377f2909b3ce0d72d21a4619a03d prerequisite-patch-id: ae9720262fb8d1f92b157865f02a9fc7d9aa1582 prerequisite-patch-id: 11c806ab6cc8d29c86218d5760ca22cf3ef2ae05 prerequisite-patch-id: 1aae146d6c20d41b4785d37962052a52c320ac3b prerequisite-patch-id: 59b00a073b5055091ccf55905e746a372dfc5e8e prerequisite-patch-id: 5b640578751b48ab50748dbe6f864ce14f1978c9 prerequisite-patch-id: 725ea892cdefd598a1841323c6e74efe160dd3fe prerequisite-patch-id: 03bb4b3b1f37211fbcd379a19ebff5621c9d901f prerequisite-patch-id: 877ab147dd7c2e56beeb97bc4651fef89590cc23 prerequisite-patch-id: 798f81cfb09f75af615986689658787d29427e85 prerequisite-patch-id: 4e64a22702fa030f57436da273da1093153cfa7a prerequisite-patch-id: c8b8f75ae6c949e68a8ee0b6e7b09344a700663f prerequisite-patch-id: 19fed1ea4aaa320e4a4e46f9c39c7e994f09c7d9 prerequisite-patch-id: 546c7611f620c90a054da039dd19cbc7339edb39 prerequisite-patch-id: 272344e3e7ca650f3833ad62ffa75aa3b080fd72 prerequisite-patch-id: b1d967b8973ec9320e239653773c7caa9d54de70 -- 2.38.1
[Intel-wired-lan] [PATCH iwl-next 3/3] ice: add ice driver PTP pin documentation
From: Karol Kolacinski Add a description of PTP pins support by the adapters to ice driver documentation. Reviewed-by: Milena Olech Signed-off-by: Karol Kolacinski Signed-off-by: Arkadiusz Kubalewski --- .../device_drivers/ethernet/intel/ice.rst | 13 + 1 file changed, 13 insertions(+) diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst index 3c46a48d99ba..0bca293cf9cb 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst @@ -927,6 +927,19 @@ To enable/disable UDP Segmentation Offload, issue the following command:: # ethtool -K tx-udp-segmentation [off|on] +PTP pin interface +- +All adapters support standard PTP pin interface. SDPs (Software Definable Pin) +are single ended pins with both periodic output and external timestamp +supported. There are also specific differential input/output pins (TIME_SYNC, +1PPS) with only one of the functions supported. + +There are adapters with DPLL, where pins are connected to the DPLL instead of +being exposed on the board. You have to be aware that in those configurations, +only SDP pins are exposed and each pin has its own fixed direction. +To see input signal on those PTP pins, you need to configure DPLL properly. +Output signal is only visible on DPLL and to send it to the board SMA/U.FL pins, +DPLL output pins have to be manually configured. GNSS module --- -- 2.38.1
[Intel-wired-lan] [PATCH iwl-next 1/3] ice: redesign dpll sma/u.fl pins control
DPLL-enabled E810 NIC driver provides user with list of input and output pins. Hardware internal design impacts user control over SMA and U.FL pins. Currently end-user view on those dpll pins doesn't provide any layer of abstraction. On the hardware level SMA and U.FL pins are tied together due to existence of direction control logic for each pair: - SMA1 (bi-directional) and U.FL1 (only output) - SMA2 (bi-directional) and U.FL2 (only input) The user activity on each pin of the pair may impact the state of the other. Previously all the pins were provided to the user as is, without the control over SMA pins direction. Introduce a software controlled layer of abstraction over external board pins, instead of providing the user with access to raw pins connected to the dpll: - new software controlled SMA and U.FL pins, - callback operations directing user requests to corresponding hardware pins according to the runtime configuration, - ability to control SMA pins direction. Reviewed-by: Przemek Kitszel Signed-off-by: Arkadiusz Kubalewski --- drivers/net/ethernet/intel/ice/ice_dpll.c | 952 +- drivers/net/ethernet/intel/ice/ice_dpll.h | 23 +- 2 files changed, 959 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c index 8d806d8ad761..1af4bfff012b 100644 --- a/drivers/net/ethernet/intel/ice/ice_dpll.c +++ b/drivers/net/ethernet/intel/ice/ice_dpll.c @@ -11,6 +11,28 @@ #define ICE_DPLL_RCLK_NUM_PER_PF 1 #define ICE_DPLL_PIN_ESYNC_PULSE_HIGH_PERCENT 25 #define ICE_DPLL_PIN_GEN_RCLK_FREQ 1953125 +#define ICE_DPLL_PIN_PRIO_OUTPUT 0xff +#define ICE_DPLL_SW_PIN_INPUT_BASE_SFP 4 +#define ICE_DPLL_SW_PIN_INPUT_BASE_QSFP6 +#define ICE_DPLL_SW_PIN_OUTPUT_BASE0 + +#define ICE_DPLL_PIN_SW_INPUT_ABS(in_idx) \ + (ICE_DPLL_SW_PIN_INPUT_BASE_SFP + (in_idx)) + +#define ICE_DPLL_PIN_SW_1_INPUT_ABS_IDX \ + (ICE_DPLL_PIN_SW_INPUT_ABS(ICE_DPLL_PIN_SW_1_IDX)) + +#define ICE_DPLL_PIN_SW_2_INPUT_ABS_IDX \ + (ICE_DPLL_PIN_SW_INPUT_ABS(ICE_DPLL_PIN_SW_2_IDX)) + +#define ICE_DPLL_PIN_SW_OUTPUT_ABS(out_idx) \ + (ICE_DPLL_SW_PIN_OUTPUT_BASE + (out_idx)) + +#define ICE_DPLL_PIN_SW_1_OUTPUT_ABS_IDX \ + (ICE_DPLL_PIN_SW_OUTPUT_ABS(ICE_DPLL_PIN_SW_1_IDX)) + +#define ICE_DPLL_PIN_SW_2_OUTPUT_ABS_IDX \ + (ICE_DPLL_PIN_SW_OUTPUT_ABS(ICE_DPLL_PIN_SW_2_IDX)) /** * enum ice_dpll_pin_type - enumerate ice pin types: @@ -18,24 +40,60 @@ * @ICE_DPLL_PIN_TYPE_INPUT: input pin * @ICE_DPLL_PIN_TYPE_OUTPUT: output pin * @ICE_DPLL_PIN_TYPE_RCLK_INPUT: recovery clock input pin + * @ICE_DPLL_PIN_TYPE_SOFTWARE: software controlled SMA/U.FL pins */ enum ice_dpll_pin_type { ICE_DPLL_PIN_INVALID, ICE_DPLL_PIN_TYPE_INPUT, ICE_DPLL_PIN_TYPE_OUTPUT, ICE_DPLL_PIN_TYPE_RCLK_INPUT, + ICE_DPLL_PIN_TYPE_SOFTWARE, }; static const char * const pin_type_name[] = { [ICE_DPLL_PIN_TYPE_INPUT] = "input", [ICE_DPLL_PIN_TYPE_OUTPUT] = "output", [ICE_DPLL_PIN_TYPE_RCLK_INPUT] = "rclk-input", + [ICE_DPLL_PIN_TYPE_SOFTWARE] = "software", }; +static const char * const ice_dpll_sw_pin_sma[] = { "SMA1", "SMA2" }; +static const char * const ice_dpll_sw_pin_ufl[] = { "U.FL1", "U.FL2" }; + static const struct dpll_pin_frequency ice_esync_range[] = { DPLL_PIN_FREQUENCY_RANGE(0, DPLL_PIN_FREQUENCY_1_HZ), }; +/** + * ice_dpll_is_sw_pin - check if given pin shall be controlled by SW + * @pf: private board structure + * @index: index of a pin as understood by FW + * @input: true for input, false for output + * + * Check if the pin shall be controlled by SW - instead of providing raw access + * for pin control. For E810 NIC with dpll there is additional MUX-related logic + * between SMA/U.FL pins/connectors and dpll device, best to give user access + * with series of wrapper functions as from user perspective they convey single + * functionality rather then separated pins. + * + * Return: + * * true - pin controlled by SW + * * false - pin not controlled by SW + */ +static bool ice_dpll_is_sw_pin(struct ice_pf *pf, u8 index, bool input) +{ + if (input && pf->hw.device_id == ICE_DEV_ID_E810C_QSFP) + index -= ICE_DPLL_SW_PIN_INPUT_BASE_QSFP - +ICE_DPLL_SW_PIN_INPUT_BASE_SFP; + + if ((input && (index == ICE_DPLL_PIN_SW_1_INPUT_ABS_IDX || + index == ICE_DPLL_PIN_SW_2_INPUT_ABS_IDX)) || + (!input && (index == ICE_DPLL_PIN_SW_1_OUTPUT_ABS_IDX || + index == ICE_DPLL_PIN_SW_2_OUTPUT_ABS_IDX))) + return true; + return false; +} + /** * ice_dpll_is_reset - check if reset is in progress * @pf: private board structure @@ -279,6 +337,87 @@ ice_dpll_output_frequency_get(const struct dpll_pin *pin, void *pin_priv, extack, ICE_D
[Intel-wired-lan] [PATCH iwl-next 2/3] ice: change SMA pins to SDP in PTP API
From: Karol Kolacinski This change aligns E810 PTP pin control to all other products. Currently, SMA/U.FL port expanders are controlled together with SDP pins connected to 1588 clock. To align this, separate this control by exposing only SDP20..23 pins in PTP API on adapters with DPLL. Clear error for all E810 on absent NVM pin section or other errors to allow proper initialization on SMA E810 with NVM section. Use ARRAY_SIZE for pin array instead of internal definition. Reviewed-by: Milena Olech Signed-off-by: Karol Kolacinski Signed-off-by: Arkadiusz Kubalewski --- drivers/net/ethernet/intel/ice/ice_ptp.c | 254 --- drivers/net/ethernet/intel/ice/ice_ptp.h | 3 - 2 files changed, 39 insertions(+), 218 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 1bb0033347c7..a7aa6d5fb775 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -40,21 +40,19 @@ static const struct ice_ptp_pin_desc ice_pin_desc_e810[] = { { ONE_PPS, { -1, 5 }, { 0, 1 }}, }; -static const char ice_pin_names_nvm[][64] = { - "GNSS", - "SMA1", - "U.FL1", - "SMA2", - "U.FL2", +static const char ice_pin_names_dpll[][64] = { + "SDP20", + "SDP21", + "SDP22", + "SDP23", }; -static const struct ice_ptp_pin_desc ice_pin_desc_e810_sma[] = { +static const struct ice_ptp_pin_desc ice_pin_desc_dpll[] = { /* name, gpio, delay */ - { GNSS, { 1, -1 }, { 0, 0 }}, - { SMA1, { 1, 0 }, { 0, 1 }}, - { UFL1, { -1, 0 }, { 0, 1 }}, - { SMA2, { 3, 2 }, { 0, 1 }}, - { UFL2, { 3, -1 }, { 0, 0 }}, + { SDP0, { -1, 0 }, { 0, 1 }}, + { SDP1, { 1, -1 }, { 0, 0 }}, + { SDP2, { -1, 2 }, { 0, 1 }}, + { SDP3, { 3, -1 }, { 0, 0 }}, }; static struct ice_pf *ice_get_ctrl_pf(struct ice_pf *pf) @@ -92,101 +90,6 @@ static int ice_ptp_find_pin_idx(struct ice_pf *pf, enum ptp_pin_function func, return -1; } -/** - * ice_ptp_update_sma_data - update SMA pins data according to pins setup - * @pf: Board private structure - * @sma_pins: parsed SMA pins status - * @data: SMA data to update - */ -static void ice_ptp_update_sma_data(struct ice_pf *pf, unsigned int sma_pins[], - u8 *data) -{ - const char *state1, *state2; - - /* Set the right state based on the desired configuration. -* When bit is set, functionality is disabled. -*/ - *data &= ~ICE_ALL_SMA_MASK; - if (!sma_pins[UFL1 - 1]) { - if (sma_pins[SMA1 - 1] == PTP_PF_EXTTS) { - state1 = "SMA1 Rx, U.FL1 disabled"; - *data |= ICE_SMA1_TX_EN; - } else if (sma_pins[SMA1 - 1] == PTP_PF_PEROUT) { - state1 = "SMA1 Tx U.FL1 disabled"; - *data |= ICE_SMA1_DIR_EN; - } else { - state1 = "SMA1 disabled, U.FL1 disabled"; - *data |= ICE_SMA1_MASK; - } - } else { - /* U.FL1 Tx will always enable SMA1 Rx */ - state1 = "SMA1 Rx, U.FL1 Tx"; - } - - if (!sma_pins[UFL2 - 1]) { - if (sma_pins[SMA2 - 1] == PTP_PF_EXTTS) { - state2 = "SMA2 Rx, U.FL2 disabled"; - *data |= ICE_SMA2_TX_EN | ICE_SMA2_UFL2_RX_DIS; - } else if (sma_pins[SMA2 - 1] == PTP_PF_PEROUT) { - state2 = "SMA2 Tx, U.FL2 disabled"; - *data |= ICE_SMA2_DIR_EN | ICE_SMA2_UFL2_RX_DIS; - } else { - state2 = "SMA2 disabled, U.FL2 disabled"; - *data |= ICE_SMA2_MASK; - } - } else { - if (!sma_pins[SMA2 - 1]) { - state2 = "SMA2 disabled, U.FL2 Rx"; - *data |= ICE_SMA2_DIR_EN | ICE_SMA2_TX_EN; - } else { - state2 = "SMA2 Tx, U.FL2 Rx"; - *data |= ICE_SMA2_DIR_EN; - } - } - - dev_dbg(ice_pf_to_dev(pf), "%s, %s\n", state1, state2); -} - -/** - * ice_ptp_set_sma_cfg - set the configuration of the SMA control logic - * @pf: Board private structure - * - * Return: 0 on success, negative error code otherwise - */ -static int ice_ptp_set_sma_cfg(struct ice_pf *pf) -{ - const struct ice_ptp_pin_desc *ice_pins = pf->ptp.ice_pin_desc; - struct ptp_pin_desc *pins = pf->ptp.pin_desc; - unsigned int sma_pins[ICE_SMA_PINS_NUM] = {}; - int err; - u8 data; - - /* Read initial pin state value */ - err = ice_read_sma_ctrl(&pf->hw, &data); - if (err) - return err; - - /* Get SMA/U.FL pins states */ - for (int i = 0; i < pf->ptp.info.n_pins; i++) - if (pins[i].func) { -
Re: [Intel-wired-lan] [PATCH iwl-net v2] idpf: call set_real_num_queues in idpf_open
> -Original Message- > From: Intel-wired-lan On Behalf Of > Joshua Hay > Sent: Tuesday, February 4, 2025 6:08 PM > To: intel-wired-...@lists.osuosl.org > Cc: Samudrala, Sridhar ; Hay, Joshua A > ; Chittim, Madhu > Subject: [Intel-wired-lan] [PATCH iwl-net v2] idpf: call set_real_num_queues > in > idpf_open > > On initial driver load, alloc_etherdev_mqs is called with whatever max queue > values are provided by the control plane. However, if the driver is loaded on > a > system where num_online_cpus() returns less than the max queues, the > netdev will think there are more queues than are actually available. Only > num_online_cpus() will be allocated, but > skb_get_queue_mapping(skb) could possibly return an index beyond the > range of allocated queues. Consequently, the packet is silently dropped and it > appears as if TX is broken. > > Set the real number of queues during open so the netdev knows how many > queues will be allocated. > > v2: > - call set_real_num_queues in idpf_open. Previous change called > set_real_num_queues function in idpf_up_complete, but it is possible > for up_complete to be called without holding the RTNL lock. If user > brings up interface, then issues a reset, the init_task will call > idpf_vport_open->idpf_up_complete. Since this is initiated by the > driver, the RTNL lock is not taken. > - adjust title to reflect new changes. > > Signed-off-by: Joshua Hay > Fixes: 1c325aac10a8 ("idpf: configure resources for TX queues") > Reviewed-by: Madhu Chittim Tested-by: Samuel Salin
[Intel-wired-lan] igb: XDP/ZC busy polling
Hello Joe, I noticed that XDP/ZC busy polling does not work anymore in combination with igb driver. This seems to be related to commit 5ef44b3cb43b ("xsk: Bring back busy polling support") which relies on netif_queue_set_napi(). I see you implemented it for e1000, igc and so on. However, igb is missing. Do you have any plans to add the missing registration to igb? Just asking. Otherwise, I can send a patch for it. Thanks, Kurt signature.asc Description: PGP signature
Re: [Intel-wired-lan] [PATCH iwl-net] ixgbe: fix media cage present detection for E610 device
On Thu, Feb 06, 2025 at 04:19:20PM +0100, Piotr Kwapulinski wrote: > The commit 23c0e5a16bcc ("ixgbe: Add link management support for E610 > device") introduced incorrect checking of media cage presence for E610 > device. Fix it. > > Fixes: 23c0e5a16bcc ("ixgbe: Add link management support for E610 device") > Reported-by: Dan Carpenter > Closes: > https://lore.kernel.org/all/e7d73b32-f12a-49d1-8b60-1ef83359ec13@stanley.mountain/ > Reviewed-by: Michal Swiatkowski > Reviewed-by: Przemek Kitszel > Signed-off-by: Piotr Kwapulinski Reviewed-by: Simon Horman
Re: [Intel-wired-lan] [PATCH iwl-next v1 3/3] ice: E825C PHY register cleanup
On Thu, Feb 06, 2025 at 09:36:55AM +0100, Grzegorz Nitka wrote: > From: Karol Kolacinski > > Minor PTP register refactor, including logical grouping E825C 1-step > timestamping registers. Remove unused register definitions > (PHY_REG_GPCS_BITSLIP, PHY_REG_REVISION). > Also, apply preferred GENMASK macro (instead of ICE_M) for register > fields definition affected by this patch. > > Reviewed-by: Przemek Kitszel > Signed-off-by: Karol Kolacinski > Signed-off-by: Grzegorz Nitka In reference to my comment on patch 1/3, this patch is also doing sevearl things. But I think that is fine because: they are all cleanups; they are somewhat related to each other; and overall the patch is still not so long. Reviewed-by: Simon Horman ...
[Intel-wired-lan] [iwl-next v1 2/4] ixgbe: check for MDD events
From: Don Skidmore When an event is detected it is logged and, for the time being, the queue is immediately re-enabled. This is due to the lack of an API to the hypervisor so it could deal with it as it chooses. Reviewed-by: Przemek Kitszel Reviewed-by: Jedrzej Jagielski Reviewed-by: Marcin Szycik Signed-off-by: Don Skidmore Signed-off-by: Michal Swiatkowski --- .../net/ethernet/intel/ixgbe/ixgbe_sriov.h| 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++ .../net/ethernet/intel/ixgbe/ixgbe_sriov.c| 50 +++ 4 files changed, 56 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 0690ecb8dfa3..bc4cab976bf9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -15,6 +15,7 @@ #ifdef CONFIG_PCI_IOV void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); #endif +bool ixgbe_check_mdd_event(struct ixgbe_adapter *adapter); void ixgbe_msg_task(struct ixgbe_adapter *adapter); int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index d446c375335a..aa3b498558bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -402,6 +402,8 @@ struct ixgbe_nvm_version { #define IXGBE_MRCTL(_i) (0x0F600 + ((_i) * 4)) #define IXGBE_VMRVLAN(_i)(0x0F610 + ((_i) * 4)) #define IXGBE_VMRVM(_i) (0x0F630 + ((_i) * 4)) +#define IXGBE_LVMMC_RX 0x2FA8 +#define IXGBE_LVMMC_TX 0x8108 #define IXGBE_WQBR_RX(_i)(0x2FB0 + ((_i) * 4)) /* 4 total */ #define IXGBE_WQBR_TX(_i)(0x8130 + ((_i) * 4)) /* 4 total */ #define IXGBE_L34T_IMIR(_i) (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 467f81239e12..3ff48207165c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7959,6 +7959,9 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) netif_carrier_on(netdev); ixgbe_check_vf_rate_limit(adapter); + if (adapter->num_vfs && hw->mac.ops.enable_mdd) + hw->mac.ops.enable_mdd(hw); + /* enable transmits */ netif_tx_wake_all_queues(adapter->netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index ccdce80edd14..c374ebd4a56b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -207,6 +207,7 @@ void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs) int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) { unsigned int num_vfs = adapter->num_vfs, vf; + struct ixgbe_hw *hw = &adapter->hw; unsigned long flags; int rss; @@ -237,6 +238,9 @@ int ixgbe_disable_sriov(struct ixgbe_adapter *adapter) if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)) return 0; + if (hw->mac.ops.disable_mdd) + hw->mac.ops.disable_mdd(hw); + #ifdef CONFIG_PCI_IOV /* * If our VFs are assigned we cannot shut down SR-IOV @@ -1353,12 +1357,58 @@ static void ixgbe_rcv_ack_from_vf(struct ixgbe_adapter *adapter, u32 vf) ixgbe_write_mbx(hw, &msg, 1, vf); } +/** + * ixgbe_check_mdd_event - check for MDD event on all VFs + * @adapter: pointer to ixgbe adapter + * + * Return: true if there is a VF on which MDD event occurred, false otherwise. + */ +bool ixgbe_check_mdd_event(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + DECLARE_BITMAP(vf_bitmap, 64); + bool ret = false; + int i; + + if (!hw->mac.ops.handle_mdd) + return false; + + /* Did we have a malicious event */ + hw->mac.ops.handle_mdd(hw, vf_bitmap); + + /* Log any blocked queues and release lock */ + for_each_set_bit(i, vf_bitmap, 64) { + dev_warn(&adapter->pdev->dev, +"Malicious event on VF %d tx:%x rx:%x\n", i, +IXGBE_READ_REG(hw, IXGBE_LVMMC_TX), +IXGBE_READ_REG(hw, IXGBE_LVMMC_RX)); + + if (hw->mac.ops.restore_mdd_vf) { + u32 ping; + + hw->mac.ops.restore_mdd_vf(hw, i); + + /* get the VF to rebuild its queues */ + adapter->vfinfo[i].clear_to_send = 0; + ping = IXGBE_PF_CONTROL_MSG | + IXGBE_VT_MSGTYPE_CTS; + ixgbe_write_mbx(hw, &ping, 1, i); +
[Intel-wired-lan] [iwl-next v1 3/4] ixgbe: add Tx hang detection unhandled MDD
From: Slawomir Mrozowicz Add Tx Hang detection due to an unhandled MDD Event. Previously, a malicious VF could disable the entire port causing TX to hang on the E610 card. Those events that caused PF to freeze were not detected as an MDD event and usually required a Tx Hang watchdog timer to catch the suspension, and perform a physical function reset. Implement flows in the affected PF driver in such a way to check the cause of the hang, detect it as an MDD event and log an entry of the malicious VF that caused the Hang. The PF blocks the malicious VF, if it continues to be the source of several MDD events. Reviewed-by: Przemek Kitszel Reviewed-by: Marcin Szycik Signed-off-by: Slawomir Mrozowicz Co-developed-by: Michal Swiatkowski Signed-off-by: Michal Swiatkowski --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 5 + drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 12 +- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 3 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 212 -- 4 files changed, 210 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index e6a380d4929b..81ccb4c591ba 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -427,6 +427,10 @@ enum ixgbe_ring_f_enum { #define IXGBE_BAD_L2A_QUEUE3 #define IXGBE_MAX_MACVLANS 63 +#define IXGBE_MAX_TX_QUEUES128 +#define IXGBE_MAX_TX_DESCRIPTORS 40 +#define IXGBE_MAX_TX_VF_HANGS 4 + DECLARE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key); struct ixgbe_ring_feature { @@ -798,6 +802,7 @@ struct ixgbe_adapter { u32 timer_event_accumulator; u32 vferr_refcount; struct ixgbe_mac_addr *mac_table; + u8 tx_hang_count[IXGBE_MAX_TX_QUEUES]; struct kobject *info_kobj; u16 lse_mask; #ifdef CONFIG_IXGBE_HWMON diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index aa3b498558bc..e07b56625595 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1044,6 +1044,7 @@ struct ixgbe_nvm_version { #define IXGBE_GCR_EXT_VT_MODE_160x0001 #define IXGBE_GCR_EXT_VT_MODE_320x0002 #define IXGBE_GCR_EXT_VT_MODE_640x0003 +#define IXGBE_GCR_EXT_VT_MODE_MASK 0x0003 #define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \ IXGBE_GCR_EXT_VT_MODE_64) @@ -2935,6 +2936,13 @@ struct ixgbe_adv_tx_context_desc { __le32 mss_l4len_idx; }; +enum { + IXGBE_VLAN_MACIP_LENS_REG = 0, + IXGBE_FCEOF_SAIDX_REG = 1, + IXGBE_TYPE_TUCMD_MLHL = 2, + IXGBE_MSS_L4LEN_IDX = 3, +}; + /* Adv Transmit Descriptor Config Masks */ #define IXGBE_ADVTXD_DTALEN_MASK 0x /* Data buf length(bytes) */ #define IXGBE_ADVTXD_MAC_LINKSEC 0x0004 /* Insert LinkSec */ @@ -2942,7 +2950,7 @@ struct ixgbe_adv_tx_context_desc { #define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK 0x03FF /* IPSec SA index */ #define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK0x01FF /* IPSec ESP length */ #define IXGBE_ADVTXD_DTYP_MASK 0x00F0 /* DTYP mask */ -#define IXGBE_ADVTXD_DTYP_CTXT 0x0020 /* Advanced Context Desc */ +#define IXGBE_ADVTXD_DTYP_CTXT 0x2 /* Advanced Context Desc */ #define IXGBE_ADVTXD_DTYP_DATA 0x0030 /* Advanced Data Descriptor */ #define IXGBE_ADVTXD_DCMD_EOP IXGBE_TXD_CMD_EOP /* End of Packet */ #define IXGBE_ADVTXD_DCMD_IFCS IXGBE_TXD_CMD_IFCS /* Insert FCS */ @@ -2991,6 +2999,8 @@ struct ixgbe_adv_tx_context_desc { #define IXGBE_ADVTXD_FCOEF_EOF_MASK (3u << 10) /* FC EOF index */ #define IXGBE_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ #define IXGBE_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ +#define IXGBE_ADVTXD_MSS_MASK GENMASK(31, IXGBE_ADVTXD_MSS_SHIFT) +#define IXGBE_ADVTXD_HEADER_LEN_MASK GENMASK(8, 0) /* Autonegotiation advertised speeds */ typedef u32 ixgbe_autoneg_advertised; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 336d47ffb95a..54d75cf94cc1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -1293,7 +1293,8 @@ void ixgbe_tx_ctxtdesc(struct ixgbe_ring *tx_ring, u32 vlan_macip_lens, tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; /* set bits to identify this as an advanced context descriptor */ - type_tucmd |= IXGBE_TXD_CMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; + type_tucmd |= IXGBE_TXD_CMD_DEXT | + FIELD_PREP(IXGBE_ADVTXD_DTYP_MASK, IXGBE_ADVTXD_DTYP_CTXT); context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); context_desc->fceof_saidx = cpu_to_le32(fceof_saidx); diff --git a/drivers/net/et
[Intel-wired-lan] [iwl-next v1 0/4] ixgbe: support MDD events
Hi, This patchset is adding support for MDD (malicious driver detection) for ixgbe driver. It can catch the error on VF side and reset malicious VF. An MDD event can be triggered for example by sending from VF a TSO packet with segment number set to 0. Add checking for Tx hang in case of MDD is unhandled. It will prevent VF from staying in Tx hang state. Don Skidmore (1): ixgbe: check for MDD events Paul Greenwalt (1): ixgbe: add MDD support Radoslaw Tyl (1): ixgbe: turn off MDD while modifying SRRCTL Slawomir Mrozowicz (1): ixgbe: add Tx hang detection unhandled MDD drivers/net/ethernet/intel/ixgbe/ixgbe.h | 5 + .../net/ethernet/intel/ixgbe/ixgbe_sriov.h| 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 42 +++- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h | 5 + drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 4 + drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 3 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 224 -- .../net/ethernet/intel/ixgbe/ixgbe_sriov.c| 50 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 119 ++ 9 files changed, 430 insertions(+), 23 deletions(-) -- 2.42.0
[Intel-wired-lan] [iwl-next v1 4/4] ixgbe: turn off MDD while modifying SRRCTL
From: Radoslaw Tyl Modifying SRRCTL register can generate MDD event. Turn MDD off during SRRCTL register write to prevent generating MDD. Fix RCT in ixgbe_set_rx_drop_en(). Reviewed-by: Marcin Szycik Reviewed-by: Przemek Kitszel Signed-off-by: Radoslaw Tyl Signed-off-by: Michal Swiatkowski --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5c1c067ffb7c..6bb2a0edf2ea 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4100,8 +4100,12 @@ void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) #endif { - int i; bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; + struct ixgbe_hw *hw = &adapter->hw; + int i; + + if (hw->mac.ops.disable_mdd) + hw->mac.ops.disable_mdd(hw); if (adapter->ixgbe_ieee_pfc) pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); @@ -4123,6 +4127,9 @@ static void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) ixgbe_disable_rx_drop(adapter, adapter->rx_ring[i]); } + + if (hw->mac.ops.enable_mdd) + hw->mac.ops.enable_mdd(hw); } #define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 -- 2.42.0
[Intel-wired-lan] [iwl-next v1 1/4] ixgbe: add MDD support
From: Paul Greenwalt Add malicious driver detection. Support enabling MDD, disabling MDD, handling a MDD event, and restoring a MDD VF. Reviewed-by: Przemek Kitszel Reviewed-by: Jedrzej Jagielski Reviewed-by: Marcin Szycik Signed-off-by: Paul Greenwalt Signed-off-by: Michal Swiatkowski --- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 28 + drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h | 5 + drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 4 + drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 119 ++ 4 files changed, 156 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 5fdf32d79d82..d446c375335a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2746,6 +2746,28 @@ enum ixgbe_fdir_pballoc_type { #define FW_PHY_INFO_ID_HI_MASK 0xu #define FW_PHY_INFO_ID_LO_MASK 0xu +/* There are only 3 options for VFs creation on this device: + * 16 VFs pool with 8 queues each + * 32 VFs pool with 4 queues each + * 64 VFs pool with 2 queues each + * + * That means reading some VF registers that map VF to queue depending on + * chosen option. Define values that help dealing with each scenario. + */ +/* Number of queues based on VFs pool */ +#define IXGBE_16VFS_QUEUES 8 +#define IXGBE_32VFS_QUEUES 4 +#define IXGBE_64VFS_QUEUES 2 +/* Mask for getting queues bits based on VFs pool */ +#define IXGBE_16VFS_BITMASKGENMASK(IXGBE_16VFS_QUEUES - 1, 0) +#define IXGBE_32VFS_BITMASKGENMASK(IXGBE_32VFS_QUEUES - 1, 0) +#define IXGBE_64VFS_BITMASKGENMASK(IXGBE_64VFS_QUEUES - 1, 0) +/* Convert queue index to register number. + * We have 4 registers with 32 queues in each. + */ +#define IXGBE_QUEUES_PER_REG 32 +#define IXGBE_QUEUES_REG_AMOUNT4 + /* Host Interface Command Structures */ struct ixgbe_hic_hdr { u8 cmd; @@ -3534,6 +3556,12 @@ struct ixgbe_mac_operations { int (*dmac_config_tcs)(struct ixgbe_hw *hw); int (*read_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32 *); int (*write_iosf_sb_reg)(struct ixgbe_hw *, u32, u32, u32); + + /* MDD events */ + void (*enable_mdd)(struct ixgbe_hw *hw); + void (*disable_mdd)(struct ixgbe_hw *hw); + void (*restore_mdd_vf)(struct ixgbe_hw *hw, u32 vf); + void (*handle_mdd)(struct ixgbe_hw *hw, unsigned long *vf_bitmap); }; struct ixgbe_phy_operations { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h index 3e4092f8da3e..2a11147fb1bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h @@ -17,4 +17,9 @@ void ixgbe_set_source_address_pruning_x550(struct ixgbe_hw *hw, void ixgbe_set_ethertype_anti_spoofing_x550(struct ixgbe_hw *hw, bool enable, int vf); +void ixgbe_enable_mdd_x550(struct ixgbe_hw *hw); +void ixgbe_disable_mdd_x550(struct ixgbe_hw *hw); +void ixgbe_restore_mdd_vf_x550(struct ixgbe_hw *hw, u32 vf); +void ixgbe_handle_mdd_x550(struct ixgbe_hw *hw, unsigned long *vf_bitmap); + #endif /* _IXGBE_X550_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 683c668672d6..e67d105fd99a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -2630,6 +2630,10 @@ static const struct ixgbe_mac_operations mac_ops_e610 = { .prot_autoc_write = prot_autoc_write_generic, .setup_fc = ixgbe_setup_fc_e610, .fc_autoneg = ixgbe_fc_autoneg_e610, + .enable_mdd = ixgbe_enable_mdd_x550, + .disable_mdd= ixgbe_disable_mdd_x550, + .restore_mdd_vf = ixgbe_restore_mdd_vf_x550, + .handle_mdd = ixgbe_handle_mdd_x550, }; static const struct ixgbe_phy_operations phy_ops_e610 = { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 277ceaf8a793..f148d3f29378 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3800,6 +3800,121 @@ static int ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, return status; } +static void ixgbe_set_mdd_x550(struct ixgbe_hw *hw, bool ena) +{ + u32 reg_dma, reg_rdr; + + reg_dma = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); + reg_rdr = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + + if (ena) { + reg_dma |= (IXGBE_DMATXCTL_MDP_EN | IXGBE_DMATXCTL_MBINTEN); + reg_rdr |= (IXGBE_RDRXCTL_MDP_EN | IXGBE_RDRXCTL_MBINTEN); + } else { + reg_dma &= ~(IXGBE_DMATXCTL_MDP_EN | IXG
Re: [Intel-wired-lan] [PATCH iwl-next v1 01/13] ixgbe: add initial devlink support
On Mon, Feb 03, 2025 at 04:03:16PM +0100, Jedrzej Jagielski wrote: > Add an initial support for devlink interface to ixgbe driver. > > Similarly to i40e driver the implementation doesn't enable > devlink to manage device-wide configuration. Devlink instance > is created for each physical function of PCIe device. > > Create separate directory for devlink related ixgbe files > and use naming scheme similar to the one used in the ice driver. > > Add a stub for Documentation, to be extended by further patches. > > Reviewed-by: Mateusz Polchlopek > Signed-off-by: Jedrzej Jagielski ... > diff --git a/Documentation/networking/devlink/ixgbe.rst > b/Documentation/networking/devlink/ixgbe.rst > new file mode 100644 > index ..ca920d421d42 > --- /dev/null > +++ b/Documentation/networking/devlink/ixgbe.rst > @@ -0,0 +1,8 @@ > +.. SPDX-License-Identifier: GPL-2.0 > + > + > +ixgbe devlink support > + nit: the '=' lines are one character too short wrt the text they decorate. Flagged by make htmldocs. > + > +This document describes the devlink features implemented by the ``ixgbe`` > +device driver. ...
Re: [Intel-wired-lan] [PATCH iwl-next v1 2/3] ice: Refactor E825C PHY registers info struct
On Thu, Feb 06, 2025 at 09:36:54AM +0100, Grzegorz Nitka wrote: > From: Karol Kolacinski > > Simplify ice_phy_reg_info_eth56g struct definition to include base > address for the very first quad. Use base address info and 'step' > value to determine address for specific PHY quad. > > Reviewed-by: Przemek Kitszel > Signed-off-by: Karol Kolacinski > Signed-off-by: Grzegorz Nitka Reviewed-by: Simon Horman
Re: [Intel-wired-lan] [PATCH iwl-next v1 1/3] ice: Add sync delay for E825C
On Thu, Feb 06, 2025 at 09:36:53AM +0100, Grzegorz Nitka wrote: > From: Karol Kolacinski > > Implement setting GLTSYN_SYNC_DLAY for E825C products. > This is the execution delay compensation of SYNC command between > PHC and PHY. > Also, refactor the code by changing ice_ptp_init_phc_eth56g function > name to ice_ptp_init_phc_e825, to be consistent with the naming pattern > for other devices. Adding support for GLTSYN_SYNC_DLAY and the refactor seem to be two distinct changes, albeit touching common code. I think it would be slightly better to split this into two patches. > Reviewed-by: Przemek Kitszel > Signed-off-by: Karol Kolacinski > Signed-off-by: Grzegorz Nitka ...
Re: [Intel-wired-lan] [PATCH iwl-next v1 2/3] ice: Refactor E825C PHY registers info struct
On Fri, Feb 07, 2025 at 10:03:45AM +, Simon Horman wrote: > On Thu, Feb 06, 2025 at 09:36:54AM +0100, Grzegorz Nitka wrote: > > From: Karol Kolacinski > > > > Simplify ice_phy_reg_info_eth56g struct definition to include base > > address for the very first quad. Use base address info and 'step' > > value to determine address for specific PHY quad. > > > > Reviewed-by: Przemek Kitszel > > Signed-off-by: Karol Kolacinski > > Signed-off-by: Grzegorz Nitka > > Reviewed-by: Simon Horman Sorry, I failed to notice that the kdoc for ice_phy_reg_info_eth56g needs to be updated to document base_addr instead of base.
Re: [Intel-wired-lan] igb: XDP/ZC busy polling
On Fri, Feb 07, 2025 at 09:38:41AM +0100, Kurt Kanzenbach wrote: > Hello Joe, > > I noticed that XDP/ZC busy polling does not work anymore in combination > with igb driver. This seems to be related to commit 5ef44b3cb43b ("xsk: > Bring back busy polling support") which relies on > netif_queue_set_napi(). > > I see you implemented it for e1000, igc and so on. However, igb is > missing. Do you have any plans to add the missing registration to igb? > Just asking. Otherwise, I can send a patch for it. Please feel free; I don't have an igb device so I wouldn't be able to test it, but I'd happily review it so please CC me. BTW, I wrote a small series that updates the documentation and adds a test for AF_XDP [1] that you may want to consider applying/running (if it is not merged by the time you add support to igb). [1]: https://lore.kernel.org/lkml/20250207030916.32751-1-jdam...@fastly.com/
Re: [Intel-wired-lan] [PATCH iwl-net v2] ice: health.c: fix compilation on gcc 7.5
On Thu, Feb 06, 2025 at 11:30:23PM +0100, Przemek Kitszel wrote: > GCC 7 is not as good as GCC 8+ in telling what is a compile-time > const, and thus could be used for static storage. > Fortunately keeping strings as const arrays is enough to make old > gcc happy. > > Excerpt from the report: > My GCC is: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0. > > CC [M] drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.o > drivers/net/ethernet/intel/ice/devlink/health.c:35:3: error: initializer > element is not constant >ice_common_port_solutions, {ice_port_number_label}}, >^ > drivers/net/ethernet/intel/ice/devlink/health.c:35:3: note: (near > initialization for 'ice_health_status_lookup[0].solution') > drivers/net/ethernet/intel/ice/devlink/health.c:35:31: error: initializer > element is not constant >ice_common_port_solutions, {ice_port_number_label}}, >^ > drivers/net/ethernet/intel/ice/devlink/health.c:35:31: note: (near > initialization for 'ice_health_status_lookup[0].data_label[0]') > drivers/net/ethernet/intel/ice/devlink/health.c:37:46: error: initializer > element is not constant >"Change or replace the module or cable.", {ice_port_number_label}}, > ^ > drivers/net/ethernet/intel/ice/devlink/health.c:37:46: note: (near > initialization for 'ice_health_status_lookup[1].data_label[0]') > drivers/net/ethernet/intel/ice/devlink/health.c:39:3: error: initializer > element is not constant >ice_common_port_solutions, {ice_port_number_label}}, >^ > > Fixes: 85d6164ec56d ("ice: add fw and port health reporters") > Reported-by: Qiuxu Zhuo > Closes: > https://lore.kernel.org/netdev/cy8pr11mb7134bf7a46d71e50d25fa7a989...@cy8pr11mb7134.namprd11.prod.outlook.com > Reviewed-by: Michal Swiatkowski > Suggested-by: Simon Horman > Signed-off-by: Przemek Kitszel > --- > v2: use static const char[] instead of #define - Simon > +added RB tag from Michal, but not adding TB tag from Qiuxu > > v1: > > https://lore.kernel.org/netdev/20250205104252.30464-2-przemyslaw.kits...@intel.com > > CC: Kees Cook > CC: Jiri Slaby Thanks Przemek, Testing locally gcc 7.5.0 [1] seems happy with this. Reviewed-by: Simon Horman [1] https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/x86_64/7.5.0/
Re: [Intel-wired-lan] [iwl-next v1 3/4] ixgbe: add Tx hang detection unhandled MDD
On Fri, Feb 07, 2025 at 11:43:42AM +0100, Michal Swiatkowski wrote: > From: Slawomir Mrozowicz > > Add Tx Hang detection due to an unhandled MDD Event. > > Previously, a malicious VF could disable the entire port causing > TX to hang on the E610 card. > Those events that caused PF to freeze were not detected > as an MDD event and usually required a Tx Hang watchdog timer > to catch the suspension, and perform a physical function reset. > > Implement flows in the affected PF driver in such a way to check > the cause of the hang, detect it as an MDD event and log an > entry of the malicious VF that caused the Hang. > > The PF blocks the malicious VF, if it continues to be the source > of several MDD events. > > Reviewed-by: Przemek Kitszel > Reviewed-by: Marcin Szycik > Signed-off-by: Slawomir Mrozowicz > Co-developed-by: Michal Swiatkowski > Signed-off-by: Michal Swiatkowski ... > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h > b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h > index aa3b498558bc..e07b56625595 100644 > --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h > +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h > @@ -1044,6 +1044,7 @@ struct ixgbe_nvm_version { > #define IXGBE_GCR_EXT_VT_MODE_160x0001 > #define IXGBE_GCR_EXT_VT_MODE_320x0002 > #define IXGBE_GCR_EXT_VT_MODE_640x0003 > +#define IXGBE_GCR_EXT_VT_MODE_MASK 0x0003 nit: For consistency I think spaces should be used to indent 0x0003 > #define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \ >IXGBE_GCR_EXT_VT_MODE_64) > ... > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c > b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ... > +static u32 ixgbe_poll_tx_icache(struct ixgbe_hw *hw, u16 queue, u16 idx) > +{ > + IXGBE_WRITE_REG(hw, IXGBE_TXDESCIC, queue * idx); > + return IXGBE_READ_REG(hw, IXGBE_TXDESCIC); > +} > + > +/** > + * ixgbe_check_illegal_queue - search for queue with illegal packet > + * @adapter: structure containing ring specific data > + * @queue: queue index > + * > + * Check if tx descriptor connected with input queue > + * contains illegal packet. > + * > + * Returns: true if queue contain illegal packet. > + */ > +static bool ixgbe_check_illegal_queue(struct ixgbe_adapter *adapter, > + u16 queue) > +{ > + u32 hdr_len_reg, mss_len_reg, type_reg; > + struct ixgbe_hw *hw = &adapter->hw; > + u32 mss_len, header_len, reg; > + > + for (u16 i = 0; i < IXGBE_MAX_TX_DESCRIPTORS; i++) { > + /* HW will clear bit IXGBE_TXDESCIC_READY when address > + * is written to address field. HW will set this bit > + * when iCache read is done, and data is ready at TIC_DWx. > + * Set descriptor address. > + */ > + read_poll_timeout(ixgbe_poll_tx_icache, reg, > + !(reg & IXGBE_TXDESCIC_READY), 0, 0, false, > + hw, queue, i); > + > + /* read tx descriptor access registers */ > + hdr_len_reg = IXGBE_READ_REG(hw, > IXGBE_TIC_DW2(IXGBE_VLAN_MACIP_LENS_REG)); > + type_reg = IXGBE_READ_REG(hw, > IXGBE_TIC_DW2(IXGBE_TYPE_TUCMD_MLHL)); > + mss_len_reg = IXGBE_READ_REG(hw, > IXGBE_TIC_DW2(IXGBE_MSS_L4LEN_IDX)); > + > + /* check if Advanced Context Descriptor */ > + if (FIELD_GET(IXGBE_ADVTXD_DTYP_MASK, type_reg) != > + IXGBE_ADVTXD_DTYP_CTXT) > + continue; > + > + /* check for illegal MSS and Header length */ > + mss_len = FIELD_GET(IXGBE_ADVTXD_MSS_MASK, mss_len_reg); > + header_len = FIELD_GET(IXGBE_ADVTXD_HEADER_LEN_MASK, > +hdr_len_reg); > + if ((mss_len + header_len) > SZ_16K) { > + e_warn(probe, > +"mss len + header len too long\n"); nit: The above two lines can be a single line. > + return true; > + } > + } > + > + return false; > +} > + > +/** > + * ixgbe_handle_mdd_event - handle mdd event > + * @adapter: structure containing ring specific data > + * @tx_ring: tx descriptor ring to handle > + * > + * Reset VF driver if malicious vf detected or > + * illegal packet in an any queue detected. > + */ > +static void ixgbe_handle_mdd_event(struct ixgbe_adapter *adapter, > +struct ixgbe_ring *tx_ring) > +{ > + u16 vf, q; > + > + if (adapter->vfinfo && ixgbe_check_mdd_event(adapter)) { > + /* vf mdd info and malicious vf detected */ > + if (!ixgbe_get_vf_idx(adapter, tx_ring->queue_index, &vf)) > + ixgbe_vf_handle_tx_hang(adapter, vf); > + } else { > + /* malicious vf not detected */ > + for (q = 0; q < IXGBE_MAX_TX_QUEUES; q++) {
Re: [Intel-wired-lan] [iwl-next v1 1/4] ixgbe: add MDD support
On Fri, Feb 07, 2025 at 11:43:40AM +0100, Michal Swiatkowski wrote: > From: Paul Greenwalt > > Add malicious driver detection. Support enabling MDD, disabling MDD, > handling a MDD event, and restoring a MDD VF. > > Reviewed-by: Przemek Kitszel > Reviewed-by: Jedrzej Jagielski > Reviewed-by: Marcin Szycik > Signed-off-by: Paul Greenwalt > Signed-off-by: Michal Swiatkowski ... > diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c > b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c ... > +/** > + * ixgbe_handle_mdd_x550 - handle malicious driver detection event > + * @hw: pointer to hardware structure > + * @vf_bitmap: output vf bitmap of malicious vfs > + */ > +void ixgbe_handle_mdd_x550(struct ixgbe_hw *hw, unsigned long *vf_bitmap) > +{ > + u32 i, j, reg, q, div, vf, wqbr; > + > + /* figure out pool size for mapping to vf's */ > + reg = IXGBE_READ_REG(hw, IXGBE_MRQC); > + switch (reg & IXGBE_MRQC_MRQE_MASK) { > + case IXGBE_MRQC_VMDQRT8TCEN: > + div = IXGBE_16VFS_QUEUES; > + break; > + case IXGBE_MRQC_VMDQRSS32EN: > + case IXGBE_MRQC_VMDQRT4TCEN: > + div = IXGBE_32VFS_QUEUES; > + break; > + default: > + div = IXGBE_64VFS_QUEUES; > + break; > + } > + > + /* Read WQBR_TX and WQBR_RX and check for malicious queues */ > + for (i = 0; i < IXGBE_QUEUES_REG_AMOUNT; i++) { > + wqbr = IXGBE_READ_REG(hw, IXGBE_WQBR_TX(i)) | > +IXGBE_READ_REG(hw, IXGBE_WQBR_RX(i)); > + if (!wqbr) > + continue; > + > + /* Get malicious queue */ > + for_each_set_bit(j, (unsigned long *)&wqbr, > + IXGBE_QUEUES_PER_REG) { The type of wqbr is a u32, that is it is 32-bits wide. Above it's address is cast to unsigned long *. But, unsigned long may be 64-bits wide, e.g. on x86_64. GCC 14.2.0 EXTRA_CFLAGS=-Warray-bounds builds report this as: In file included from ./include/linux/bitmap.h:11, from ./include/linux/cpumask.h:12, from ./arch/x86/include/asm/paravirt.h:21, from ./arch/x86/include/asm/cpuid.h:71, from ./arch/x86/include/asm/processor.h:19, from ./arch/x86/include/asm/cpufeature.h:5, from ./arch/x86/include/asm/thread_info.h:59, from ./include/linux/thread_info.h:60, from ./include/linux/uio.h:9, from ./include/linux/socket.h:8, from ./include/uapi/linux/if.h:25, from ./include/linux/mii.h:12, from ./include/uapi/linux/mdio.h:15, from ./include/linux/mdio.h:9, from drivers/net/ethernet/intel/ixgbe/ixgbe_type.h:8, from drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h:7, from drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c:4: In function ‘find_next_bit’, inlined from ‘ixgbe_handle_mdd_x550’ at drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c:3907:3: ./include/linux/find.h:65:23: error: array subscript ‘long unsigned int[0]’ is partly outside array bounds of ‘u32[1]’ {aka ‘unsigned int[1]’} [-Werror=array-bounds=] 65 | val = *addr & GENMASK(size - 1, offset); | ^ I think this can be addressed by changing the type of wqmbr to unsigned long. > + /* Get queue from bitmask */ > + q = j + (i * IXGBE_QUEUES_PER_REG); > + /* Map queue to vf */ > + vf = q / div; > + set_bit(vf, vf_bitmap); > + } > + } > +} > + > #define X550_COMMON_MAC \ > .init_hw= &ixgbe_init_hw_generic, \ > .start_hw = &ixgbe_start_hw_X540, \ ...
Re: [Intel-wired-lan] [PATCH iwl-net] ice: health.c: fix compilation on gcc 7.5
On Wed, Feb 05, 2025 at 08:45:46PM +, Simon Horman wrote: > I ran into a similar problem not so long ago and I'm wondering if > the following, based on a suggestion by Jiri Slaby, resolves your > problem. > > diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c > b/drivers/net/ethernet/intel/ice/devlink/health.c > index ea40f7941259..19c3d37aa768 100644 > --- a/drivers/net/ethernet/intel/ice/devlink/health.c > +++ b/drivers/net/ethernet/intel/ice/devlink/health.c > @@ -25,10 +25,10 @@ struct ice_health_status { > * The below lookup requires to be sorted by code. > */ > > -static const char *const ice_common_port_solutions = > +static const char ice_common_port_solutions[] = > "Check your cable connection. Change or replace the module or cable. > Manually set speed and duplex."; > -static const char *const ice_port_number_label = "Port Number"; > -static const char *const ice_update_nvm_solution = "Update to the latest NVM > image."; > +static const char ice_port_number_label[] = "Port Number"; > +static const char ice_update_nvm_solution[] = "Update to the latest NVM > image."; > > static const struct ice_health_status ice_health_status_lookup[] = { > {ICE_AQC_HEALTH_STATUS_ERR_UNKNOWN_MOD_STRICT, "An unsupported module > was detected.", > I'd agree that would be the preferred fix. :) -- Kees Cook
[Intel-wired-lan] [PATCH iwl-next v3 2/9] igc: Rename xdp_get_tx_ring() for non-xdp usage
Renamed xdp_get_tx_ring() function to a more generic name for use in upcoming frame preemption patches. Signed-off-by: Faizal Rahim --- drivers/net/ethernet/intel/igc/igc.h | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 10 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index b8111ad9a9a8..22ecdac26cf4 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -736,7 +736,7 @@ struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, u32 location); int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule); void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule); - +struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, int cpu); void igc_ptp_init(struct igc_adapter *adapter); void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 56a35d58e7a6..44e4f925491f 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -2444,8 +2444,8 @@ static int igc_xdp_init_tx_descriptor(struct igc_ring *ring, return -ENOMEM; } -static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter, - int cpu) +struct igc_ring *igc_get_tx_ring(struct igc_adapter *adapter, +int cpu) { int index = cpu; @@ -2469,7 +2469,7 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp) if (unlikely(!xdpf)) return -EFAULT; - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); @@ -2546,7 +2546,7 @@ static void igc_finalize_xdp(struct igc_adapter *adapter, int status) struct igc_ring *ring; if (status & IGC_XDP_TX) { - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); @@ -6699,7 +6699,7 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames, if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; - ring = igc_xdp_get_tx_ring(adapter, cpu); + ring = igc_get_tx_ring(adapter, cpu); nq = txring_txq(ring); __netif_tx_lock(nq, cpu); -- 2.34.1
[Intel-wired-lan] [PATCH iwl-next v3 1/9] net: ethtool: mm: extract stmmac verification logic into common library
From: Vladimir Oltean It appears that stmmac is not the only hardware which requires a software-driven verification state machine for the MAC Merge layer. While on the one hand it's good to encourage hardware implementations, on the other hand it's quite difficult to tolerate multiple drivers implementing independently fairly non-trivial logic. Extract the hardware-independent logic from stmmac into library code and put it in ethtool. Name the state structure "mmsv" for MAC Merge Software Verification. Let this expose an operations structure for executing the hardware stuff: sync hardware with the tx_active boolean (result of verification process), enable/disable the pMAC, send mPackets, notify library of external events (reception of mPackets), as well as link state changes. Note that it is assumed that the external events are received in hardirq context. If they are not, it is probably a good idea to disable hardirqs when calling ethtool_mmsv_event_handle(), because the library does not do so. Also, the MM software verification process has no business with the tx_min_frag_size, that is all the driver's to handle. Signed-off-by: Vladimir Oltean Co-developed-by: Choong Yong Liang Signed-off-by: Choong Yong Liang Co-developed-by: Faizal Rahim Signed-off-by: Faizal Rahim Tested-by: Choong Yong Liang --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 16 +- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 41 +--- .../net/ethernet/stmicro/stmmac/stmmac_fpe.c | 174 +++--- .../net/ethernet/stmicro/stmmac/stmmac_fpe.h | 5 - .../net/ethernet/stmicro/stmmac/stmmac_main.c | 8 +- include/linux/ethtool.h | 61 + net/ethtool/mm.c | 222 ++ 7 files changed, 327 insertions(+), 200 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index f05cae103d83..c9cc41af258a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -147,21 +147,9 @@ struct stmmac_channel { }; struct stmmac_fpe_cfg { - /* Serialize access to MAC Merge state between ethtool requests -* and link state updates. -*/ - spinlock_t lock; - + struct ethtool_mmsv mmsv; const struct stmmac_fpe_reg *reg; - u32 fpe_csr;/* MAC_FPE_CTRL_STS reg cache */ - - enum ethtool_mm_verify_status status; - struct timer_list verify_timer; - bool verify_enabled; - int verify_retries; - bool pmac_enabled; - u32 verify_time; - bool tx_enabled; + u32 fpe_csr;/* MAC_FPE_CTRL_STS reg cache */ }; struct stmmac_tc_entry { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 918a32f8fda8..8e6b052e00d6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -1210,37 +1210,17 @@ static int stmmac_get_mm(struct net_device *ndev, struct ethtool_mm_state *state) { struct stmmac_priv *priv = netdev_priv(ndev); - unsigned long flags; u32 frag_size; if (!stmmac_fpe_supported(priv)) return -EOPNOTSUPP; - spin_lock_irqsave(&priv->fpe_cfg.lock, flags); + ethtool_mmsv_get_mm(&priv->fpe_cfg.mmsv, state); - state->max_verify_time = STMMAC_FPE_MM_MAX_VERIFY_TIME_MS; - state->verify_enabled = priv->fpe_cfg.verify_enabled; - state->pmac_enabled = priv->fpe_cfg.pmac_enabled; - state->verify_time = priv->fpe_cfg.verify_time; - state->tx_enabled = priv->fpe_cfg.tx_enabled; - state->verify_status = priv->fpe_cfg.status; state->rx_min_frag_size = ETH_ZLEN; - - /* FPE active if common tx_enabled and -* (verification success or disabled(forced)) -*/ - if (state->tx_enabled && - (state->verify_status == ETHTOOL_MM_VERIFY_STATUS_SUCCEEDED || -state->verify_status == ETHTOOL_MM_VERIFY_STATUS_DISABLED)) - state->tx_active = true; - else - state->tx_active = false; - frag_size = stmmac_fpe_get_add_frag_size(priv); state->tx_min_frag_size = ethtool_mm_frag_size_add_to_min(frag_size); - spin_unlock_irqrestore(&priv->fpe_cfg.lock, flags); - return 0; } @@ -1248,8 +1228,6 @@ static int stmmac_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, struct netlink_ext_ack *extack) { struct stmmac_priv *priv = netdev_priv(ndev); - struct stmmac_fpe_cfg *fpe_cfg = &priv->fpe_cfg; - unsigned long flags; u32 frag_size; int err; @@ -1258,23 +1236,8 @@ static int stmmac_set_mm(struct net_device *ndev, struct ethtool_mm_cfg *cfg, if (err) return err; - /* Wait for the verifi
[Intel-wired-lan] [PATCH iwl-next v3 5/9] igc: Add support for frame preemption verification
This patch implements the "ethtool --set-mm" callback to trigger the frame preemption verification handshake. Uses the MAC Merge Software Verification (mmsv) mechanism in ethtool to perform the verification handshake for igc. The structure fpe.mmsv is set by mmsv in ethtool and should remain read-only for the driver. Other mmsv callbacks: a) configure_tx() -> not used yet at this point - igc lacks registers to configure FPE in the transmit direction, so this API is not utilized for now. A future patch will use it to control preemptible queue config. b) configure_pmac() -> not used - this callback dynamically controls pmac_enabled at runtime. For example, mmsv calls configure_pmac() and disables pmac_enabled when the link partner goes down, even if the user previously enabled it. The intention is to save power but it is not feasible in igc because it causes an endless adapter reset loop: 1) Board A and Board B complete the verification handshake. Tx mode register for both boards are in TSN mode. 2) Board B link goes down. On Board A: 3) mmsv calls configure_pmac() with pmac_enabled = false. 4) configure_pmac() in igc updates a new field based on pmac_enabled. Driver uses this field in igc_tsn_new_flags() to indicate that the user enabled/disabled FPE. 5) configure_pmac() in igc calls igc_tsn_offload_apply() to check whether an adapter reset is needed. Calls existing logic in igc_tsn_will_tx_mode_change() and igc_tsn_new_flags(). 6) Since pmac_enabled is now disabled and no other TSN feature is active, igc_tsn_will_tx_mode_change() evaluates to true because Tx mode will switch from TSN to Legacy. 7) Driver resets the adapter. 8) Registers are set, and Tx mode switches to Legacy. 9) When link partner is up, steps 3–8 repeat, but this time with pmac_enabled = true, reactivating TSN. igc_tsn_will_tx_mode_change() evaluates to true again, since Tx mode will switch from Legacy to TSN. 10) Driver resets the adapter. 11) Rest adapter completes, registers are set, and Tx mode switches to TSN. On Board B: 12) Adapter reset on Board A at step 10 causes it to detect its link partner as down. 13) Repeats steps 3–8. 14) Once reset adapter on Board A is completed at step 11, it detects its link partner as up. 15) Repeats steps 9–11. - this cycle repeats indefinitely. To avoid this issue, igc only uses mmsv.pmac_enabled to track whether FPE is enabled or disabled. Co-developed-by: Vinicius Costa Gomes Signed-off-by: Vinicius Costa Gomes Co-developed-by: Choong Yong Liang Signed-off-by: Choong Yong Liang Signed-off-by: Faizal Rahim --- drivers/net/ethernet/intel/igc/igc.h | 12 +- drivers/net/ethernet/intel/igc/igc_base.h| 1 + drivers/net/ethernet/intel/igc/igc_defines.h | 8 +- drivers/net/ethernet/intel/igc/igc_ethtool.c | 21 +++ drivers/net/ethernet/intel/igc/igc_main.c| 54 ++- drivers/net/ethernet/intel/igc/igc_tsn.c | 157 ++- drivers/net/ethernet/intel/igc/igc_tsn.h | 33 7 files changed, 281 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 22ecdac26cf4..705bd4739e3b 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -40,6 +40,10 @@ void igc_ethtool_set_ops(struct net_device *); #define IGC_MAX_TX_TSTAMP_REGS 4 +struct fpe_t { + struct ethtool_mmsv mmsv; +}; + enum igc_mac_filter_type { IGC_MAC_FILTER_TYPE_DST = 0, IGC_MAC_FILTER_TYPE_SRC @@ -332,6 +336,8 @@ struct igc_adapter { struct timespec64 period; } perout[IGC_N_PEROUT]; + struct fpe_t fpe; + /* LEDs */ struct mutex led_mutex; struct igc_led_classdev *leds; @@ -389,10 +395,11 @@ extern char igc_driver_name[]; #define IGC_FLAG_TSN_QBV_ENABLED BIT(17) #define IGC_FLAG_TSN_QAV_ENABLED BIT(18) #define IGC_FLAG_TSN_LEGACY_ENABLEDBIT(19) +#define IGC_FLAG_TSN_PREEMPT_ENABLED BIT(20) #define IGC_FLAG_TSN_ANY_ENABLED \ (IGC_FLAG_TSN_QBV_ENABLED | IGC_FLAG_TSN_QAV_ENABLED | \ -IGC_FLAG_TSN_LEGACY_ENABLED) +IGC_FLAG_TSN_LEGACY_ENABLED | IGC_FLAG_TSN_PREEMPT_ENABLED) #define IGC_FLAG_RSS_FIELD_IPV4_UDPBIT(6) #define IGC_FLAG_RSS_FIELD_IPV6_UDPBIT(7) @@ -736,7 +743,10 @@ struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter, u32 location); int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule); void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule); +void igc_disab
[Intel-wired-lan] [PATCH iwl-next v3 3/9] igc: Optimize the TX packet buffer utilization
Packet buffers (RX + TX) total 64KB. Neither RX or TX buffers can be larger than 34KB. So divide the buffer equally, 32KB for each. Co-developed-by: Vinicius Costa Gomes Signed-off-by: Vinicius Costa Gomes Signed-off-by: Faizal Rahim --- drivers/net/ethernet/intel/igc/igc_defines.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 8e449904aa7d..516ef70c98e9 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -400,7 +400,8 @@ #define I225_TXPBSIZE_DEFAULT 0x0414 /* TXPBSIZE default */ #define IGC_RXPBS_CFG_TS_EN0x8000 /* Timestamp in Rx buffer */ -#define IGC_TXPBSIZE_TSN 0x04145145 /* 5k bytes buffer for each queue */ + /* 7KB bytes buffer for each tx queue (total 4 queues) + 4KB for BMC*/ +#define IGC_TXPBSIZE_TSN 0x041c71c7 #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ -- 2.34.1
[Intel-wired-lan] [PATCH iwl-next v3 4/9] igc: Set the RX packet buffer size for TSN mode
In preparation for supporting frame preemption, when entering TSN mode set the receive packet buffer to 16KB for the Express MAC, 16KB for the Preemptible MAC and 2KB for the BMC, according to the datasheet section 7.1.3.2. Co-developed-by: Vinicius Costa Gomes Signed-off-by: Vinicius Costa Gomes Signed-off-by: Faizal Rahim --- drivers/net/ethernet/intel/igc/igc_defines.h | 3 +++ drivers/net/ethernet/intel/igc/igc_tsn.c | 13 +++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 516ef70c98e9..b19ac6f30dac 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -402,6 +402,9 @@ /* 7KB bytes buffer for each tx queue (total 4 queues) + 4KB for BMC*/ #define IGC_TXPBSIZE_TSN 0x041c71c7 +/* 15KB for EXP + 15KB for BE + 2KB for BMC */ +#define IGC_RXPBSIZE_TSN 0xf08f +#define IGC_RXPBSIZE_SIZE_MASK 0x0001 #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 1e44374ca1ff..f0213cfce07d 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -132,13 +132,17 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) { u16 queue_per_tc[4] = { 3, 2, 1, 0 }; struct igc_hw *hw = &adapter->hw; - u32 tqavctrl; + u32 tqavctrl, rxpbs; int i; wr32(IGC_GTXOFFSET, 0); wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); + rxpbs = rd32(IGC_RXPBS) & ~IGC_RXPBSIZE_SIZE_MASK; + rxpbs |= I225_RXPBSIZE_DEFAULT; + wr32(IGC_RXPBS, rxpbs); + if (igc_is_device_id_i226(hw)) igc_tsn_restore_retx_default(adapter); @@ -194,7 +198,7 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; u32 tqavctrl, baset_l, baset_h; - u32 sec, nsec, cycle; + u32 sec, nsec, cycle, rxpbs; ktime_t base_time, systim; int i; @@ -202,6 +206,11 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); + rxpbs = rd32(IGC_RXPBS) & ~IGC_RXPBSIZE_SIZE_MASK; + rxpbs |= IGC_RXPBSIZE_TSN; + + wr32(IGC_RXPBS, rxpbs); + if (igc_is_device_id_i226(hw)) igc_tsn_set_retx_qbvfullthreshold(adapter); -- 2.34.1
[Intel-wired-lan] [PATCH iwl-next v3 0/9] igc: Add support for Frame Preemption feature in IGC
Introduces support for the FPE feature in the IGC driver. The patches aligns with the upstream FPE API: https://patchwork.kernel.org/project/netdevbpf/cover/20230220122343.1156614-1-vladimir.olt...@nxp.com/ https://patchwork.kernel.org/project/netdevbpf/cover/20230119122705.73054-1-vladimir.olt...@nxp.com/ It builds upon earlier work: https://patchwork.kernel.org/project/netdevbpf/cover/20220520011538.109-1-vinicius.go...@intel.com/ The patch series adds the following functionalities to the IGC driver: a) Configure FPE using `ethtool --set-mm`. b) Display FPE settings via `ethtool --show-mm`. c) View FPE statistics using `ethtool --include-statistics --show-mm'. e) Enable preemptible/express queue with `fp`: tc qdisc add ... root taprio \ fp E E P P Change Log: v2 -> v3: - Implement configure_tx() mmsv callback (Vladimir) - Use static_branch_inc() and static_branch_dec() (Vladimir) - Add adapter->fpe.mmsv.pmac_enabled as extra check (Vladimir) - Remove unnecessary error check in igc_fpe_init_tx_descriptor() (Vladimir) - Additional places to use FIELD_PREP() instead of manual bit manipulation (Vladimir) - IGC_TXD_POPTS_SMD_V and IGC_TXD_POPTS_SMD_R type change to enum (Vladimir) - Remove unnecessary netif_running() check in igc_fpe_xmit_frame (Vladimir) - Rate limit print in igc_fpe_send_mpacket (Vladimir) v1 -> v2: - Extract the stmmac verification logic into a common library (Vladimir) - igc to use common library for verification (Vladimir) - Fix syntax for kernel-doc to use "Return:" (Vladimir) - Use FIELD_GET instead of manual bit masking (Vladimir) - Don't assign 0 to statistics counter in igc_ethtool_get_mm_stats() (Vladimir) - Use pmac-enabled as a condition to allow MAC address value 0 (Vladimir) - Define macro register value in increasing value order (Vladimir) - Fix tx-min-frag-size handling for igc (Vladimir) - Handle link state changes with verification in igc (Vladimir) - Add static key for fast path code (Vladimir) - rx_min_frag_size get from constant (Vladimir) v1: https://patchwork.kernel.org/project/netdevbpf/cover/20241216064720.931522-1-faizal.abdul.ra...@linux.intel.com/ v2: https://patchwork.kernel.org/project/netdevbpf/cover/20250205100524.1138523-1-faizal.abdul.ra...@linux.intel.com/ Faizal Rahim (8): igc: Rename xdp_get_tx_ring() for non-xdp usage igc: Optimize the TX packet buffer utilization igc: Set the RX packet buffer size for TSN mode igc: Add support for frame preemption verification igc: Add support to set tx-min-frag-size igc: Add support for preemptible traffic class in taprio igc: Add support to get MAC Merge data via ethtool igc: Add support to get frame preemption statistics via ethtool Vladimir Oltean (1): net: ethtool: mm: extract stmmac verification logic into common library drivers/net/ethernet/intel/igc/igc.h | 18 +- drivers/net/ethernet/intel/igc/igc_base.h | 1 + drivers/net/ethernet/intel/igc/igc_defines.h | 16 +- drivers/net/ethernet/intel/igc/igc_ethtool.c | 76 ++ drivers/net/ethernet/intel/igc/igc_main.c | 101 +++- drivers/net/ethernet/intel/igc/igc_regs.h | 16 ++ drivers/net/ethernet/intel/igc/igc_tsn.c | 220 - drivers/net/ethernet/intel/igc/igc_tsn.h | 34 +++ drivers/net/ethernet/stmicro/stmmac/stmmac.h | 16 +- .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 41 +--- .../net/ethernet/stmicro/stmmac/stmmac_fpe.c | 174 +++--- .../net/ethernet/stmicro/stmmac/stmmac_fpe.h | 5 - .../net/ethernet/stmicro/stmmac/stmmac_main.c | 8 +- include/linux/ethtool.h | 61 + net/ethtool/mm.c | 224 +- 15 files changed, 794 insertions(+), 217 deletions(-) -- 2.34.1
[Intel-wired-lan] [PATCH iwl-next v3 7/9] igc: Add support for preemptible traffic class in taprio
Set queue as preemptible or express via taprio. This will eventually set queue-specific preemptible field in TXQCTL register. Implement configure_tx(), a callback triggered by mmsv, to set tx_enabled and update preemptible queue settings. tx_enabled is a new field that serves as a condition in igc_tsn_enable_offload() before configuring the preemptible queue. This provides some control over FPE in TX, despite lacking a dedicated register. Verified that the correct preemptible hardware queue is set using the following commands: a) 1:1 TC-to-Queue Mapping $ sudo tc qdisc replace dev enp1s0 parent root handle 100 \ taprio num_tc 4 map 3 2 1 0 3 3 3 3 3 3 3 3 3 3 3 3 \ queues 1@0 1@1 1@2 1@3 base-time 0 sched-entry S F 10 \ fp E E P P b) Non-1:1 TC-to-Queue Mapping $ sudo tc qdisc replace dev enp1s0 parent root handle 100 \ taprio num_tc 3 map 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 2 queues 2@0 1@2 1@3 fp E E P Co-developed-by: Vinicius Costa Gomes Signed-off-by: Vinicius Costa Gomes Signed-off-by: Faizal Rahim --- drivers/net/ethernet/intel/igc/igc.h | 3 +- drivers/net/ethernet/intel/igc/igc_defines.h | 1 + drivers/net/ethernet/intel/igc/igc_main.c| 36 drivers/net/ethernet/intel/igc/igc_tsn.c | 17 + 4 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 2f3662143589..59e6fca808e4 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -43,6 +43,7 @@ void igc_ethtool_set_ops(struct net_device *); struct fpe_t { struct ethtool_mmsv mmsv; u32 tx_min_frag_size; + bool tx_enabled; }; enum igc_mac_filter_type { @@ -163,7 +164,7 @@ struct igc_ring { bool launchtime_enable; /* true if LaunchTime is enabled */ ktime_t last_tx_cycle; /* end of the cycle with a launchtime transmission */ ktime_t last_ff_cycle; /* Last cycle with an active first flag */ - + bool preemptible; /* True if not express */ u32 start_time; u32 end_time; u32 max_sdu; diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 038ee89f1e08..208899e67308 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -556,6 +556,7 @@ #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT 0x0001 #define IGC_TXQCTL_STRICT_CYCLE0x0002 #define IGC_TXQCTL_STRICT_END 0x0004 +#define IGC_TXQCTL_PREEMPTIBLE 0x0008 #define IGC_TXQCTL_QAV_SEL_MASK0x00C0 #define IGC_TXQCTL_QAV_SEL_CBS00x0080 #define IGC_TXQCTL_QAV_SEL_CBS10x00C0 diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 7fe6875d7bf7..f15ac7565fbd 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6258,6 +6258,39 @@ static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now) return timespec64_compare(now, &b) > 0; } +static u32 igc_map_tc_to_queue(const struct igc_adapter *adapter, + unsigned long preemptible_tcs) +{ + struct net_device *dev = adapter->netdev; + u32 i, queue = 0; + + for (i = 0; i < dev->num_tc; i++) { + u32 offset, count; + + if (!(preemptible_tcs & BIT(i))) + continue; + + offset = dev->tc_to_txq[i].offset; + count = dev->tc_to_txq[i].count; + queue |= GENMASK(offset + count - 1, offset); + } + + return queue; +} + +static void igc_save_preempt_queue(struct igc_adapter *adapter, + const struct tc_mqprio_qopt_offload *mqprio) +{ + u32 preemptible_queue = igc_map_tc_to_queue(adapter, + mqprio->preemptible_tcs); + + for (int i = 0; i < adapter->num_tx_queues; i++) { + struct igc_ring *tx_ring = adapter->tx_ring[i]; + + tx_ring->preemptible = preemptible_queue & BIT(i); + } +} + static bool validate_schedule(struct igc_adapter *adapter, const struct tc_taprio_qopt_offload *qopt) { @@ -6344,6 +6377,7 @@ static int igc_qbv_clear_schedule(struct igc_adapter *adapter) ring->start_time = 0; ring->end_time = NSEC_PER_SEC; ring->max_sdu = 0; + ring->preemptible = false; } spin_lock_irqsave(&adapter->qbv_tx_lock, flags); @@ -6500,6 +6534,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, ring->max_sdu = 0; } + igc_save_preempt_queue(adapter, &qopt->mqprio); + return 0;
[Intel-wired-lan] [PATCH iwl-next v3 6/9] igc: Add support to set tx-min-frag-size
Add support to set tx-min-frag-size via set_mm callback in igc. Increase the max limit of tx-ming-frag-size in ethtool from 252 to 256 since i225/6 value range is 64, 128, 192 and 256. Co-developed-by: Vinicius Costa Gomes Signed-off-by: Vinicius Costa Gomes Signed-off-by: Faizal Rahim --- drivers/net/ethernet/intel/igc/igc.h | 1 + drivers/net/ethernet/intel/igc/igc_defines.h | 1 + drivers/net/ethernet/intel/igc/igc_ethtool.c | 5 +++ drivers/net/ethernet/intel/igc/igc_tsn.c | 37 ++-- drivers/net/ethernet/intel/igc/igc_tsn.h | 2 +- net/ethtool/mm.c | 2 +- 6 files changed, 43 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 705bd4739e3b..2f3662143589 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -42,6 +42,7 @@ void igc_ethtool_set_ops(struct net_device *); struct fpe_t { struct ethtool_mmsv mmsv; + u32 tx_min_frag_size; }; enum igc_mac_filter_type { diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 22db1de02964..038ee89f1e08 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -551,6 +551,7 @@ #define IGC_TQAVCTRL_PREEMPT_ENA 0x0002 #define IGC_TQAVCTRL_ENHANCED_QAV 0x0008 #define IGC_TQAVCTRL_FUTSCDDIS 0x0080 +#define IGC_TQAVCTRL_MIN_FRAG_MASK 0xC000 #define IGC_TXQCTL_QUEUE_MODE_LAUNCHT 0x0001 #define IGC_TXQCTL_STRICT_CYCLE0x0002 diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index e2a14edf7552..081e24f228b2 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1789,6 +1789,11 @@ static int igc_ethtool_set_mm(struct net_device *netdev, struct igc_adapter *adapter = netdev_priv(netdev); struct fpe_t *fpe = &adapter->fpe; + fpe->tx_min_frag_size = igc_fpe_get_supported_frag_size(cmd->tx_min_frag_size); + if (fpe->tx_min_frag_size != cmd->tx_min_frag_size) + NL_SET_ERR_MSG_MOD(extack, + "tx-min-frag-size value set is unsupported. Rounded up to supported value (64, 128, 192, 256)"); + if (fpe->mmsv.pmac_enabled != cmd->pmac_enabled) { if (cmd->pmac_enabled) static_branch_inc(&igc_fpe_enabled); diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 7b3c46993cec..d9de2cfb0c17 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -7,6 +7,12 @@ #include "igc_hw.h" #include "igc_tsn.h" +#define MIN_MULTPLIER_TX_MIN_FRAG 0 +#define MAX_MULTPLIER_TX_MIN_FRAG 3 +/* Frag size is based on the Section 8.12.2 of the SW User Manual */ +#define TX_MIN_FRAG_SIZE 64 +#define TX_MAX_FRAG_SIZE (TX_MIN_FRAG_SIZE * (MAX_MULTPLIER_TX_MIN_FRAG + 1)) + enum igc_txd_popts_type { SMD_V = 0x01, SMD_R = 0x02 @@ -142,6 +148,7 @@ static const struct ethtool_mmsv_ops igc_mmsv_ops = { void igc_fpe_init(struct igc_adapter *adapter) { + adapter->fpe.tx_min_frag_size = TX_MIN_FRAG_SIZE; ethtool_mmsv_init(&adapter->fpe.mmsv, adapter->netdev, &igc_mmsv_ops); } @@ -292,7 +299,7 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) tqavctrl = rd32(IGC_TQAVCTRL); tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS | - IGC_TQAVCTRL_PREEMPT_ENA); + IGC_TQAVCTRL_PREEMPT_ENA | IGC_TQAVCTRL_MIN_FRAG_MASK); wr32(IGC_TQAVCTRL, tqavctrl); @@ -338,12 +345,34 @@ static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter) wr32(IGC_RETX_CTL, retxctl); } +static u8 igc_fpe_get_frag_size_mult(const struct fpe_t *fpe) +{ + u8 mult = (fpe->tx_min_frag_size / TX_MIN_FRAG_SIZE) - 1; + + return clamp_t(u8, mult, MIN_MULTPLIER_TX_MIN_FRAG, + MAX_MULTPLIER_TX_MIN_FRAG); +} + +u32 igc_fpe_get_supported_frag_size(u32 frag_size) +{ + const u32 supported_sizes[] = {64, 128, 192, 256}; + + /* Find the smallest supported size that is >= frag_size */ + for (int i = 0; i < ARRAY_SIZE(supported_sizes); i++) { + if (frag_size <= supported_sizes[i]) + return supported_sizes[i]; + } + + return TX_MAX_FRAG_SIZE; /* Should not happen, value > 256 is blocked by ethtool */ +} + static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; u32 tqavctrl, baset_l, baset_h; u32 sec, nsec, cycle, rxpbs; ktime_t base_time, systim
[Intel-wired-lan] [PATCH iwl-next v3 8/9] igc: Add support to get MAC Merge data via ethtool
Implement "ethtool --show-mm" callback for IGC. Tested with command: $ ethtool --show-mm enp1s0. MAC Merge layer state for enp1s0: pMAC enabled: on TX enabled: on TX active: on TX minimum fragment size: 64 RX minimum fragment size: 60 Verify enabled: on Verify time: 128 Max verify time: 128 Verification status: SUCCEEDED Verified that the fields value are retrieved correctly. Signed-off-by: Faizal Rahim --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 14 ++ drivers/net/ethernet/intel/igc/igc_tsn.h | 1 + 2 files changed, 15 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 081e24f228b2..7f0052e0d50c 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1782,6 +1782,19 @@ static int igc_ethtool_set_eee(struct net_device *netdev, return 0; } +static int igc_ethtool_get_mm(struct net_device *netdev, + struct ethtool_mm_state *cmd) +{ + struct igc_adapter *adapter = netdev_priv(netdev); + struct fpe_t *fpe = &adapter->fpe; + + ethtool_mmsv_get_mm(&fpe->mmsv, cmd); + cmd->tx_min_frag_size = fpe->tx_min_frag_size; + cmd->rx_min_frag_size = IGC_RX_MIN_FRAG_SIZE; + + return 0; +} + static int igc_ethtool_set_mm(struct net_device *netdev, struct ethtool_mm_cfg *cmd, struct netlink_ext_ack *extack) @@ -2093,6 +2106,7 @@ static const struct ethtool_ops igc_ethtool_ops = { .set_rxfh = igc_ethtool_set_rxfh, .get_ts_info= igc_ethtool_get_ts_info, .get_channels = igc_ethtool_get_channels, + .get_mm = igc_ethtool_get_mm, .set_mm = igc_ethtool_set_mm, .set_channels = igc_ethtool_set_channels, .get_priv_flags = igc_ethtool_get_priv_flags, diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index 898c4630bc70..c82f9718cb85 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -4,6 +4,7 @@ #ifndef _IGC_TSN_H_ #define _IGC_TSN_H_ +#define IGC_RX_MIN_FRAG_SIZE 60 #define SMD_FRAME_SIZE 60 DECLARE_STATIC_KEY_FALSE(igc_fpe_enabled); -- 2.34.1
Re: [Intel-wired-lan] [PATCH iwl-next v2 5/9] igc: Add support for frame preemption verification
On 6/2/2025 11:04 pm, Vladimir Oltean wrote: On Thu, Feb 06, 2025 at 10:40:11PM +0800, Abdul Rahim, Faizal wrote: Hi Vladimir, Thanks for the quick review, appreciate your help. On 6/2/2025 1:12 am, Vladimir Oltean wrote: On Wed, Feb 05, 2025 at 05:05:20AM -0500, Faizal Rahim wrote: This patch implements the "ethtool --set-mm" callback to trigger the frame preemption verification handshake. Uses the MAC Merge Software Verification (mmsv) mechanism in ethtool to perform the verification handshake for igc. The structure fpe.mmsv is set by mmsv in ethtool and should remain read-only for the driver. igc does not use two mmsv callbacks: a) configure_tx() - igc lacks registers to configure FPE in the transmit direction. Yes, maybe, but it's still important to handle this. It tells you when the preemptible traffic classes should be sent as preemptible on the wire (i.e. when the verification is either disabled, or it succeeded). There is a selftest called manual_failed_verification() which supposedly tests this exact condition: if verification fails, then packets sent to TC0 are supposed to bump the eMAC's TX counters, even though TC0 is configured as preemptible. Otherwise stated: even if the tc program says that a certain traffic class is preemptible, you don't want to actually send preemptible packets if you haven't verified the link partner can handle them, since it will likely drop them on RX otherwise. Even though fpe in tx direction isn't set in igc, it still checks ethtool_mmsv_is_tx_active() before setting a queue as preemptible. This is done in : igc_tsn_enable_offload(struct igc_adapter *adapter) { { if (ethtool_mmsv_is_tx_active(&adapter->fpe.mmsv) && ring->preemptible) txqctl |= IGC_TXQCTL_PREEMPTIBLE; Wouldn't this handle the situation mentioned ? Sorry if I miss something here. And what if tx_active becomes true after you had already configured the queues with tc (and the above check caused IGC_TXQCTL_PREEMPTIBLE to not be set)? Shouldn't you set IGC_TXQCTL_PREEMPTIBLE now? Isn't ethtool_mmsv_configure_tx() exactly the function that notifies you of changes to tx_active, and hence, aren't you interested in setting up a callback for it? Ahh okay, got it. I sent v3 that also included this update. Thanks!
[Intel-wired-lan] [PATCH iwl-next v3 9/9] igc: Add support to get frame preemption statistics via ethtool
Implemented "ethtool --include-statistics --show-mm" callback for IGC. Tested preemption scenario to check preemption statistics: 1) Trigger verification handshake on both boards: $ sudo ethtool --set-mm enp1s0 pmac-enabled on $ sudo ethtool --set-mm enp1s0 tx-enabled on $ sudo ethtool --set-mm enp1s0 verify-enabled on 2) Set preemptible or express queue in taprio for tx board: $ sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \ num_tc 4 map 3 2 1 0 3 3 3 3 3 3 3 3 3 3 3 3 \ queues 1@0 1@1 1@2 1@3 base-time 0 sched-entry S F 10 \ fp E E P P 3) Send large size packets on preemptible queue 4) Send small size packets on express queue to preempt packets in preemptible queue 5) Show preemption statistics on the receiving board: $ ethtool --include-statistics --show-mm enp1s0 MAC Merge layer state for enp1s0: pMAC enabled: on TX enabled: on TX active: on TX minimum fragment size: 64 RX minimum fragment size: 60 Verify enabled: on Verify time: 128 Max verify time: 128 Verification status: SUCCEEDED Statistics: MACMergeFrameAssErrorCount: 0 MACMergeFrameSmdErrorCount: 0 MACMergeFrameAssOkCount: 511 MACMergeFragCountRx: 764 MACMergeFragCountTx: 0 MACMergeHoldCount: 0 Co-developed-by: Vinicius Costa Gomes Signed-off-by: Vinicius Costa Gomes Signed-off-by: Faizal Rahim --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 36 drivers/net/ethernet/intel/igc/igc_main.c| 1 + drivers/net/ethernet/intel/igc/igc_regs.h| 16 + 3 files changed, 53 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 7f0052e0d50c..97a1194399b1 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1819,6 +1819,41 @@ static int igc_ethtool_set_mm(struct net_device *netdev, return igc_tsn_offload_apply(adapter); } +/** + * igc_ethtool_get_frame_ass_error - Get the frame assembly error count. + * @dev: Pointer to the net_device structure. + * Return: The count of frame assembly errors. + */ +static u64 igc_ethtool_get_frame_ass_error(struct net_device *dev) +{ + struct igc_adapter *adapter = netdev_priv(dev); + u32 ooo_smdc, ooo_frame_cnt, ooo_frag_cnt; /* Out of order statistics */ + struct igc_hw *hw = &adapter->hw; + u32 miss_frame_frag_cnt; + u32 reg_value; + + reg_value = rd32(IGC_PRMEXPRCNT); + ooo_smdc = FIELD_GET(IGC_PRMEXPRCNT_OOO_SMDC, reg_value); + ooo_frame_cnt = FIELD_GET(IGC_PRMEXPRCNT_OOO_FRAME_CNT, reg_value); + ooo_frag_cnt = FIELD_GET(IGC_PRMEXPRCNT_OOO_FRAG_CNT, reg_value); + miss_frame_frag_cnt = FIELD_GET(IGC_PRMEXPRCNT_MISS_FRAME_FRAG_CNT, + reg_value); + + return ooo_smdc + ooo_frame_cnt + ooo_frag_cnt + miss_frame_frag_cnt; +} + +static void igc_ethtool_get_mm_stats(struct net_device *dev, +struct ethtool_mm_stats *stats) +{ + struct igc_adapter *adapter = netdev_priv(dev); + struct igc_hw *hw = &adapter->hw; + + stats->MACMergeFrameAssErrorCount = igc_ethtool_get_frame_ass_error(dev); + stats->MACMergeFrameAssOkCount = rd32(IGC_PRMPTDRCNT); + stats->MACMergeFragCountRx = rd32(IGC_PRMEVNTRCNT); + stats->MACMergeFragCountTx = rd32(IGC_PRMEVNTTCNT); +} + static int igc_ethtool_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -2108,6 +2143,7 @@ static const struct ethtool_ops igc_ethtool_ops = { .get_channels = igc_ethtool_get_channels, .get_mm = igc_ethtool_get_mm, .set_mm = igc_ethtool_set_mm, + .get_mm_stats = igc_ethtool_get_mm_stats, .set_channels = igc_ethtool_set_channels, .get_priv_flags = igc_ethtool_get_priv_flags, .set_priv_flags = igc_ethtool_set_priv_flags, diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f15ac7565fbd..cd5160315993 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3076,6 +3076,7 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget) break; if (static_branch_unlikely(&igc_fpe_enabled) && + adapter->fpe.mmsv.pmac_enabled && igc_fpe_transmitted_smd_v(tx_desc)) ethtool_mmsv_event_handle(&adapter->fpe.mmsv, ETHTOOL_MMSV_LD_SENT_VERIFY_MPACKET); diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h index 12ddc5793651..41dbfb07eb2f 100644 ---