Commit [1] add support in Bluefield-3, but Bluefield-3 has different
GGA opaque structure than Bluefield-2.

This patch updates the PRM structure to include both versions, and
culculate the relevant offset for each version in control path.

Commit [1] 559014f232b4 ("compress/mlx5: add Bluefield-3 device ID")

Signed-off-by: Michael Baum <michae...@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c  |  6 ++--
 drivers/common/mlx5/mlx5_devx_cmds.h  |  3 +-
 drivers/common/mlx5/mlx5_prm.h        | 40 +++++++++++++++---------
 drivers/compress/mlx5/mlx5_compress.c | 45 +++++++++++++++++----------
 4 files changed, 61 insertions(+), 33 deletions(-)

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c 
b/drivers/common/mlx5/mlx5_devx_cmds.c
index 59cebb530f..dfec4dcf1b 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -978,8 +978,10 @@ mlx5_devx_cmd_query_hca_attr(void *ctx,
        attr->mmo_dma_qp_en = MLX5_GET(cmd_hca_cap, hcattr, dma_mmo_qp);
        attr->mmo_compress_qp_en = MLX5_GET(cmd_hca_cap, hcattr,
                        compress_mmo_qp);
-       attr->mmo_decompress_qp_en = MLX5_GET(cmd_hca_cap, hcattr,
-                       decompress_mmo_qp);
+       attr->decomp_deflate_v1_en = MLX5_GET(cmd_hca_cap, hcattr,
+                                             decompress_deflate_v1);
+       attr->decomp_deflate_v2_en = MLX5_GET(cmd_hca_cap, hcattr,
+                                             decompress_deflate_v2);
        attr->compress_min_block_size = MLX5_GET(cmd_hca_cap, hcattr,
                                                 compress_min_block_size);
        attr->log_max_mmo_dma = MLX5_GET(cmd_hca_cap, hcattr, log_dma_mmo_size);
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h 
b/drivers/common/mlx5/mlx5_devx_cmds.h
index c94b9eac06..edb387e272 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -259,7 +259,8 @@ struct mlx5_hca_attr {
        uint32_t mmo_decompress_sq_en:1;
        uint32_t mmo_dma_qp_en:1;
        uint32_t mmo_compress_qp_en:1;
-       uint32_t mmo_decompress_qp_en:1;
+       uint32_t decomp_deflate_v1_en:1;
+       uint32_t decomp_deflate_v2_en:1;
        uint32_t mmo_regex_qp_en:1;
        uint32_t mmo_regex_sq_en:1;
        uint32_t compress_min_block_size:4;
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 2b5c43ee6e..377cbfab87 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -607,17 +607,27 @@ struct mlx5_gga_wqe {
        struct mlx5_wqe_dseg scatter;
 } __rte_packed;
 
-struct mlx5_gga_compress_opaque {
-       uint32_t syndrom;
-       uint32_t reserved0;
-       uint32_t scattered_length;
-       uint32_t gathered_length;
-       uint64_t scatter_crc;
-       uint64_t gather_crc;
-       uint32_t crc32;
-       uint32_t adler32;
-       uint8_t reserved1[216];
-} __rte_packed;
+union mlx5_gga_compress_opaque {
+       struct {
+               uint32_t syndrome;
+               uint32_t reserved0;
+               uint32_t scattered_length;
+               union {
+                       struct {
+                               uint32_t reserved1[5];
+                               uint32_t crc32;
+                               uint32_t adler32;
+                       } v1 __rte_packed;
+                       struct {
+                               uint32_t crc32;
+                               uint32_t adler32;
+                               uint32_t crc32c;
+                               uint32_t xxh32;
+                       } v2 __rte_packed;
+               };
+       } __rte_packed;
+       uint32_t data[64];
+};
 
 struct mlx5_ifc_regexp_mmo_control_bits {
        uint8_t reserved_at_31[0x2];
@@ -1463,7 +1473,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8 log_max_bsf_list_size[0x6];
        u8 umr_extended_translation_offset[0x1];
        u8 null_mkey[0x1];
-       u8 log_max_klm_list_size[0x6];
+       u8 log_maxklm_list_size[0x6];
        u8 non_wire_sq[0x1];
        u8 reserved_at_121[0x9];
        u8 log_max_ra_req_dc[0x6];
@@ -1749,8 +1759,10 @@ struct mlx5_ifc_cmd_hca_cap_bits {
        u8 dma_mmo_qp[0x1];
        u8 regexp_mmo_qp[0x1];
        u8 compress_mmo_qp[0x1];
-       u8 decompress_mmo_qp[0x1];
-       u8 reserved_at_74c[0x14];
+       u8 decompress_deflate_v1[0x1];
+       u8 reserved_at_74c[0x4];
+       u8 decompress_deflate_v2[0x1];
+       u8 reserved_at_751[0xf];
        u8 reserved_at_760[0x3];
        u8 log_max_num_header_modify_argument[0x5];
        u8 log_header_modify_argument_granularity_offset[0x4];
diff --git a/drivers/compress/mlx5/mlx5_compress.c 
b/drivers/compress/mlx5/mlx5_compress.c
index c4bf62ed41..11fad72a4f 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -55,6 +55,7 @@ struct mlx5_compress_priv {
        uint32_t mmo_dma_sq:1;
        uint32_t mmo_dma_qp:1;
        uint32_t log_block_sz;
+       uint32_t crc32_opaq_offs;
 };
 
 struct mlx5_compress_qp {
@@ -157,7 +158,7 @@ mlx5_compress_init_qp(struct mlx5_compress_qp *qp)
 {
        volatile struct mlx5_gga_wqe *restrict wqe =
                                    (volatile struct mlx5_gga_wqe *)qp->qp.wqes;
-       volatile struct mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr;
+       volatile union mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr;
        const uint32_t sq_ds = rte_cpu_to_be_32((qp->qp.qp->id << 8) | 4u);
        const uint32_t flags = RTE_BE32(MLX5_COMP_ALWAYS <<
                                        MLX5_COMP_MODE_OFFSET);
@@ -211,8 +212,8 @@ mlx5_compress_qp_setup(struct rte_compressdev *dev, 
uint16_t qp_id,
                goto err;
        }
        opaq_buf = rte_calloc(__func__, (size_t)1 << log_ops_n,
-                             sizeof(struct mlx5_gga_compress_opaque),
-                             sizeof(struct mlx5_gga_compress_opaque));
+                             sizeof(union mlx5_gga_compress_opaque),
+                             sizeof(union mlx5_gga_compress_opaque));
        if (opaq_buf == NULL) {
                DRV_LOG(ERR, "Failed to allocate opaque memory.");
                rte_errno = ENOMEM;
@@ -225,7 +226,7 @@ mlx5_compress_qp_setup(struct rte_compressdev *dev, 
uint16_t qp_id,
        qp->ops = (struct rte_comp_op **)RTE_ALIGN((uintptr_t)(qp + 1),
                                                   RTE_CACHE_LINE_SIZE);
        if (mlx5_common_verbs_reg_mr(priv->cdev->pd, opaq_buf, qp->entries_n *
-                                       sizeof(struct mlx5_gga_compress_opaque),
+                                       sizeof(union mlx5_gga_compress_opaque),
                                                         &qp->opaque_mr) != 0) {
                rte_free(opaq_buf);
                DRV_LOG(ERR, "Failed to register opaque MR.");
@@ -544,7 +545,7 @@ mlx5_compress_dump_err_objs(volatile uint32_t *cqe, 
volatile uint32_t *wqe,
                DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
                        wqe[i + 2], wqe[i + 3]);
        DRV_LOG(ERR, "\nError opaq:");
-       for (i = 0; i < sizeof(struct mlx5_gga_compress_opaque) >> 2; i += 4)
+       for (i = 0; i < sizeof(union mlx5_gga_compress_opaque) >> 2; i += 4)
                DRV_LOG(ERR, "%08X %08X %08X %08X", opaq[i], opaq[i + 1],
                        opaq[i + 2], opaq[i + 3]);
 }
@@ -558,7 +559,7 @@ mlx5_compress_cqe_err_handle(struct mlx5_compress_qp *qp,
                                                              &qp->cq.cqes[idx];
        volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe *)
                                                                    qp->qp.wqes;
-       volatile struct mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr;
+       volatile union mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr;
 
        volatile uint32_t *synd_word = RTE_PTR_ADD(cqe, 
MLX5_ERROR_CQE_SYNDROME_OFFSET);
        switch (*synd_word) {
@@ -575,7 +576,7 @@ mlx5_compress_cqe_err_handle(struct mlx5_compress_qp *qp,
        op->consumed = 0;
        op->produced = 0;
        op->output_chksum = 0;
-       op->debug_status = rte_be_to_cpu_32(opaq[idx].syndrom) |
+       op->debug_status = rte_be_to_cpu_32(opaq[idx].syndrome) |
                              ((uint64_t)rte_be_to_cpu_32(cqe->syndrome) << 32);
        mlx5_compress_dump_err_objs((volatile uint32_t *)cqe,
                                 (volatile uint32_t *)&wqes[idx],
@@ -590,13 +591,14 @@ mlx5_compress_dequeue_burst(void *queue_pair, struct 
rte_comp_op **ops,
        struct mlx5_compress_qp *qp = queue_pair;
        volatile struct mlx5_compress_xform *restrict xform;
        volatile struct mlx5_cqe *restrict cqe;
-       volatile struct mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr;
+       volatile union mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr;
        struct rte_comp_op *restrict op;
        const unsigned int cq_size = qp->entries_n;
        const unsigned int mask = cq_size - 1;
        uint32_t idx;
        uint32_t next_idx = qp->ci & mask;
        const uint16_t max = RTE_MIN((uint16_t)(qp->pi - qp->ci), nb_ops);
+       uint32_t crc32_idx = qp->priv->crc32_opaq_offs;
        uint16_t i = 0;
        int ret;
 
@@ -629,17 +631,17 @@ mlx5_compress_dequeue_burst(void *queue_pair, struct 
rte_comp_op **ops,
                        switch (xform->csum_type) {
                        case RTE_COMP_CHECKSUM_CRC32:
                                op->output_chksum = (uint64_t)rte_be_to_cpu_32
-                                                   (opaq[idx].crc32);
+                                                   (opaq[idx].data[crc32_idx]);
                                break;
                        case RTE_COMP_CHECKSUM_ADLER32:
                                op->output_chksum = (uint64_t)rte_be_to_cpu_32
-                                                   (opaq[idx].adler32);
+                                               (opaq[idx].data[crc32_idx + 1]);
                                break;
                        case RTE_COMP_CHECKSUM_CRC32_ADLER32:
                                op->output_chksum = (uint64_t)rte_be_to_cpu_32
-                                                            (opaq[idx].crc32) |
+                                                  (opaq[idx].data[crc32_idx]) |
                                                     ((uint64_t)rte_be_to_cpu_32
-                                                    (opaq[idx].adler32) << 32);
+                                        (opaq[idx].data[crc32_idx + 1]) << 32);
                                break;
                        default:
                                break;
@@ -717,15 +719,17 @@ mlx5_compress_dev_probe(struct mlx5_common_device *cdev,
                .socket_id = cdev->dev->numa_node,
        };
        const char *ibdev_name = mlx5_os_get_ctx_device_name(cdev->ctx);
+       uint32_t crc32_opaq_offset;
 
        if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
                DRV_LOG(ERR, "Non-primary process type is not supported.");
                rte_errno = ENOTSUP;
                return -rte_errno;
        }
-       if (!attr->mmo_decompress_qp_en && !attr->mmo_decompress_sq_en
-               && !attr->mmo_compress_qp_en && !attr->mmo_compress_sq_en
-               && !attr->mmo_dma_qp_en && !attr->mmo_dma_sq_en) {
+       if (!attr->decomp_deflate_v1_en && !attr->decomp_deflate_v2_en &&
+           !attr->mmo_decompress_sq_en && !attr->mmo_compress_qp_en &&
+           !attr->mmo_compress_sq_en && !attr->mmo_dma_qp_en &&
+           !attr->mmo_dma_sq_en) {
                DRV_LOG(ERR, "Not enough capabilities to support compress 
operations, maybe old FW/OFED version?");
                rte_errno = ENOTSUP;
                return -ENOTSUP;
@@ -746,11 +750,20 @@ mlx5_compress_dev_probe(struct mlx5_common_device *cdev,
        priv = compressdev->data->dev_private;
        priv->log_block_sz = devarg_prms.log_block_sz;
        priv->mmo_decomp_sq = attr->mmo_decompress_sq_en;
-       priv->mmo_decomp_qp = attr->mmo_decompress_qp_en;
+       priv->mmo_decomp_qp =
+                       attr->decomp_deflate_v1_en | attr->decomp_deflate_v2_en;
        priv->mmo_comp_sq = attr->mmo_compress_sq_en;
        priv->mmo_comp_qp = attr->mmo_compress_qp_en;
        priv->mmo_dma_sq = attr->mmo_dma_sq_en;
        priv->mmo_dma_qp = attr->mmo_dma_qp_en;
+       if (attr->decomp_deflate_v2_en)
+               crc32_opaq_offset = offsetof(union mlx5_gga_compress_opaque,
+                                            v2.crc32);
+       else
+               crc32_opaq_offset = offsetof(union mlx5_gga_compress_opaque,
+                                            v1.crc32);
+       MLX5_ASSERT((crc32_opaq_offset % 4) == 0);
+       priv->crc32_opaq_offs = crc32_opaq_offset / 4;
        priv->cdev = cdev;
        priv->compressdev = compressdev;
        priv->min_block_size = attr->compress_min_block_size;
-- 
2.25.1

Reply via email to