Add support for decompress LZ4 algorithm for mlx5 PMD.

Signed-off-by: Michael Baum <michae...@nvidia.com>
---
 doc/guides/compressdevs/features/mlx5.ini |  18 ++-
 doc/guides/compressdevs/mlx5.rst          |  49 ++++++-
 doc/guides/rel_notes/release_23_03.rst    |   4 +
 drivers/compress/mlx5/mlx5_compress.c     | 150 ++++++++++++++++++----
 4 files changed, 180 insertions(+), 41 deletions(-)

diff --git a/doc/guides/compressdevs/features/mlx5.ini 
b/doc/guides/compressdevs/features/mlx5.ini
index 891ce47936..28b050144a 100644
--- a/doc/guides/compressdevs/features/mlx5.ini
+++ b/doc/guides/compressdevs/features/mlx5.ini
@@ -4,10 +4,14 @@
 ; Supported features of 'MLX5' compression driver.
 ;
 [Features]
-HW Accelerated = Y
-Deflate        = Y
-Adler32        = Y
-Crc32          = Y
-Adler32&Crc32  = Y
-Fixed          = Y
-Dynamic        = Y
+HW Accelerated         = Y
+Deflate                = Y
+LZ4                    = Y
+Adler32                = Y
+Crc32                  = Y
+Adler32&Crc32          = Y
+xxHash32               = Y
+Fixed                  = Y
+Dynamic                = Y
+LZ4 Block Checksum     = Y
+LZ4 Block Independence = Y
diff --git a/doc/guides/compressdevs/mlx5.rst b/doc/guides/compressdevs/mlx5.rst
index 37839a59e3..c834025732 100644
--- a/doc/guides/compressdevs/mlx5.rst
+++ b/doc/guides/compressdevs/mlx5.rst
@@ -14,8 +14,8 @@ NVIDIA MLX5 Compress Driver
    that are now NVIDIA trademarks.
 
 The mlx5 compress driver library
-(**librte_compress_mlx5**) provides support for **NVIDIA BlueField-2**
-families of 25/50/100/200 Gb/s adapters.
+(**librte_compress_mlx5**) provides support for **NVIDIA BlueField-2** and
+**NVIDIA BlueField-3** families of 25/50/100/200 Gb/s adapters.
 
 Design
 ------
@@ -39,11 +39,27 @@ Features
 
 Compress mlx5 PMD has support for:
 
-Compression/Decompression algorithm:
+- Compression
+- Decompression
+- DMA
 
-* DEFLATE.
+Algorithms
+----------
 
-NULL algorithm for DMA operations.
+NULL algorithm
+~~~~~~~~~~~~~~
+
+NULL algorithm is the way to perform DMA operations.
+It works through either compress or decompress operation.
+
+Shareable transformation.
+
+Checksum generation:
+
+* CRC32, Adler32 and combined checksum.
+
+DEFLATE algorithm
+~~~~~~~~~~~~~~~~~
 
 Huffman code type:
 
@@ -60,11 +76,31 @@ Checksum generation:
 
 * CRC32, Adler32 and combined checksum.
 
+LZ4 algorithm
+~~~~~~~~~~~~~
+
+Support for flags:
+
+* ``RTE_COMP_LZ4_FLAG_BLOCK_CHECKSUM``
+* ``RTE_COMP_LZ4_FLAG_BLOCK_INDEPENDENCE``
+
+Window size support:
+
+1KB, 2KB, 4KB, 8KB, 16KB and 32KB.
+
+Shareable transformation.
+
+Checksum generation:
+
+* xxHash-32 checksum.
+
 Limitations
 -----------
 
 * Scatter-Gather, SHA and Stateful are not supported.
 * Non-compressed block is not supported in compress (supported in decompress).
+* Compress operation is not supported by BlueField-3.
+* LZ4 algorithm is not supported by BlueField-2.
 
 Driver options
 --------------
@@ -75,7 +111,7 @@ for an additional list of options shared with other mlx5 
drivers.
 - ``log-block-size`` parameter [int]
 
   Log of the Huffman block size in the Deflate algorithm.
-  Values from [4-15]; value x means block size is 2^x.
+  Values from [4-15]; value x means block size is 2\ :sup:`x`.
   The default value is 15.
 
 
@@ -83,6 +119,7 @@ Supported NICs
 --------------
 
 * NVIDIA\ |reg| BlueField-2 SmartNIC
+* NVIDIA\ |reg| BlueField-3 SmartNIC
 
 Prerequisites
 -------------
diff --git a/doc/guides/rel_notes/release_23_03.rst 
b/doc/guides/rel_notes/release_23_03.rst
index aedc5767ff..3d97a4611d 100644
--- a/doc/guides/rel_notes/release_23_03.rst
+++ b/doc/guides/rel_notes/release_23_03.rst
@@ -86,6 +86,10 @@ New Features
   * Added support for ``RTE_COMP_CHECKSUM_XXHASH32``.
   * Added support for ``lz4`` in test-compress-perf algo options.
 
+* **Updated NVIDIA mlx5 compress PMD.**
+
+  * Added LZ4 algorithm support for decompress operation.
+
 * **Allowed test single compress operation in test-compress-perf.**
 
   Enable the application options for testing only compress and only decompress.
diff --git a/drivers/compress/mlx5/mlx5_compress.c 
b/drivers/compress/mlx5/mlx5_compress.c
index 7841f57b9c..e33b58ab54 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -24,6 +24,7 @@
 #define MLX5_COMPRESS_DRIVER_NAME mlx5_compress
 #define MLX5_COMPRESS_MAX_QPS 1024
 #define MLX5_COMP_MAX_WIN_SIZE_CONF 6u
+#define MLX5_COMP_NUM_SUP_ALGO 4
 
 struct mlx5_compress_devarg_params {
        uint32_t log_block_sz;
@@ -43,6 +44,7 @@ struct mlx5_compress_priv {
        struct mlx5_common_device *cdev; /* Backend mlx5 device. */
        struct mlx5_uar uar;
        struct rte_compressdev_config dev_config;
+       struct rte_compressdev_capabilities caps[MLX5_COMP_NUM_SUP_ALGO];
        LIST_HEAD(xform_list, mlx5_compress_xform) xform_list;
        rte_spinlock_t xform_sl;
        uint32_t log_block_sz;
@@ -70,36 +72,16 @@ static pthread_mutex_t priv_list_lock = 
PTHREAD_MUTEX_INITIALIZER;
 
 int mlx5_compress_logtype;
 
-static const struct rte_compressdev_capabilities mlx5_caps[] = {
-       {
-               .algo = RTE_COMP_ALGO_NULL,
-               .comp_feature_flags = RTE_COMP_FF_ADLER32_CHECKSUM |
-                                     RTE_COMP_FF_CRC32_CHECKSUM |
-                                     RTE_COMP_FF_CRC32_ADLER32_CHECKSUM |
-                                     RTE_COMP_FF_SHAREABLE_PRIV_XFORM,
-       },
-       {
-               .algo = RTE_COMP_ALGO_DEFLATE,
-               .comp_feature_flags = RTE_COMP_FF_ADLER32_CHECKSUM |
-                                     RTE_COMP_FF_CRC32_CHECKSUM |
-                                     RTE_COMP_FF_CRC32_ADLER32_CHECKSUM |
-                                     RTE_COMP_FF_SHAREABLE_PRIV_XFORM |
-                                     RTE_COMP_FF_HUFFMAN_FIXED |
-                                     RTE_COMP_FF_HUFFMAN_DYNAMIC,
-               .window_size = {.min = 10, .max = 15, .increment = 1},
-       },
-       RTE_COMP_END_OF_CAPABILITIES_LIST()
-};
-
 static void
 mlx5_compress_dev_info_get(struct rte_compressdev *dev,
                           struct rte_compressdev_info *info)
 {
-       RTE_SET_USED(dev);
-       if (info != NULL) {
+       if (dev != NULL && info != NULL) {
+               struct mlx5_compress_priv *priv = dev->data->dev_private;
+
                info->max_nb_queue_pairs = MLX5_COMPRESS_MAX_QPS;
                info->feature_flags = RTE_COMPDEV_FF_HW_ACCELERATED;
-               info->capabilities = mlx5_caps;
+               info->capabilities = priv->caps;
        }
 }
 
@@ -236,6 +218,8 @@ mlx5_compress_qp_setup(struct rte_compressdev *dev, 
uint16_t qp_id,
        qp_attr.num_of_receive_wqes = 0;
        qp_attr.num_of_send_wqbbs = RTE_BIT32(log_ops_n);
        qp_attr.mmo = attr->mmo_compress_qp_en || attr->mmo_dma_qp_en ||
+                     attr->decomp_lz4_checksum_en ||
+                     attr->decomp_lz4_no_checksum_en ||
                      attr->decomp_deflate_v1_en || attr->decomp_deflate_v2_en;
        ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp,
                                        qp_attr.num_of_send_wqbbs *
@@ -280,7 +264,11 @@ mlx5_compress_xform_validate(const struct rte_comp_xform 
*xform,
                        return -ENOTSUP;
                } else if (!attr->mmo_compress_qp_en &&
                           !attr->mmo_compress_sq_en) {
-                       DRV_LOG(ERR, "Not enough capabilities to support 
compress operation, maybe old FW/OFED version?");
+                       DRV_LOG(ERR, "Not enough capabilities to support 
compress operation.");
+                       return -ENOTSUP;
+               }
+               if (xform->compress.algo == RTE_COMP_ALGO_LZ4) {
+                       DRV_LOG(ERR, "LZ4 compression is not supported.");
                        return -ENOTSUP;
                }
                if (xform->compress.level == RTE_COMP_LEVEL_NONE) {
@@ -291,6 +279,10 @@ mlx5_compress_xform_validate(const struct rte_comp_xform 
*xform,
                        DRV_LOG(ERR, "SHA is not supported.");
                        return -ENOTSUP;
                }
+               if (xform->compress.chksum == RTE_COMP_CHECKSUM_XXHASH32) {
+                       DRV_LOG(ERR, "xxHash32 checksum isn't supported in 
compress operation.");
+                       return -ENOTSUP;
+               }
                break;
        case RTE_COMP_DECOMPRESS:
                switch (xform->decompress.algo) {
@@ -307,6 +299,37 @@ mlx5_compress_xform_validate(const struct rte_comp_xform 
*xform,
                                DRV_LOG(ERR, "Not enough capabilities to 
support decompress DEFLATE algorithm, maybe old FW/OFED version?");
                                return -ENOTSUP;
                        }
+                       if (xform->decompress.chksum ==
+                           RTE_COMP_CHECKSUM_XXHASH32) {
+                               DRV_LOG(ERR, "DEFLATE algorithm doesn't support 
xxHash32 checksum.");
+                               return -ENOTSUP;
+                       }
+                       break;
+               case RTE_COMP_ALGO_LZ4:
+                       if (!attr->decomp_lz4_no_checksum_en &&
+                           !attr->decomp_lz4_checksum_en) {
+                               DRV_LOG(ERR, "Not enough capabilities to 
support decompress LZ4 algorithm, maybe old FW/OFED version?");
+                               return -ENOTSUP;
+                       }
+                       if (xform->decompress.lz4.flags &
+                           RTE_COMP_LZ4_FLAG_BLOCK_CHECKSUM) {
+                               if (!attr->decomp_lz4_checksum_en) {
+                                       DRV_LOG(ERR, "Not enough capabilities 
to support decompress LZ4 block with checksum param, maybe old FW/OFED 
version?");
+                                       return -ENOTSUP;
+                               }
+                       } else {
+                               if (!attr->decomp_lz4_no_checksum_en) {
+                                       DRV_LOG(ERR, "Not enough capabilities 
to support decompress LZ4 block without checksum param, maybe old FW/OFED 
version?");
+                                       return -ENOTSUP;
+                               }
+                       }
+                       if (xform->decompress.chksum !=
+                           RTE_COMP_CHECKSUM_XXHASH32 &&
+                           xform->decompress.chksum !=
+                           RTE_COMP_CHECKSUM_NONE) {
+                               DRV_LOG(ERR, "LZ4 algorithm supports only 
xxHash32 checksum.");
+                               return -ENOTSUP;
+                       }
                        break;
                default:
                        DRV_LOG(ERR, "Algorithm %u is not supported.",
@@ -383,6 +406,27 @@ mlx5_compress_xform_create(struct rte_compressdev *dev,
                case RTE_COMP_ALGO_DEFLATE:
                        xfrm->opcode += MLX5_OPC_MOD_MMO_DECOMP <<
                                                        WQE_CSEG_OPC_MOD_OFFSET;
+                       xfrm->gga_ctrl1 += WQE_GGA_DECOMP_DEFLATE <<
+                                                    WQE_GGA_DECOMP_TYPE_OFFSET;
+                       break;
+               case RTE_COMP_ALGO_LZ4:
+                       xfrm->opcode += MLX5_OPC_MOD_MMO_DECOMP <<
+                                                       WQE_CSEG_OPC_MOD_OFFSET;
+                       xfrm->gga_ctrl1 += WQE_GGA_DECOMP_LZ4 <<
+                                                    WQE_GGA_DECOMP_TYPE_OFFSET;
+                       if (xform->decompress.lz4.flags &
+                           RTE_COMP_LZ4_FLAG_BLOCK_CHECKSUM)
+                               xfrm->gga_ctrl1 +=
+                                     MLX5_GGA_DECOMP_LZ4_BLOCK_WITH_CHECKSUM <<
+                                                  WQE_GGA_DECOMP_PARAMS_OFFSET;
+                       else
+                               xfrm->gga_ctrl1 +=
+                                     MLX5_GGA_DECOMP_LZ4_BLOCK_WITHOUT_CHECKSUM
+                                               << WQE_GGA_DECOMP_PARAMS_OFFSET;
+                       if (xform->decompress.lz4.flags &
+                           RTE_COMP_LZ4_FLAG_BLOCK_INDEPENDENCE)
+                               xfrm->gga_ctrl1 += 1u <<
+                                       WQE_GGA_DECOMP_BLOCK_INDEPENDENT_OFFSET;
                        break;
                default:
                        goto err;
@@ -390,7 +434,7 @@ mlx5_compress_xform_create(struct rte_compressdev *dev,
                xfrm->csum_type = xform->decompress.chksum;
                break;
        default:
-               DRV_LOG(ERR, "Algorithm %u is not supported.", xform->type);
+               DRV_LOG(ERR, "Operation %u is not supported.", xform->type);
                goto err;
        }
        DRV_LOG(DEBUG, "New xform: gga ctrl1 = 0x%08X opcode = 0x%08X csum "
@@ -657,6 +701,10 @@ mlx5_compress_dequeue_burst(void *queue_pair, struct 
rte_comp_op **ops,
                                                     ((uint64_t)rte_be_to_cpu_32
                                         (opaq[idx].data[crc32_idx + 1]) << 32);
                                break;
+                       case RTE_COMP_CHECKSUM_XXHASH32:
+                               op->output_chksum = (uint64_t)rte_be_to_cpu_32
+                                                   (opaq[idx].v2.xxh32);
+                               break;
                        default:
                                break;
                        }
@@ -720,6 +768,49 @@ mlx5_compress_handle_devargs(struct mlx5_kvargs_ctrl 
*mkvlist,
        return 0;
 }
 
+static void
+mlx5_compress_fill_caps(struct mlx5_compress_priv *priv,
+                       const struct mlx5_hca_attr *attr)
+{
+       struct rte_compressdev_capabilities caps[] = {
+               {
+                       .algo = RTE_COMP_ALGO_NULL,
+                       .comp_feature_flags = RTE_COMP_FF_ADLER32_CHECKSUM |
+                                       RTE_COMP_FF_CRC32_CHECKSUM |
+                                       RTE_COMP_FF_CRC32_ADLER32_CHECKSUM |
+                                       RTE_COMP_FF_SHAREABLE_PRIV_XFORM,
+               },
+               {
+                       .algo = RTE_COMP_ALGO_DEFLATE,
+                       .comp_feature_flags = RTE_COMP_FF_ADLER32_CHECKSUM |
+                                       RTE_COMP_FF_CRC32_CHECKSUM |
+                                       RTE_COMP_FF_CRC32_ADLER32_CHECKSUM |
+                                       RTE_COMP_FF_SHAREABLE_PRIV_XFORM |
+                                       RTE_COMP_FF_HUFFMAN_FIXED |
+                                       RTE_COMP_FF_HUFFMAN_DYNAMIC,
+                       .window_size = {.min = 10, .max = 15, .increment = 1},
+               },
+               {
+                       .algo = RTE_COMP_ALGO_LZ4,
+                       .comp_feature_flags = RTE_COMP_FF_XXHASH32_CHECKSUM |
+                                       RTE_COMP_FF_SHAREABLE_PRIV_XFORM |
+                                       RTE_COMP_FF_LZ4_BLOCK_INDEPENDENCE,
+                       .window_size = {.min = 1, .max = 15, .increment = 1},
+               },
+               RTE_COMP_END_OF_CAPABILITIES_LIST()
+       };
+       priv->caps[0] = caps[0];
+       priv->caps[1] = caps[1];
+       if (attr->decomp_lz4_checksum_en || attr->decomp_lz4_no_checksum_en) {
+               priv->caps[2] = caps[2];
+               if (attr->decomp_lz4_checksum_en)
+                       priv->caps[2].comp_feature_flags |=
+                                       RTE_COMP_FF_LZ4_BLOCK_WITH_CHECKSUM;
+               priv->caps[3] = caps[3];
+       } else
+               priv->caps[2] = caps[3];
+}
+
 static int
 mlx5_compress_dev_probe(struct mlx5_common_device *cdev,
                        struct mlx5_kvargs_ctrl *mkvlist)
@@ -740,7 +831,8 @@ mlx5_compress_dev_probe(struct mlx5_common_device *cdev,
                rte_errno = ENOTSUP;
                return -rte_errno;
        }
-       if (!attr->decomp_deflate_v1_en && !attr->decomp_deflate_v2_en &&
+       if (!attr->decomp_lz4_checksum_en && !attr->decomp_lz4_no_checksum_en &&
+           !attr->decomp_deflate_v1_en && !attr->decomp_deflate_v2_en &&
            !attr->mmo_decompress_sq_en && !attr->mmo_compress_qp_en &&
            !attr->mmo_compress_sq_en && !attr->mmo_dma_qp_en &&
            !attr->mmo_dma_sq_en) {
@@ -763,7 +855,8 @@ mlx5_compress_dev_probe(struct mlx5_common_device *cdev,
        compressdev->feature_flags = RTE_COMPDEV_FF_HW_ACCELERATED;
        priv = compressdev->data->dev_private;
        priv->log_block_sz = devarg_prms.log_block_sz;
-       if (attr->decomp_deflate_v2_en)
+       if (attr->decomp_deflate_v2_en || attr->decomp_lz4_checksum_en ||
+           attr->decomp_lz4_no_checksum_en)
                crc32_opaq_offset = offsetof(union mlx5_gga_compress_opaque,
                                             v2.crc32);
        else
@@ -773,6 +866,7 @@ mlx5_compress_dev_probe(struct mlx5_common_device *cdev,
        priv->crc32_opaq_offs = crc32_opaq_offset / 4;
        priv->cdev = cdev;
        priv->compressdev = compressdev;
+       mlx5_compress_fill_caps(priv, attr);
        if (mlx5_devx_uar_prepare(cdev, &priv->uar) != 0) {
                rte_compressdev_pmd_destroy(priv->compressdev);
                return -1;
-- 
2.25.1

Reply via email to