Hi Shally,

Sorry for delay - I was on sick leave.
We had some issues with dynamic compression test so I block this test in V2. 
May be there's too late to add this into this release but we've decided to send 
this V2 to DPDK.

My comment inline (not all have answer so far, still working on that)

> -----Original Message-----
> From: Verma, Shally [mailto:shally.ve...@cavium.com]
> Sent: Friday, October 12, 2018 12:16 PM
> To: Jozwiak, TomaszX <tomaszx.jozw...@intel.com>; dev@dpdk.org; Trahe,
> Fiona <fiona.tr...@intel.com>; akhil.go...@nxp.com; De Lara Guarch, Pablo
> <pablo.de.lara.gua...@intel.com>
> Cc: d...@dpdk.org; l...@dpdk.org; gua...@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> measurement
> 
> HI TomaszX
> 
> Sorry for delay in response. Comments inline.
> 
> >-----Original Message-----
> >From: dev <dev-boun...@dpdk.org> On Behalf Of Tomasz Jozwiak
> >Sent: 01 October 2018 18:57
> >To: dev@dpdk.org; fiona.tr...@intel.com; tomaszx.jozw...@intel.com;
> >akhil.go...@nxp.com; pablo.de.lara.gua...@intel.com
> >Cc: d...@dpdk.org; l...@dpdk.org; gua...@dpdk.org
> >Subject: [dpdk-dev] [PATCH 2/3] app/compress-perf: add performance
> >measurement
> >
> >External Email
> >
> >Added performance measurement part into compression perf. test.
> >
> >Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.gua...@intel.com>
> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozw...@intel.com>
> >---
> > app/test-compress-perf/main.c | 844
> >++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 844 insertions(+)
> >
> >diff --git a/app/test-compress-perf/main.c
> >b/app/test-compress-perf/main.c index f52b98d..093dfaf 100644
> >--- a/app/test-compress-perf/main.c
> >+++ b/app/test-compress-perf/main.c
> >@@ -5,13 +5,721 @@
> > #include <rte_malloc.h>
> > #include <rte_eal.h>
> > #include <rte_log.h>
> >+#include <rte_cycles.h>
> > #include <rte_compressdev.h>
> >
> > #include "comp_perf_options.h"
> >
> >+#define NUM_MAX_XFORMS 16
> >+#define NUM_MAX_INFLIGHT_OPS 512
> >+#define EXPANSE_RATIO 1.05
> >+#define MIN_ISAL_SIZE 8
> >+
> >+#define DIV_CEIL(a, b)  ((a) / (b) + ((a) % (b) != 0))
> >+
> >+static int
> >+param_range_check(uint16_t size, const struct rte_param_log2_range
> >+*range) {
> >+       unsigned int next_size;
> >+
> >+       /* Check lower/upper bounds */
> >+       if (size < range->min)
> >+               return -1;
> >+
> >+       if (size > range->max)
> >+               return -1;
> >+
> >+       /* If range is actually only one value, size is correct */
> >+       if (range->increment == 0)
> >+               return 0;
> >+
> >+       /* Check if value is one of the supported sizes */
> >+       for (next_size = range->min; next_size <= range->max;
> >+                       next_size += range->increment)
> >+               if (size == next_size)
> >+                       return 0;
> >+
> >+       return -1;
> >+}
> >+
> >+static int
> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
> >+       const struct rte_compressdev_capabilities *cap;
> >+
> >+       cap = rte_compressdev_capability_get(test_data->cdev_id,
> >+                                            RTE_COMP_ALGO_DEFLATE);
> >+
> >+       if (cap == NULL) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not support DEFLATE\n");
> >+               return -1;
> >+       }
> >+
> >+       uint64_t comp_flags = cap->comp_feature_flags;
> >+
> >+       /* Huffman enconding */
> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not supported Fixed 
> >Huffman\n");
> >+               return -1;
> >+       }
> >+
> >+       if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
> >+                       (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Compress device does not supported Dynamic 
> >Huffman\n");
> >+               return -1;
> >+       }
> >+
> >+       /* Window size */
> >+       if (test_data->window_sz != -1) {
> >+               if (param_range_check(test_data->window_sz,
> >+ &cap->window_size)
> What if cap->window_size is 0 i.e. implementation default?

TJ: You probably mean cap->window_size.increment = 0 (because cap->window_size 
is a structure). In that case we check if test_data->window_sz >=min and 
test_data->window_sz <= max only, because increment = 0 means (base on 
compression API) we have only one value of windows_size (no range is supported).



> 
> >+                               < 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Compress device does not support "
> >+                               "this window size\n");
> >+                       return -1;
> >+               }
> >+       } else
> >+               /* Set window size to PMD maximum if none was specified */
> >+               test_data->window_sz = cap->window_size.max;
> >+
> >+       /* Check if chained mbufs is supported */
> >+       if (test_data->max_sgl_segs > 1  &&
> >+                       (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0) {
> >+               RTE_LOG(INFO, USER1, "Compress device does not support "
> >+                               "chained mbufs. Max SGL segments set to 
> >1\n");
> >+               test_data->max_sgl_segs = 1;
> >+       }
> >+
> >+       /* Level 0 support */
> >+       if (test_data->level.min == 0 &&
> >+                       (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) ==
> 0) {
> >+               RTE_LOG(ERR, USER1, "Compress device does not support "
> >+                               "level 0 (no compression)\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+comp_perf_allocate_memory(struct comp_test_data *test_data) {
> >+       /* Number of segments for input and output
> >+        * (compression and decompression)
> >+        */
> >+       uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
> >+                       test_data->seg_sz);
> >+       test_data->comp_buf_pool =
> rte_pktmbuf_pool_create("comp_buf_pool",
> >+                               total_segs,
> >+                               0, 0, test_data->seg_sz + 
> >RTE_PKTMBUF_HEADROOM,
> >+                               rte_socket_id());
> >+       if (test_data->comp_buf_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->decomp_buf_pool =
> rte_pktmbuf_pool_create("decomp_buf_pool",
> >+                               total_segs,
> >+                               0, 0, test_data->seg_sz + 
> >RTE_PKTMBUF_HEADROOM,
> >+                               rte_socket_id());
> >+       if (test_data->decomp_buf_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->total_bufs = DIV_CEIL(total_segs,
> >+ test_data->max_sgl_segs);
> >+
> >+       test_data->op_pool = rte_comp_op_pool_create("op_pool",
> >+                                 test_data->total_bufs,
> >+                                 0, 0, rte_socket_id());
> >+       if (test_data->op_pool == NULL) {
> >+               RTE_LOG(ERR, USER1, "Comp op mempool could not be
> created\n");
> >+               return -1;
> >+       }
> >+
> >+       /*
> >+        * Compressed data might be a bit larger than input data,
> >+        * if data cannot be compressed
> Possible only if it's zlib format right? Or deflate as well?

TJ: This due to possibility of uncompressible data. In that case the compressed 
data can be bigger than input, because of frame, which has to be added into 
data . Yes it related to zlib and deflate as well.

> 
> >+        */
> >+       test_data->compressed_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz * EXPANSE_RATIO
> >+                                                       + MIN_ISAL_SIZE, 0,
> >+                               rte_socket_id());
> >+       if (test_data->compressed_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->decompressed_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz, 0,
> >+                               rte_socket_id());
> >+       if (test_data->decompressed_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->comp_bufs = rte_zmalloc_socket(NULL,
> >+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
> >+                       0, rte_socket_id());
> >+       if (test_data->comp_bufs == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
> >+                               " could not be allocated\n");
> >+               return -1;
> >+       }
> >+
> >+       test_data->decomp_bufs = rte_zmalloc_socket(NULL,
> >+                       test_data->total_bufs * sizeof(struct rte_mbuf *),
> >+                       0, rte_socket_id());
> >+       if (test_data->decomp_bufs == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the decompression
> mbufs"
> >+                               " could not be allocated\n");
> >+               return -1;
> >+       }
> >+       return 0;
> >+}
> >+
> >+static int
> >+comp_perf_dump_input_data(struct comp_test_data *test_data) {
> >+       FILE *f = fopen(test_data->input_file, "r");
> >+
> >+       if (f == NULL) {
> >+               RTE_LOG(ERR, USER1, "Input file could not be opened\n");
> >+               return -1;
> >+       }
> >+
> >+       if (fseek(f, 0, SEEK_END) != 0) {
> >+               RTE_LOG(ERR, USER1, "Size of input could not be 
> >calculated\n");
> >+               goto err;
> >+       }
> >+       size_t actual_file_sz = ftell(f);
> >+       /* If extended input data size has not been set,
> >+        * input data size = file size
> >+        */
> >+
> >+       if (test_data->input_data_sz == 0)
> >+               test_data->input_data_sz = actual_file_sz;
> >+
> >+       if (fseek(f, 0, SEEK_SET) != 0) {
> >+               RTE_LOG(ERR, USER1, "Size of input could not be 
> >calculated\n");
> >+               goto err;
> >+       }
> >+
> >+       test_data->input_data = rte_zmalloc_socket(NULL,
> >+                               test_data->input_data_sz, 0,
> >+ rte_socket_id());
> >+
> >+       if (test_data->input_data == NULL) {
> >+               RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
> >+                               "file could not be allocated\n");
> >+               goto err;
> >+       }
> >+
> >+       size_t remaining_data = test_data->input_data_sz;
> >+       uint8_t *data = test_data->input_data;
> >+
> >+       while (remaining_data > 0) {
> >+               size_t data_to_read = RTE_MIN(remaining_data,
> >+ actual_file_sz);
> >+
> >+               if (fread(data, data_to_read, 1, f) != 1) {
> >+                       RTE_LOG(ERR, USER1, "Input file could not be 
> >read\n");
> >+                       goto err;
> >+               }
> >+               if (fseek(f, 0, SEEK_SET) != 0) {
> >+                       RTE_LOG(ERR, USER1,
> >+                               "Size of input could not be calculated\n");
> >+                       goto err;
> >+               }
> >+               remaining_data -= data_to_read;
> >+               data += data_to_read;
> It looks like it will run 2nd time only if input file size < input data size 
> in which
> case it will just keep filling input buffer with repeated data.
> Is that the intention here?

TJ: Yes exactly. If test_data->input_data_sz is bigger than  actual_file_sz 
then we fill the buffer with repeated data from file to fill whole buffer.

> 
> >+       }
> >+
> >+       if (test_data->input_data_sz > actual_file_sz)
> >+               RTE_LOG(INFO, USER1,
> >+                 "%zu bytes read from file %s, extending the file %.2f 
> >times\n",
> >+                       test_data->input_data_sz, test_data->input_file,
> >+                       (double)test_data->input_data_sz/actual_file_sz);
> >+       else
> >+               RTE_LOG(INFO, USER1,
> >+                       "%zu bytes read from file %s\n",
> >+                       test_data->input_data_sz,
> >+ test_data->input_file);
> >+
> >+       fclose(f);
> >+
> >+       return 0;
> >+
> >+err:
> >+       fclose(f);
> >+       rte_free(test_data->input_data);
> >+       test_data->input_data = NULL;
> >+
> >+       return -1;
> >+}
> >+
> >+static int
> >+comp_perf_initialize_compressdev(struct comp_test_data *test_data) {
> >+       uint8_t enabled_cdev_count;
> >+       uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
> >+
> >+       enabled_cdev_count = rte_compressdev_devices_get(test_data-
> >driver_name,
> >+                       enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
> >+       if (enabled_cdev_count == 0) {
> >+               RTE_LOG(ERR, USER1, "No compress devices type %s 
> >available\n",
> >+                               test_data->driver_name);
> >+               return -EINVAL;
> >+       }
> >+
> >+       if (enabled_cdev_count > 1)
> >+               RTE_LOG(INFO, USER1,
> >+                       "Only the first compress device will be
> >+ used\n");
> >+
> >+       test_data->cdev_id = enabled_cdevs[0];
> >+
> >+       if (comp_perf_check_capabilities(test_data) < 0)
> >+               return -1;
> >+
> >+       /* Configure compressdev (one device, one queue pair) */
> >+       struct rte_compressdev_config config = {
> >+               .socket_id = rte_socket_id(),
> >+               .nb_queue_pairs = 1,
> >+               .max_nb_priv_xforms = NUM_MAX_XFORMS,
> >+               .max_nb_streams = 0
> >+       };
> >+
> >+       if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
> >+               RTE_LOG(ERR, USER1, "Device configuration failed\n");
> >+               return -1;
> >+       }
> >+
> >+       if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
> >+                       NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
> >+               RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
> >+               return -1;
> >+       }
> >+
> >+       if (rte_compressdev_start(test_data->cdev_id) < 0) {
> >+               RTE_LOG(ERR, USER1, "Device could not be started\n");
> >+               return -1;
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static int
> >+prepare_bufs(struct comp_test_data *test_data) {
> >+       uint32_t remaining_data = test_data->input_data_sz;
> >+       uint8_t *input_data_ptr = test_data->input_data;
> >+       size_t data_sz;
> >+       uint8_t *data_addr;
> >+       uint32_t i, j;
> >+
> >+       for (i = 0; i < test_data->total_bufs; i++) {
> >+               /* Allocate data in input mbuf and copy data from input file 
> >*/
> >+               test_data->decomp_bufs[i] =
> >+                       rte_pktmbuf_alloc(test_data->decomp_buf_pool);
> >+               if (test_data->decomp_bufs[i] == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
> >+                       return -1;
> >+               }
> >+
> >+               data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
> >+                                       test_data->decomp_bufs[i], data_sz);
> >+               if (data_addr == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
> >+                       return -1;
> >+               }
> >+               rte_memcpy(data_addr, input_data_ptr, data_sz);
> >+
> >+               input_data_ptr += data_sz;
> >+               remaining_data -= data_sz;
> >+
> >+               /* Already one segment in the mbuf */
> >+               uint16_t segs_per_mbuf = 1;
> >+
> >+               /* Chain mbufs if needed for input mbufs */
> >+               while (segs_per_mbuf < test_data->max_sgl_segs
> >+                               && remaining_data > 0) {
> >+                       struct rte_mbuf *next_seg =
> >+
> >+ rte_pktmbuf_alloc(test_data->decomp_buf_pool);
> >+
> >+                       if (next_seg == NULL) {
> >+                               RTE_LOG(ERR, USER1,
> >+                                       "Could not allocate mbuf\n");
> >+                               return -1;
> >+                       }
> >+
> >+                       data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
> >+                               data_sz);
> >+
> >+                       if (data_addr == NULL) {
> >+                               RTE_LOG(ERR, USER1, "Could not append
> >+ data\n");
> Since a new buffer per segment is allocated, so is it possible for append to
> fail? think, this check is redundant here.

TJ: Yes, you're right, it should never fail. But I think it's good coding 
practice to add the check just in case.

> >+                               return -1;
> >+                       }
> >+
> >+                       rte_memcpy(data_addr, input_data_ptr, data_sz);
> >+                       input_data_ptr += data_sz;
> >+                       remaining_data -= data_sz;
> >+
> >+                       if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
> >+                                       next_seg) < 0) {
> >+                               RTE_LOG(ERR, USER1, "Could not chain 
> >mbufs\n");
> >+                               return -1;
> >+                       }
> >+                       segs_per_mbuf++;
> >+               }
> >+
> >+               /* Allocate data in output mbuf */
> >+               test_data->comp_bufs[i] =
> >+                       rte_pktmbuf_alloc(test_data->comp_buf_pool);
> >+               if (test_data->comp_bufs[i] == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
> >+                       return -1;
> >+               }
> >+               data_addr = (uint8_t *) rte_pktmbuf_append(
> >+                                       test_data->comp_bufs[i],
> >+                                       test_data->seg_sz);
> >+               if (data_addr == NULL) {
> >+                       RTE_LOG(ERR, USER1, "Could not append data\n");
> >+                       return -1;
> >+               }
> >+
> >+               /* Chain mbufs if needed for output mbufs */
> >+               for (j = 1; j < segs_per_mbuf; j++) {
> >+                       struct rte_mbuf *next_seg =
> >+
> >+ rte_pktmbuf_alloc(test_data->comp_buf_pool);
> >+
> >+                       if (next_seg == NULL) {
> >+                               RTE_LOG(ERR, USER1,
> >+                                       "Could not allocate mbuf\n");
> >+                               return -1;
> >+                       }
> >+
> >+                       data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
> >+                               test_data->seg_sz);
> >+
> >+                       if (data_addr == NULL) {
> >+                               RTE_LOG(ERR, USER1, "Could not append 
> >data\n");
> >+                               return -1;
> >+                       }
> >+
> >+                       if (rte_pktmbuf_chain(test_data->comp_bufs[i],
> >+                                       next_seg) < 0) {
> >+                               RTE_LOG(ERR, USER1, "Could not chain 
> >mbufs\n");
> >+                               return -1;
> >+                       }
> >+               }
> >+       }
> >+
> >+       return 0;
> >+}
> >+
> >+static void
> >+free_bufs(struct comp_test_data *test_data) {
> >+       uint32_t i;
> >+
> >+       for (i = 0; i < test_data->total_bufs; i++) {
> >+               rte_pktmbuf_free(test_data->comp_bufs[i]);
> >+               rte_pktmbuf_free(test_data->decomp_bufs[i]);
> >+       }
> >+       rte_free(test_data->comp_bufs);
> >+       rte_free(test_data->decomp_bufs); }
> >+
> >+static int
> >+main_loop(struct comp_test_data *test_data, uint8_t level,
> >+                       enum rte_comp_xform_type type,
> >+                       uint8_t *output_data_ptr,
> >+                       size_t *output_data_sz,
> >+                       unsigned int benchmarking) {
> >+       uint8_t dev_id = test_data->cdev_id;
> >+       uint32_t i, iter, num_iter;
> >+       struct rte_comp_op **ops, **deq_ops;
> >+       void *priv_xform = NULL;
> >+       struct rte_comp_xform xform;
> >+       size_t output_size = 0;
> >+       struct rte_mbuf **input_bufs, **output_bufs;
> >+       int res = 0;
> >+       int allocated = 0;
> >+
> >+       if (test_data == NULL || !test_data->burst_sz) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Unknow burst size\n");
> >+               return -1;
> >+       }
> >+
> >+       ops = rte_zmalloc_socket(NULL,
> >+               2 * test_data->total_bufs * sizeof(struct rte_comp_op *),
> >+               0, rte_socket_id());
> >+
> >+       if (ops == NULL) {
> >+               RTE_LOG(ERR, USER1,
> >+                       "Can't allocate memory for ops strucures\n");
> >+               return -1;
> >+       }
> >+
> >+       deq_ops = &ops[test_data->total_bufs];
> >+
> >+       if (type == RTE_COMP_COMPRESS) {
> >+               xform = (struct rte_comp_xform) {
> >+                       .type = RTE_COMP_COMPRESS,
> >+                       .compress = {
> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
> >+                               .deflate.huffman = test_data->huffman_enc,
> >+                               .level = level,
> >+                               .window_size = test_data->window_sz,
> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
> >+                       }
> >+               };
> >+               input_bufs = test_data->decomp_bufs;
> >+               output_bufs = test_data->comp_bufs;
> >+       } else {
> >+               xform = (struct rte_comp_xform) {
> >+                       .type = RTE_COMP_DECOMPRESS,
> >+                       .decompress = {
> >+                               .algo = RTE_COMP_ALGO_DEFLATE,
> >+                               .chksum = RTE_COMP_CHECKSUM_NONE,
> >+                               .window_size = test_data->window_sz,
> >+                               .hash_algo = RTE_COMP_HASH_ALGO_NONE
> >+                       }
> >+               };
> >+               input_bufs = test_data->comp_bufs;
> >+               output_bufs = test_data->decomp_bufs;
> >+       }
> >+
> >+       /* Create private xform */
> >+       if (rte_compressdev_private_xform_create(dev_id, &xform,
> >+                       &priv_xform) < 0) {
> >+               RTE_LOG(ERR, USER1, "Private xform could not be created\n");
> >+               res = -1;
> >+               goto end;
> >+       }
> >+
> >+       uint64_t tsc_start, tsc_end, tsc_duration;
> >+
> >+       tsc_start = tsc_end = tsc_duration = 0;
> >+       if (benchmarking) {
> >+               tsc_start = rte_rdtsc();
> >+               num_iter = test_data->num_iter;
> >+       } else
> >+               num_iter = 1;
> Looks like in same code we're doing benchmarking and functional validation.
> It can be reorganised to keep validation test separately like done in
> crypto_perf.

TJ: Ok, makes sense. However in the interests of getting this into the 18.11 
release I'd like to
defer this refactoring and the remainder of your comments below to the next 
release.


Next comments - WIP


Br, Tomek

Reply via email to