>-----Original Message-----
>From: Jozwiak, TomaszX <tomaszx.jozw...@intel.com>
>Sent: 06 November 2018 14:19
>To: Verma, Shally <shally.ve...@cavium.com>; dev@dpdk.org; Trahe, Fiona
><fiona.tr...@intel.com>; akhil.go...@nxp.com
>Subject: RE: [PATCH v2 2/3] app/compress-perf: add performance measurement
>
>External Email
>
>> -----Original Message-----
>> From: Verma, Shally [mailto:shally.ve...@cavium.com]
>> Sent: Monday, November 5, 2018 9:57 AM
>> To: Jozwiak, TomaszX <tomaszx.jozw...@intel.com>; dev@dpdk.org; Trahe,
>> Fiona <fiona.tr...@intel.com>; akhil.go...@nxp.com
>> Subject: RE: [PATCH v2 2/3] app/compress-perf: add performance
>> measurement
>>
>>
>>
>> >-----Original Message-----
>> >From: Tomasz Jozwiak <tomaszx.jozw...@intel.com>
>> >Sent: 02 November 2018 15:14
>> >To: dev@dpdk.org; fiona.tr...@intel.com; tomaszx.jozw...@intel.com;
>> >Verma, Shally <shally.ve...@cavium.com>; akhil.go...@nxp.com
>> >Subject: [PATCH v2 2/3] app/compress-perf: add performance
>> measurement
>> >
>> >External Email
>> >
>> >Added performance measurement part into compression perf. test.
>> >
>> >Signed-off-by: De Lara Guarch, Pablo <pablo.de.lara.gua...@intel.com>
>> >Signed-off-by: Tomasz Jozwiak <tomaszx.jozw...@intel.com>
>> >---
>> > app/test-compress-perf/comp_perf_options_parse.c | 8 +-
>> > app/test-compress-perf/main.c | 886
>> ++++++++++++++++++++++-
>> > 2 files changed, 883 insertions(+), 11 deletions(-)
>> >
>> >diff --git a/app/test-compress-perf/comp_perf_options_parse.c
>> >b/app/test-compress-perf/comp_perf_options_parse.c
>> >index bef4d2f..e5da3ad 100644
>> >--- a/app/test-compress-perf/comp_perf_options_parse.c
>> >+++ b/app/test-compress-perf/comp_perf_options_parse.c
>> >@@ -431,10 +431,6 @@ parse_huffman_enc(struct comp_test_data
>> *test_data, const char *arg)
>> > {
>> > "fixed",
>> > RTE_COMP_HUFFMAN_FIXED
>> >- },
>> >- {
>> >- "dynamic",
>> >- RTE_COMP_HUFFMAN_DYNAMIC
>> > }
>> > };
>> >
>> >@@ -569,9 +565,9 @@ comp_perf_options_default(struct comp_test_data
>> *test_data)
>> > test_data->seg_sz = 2048;
>> > test_data->burst_sz = 32;
>> > test_data->pool_sz = 8192;
>> >- test_data->max_sgl_segs = UINT16_MAX;
>> >+ test_data->max_sgl_segs = 16;
>> > test_data->num_iter = 10000;
>> >- test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
>> >+ test_data->huffman_enc = RTE_COMP_HUFFMAN_FIXED;
>> > test_data->test_op = COMPRESS_DECOMPRESS;
>> > test_data->window_sz = -1;
>> > test_data->level.min = 1;
>> >diff --git a/app/test-compress-perf/main.c
>> >b/app/test-compress-perf/main.c index f52b98d..e3f4bf6 100644
>> >--- a/app/test-compress-perf/main.c
>> >+++ b/app/test-compress-perf/main.c
>> >@@ -5,14 +5,728 @@
>> > #include <rte_malloc.h>
>> > #include <rte_eal.h>
>> > #include <rte_log.h>
>> >+#include <rte_cycles.h>
>> > #include <rte_compressdev.h>
>> >
>> > #include "comp_perf_options.h"
>> >
>> >+#define NUM_MAX_XFORMS 16
>> >+#define NUM_MAX_INFLIGHT_OPS 512
>> >+#define EXPANSE_RATIO 1.05
>> >+#define MIN_ISAL_SIZE 8
>> Can we avoid ISAL specific naming ?
>
>TJ: yes true :) will be fixed in V3
>
>
>
>
>> >+
>> >+#define DIV_CEIL(a, b) ((a) / (b) + ((a) % (b) != 0))
>> >+
>> >+/* Cleanup state machine */
>> >+static enum cleanup_st {
>> >+ ST_CLEAR = 0,
>> >+ ST_TEST_DATA,
>> >+ ST_COMPDEV,
>> >+ ST_INPUT_DATA,
>> >+ ST_MEMORY_ALLOC,
>> >+ ST_PREPARE_BUF,
>> >+ ST_DURING_TEST
>> >+} cleanup = ST_CLEAR;
>> >+
>> >+static int
>> >+param_range_check(uint16_t size, const struct rte_param_log2_range
>> >+*range) {
>> >+ unsigned int next_size;
>> >+
>> >+ /* Check lower/upper bounds */
>> >+ if (size < range->min)
>> >+ return -1;
>> >+
>> >+ if (size > range->max)
>> >+ return -1;
>> >+
>> >+ /* If range is actually only one value, size is correct */
>> >+ if (range->increment == 0)
>> >+ return 0;
>> >+
>> >+ /* Check if value is one of the supported sizes */
>> >+ for (next_size = range->min; next_size <= range->max;
>> >+ next_size += range->increment)
>> >+ if (size == next_size)
>> >+ return 0;
>> >+
>> >+ return -1;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_check_capabilities(struct comp_test_data *test_data) {
>> >+ const struct rte_compressdev_capabilities *cap;
>> >+
>> >+ cap = rte_compressdev_capability_get(test_data->cdev_id,
>> >+ RTE_COMP_ALGO_DEFLATE);
>> >+
>> >+ if (cap == NULL) {
>> >+ RTE_LOG(ERR, USER1,
>> >+ "Compress device does not support DEFLATE\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ uint64_t comp_flags = cap->comp_feature_flags;
>> >+
>> >+ /* Huffman enconding */
>> >+ if (test_data->huffman_enc == RTE_COMP_HUFFMAN_FIXED &&
>> >+ (comp_flags & RTE_COMP_FF_HUFFMAN_FIXED) == 0) {
>> >+ RTE_LOG(ERR, USER1,
>> >+ "Compress device does not supported Fixed
>> >Huffman\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ if (test_data->huffman_enc == RTE_COMP_HUFFMAN_DYNAMIC &&
>> >+ (comp_flags & RTE_COMP_FF_HUFFMAN_DYNAMIC) == 0) {
>> >+ RTE_LOG(ERR, USER1,
>> >+ "Compress device does not supported Dynamic
>> >Huffman\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ /* Window size */
>> >+ if (test_data->window_sz != -1) {
>> >+ if (param_range_check(test_data->window_sz, &cap-
>> >window_size)
>> >+ < 0) {
>> >+ RTE_LOG(ERR, USER1,
>> >+ "Compress device does not support "
>> >+ "this window size\n");
>> >+ return -1;
>> >+ }
>> >+ } else
>> >+ /* Set window size to PMD maximum if none was specified */
>> >+ test_data->window_sz = cap->window_size.max;
>> >+
>> >+ /* Check if chained mbufs is supported */
>> >+ if (test_data->max_sgl_segs > 1 &&
>> >+ (comp_flags & RTE_COMP_FF_OOP_SGL_IN_SGL_OUT) == 0)
>> >{
>> >+ RTE_LOG(INFO, USER1, "Compress device does not support "
>> >+ "chained mbufs. Max SGL segments set to
>> >1\n");
>> >+ test_data->max_sgl_segs = 1;
>> >+ }
>> >+
>> >+ /* Level 0 support */
>> >+ if (test_data->level.min == 0 &&
>> >+ (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) ==
>> 0) {
>> >+ RTE_LOG(ERR, USER1, "Compress device does not support "
>> >+ "level 0 (no compression)\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ return 0;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_allocate_memory(struct comp_test_data *test_data) {
>> >+ /* Number of segments for input and output
>> >+ * (compression and decompression)
>> >+ */
>> >+ uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
>> >+ test_data->seg_sz);
>> >+ test_data->comp_buf_pool =
>> rte_pktmbuf_pool_create("comp_buf_pool",
>> >+ total_segs,
>> >+ 0, 0, test_data->seg_sz +
>> >RTE_PKTMBUF_HEADROOM,
>> >+ rte_socket_id());
>> >+ if (test_data->comp_buf_pool == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ cleanup = ST_MEMORY_ALLOC;
>> >+ test_data->decomp_buf_pool =
>> rte_pktmbuf_pool_create("decomp_buf_pool",
>> >+ total_segs,
>> >+ 0, 0, test_data->seg_sz +
>> >RTE_PKTMBUF_HEADROOM,
>> >+ rte_socket_id());
>> >+ if (test_data->decomp_buf_pool == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
>> >+ return -1;
>> >+ }
>> Unless am missing to see it, you need to free pre-allocated memories here
>> before return call for all failed cases.
>
>TJ: There's only one 'freeing stack' at the end of main application function
>to avoid double freeing resources (which was previously n
>V1).
>We have state machine for that stuff (static enum cleanup_st) to know what
>should be free and what has been allocated already.
>In case you mean the state machine is set just after first alloc in line 136:
>
>cleanup = ST_MEMORY_ALLOC;
>
>so we know what should be free at the end of application running in line 891:
>
>end:
> switch (cleanup) {
>
> case ST_DURING_TEST:
> case ST_PREPARE_BUF:
> free_bufs(test_data);
> /* fallthrough */
> case ST_MEMORY_ALLOC:
> rte_free(test_data->decomp_bufs);
Even if we are in this state but it doesn't guarantee all of the buffers in
this state are allocated. So shouldn't every pointer be null-checked before?
Thanks
Shally
> rte_free(test_data->comp_bufs);
> rte_free(test_data->decompressed_data);
> rte_free(test_data->compressed_data);
> rte_mempool_free(test_data->op_pool);
> rte_mempool_free(test_data->decomp_buf_pool);
> rte_mempool_free(test_data->comp_buf_pool);
> /* fallthrough */
> case ST_INPUT_DATA:
> rte_free(test_data->input_data);
> /* fallthrough */
> case ST_COMPDEV:
> if (test_data->cdev_id != -1)
> rte_compressdev_stop(test_data->cdev_id);
> /* fallthrough */
> case ST_TEST_DATA:
> rte_free(test_data);
> /* fallthrough */
> case ST_CLEAR:
> default:
> i = rte_eal_cleanup();
> if (i) {
> RTE_LOG(ERR, USER1,
> "Error from rte_eal_cleanup(), %d\n", i);
> ret = i;
> }
> break;
> }
> return ret;
>
>
>
>>
>> >+
>> >+ test_data->total_bufs = DIV_CEIL(total_segs,
>> >+ test_data->max_sgl_segs);
>> >+
>> >+ test_data->op_pool = rte_comp_op_pool_create("op_pool",
>> >+ test_data->total_bufs,
>> >+ 0, 0, rte_socket_id());
>> >+ if (test_data->op_pool == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Comp op mempool could not be
>> created\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ /*
>> >+ * Compressed data might be a bit larger than input data,
>> >+ * if data cannot be compressed
>> >+ */
>> >+ test_data->compressed_data = rte_zmalloc_socket(NULL,
>> >+ test_data->input_data_sz * EXPANSE_RATIO
>> >+ +
>> >+ MIN_ISAL_SIZE, 0,
>> MIN_ISAL_SIZE looks specific to ISAL driver. if so, then is this perf app
>> specific
>> to that PMD? or Can we make it somewhat generic?
>
>TJ: True will be fixed
>
>
>>
>> >+ rte_socket_id());
>> >+ if (test_data->compressed_data == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Memory to hold the data from the input
>> >"
>> >+ "file could not be allocated\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ test_data->decompressed_data = rte_zmalloc_socket(NULL,
>> >+ test_data->input_data_sz, 0,
>> >+ rte_socket_id());
>> >+ if (test_data->decompressed_data == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Memory to hold the data from the input
>> >"
>> >+ "file could not be allocated\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ test_data->comp_bufs = rte_zmalloc_socket(NULL,
>> >+ test_data->total_bufs * sizeof(struct rte_mbuf *),
>> >+ 0, rte_socket_id());
>> >+ if (test_data->comp_bufs == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
>> >+ " could not be allocated\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ test_data->decomp_bufs = rte_zmalloc_socket(NULL,
>> >+ test_data->total_bufs * sizeof(struct rte_mbuf *),
>> >+ 0, rte_socket_id());
>> >+ if (test_data->decomp_bufs == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Memory to hold the decompression
>> mbufs"
>> >+ " could not be allocated\n");
>> >+ return -1;
>> >+ }
>> >+ return 0;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_dump_input_data(struct comp_test_data *test_data) {
>> >+ FILE *f = fopen(test_data->input_file, "r");
>> >+ int ret = -1;
>> >+
>> >+ if (f == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Input file could not be opened\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ if (fseek(f, 0, SEEK_END) != 0) {
>> >+ RTE_LOG(ERR, USER1, "Size of input could not be
>> >calculated\n");
>> >+ goto end;
>> >+ }
>> >+ size_t actual_file_sz = ftell(f);
>> >+ /* If extended input data size has not been set,
>> >+ * input data size = file size
>> >+ */
>> >+
>> >+ if (test_data->input_data_sz == 0)
>> >+ test_data->input_data_sz = actual_file_sz;
>> >+
>> >+ if (fseek(f, 0, SEEK_SET) != 0) {
>> >+ RTE_LOG(ERR, USER1, "Size of input could not be
>> >calculated\n");
>> >+ goto end;
>> >+ }
>> >+
>> >+ test_data->input_data = rte_zmalloc_socket(NULL,
>> >+ test_data->input_data_sz, 0,
>> >+ rte_socket_id());
>> >+
>> >+ if (test_data->input_data == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Memory to hold the data from the input
>> >"
>> >+ "file could not be allocated\n");
>> >+ goto end;
>> >+ }
>> >+
>> >+ size_t remaining_data = test_data->input_data_sz;
>> >+ uint8_t *data = test_data->input_data;
>> >+
>> >+ while (remaining_data > 0) {
>> >+ size_t data_to_read = RTE_MIN(remaining_data,
>> >+ actual_file_sz);
>> >+
>> >+ if (fread(data, data_to_read, 1, f) != 1) {
>> >+ RTE_LOG(ERR, USER1, "Input file could not be
>> >read\n");
>> >+ goto end;
>> >+ }
>> >+ if (fseek(f, 0, SEEK_SET) != 0) {
>> >+ RTE_LOG(ERR, USER1,
>> >+ "Size of input could not be calculated\n");
>> >+ goto end;
>> >+ }
>> >+ remaining_data -= data_to_read;
>> >+ data += data_to_read;
>> >+ }
>> >+
>> >+ if (test_data->input_data_sz > actual_file_sz)
>> >+ RTE_LOG(INFO, USER1,
>> >+ "%zu bytes read from file %s, extending the file %.2f
>> >times\n",
>> >+ test_data->input_data_sz, test_data->input_file,
>> >+ (double)test_data->input_data_sz/actual_file_sz);
>> >+ else
>> >+ RTE_LOG(INFO, USER1,
>> >+ "%zu bytes read from file %s\n",
>> >+ test_data->input_data_sz,
>> >+ test_data->input_file);
>> >+
>> >+ ret = 0;
>> >+
>> >+end:
>> >+ fclose(f);
>> >+ return ret;
>> >+}
>> >+
>> >+static int
>> >+comp_perf_initialize_compressdev(struct comp_test_data *test_data) {
>> >+ uint8_t enabled_cdev_count;
>> >+ uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
>> >+
>> >+ enabled_cdev_count = rte_compressdev_devices_get(test_data-
>> >driver_name,
>> >+ enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
>> >+ if (enabled_cdev_count == 0) {
>> >+ RTE_LOG(ERR, USER1, "No compress devices type %s
>> >available\n",
>> >+ test_data->driver_name);
>> >+ return -EINVAL;
>> >+ }
>> >+
>> >+ if (enabled_cdev_count > 1)
>> >+ RTE_LOG(INFO, USER1,
>> >+ "Only the first compress device will be
>> >+ used\n");
>> >+
>> >+ test_data->cdev_id = enabled_cdevs[0];
>> >+
>> >+ if (comp_perf_check_capabilities(test_data) < 0)
>> >+ return -1;
>> >+
>> >+ /* Configure compressdev (one device, one queue pair) */
>> >+ struct rte_compressdev_config config = {
>> >+ .socket_id = rte_socket_id(),
>> >+ .nb_queue_pairs = 1,
>> >+ .max_nb_priv_xforms = NUM_MAX_XFORMS,
>> >+ .max_nb_streams = 0
>> >+ };
>> >+
>> >+ if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
>> >+ RTE_LOG(ERR, USER1, "Device configuration failed\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
>> >+ NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
>> >+ RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ if (rte_compressdev_start(test_data->cdev_id) < 0) {
>> >+ RTE_LOG(ERR, USER1, "Device could not be started\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ return 0;
>> >+}
>> >+
>> >+static int
>> >+prepare_bufs(struct comp_test_data *test_data) {
>> >+ uint32_t remaining_data = test_data->input_data_sz;
>> >+ uint8_t *input_data_ptr = test_data->input_data;
>> >+ size_t data_sz;
>> >+ uint8_t *data_addr;
>> >+ uint32_t i, j;
>> >+
>> >+ for (i = 0; i < test_data->total_bufs; i++) {
>> >+ /* Allocate data in input mbuf and copy data from input
>> >file */
>> >+ test_data->decomp_bufs[i] =
>> >+ rte_pktmbuf_alloc(test_data->decomp_buf_pool);
>> >+ if (test_data->decomp_bufs[i] == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
>> >+ return -1;
>> >+ }
>> >+
>> >+ cleanup = ST_PREPARE_BUF;
>> >+ data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
>> >+ data_addr = (uint8_t *) rte_pktmbuf_append(
>> >+ test_data->decomp_bufs[i], data_sz);
>> >+ if (data_addr == NULL) {
>> >+ RTE_LOG(ERR, USER1, "Could not append data\n");
>> So is here ..free of allocated buffer before return from failed cases
>>
>> Thanks
>> Shally
>
>Thx, Tomek