Re: [PATCH] maintainers: update email address
On 9/19/23 05:44, Chenbo Xia wrote: I am leaving Intel, so replace my Intel email with personal one temporarily. Signed-off-by: Chenbo Xia --- .mailmap| 2 +- MAINTAINERS | 12 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) Acked-by: Maxime Coquelin Thanks, Maxime
Re: [RFC PATCH v4 3/4] dts: add doc generation
> diff --git a/doc/guides/conf.py b/doc/guides/conf.py > index 0f7ff5282d..737e5a5688 100644 > --- a/doc/guides/conf.py > +++ b/doc/guides/conf.py > @@ -7,10 +7,9 @@ > from sphinx import __version__ as sphinx_version > from os import listdir > from os import environ > -from os.path import basename > -from os.path import dirname > +from os.path import basename, dirname > from os.path import join as path_join > -from sys import argv, stderr > +from sys import argv, stderr, path > > import configparser > > @@ -24,6 +23,29 @@ >file=stderr) > pass > > +extensions = ['sphinx.ext.napoleon'] > + > +# Python docstring options > +autodoc_default_options = { > +'members': True, > +'member-order': 'bysource', > +'show-inheritance': True, > +} > +autodoc_typehints = 'both' > +autodoc_typehints_format = 'short' > +napoleon_numpy_docstring = False > +napoleon_attr_annotations = True > +napoleon_use_ivar = True > +napoleon_use_rtype = False > +add_module_names = False > +toc_object_entries = False > + > +# Sidebar config > +html_theme_options = { > +'collapse_navigation': False, > +'navigation_depth': -1, > +} > + Thomas, Bruce, I've added this configuration which modifies the sidebar a bit. This affects the DPDK docs so I'd like to know whether this is permissible. I think the sidebar works better this way even with DPDK docs, but that may be a personal preference. Let me know what you think. > stop_on_error = ('-W' in argv) > > project = 'Data Plane Development Kit'
[PATCH v2 1/1] app/mldev: fix check for filelist and models count
Fix incorrect check for filelist and models count. Fixes: bbd272edcb14 ("app/mldev: add ordered inferences") Fixes: f6661e6d9a3a ("app/mldev: validate model operations") Cc: sta...@dpdk.org Signed-off-by: Srikanth Yalavarthi --- app/test-mldev/ml_options.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/app/test-mldev/ml_options.c b/app/test-mldev/ml_options.c index bed06d1bf7..d068b30df5 100644 --- a/app/test-mldev/ml_options.c +++ b/app/test-mldev/ml_options.c @@ -83,14 +83,15 @@ ml_parse_models(struct ml_options *opt, const char *arg) token = strtok(models, delim); while (token != NULL) { - strlcpy(opt->filelist[opt->nb_filelist].model, token, PATH_MAX); - opt->nb_filelist++; - if (opt->nb_filelist >= ML_TEST_MAX_MODELS) { ml_err("Exceeded model count, max = %d\n", ML_TEST_MAX_MODELS); ret = -EINVAL; break; } + + strlcpy(opt->filelist[opt->nb_filelist].model, token, PATH_MAX); + opt->nb_filelist++; + token = strtok(NULL, delim); } -- 2.41.0
[PATCH v1 1/1] app/mldev: fix file and buffer handling
Addressed issues reported by klocwork static analysis tool. Fixes: fccf444cfe05 ("app/mldev: add function for file read") Cc: sta...@dpdk.org Signed-off-by: Srikanth Yalavarthi --- app/test-mldev/test_common.c | 1 + app/test-mldev/test_inference_common.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/app/test-mldev/test_common.c b/app/test-mldev/test_common.c index 357a85a312..90e591f205 100644 --- a/app/test-mldev/test_common.c +++ b/app/test-mldev/test_common.c @@ -34,6 +34,7 @@ ml_read_file(char *file, size_t *size, char **buffer) if (fstat(fd, &file_stat) != 0) { ml_err("fstat failed for file: %s\n", file); + close(fd); return -errno; } diff --git a/app/test-mldev/test_inference_common.c b/app/test-mldev/test_inference_common.c index 418bf38be4..05b221401b 100644 --- a/app/test-mldev/test_inference_common.c +++ b/app/test-mldev/test_inference_common.c @@ -653,6 +653,7 @@ ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t ml_err("Invalid input file, size = %zu (expected size = %" PRIu64 ")\n", fsize, t->model[fid].inp_dsize); ret = -EINVAL; + free(buffer); goto error; } @@ -670,6 +671,7 @@ ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t ml_err("Invalid reference file, size = %zu (expected size = %" PRIu64 ")\n", fsize, t->model[fid].out_dsize); ret = -EINVAL; + free(buffer); goto error; } } @@ -703,8 +705,6 @@ ml_inference_iomem_setup(struct ml_test *test, struct ml_options *opt, uint16_t t->model[fid].io_pool = NULL; } - free(buffer); - return ret; } -- 2.41.0
[PATCH v2 0/3] Spec changes to support multi I/O models
This series implements changes to mldev spec to extend support for ML models with multiple inputs and outputs. Changes include introduction of I/O layout to support packed and split buffers for model input and output. Extended the rte_ml_model_info structure to support multiple inputs and outputs. Updated rte_ml_op and quantize / dequantize APIs to support an array of input and output ML buffer segments. Support for batches option is dropped from test application. v2: - Minor fixes - Cleanup of application help v1: - Initial changes Srikanth Yalavarthi (3): mldev: add support for arbitrary shape dimensions mldev: introduce support for IO layout mldev: drop input and output size get APIs app/test-mldev/ml_options.c| 16 - app/test-mldev/ml_options.h| 2 - app/test-mldev/test_inference_common.c | 420 + app/test-mldev/test_inference_common.h | 6 + app/test-mldev/test_model_common.c | 6 - app/test-mldev/test_model_common.h | 1 - doc/guides/tools/testmldev.rst | 6 - drivers/ml/cnxk/cn10k_ml_dev.h | 3 + drivers/ml/cnxk/cn10k_ml_model.c | 84 +++-- drivers/ml/cnxk/cn10k_ml_model.h | 12 + drivers/ml/cnxk/cn10k_ml_ops.c | 135 +++- lib/mldev/meson.build | 2 +- lib/mldev/mldev_utils.c| 30 -- lib/mldev/mldev_utils.h| 16 - lib/mldev/rte_mldev.c | 50 +-- lib/mldev/rte_mldev.h | 201 +--- lib/mldev/rte_mldev_core.h | 68 +--- lib/mldev/version.map | 3 - 18 files changed, 506 insertions(+), 555 deletions(-) -- 2.41.0
[PATCH v2 3/3] mldev: drop input and output size get APIs
Drop support and use of ML input and output size get functions, rte_ml_io_input_size_get and rte_ml_io_output_size_get. These functions are not required, as the model buffer size can be computed from the fields of updated rte_ml_io_info structure. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 50 lib/mldev/rte_mldev.c | 38 - lib/mldev/rte_mldev.h | 60 -- lib/mldev/rte_mldev_core.h | 54 -- lib/mldev/version.map | 2 -- 5 files changed, 204 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 1d72fb52a6..4abf4ae0d3 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -2110,54 +2110,6 @@ cn10k_ml_model_params_update(struct rte_ml_dev *dev, uint16_t model_id, void *bu return 0; } -static int -cn10k_ml_io_input_size_get(struct rte_ml_dev *dev, uint16_t model_id, uint32_t nb_batches, - uint64_t *input_qsize, uint64_t *input_dsize) -{ - struct cn10k_ml_model *model; - - model = dev->data->models[model_id]; - - if (model == NULL) { - plt_err("Invalid model_id = %u", model_id); - return -EINVAL; - } - - if (input_qsize != NULL) - *input_qsize = PLT_U64_CAST(model->addr.total_input_sz_q * - PLT_DIV_CEIL(nb_batches, model->batch_size)); - - if (input_dsize != NULL) - *input_dsize = PLT_U64_CAST(model->addr.total_input_sz_d * - PLT_DIV_CEIL(nb_batches, model->batch_size)); - - return 0; -} - -static int -cn10k_ml_io_output_size_get(struct rte_ml_dev *dev, uint16_t model_id, uint32_t nb_batches, - uint64_t *output_qsize, uint64_t *output_dsize) -{ - struct cn10k_ml_model *model; - - model = dev->data->models[model_id]; - - if (model == NULL) { - plt_err("Invalid model_id = %u", model_id); - return -EINVAL; - } - - if (output_qsize != NULL) - *output_qsize = PLT_U64_CAST(model->addr.total_output_sz_q * -PLT_DIV_CEIL(nb_batches, model->batch_size)); - - if (output_dsize != NULL) - *output_dsize = PLT_U64_CAST(model->addr.total_output_sz_d * -PLT_DIV_CEIL(nb_batches, model->batch_size)); - - return 0; -} - static int cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **dbuffer, struct rte_ml_buff_seg **qbuffer) @@ -2636,8 +2588,6 @@ struct rte_ml_dev_ops cn10k_ml_ops = { .model_params_update = cn10k_ml_model_params_update, /* I/O ops */ - .io_input_size_get = cn10k_ml_io_input_size_get, - .io_output_size_get = cn10k_ml_io_output_size_get, .io_quantize = cn10k_ml_io_quantize, .io_dequantize = cn10k_ml_io_dequantize, }; diff --git a/lib/mldev/rte_mldev.c b/lib/mldev/rte_mldev.c index 9a48ed3e94..cc5f2e0cc6 100644 --- a/lib/mldev/rte_mldev.c +++ b/lib/mldev/rte_mldev.c @@ -691,44 +691,6 @@ rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer) return (*dev->dev_ops->model_params_update)(dev, model_id, buffer); } -int -rte_ml_io_input_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, -uint64_t *input_qsize, uint64_t *input_dsize) -{ - struct rte_ml_dev *dev; - - if (!rte_ml_dev_is_valid_dev(dev_id)) { - RTE_MLDEV_LOG(ERR, "Invalid dev_id = %d\n", dev_id); - return -EINVAL; - } - - dev = rte_ml_dev_pmd_get_dev(dev_id); - if (*dev->dev_ops->io_input_size_get == NULL) - return -ENOTSUP; - - return (*dev->dev_ops->io_input_size_get)(dev, model_id, nb_batches, input_qsize, - input_dsize); -} - -int -rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches, - uint64_t *output_qsize, uint64_t *output_dsize) -{ - struct rte_ml_dev *dev; - - if (!rte_ml_dev_is_valid_dev(dev_id)) { - RTE_MLDEV_LOG(ERR, "Invalid dev_id = %d\n", dev_id); - return -EINVAL; - } - - dev = rte_ml_dev_pmd_get_dev(dev_id); - if (*dev->dev_ops->io_output_size_get == NULL) - return -ENOTSUP; - - return (*dev->dev_ops->io_output_size_get)(dev, model_id, nb_batches, output_qsize, - output_dsize); -} - int rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, struct rte_ml_buff_seg **dbuffer, struct rte_ml_buff_seg **qbuffer) diff --git a/lib/mldev/rte_mldev.h b/lib/mldev/rte_mldev.h index 316c6f
[PATCH v2 1/3] mldev: add support for arbitrary shape dimensions
Updated rte_ml_io_info to support shape of arbitrary number of dimensions. Dropped use of rte_ml_io_shape and rte_ml_io_format. Introduced new fields nb_elements and size in rte_ml_io_info. Updated drivers and app/mldev to support the changes. Signed-off-by: Srikanth Yalavarthi --- app/test-mldev/test_inference_common.c | 97 +- drivers/ml/cnxk/cn10k_ml_model.c | 78 + drivers/ml/cnxk/cn10k_ml_model.h | 12 drivers/ml/cnxk/cn10k_ml_ops.c | 11 +-- lib/mldev/mldev_utils.c| 30 lib/mldev/mldev_utils.h| 16 - lib/mldev/rte_mldev.h | 59 lib/mldev/version.map | 1 - 8 files changed, 94 insertions(+), 210 deletions(-) diff --git a/app/test-mldev/test_inference_common.c b/app/test-mldev/test_inference_common.c index 05b221401b..b40519b5e3 100644 --- a/app/test-mldev/test_inference_common.c +++ b/app/test-mldev/test_inference_common.c @@ -3,6 +3,7 @@ */ #include +#include #include #include @@ -18,11 +19,6 @@ #include "ml_common.h" #include "test_inference_common.h" -#define ML_TEST_READ_TYPE(buffer, type) (*((type *)buffer)) - -#define ML_TEST_CHECK_OUTPUT(output, reference, tolerance) \ - (((float)output - (float)reference) <= (((float)reference * tolerance) / 100.0)) - #define ML_OPEN_WRITE_GET_ERR(name, buffer, size, err) \ do { \ FILE *fp = fopen(name, "w+"); \ @@ -763,9 +759,9 @@ ml_inference_validation(struct ml_test *test, struct ml_request *req) { struct test_inference *t = ml_test_priv((struct ml_test *)test); struct ml_model *model; - uint32_t nb_elements; - uint8_t *reference; - uint8_t *output; + float *reference; + float *output; + float deviation; bool match; uint32_t i; uint32_t j; @@ -777,89 +773,30 @@ ml_inference_validation(struct ml_test *test, struct ml_request *req) match = (rte_hash_crc(model->output, model->out_dsize, 0) == rte_hash_crc(model->reference, model->out_dsize, 0)); } else { - output = model->output; - reference = model->reference; + output = (float *)model->output; + reference = (float *)model->reference; i = 0; next_output: - nb_elements = - model->info.output_info[i].shape.w * model->info.output_info[i].shape.x * - model->info.output_info[i].shape.y * model->info.output_info[i].shape.z; j = 0; next_element: match = false; - switch (model->info.output_info[i].dtype) { - case RTE_ML_IO_TYPE_INT8: - if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int8_t), -ML_TEST_READ_TYPE(reference, int8_t), -t->cmn.opt->tolerance)) - match = true; - - output += sizeof(int8_t); - reference += sizeof(int8_t); - break; - case RTE_ML_IO_TYPE_UINT8: - if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint8_t), -ML_TEST_READ_TYPE(reference, uint8_t), -t->cmn.opt->tolerance)) - match = true; - - output += sizeof(float); - reference += sizeof(float); - break; - case RTE_ML_IO_TYPE_INT16: - if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int16_t), -ML_TEST_READ_TYPE(reference, int16_t), -t->cmn.opt->tolerance)) - match = true; - - output += sizeof(int16_t); - reference += sizeof(int16_t); - break; - case RTE_ML_IO_TYPE_UINT16: - if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, uint16_t), -ML_TEST_READ_TYPE(reference, uint16_t), -t->cmn.opt->tolerance)) - match = true; - - output += sizeof(uint16_t); - reference += sizeof(uint16_t); - break; - case RTE_ML_IO_TYPE_INT32: - if (ML_TEST_CHECK_OUTPUT(ML_TEST_READ_TYPE(output, int32_t), -ML_TEST_READ_TYPE(reference, int32_t), -t->cmn.opt->tolerance)) - match = true;
[PATCH v2 2/3] mldev: introduce support for IO layout
Introduce IO layout in ML device specification. IO layout defines the expected arrangement of model input and output buffers in the memory. Packed and Split layout support is added in the specification. Updated rte_ml_op to support array of rte_ml_buff_seg pointers to support packed and split I/O layouts. Updated ML quantize and dequantize APIs to support rte_ml_buff_seg pointer arrays. Replaced batch_size with min_batches and max_batches in rte_ml_model_info. Implement support for model IO layout in ml/cnxk driver. Updated the ML test application to support IO layout and dropped support for '--batches' in test application. Signed-off-by: Srikanth Yalavarthi --- app/test-mldev/ml_options.c| 16 -- app/test-mldev/ml_options.h| 2 - app/test-mldev/test_inference_common.c | 327 + app/test-mldev/test_inference_common.h | 6 + app/test-mldev/test_model_common.c | 6 - app/test-mldev/test_model_common.h | 1 - doc/guides/tools/testmldev.rst | 6 - drivers/ml/cnxk/cn10k_ml_dev.h | 3 + drivers/ml/cnxk/cn10k_ml_model.c | 6 +- drivers/ml/cnxk/cn10k_ml_ops.c | 74 +++--- lib/mldev/meson.build | 2 +- lib/mldev/rte_mldev.c | 12 +- lib/mldev/rte_mldev.h | 90 +-- lib/mldev/rte_mldev_core.h | 14 +- 14 files changed, 418 insertions(+), 147 deletions(-) diff --git a/app/test-mldev/ml_options.c b/app/test-mldev/ml_options.c index d068b30df5..eeaffec399 100644 --- a/app/test-mldev/ml_options.c +++ b/app/test-mldev/ml_options.c @@ -28,7 +28,6 @@ ml_options_default(struct ml_options *opt) opt->burst_size = 1; opt->queue_pairs = 1; opt->queue_size = 1; - opt->batches = 0; opt->tolerance = 0.0; opt->stats = false; opt->debug = false; @@ -213,18 +212,6 @@ ml_parse_queue_size(struct ml_options *opt, const char *arg) return ret; } -static int -ml_parse_batches(struct ml_options *opt, const char *arg) -{ - int ret; - - ret = parser_read_uint16(&opt->batches, arg); - if (ret != 0) - ml_err("Invalid option, batches = %s\n", arg); - - return ret; -} - static int ml_parse_tolerance(struct ml_options *opt, const char *arg) { @@ -255,7 +242,6 @@ ml_dump_test_options(const char *testname) "\t\t--burst_size : inference burst size\n" "\t\t--queue_pairs : number of queue pairs to create\n" "\t\t--queue_size : size of queue-pair\n" - "\t\t--batches : number of batches of input\n" "\t\t--tolerance: maximum tolerance (%%) for output validation\n" "\t\t--stats: enable reporting device and model statistics\n"); printf("\n"); @@ -287,7 +273,6 @@ static struct option lgopts[] = { {ML_BURST_SIZE, 1, 0, 0}, {ML_QUEUE_PAIRS, 1, 0, 0}, {ML_QUEUE_SIZE, 1, 0, 0}, - {ML_BATCHES, 1, 0, 0}, {ML_TOLERANCE, 1, 0, 0}, {ML_STATS, 0, 0, 0}, {ML_DEBUG, 0, 0, 0}, @@ -309,7 +294,6 @@ ml_opts_parse_long(int opt_idx, struct ml_options *opt) {ML_BURST_SIZE, ml_parse_burst_size}, {ML_QUEUE_PAIRS, ml_parse_queue_pairs}, {ML_QUEUE_SIZE, ml_parse_queue_size}, - {ML_BATCHES, ml_parse_batches}, {ML_TOLERANCE, ml_parse_tolerance}, }; diff --git a/app/test-mldev/ml_options.h b/app/test-mldev/ml_options.h index 622a4c05fc..90e22adeac 100644 --- a/app/test-mldev/ml_options.h +++ b/app/test-mldev/ml_options.h @@ -21,7 +21,6 @@ #define ML_BURST_SIZE ("burst_size") #define ML_QUEUE_PAIRS ("queue_pairs") #define ML_QUEUE_SIZE ("queue_size") -#define ML_BATCHES ("batches") #define ML_TOLERANCE ("tolerance") #define ML_STATS ("stats") #define ML_DEBUG ("debug") @@ -44,7 +43,6 @@ struct ml_options { uint16_t burst_size; uint16_t queue_pairs; uint16_t queue_size; - uint16_t batches; float tolerance; bool stats; bool debug; diff --git a/app/test-mldev/test_inference_common.c b/app/test-mldev/test_inference_common.c index b40519b5e3..846f71abb1 100644 --- a/app/test-mldev/test_inference_common.c +++ b/app/test-mldev/test_inference_common.c @@ -47,7 +47,10 @@ ml_enqueue_single(void *arg) uint64_t start_cycle; uint32_t burst_enq; uint32_t lcore_id; + uint64_t offset; + uint64_t bufsz; uint16_t fid; + uint32_t i; int ret; lcore_id = rte_lcore_id(); @@ -66,24 +69,64 @@ ml_enqueue_single(void *arg) if (ret != 0) goto next_model; -retry: +retry_req: ret = rte_mempool_get(t->model[fid].io_pool, (void **)&req); if (ret != 0) - goto retry; + goto retry_req; +
[PATCH v3] app/test: add external mbuf IPsec tests
Adding IPsec tests using external mbuf API. Signed-off-by: Tejasree Kondoj --- v3: - Rebased and fixed Intel compilation failure. v2: - Fixed compilation with ubuntu-20.04-gcc-static-i386. app/test/test_cryptodev.c| 191 ++- app/test/test_cryptodev_security_ipsec.h | 1 + 2 files changed, 189 insertions(+), 3 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 049ee17d41..22e9800779 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -9754,6 +9754,133 @@ test_PDCP_SDAP_PROTO_decap_all(void) return (all_err == TEST_SUCCESS) ? TEST_SUCCESS : TEST_FAILED; } +static inline void +ext_mbuf_callback_fn_free(void *addr __rte_unused, void *opaque __rte_unused) +{ +} + +static inline void +ext_mbuf_memzone_free(int nb_segs) +{ + int i; + + for (i = 0; i <= nb_segs; i++) { + char mz_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *memzone; + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "ext_buf_%d", i); + memzone = rte_memzone_lookup(mz_name); + if (memzone != NULL) { + rte_memzone_free(memzone); + memzone = NULL; + } + } +} + +static inline struct rte_mbuf * +ext_mbuf_create(struct rte_mempool *mbuf_pool, int pkt_len, + int nb_segs, const void *input_text) +{ + struct rte_mbuf *m = NULL, *mbuf = NULL; + size_t data_off = 0; + uint8_t *dst; + int i, size; + int t_len; + + if (pkt_len < 1) { + printf("Packet size must be 1 or more (is %d)\n", pkt_len); + return NULL; + } + + if (nb_segs < 1) { + printf("Number of segments must be 1 or more (is %d)\n", + nb_segs); + return NULL; + } + + t_len = pkt_len >= nb_segs ? pkt_len / nb_segs : 1; + size = pkt_len; + + /* Create chained mbuf_src with external buffer */ + for (i = 0; size > 0; i++) { + struct rte_mbuf_ext_shared_info *ret_shinfo = NULL; + uint16_t data_len = RTE_MIN(size, t_len); + char mz_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *memzone; + void *ext_buf_addr = NULL; + rte_iova_t buf_iova; + bool freed = false; + uint16_t buf_len; + + buf_len = RTE_ALIGN_CEIL(data_len + 1024 + + sizeof(struct rte_mbuf_ext_shared_info), 8); + + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "ext_buf_%d", i); + memzone = rte_memzone_lookup(mz_name); + if (memzone != NULL && memzone->len != buf_len) { + rte_memzone_free(memzone); + memzone = NULL; + } + if (memzone == NULL) { + memzone = rte_memzone_reserve_aligned(mz_name, buf_len, SOCKET_ID_ANY, + RTE_MEMZONE_IOVA_CONTIG, RTE_CACHE_LINE_SIZE); + if (memzone == NULL) { + printf("Can't allocate memory zone %s\n", mz_name); + return NULL; + } + } + + ext_buf_addr = memzone->addr; + memcpy(ext_buf_addr, RTE_PTR_ADD(input_text, data_off), data_len); + + /* Create buffer to hold rte_mbuf header */ + m = rte_pktmbuf_alloc(mbuf_pool); + if (i == 0) + mbuf = m; + + if (m == NULL) { + printf("Cannot create segment for source mbuf"); + goto fail; + } + + /* Save shared data (like callback function) in external buffer’s end */ + ret_shinfo = rte_pktmbuf_ext_shinfo_init_helper(ext_buf_addr, &buf_len, + ext_mbuf_callback_fn_free, &freed); + if (ret_shinfo == NULL) { + printf("Shared mem initialization failed!\n"); + goto fail; + } + + buf_iova = rte_mem_virt2iova(ext_buf_addr); + + /* Attach external buffer to mbuf */ + rte_pktmbuf_attach_extbuf(m, ext_buf_addr, buf_iova, buf_len, + ret_shinfo); + if (m->ol_flags != RTE_MBUF_F_EXTERNAL) { + printf("External buffer is not attached to mbuf\n"); + goto fail; + } + + dst = (uint8_t *)rte_pktmbuf_append(m, data_len); + if (dst == NULL) { + printf("Cannot append %d bytes to the mbuf\n", data_len); + goto fail; + } + + if (mbuf != m) + rte_pktmbuf_chain(mbuf, m); + + size -= data_len;
[PATCH v2 00/34] Implemenation of revised ml/cnxk driver
This patch series is an implementation of revised ml/cnxk driver to support models compiled with TVM compiler framework. TVM models use a hybrid mode for execution, with regions of the model executing on the ML accelerator and the rest executing on CPU cores. This series of commits reorganizes the ml/cnxk driver and adds support to execute multiple regions with-in a TVM model. v2: - Fix xstats reporting - Fix issues reported by klocwork static analysis tool - Update external header inclusions v1: - Initial changes Anup Prabhu (1): ml/cnxk: enable fast-path ops for TVM models Prince Takkar (2): ml/cnxk: update internal TVM model info structure ml/cnxk: support quantize and dequantize callback Srikanth Yalavarthi (31): ml/cnxk: drop support for register polling ml/cnxk: drop use of RTE API for firmware read ml/cnxk: add generic cnxk device structure ml/cnxk: add generic model and layer structures ml/cnxk: add generic cnxk request structure ml/cnxk: add generic cnxk xstats structures ml/cnxk: rename cnxk ops function pointers struct ml/cnxk: update device handling functions ml/cnxk: update queue-pair handling functions ml/cnxk: update model load and unload functions ml/cnxk: update model start and stop functions ml/cnxk: update model utility functions ml/cnxk: update data quantization functions ml/cnxk: update device debug functions ml/cnxk: update device stats functions ml/cnxk: update device and model xstats functions ml/cnxk: update fast path functions ml/cnxk: move error handling to cnxk layer ml/cnxk: support config and close of tvmdp library ml/cnxk: add structures to support TVM model type ml/cnxk: add support for identify model type ml/cnxk: add support to parse TVM model objects ml/cnxk: fetch layer info and load TVM model ml/cnxk: update internal info for TVM model ml/cnxk: enable model unload in tvmdp library ml/cnxk: support start and stop for TVM models ml/cnxk: support device dump for TVM models ml/cnxk: enable reporting model runtime as xstats ml/cnxk: implement I/O alloc and free callbacks ml/cnxk: add generic ML malloc and free callback ml/cnxk: enable creation of mvtvm virtual device doc/guides/mldevs/cnxk.rst | 16 - drivers/ml/cnxk/cn10k_ml_dev.c | 477 ++--- drivers/ml/cnxk/cn10k_ml_dev.h | 457 + drivers/ml/cnxk/cn10k_ml_model.c | 383 ++-- drivers/ml/cnxk/cn10k_ml_model.h | 148 +- drivers/ml/cnxk/cn10k_ml_ocm.c | 109 +- drivers/ml/cnxk/cn10k_ml_ocm.h | 15 +- drivers/ml/cnxk/cn10k_ml_ops.c | 2915 ++ drivers/ml/cnxk/cn10k_ml_ops.h | 351 +++- drivers/ml/cnxk/cnxk_ml_dev.c| 22 + drivers/ml/cnxk/cnxk_ml_dev.h| 120 ++ drivers/ml/cnxk/cnxk_ml_io.c | 95 + drivers/ml/cnxk/cnxk_ml_io.h | 88 + drivers/ml/cnxk/cnxk_ml_model.c | 143 ++ drivers/ml/cnxk/cnxk_ml_model.h | 187 ++ drivers/ml/cnxk/cnxk_ml_ops.c| 1789 ++ drivers/ml/cnxk/cnxk_ml_ops.h| 85 + drivers/ml/cnxk/cnxk_ml_utils.c | 15 + drivers/ml/cnxk/cnxk_ml_utils.h | 17 + drivers/ml/cnxk/cnxk_ml_xstats.h | 152 ++ drivers/ml/cnxk/meson.build | 70 + drivers/ml/cnxk/mvtvm_ml_dev.c | 198 ++ drivers/ml/cnxk/mvtvm_ml_dev.h | 40 + drivers/ml/cnxk/mvtvm_ml_model.c | 322 drivers/ml/cnxk/mvtvm_ml_model.h | 88 + drivers/ml/cnxk/mvtvm_ml_ops.c | 581 ++ drivers/ml/cnxk/mvtvm_ml_ops.h | 74 + 27 files changed, 5964 insertions(+), 2993 deletions(-) create mode 100644 drivers/ml/cnxk/cnxk_ml_dev.c create mode 100644 drivers/ml/cnxk/cnxk_ml_dev.h create mode 100644 drivers/ml/cnxk/cnxk_ml_io.c create mode 100644 drivers/ml/cnxk/cnxk_ml_io.h create mode 100644 drivers/ml/cnxk/cnxk_ml_model.c create mode 100644 drivers/ml/cnxk/cnxk_ml_model.h create mode 100644 drivers/ml/cnxk/cnxk_ml_ops.c create mode 100644 drivers/ml/cnxk/cnxk_ml_ops.h create mode 100644 drivers/ml/cnxk/cnxk_ml_utils.c create mode 100644 drivers/ml/cnxk/cnxk_ml_utils.h create mode 100644 drivers/ml/cnxk/cnxk_ml_xstats.h create mode 100644 drivers/ml/cnxk/mvtvm_ml_dev.c create mode 100644 drivers/ml/cnxk/mvtvm_ml_dev.h create mode 100644 drivers/ml/cnxk/mvtvm_ml_model.c create mode 100644 drivers/ml/cnxk/mvtvm_ml_model.h create mode 100644 drivers/ml/cnxk/mvtvm_ml_ops.c create mode 100644 drivers/ml/cnxk/mvtvm_ml_ops.h -- 2.41.0
[PATCH v2 01/34] ml/cnxk: drop support for register polling
Dropped support for device argument "poll_mem" for cnxk ML driver. Support to use registers for polling is removed and DDR addresses would be used for polling. Signed-off-by: Srikanth Yalavarthi --- Depends-on: series-29565 ("Spec changes to support multi I/O models") doc/guides/mldevs/cnxk.rst | 16 - drivers/ml/cnxk/cn10k_ml_dev.c | 36 +-- drivers/ml/cnxk/cn10k_ml_dev.h | 13 +--- drivers/ml/cnxk/cn10k_ml_ops.c | 111 - drivers/ml/cnxk/cn10k_ml_ops.h | 6 -- 5 files changed, 18 insertions(+), 164 deletions(-) diff --git a/doc/guides/mldevs/cnxk.rst b/doc/guides/mldevs/cnxk.rst index b79bc540d9..1834b1f905 100644 --- a/doc/guides/mldevs/cnxk.rst +++ b/doc/guides/mldevs/cnxk.rst @@ -180,22 +180,6 @@ Runtime Config Options in the fast path enqueue burst operation. -**Polling memory location** (default ``ddr``) - - ML cnxk driver provides the option to select the memory location to be used - for polling to check the inference request completion. - Driver supports using either the DDR address space (``ddr``) - or ML registers (``register``) as polling locations. - The parameter ``poll_mem`` is used to specify the poll location. - - For example:: - - -a :00:10.0,poll_mem="register" - - With the above configuration, ML cnxk driver is configured to use ML registers - for polling in fastpath requests. - - Debugging Options - diff --git a/drivers/ml/cnxk/cn10k_ml_dev.c b/drivers/ml/cnxk/cn10k_ml_dev.c index 983138a7f2..e3c2badcef 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.c +++ b/drivers/ml/cnxk/cn10k_ml_dev.c @@ -23,7 +23,6 @@ #define CN10K_ML_DEV_CACHE_MODEL_DATA "cache_model_data" #define CN10K_ML_OCM_ALLOC_MODE"ocm_alloc_mode" #define CN10K_ML_DEV_HW_QUEUE_LOCK "hw_queue_lock" -#define CN10K_ML_FW_POLL_MEM "poll_mem" #define CN10K_ML_OCM_PAGE_SIZE "ocm_page_size" #define CN10K_ML_FW_PATH_DEFAULT "/lib/firmware/mlip-fw.bin" @@ -32,7 +31,6 @@ #define CN10K_ML_DEV_CACHE_MODEL_DATA_DEFAULT 1 #define CN10K_ML_OCM_ALLOC_MODE_DEFAULT"lowest" #define CN10K_ML_DEV_HW_QUEUE_LOCK_DEFAULT 1 -#define CN10K_ML_FW_POLL_MEM_DEFAULT "ddr" #define CN10K_ML_OCM_PAGE_SIZE_DEFAULT 16384 /* ML firmware macros */ @@ -54,7 +52,6 @@ static const char *const valid_args[] = {CN10K_ML_FW_PATH, CN10K_ML_DEV_CACHE_MODEL_DATA, CN10K_ML_OCM_ALLOC_MODE, CN10K_ML_DEV_HW_QUEUE_LOCK, -CN10K_ML_FW_POLL_MEM, CN10K_ML_OCM_PAGE_SIZE, NULL}; @@ -103,9 +100,7 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde bool hw_queue_lock_set = false; bool ocm_page_size_set = false; char *ocm_alloc_mode = NULL; - bool poll_mem_set = false; bool fw_path_set = false; - char *poll_mem = NULL; char *fw_path = NULL; int ret = 0; bool found; @@ -189,17 +184,6 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde hw_queue_lock_set = true; } - if (rte_kvargs_count(kvlist, CN10K_ML_FW_POLL_MEM) == 1) { - ret = rte_kvargs_process(kvlist, CN10K_ML_FW_POLL_MEM, &parse_string_arg, -&poll_mem); - if (ret < 0) { - plt_err("Error processing arguments, key = %s\n", CN10K_ML_FW_POLL_MEM); - ret = -EINVAL; - goto exit; - } - poll_mem_set = true; - } - if (rte_kvargs_count(kvlist, CN10K_ML_OCM_PAGE_SIZE) == 1) { ret = rte_kvargs_process(kvlist, CN10K_ML_OCM_PAGE_SIZE, &parse_integer_arg, &mldev->ocm_page_size); @@ -280,18 +264,6 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde } plt_info("ML: %s = %d", CN10K_ML_DEV_HW_QUEUE_LOCK, mldev->hw_queue_lock); - if (!poll_mem_set) { - mldev->fw.poll_mem = CN10K_ML_FW_POLL_MEM_DEFAULT; - } else { - if (!((strcmp(poll_mem, "ddr") == 0) || (strcmp(poll_mem, "register") == 0))) { - plt_err("Invalid argument, %s = %s\n", CN10K_ML_FW_POLL_MEM, poll_mem); - ret = -EINVAL; - goto exit; - } - mldev->fw.poll_mem = poll_mem; - } - plt_info("ML: %s = %s", CN10K_ML_FW_POLL_MEM, mldev->fw.poll_mem); - if (!ocm_page_size_set) { mldev->ocm_page_size = CN10K_ML_OCM_PAGE_SIZE_DEFAULT; } else { @@ -450,10 +422,7 @@ cn10k_ml_fw_flags_get(struct cn10k_ml_fw *fw) if (fw->report_dpe_w
[PATCH v2 02/34] ml/cnxk: drop use of RTE API for firmware read
Dropped use of rte_firmware_read API to read ML firmware binary. When DPDK is built with libarchive aaupport, the the RTE API assumes the binary file as a compressed archive. This causes the ML firmware binary to be parsed incorrectly. Fixes: c29da752ffa8 ("ml/cnxk: support firmware load and device reset") Cc: syalavar...@marvell.com Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.c | 64 +++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_dev.c b/drivers/ml/cnxk/cn10k_ml_dev.c index e3c2badcef..b7e6ed9a00 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.c +++ b/drivers/ml/cnxk/cn10k_ml_dev.c @@ -2,6 +2,11 @@ * Copyright (c) 2022 Marvell. */ +#include +#include +#include +#include + #include #include #include @@ -61,6 +66,57 @@ static const int valid_ocm_page_size[] = {1024, 2048, 4096, 8192, 16384}; /* Dummy operations for ML device */ struct rte_ml_dev_ops ml_dev_dummy_ops = {0}; +static int +ml_read_file(const char *file, size_t *size, char **buffer) +{ + char *file_buffer = NULL; + struct stat file_stat; + char *file_map; + int ret; + int fd; + + fd = open(file, O_RDONLY); + if (fd == -1) { + plt_err("Failed to open file: %s\n", file); + return -errno; + } + + if (fstat(fd, &file_stat) != 0) { + plt_err("fstat failed for file: %s\n", file); + close(fd); + return -errno; + } + + file_buffer = rte_malloc("ml_firmware", file_stat.st_size, PLT_CACHE_LINE_SIZE); + if (file_buffer == NULL) { + plt_err("Failed to allocate memory: %s\n", file); + ret = -ENOMEM; + goto error; + } + + file_map = mmap(0, file_stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (file_map == MAP_FAILED) { + plt_err("Failed to map file: %s\n", file); + ret = -errno; + goto error; + } + + rte_memcpy(file_buffer, file_map, file_stat.st_size); + munmap(file_map, file_stat.st_size); + close(fd); + + *size = file_stat.st_size; + *buffer = file_buffer; + + return 0; + +error: + free(file_buffer); + close(fd); + + return ret; +} + static int parse_string_arg(const char *key __rte_unused, const char *value, void *extra_args) { @@ -736,7 +792,7 @@ cn10k_ml_fw_load(struct cn10k_ml_dev *mldev) { const struct plt_memzone *mz; struct cn10k_ml_fw *fw; - void *fw_buffer = NULL; + char *fw_buffer = NULL; uint64_t mz_size = 0; uint64_t fw_size = 0; int ret = 0; @@ -746,7 +802,7 @@ cn10k_ml_fw_load(struct cn10k_ml_dev *mldev) if (roc_env_is_emulator() || roc_env_is_hw()) { /* Read firmware image to a buffer */ - ret = rte_firmware_read(fw->path, &fw_buffer, &fw_size); + ret = ml_read_file(fw->path, &fw_size, &fw_buffer); if ((ret < 0) || (fw_buffer == NULL)) { plt_err("Unable to read firmware data: %s\n", fw->path); return ret; @@ -763,7 +819,7 @@ cn10k_ml_fw_load(struct cn10k_ml_dev *mldev) mz = plt_memzone_reserve_aligned(FW_MEMZONE_NAME, mz_size, 0, ML_CN10K_ALIGN_SIZE); if (mz == NULL) { plt_err("plt_memzone_reserve failed : %s", FW_MEMZONE_NAME); - free(fw_buffer); + rte_free(fw_buffer); return -ENOMEM; } fw->req = mz->addr; @@ -780,7 +836,7 @@ cn10k_ml_fw_load(struct cn10k_ml_dev *mldev) if (roc_env_is_emulator() || roc_env_is_hw()) { fw->data = PLT_PTR_ADD(mz->addr, sizeof(struct cn10k_ml_req)); ret = cn10k_ml_fw_load_cn10ka(fw, fw_buffer, fw_size); - free(fw_buffer); + rte_free(fw_buffer); } else if (roc_env_is_asim()) { fw->data = NULL; ret = cn10k_ml_fw_load_asim(fw); -- 2.41.0
[PATCH v2 04/34] ml/cnxk: add generic model and layer structures
Introduce generic cnxk model and layer structure. These structures would enable supporting models with multiple layers. A model is a collection of multiple independent layers with flow dependencies between the layers. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.h | 9 +- drivers/ml/cnxk/cn10k_ml_model.c | 244 drivers/ml/cnxk/cn10k_ml_model.h | 122 ++-- drivers/ml/cnxk/cn10k_ml_ocm.c | 49 +++- drivers/ml/cnxk/cn10k_ml_ocm.h | 9 +- drivers/ml/cnxk/cn10k_ml_ops.c | 487 +-- drivers/ml/cnxk/cnxk_ml_io.h | 79 + drivers/ml/cnxk/cnxk_ml_model.c | 7 + drivers/ml/cnxk/cnxk_ml_model.h | 111 +++ drivers/ml/cnxk/meson.build | 3 + 10 files changed, 653 insertions(+), 467 deletions(-) create mode 100644 drivers/ml/cnxk/cnxk_ml_io.h create mode 100644 drivers/ml/cnxk/cnxk_ml_model.c create mode 100644 drivers/ml/cnxk/cnxk_ml_model.h diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index f9da1548c4..99ff0a344a 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -9,6 +9,8 @@ #include "cn10k_ml_ocm.h" +#include "cnxk_ml_io.h" + /* Dummy Device ops */ extern struct rte_ml_dev_ops ml_dev_dummy_ops; @@ -21,9 +23,6 @@ extern struct rte_ml_dev_ops ml_dev_dummy_ops; /* Device alignment size */ #define ML_CN10K_ALIGN_SIZE 128 -/* Maximum number of models per device */ -#define ML_CN10K_MAX_MODELS 16 - /* Maximum number of queue-pairs per device, spinlock version */ #define ML_CN10K_MAX_QP_PER_DEVICE_SL 16 @@ -455,8 +454,8 @@ struct cn10k_ml_xstats { struct cn10k_ml_xstats_entry *entries; /* Store num stats and offset of the stats for each model */ - uint16_t count_per_model[ML_CN10K_MAX_MODELS]; - uint16_t offset_for_model[ML_CN10K_MAX_MODELS]; + uint16_t count_per_model[ML_CNXK_MAX_MODELS]; + uint16_t offset_for_model[ML_CNXK_MAX_MODELS]; uint16_t count_mode_device; uint16_t count_mode_model; uint16_t count; diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c index d146535866..0ea6520bf7 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.c +++ b/drivers/ml/cnxk/cn10k_ml_model.c @@ -11,6 +11,7 @@ #include "cn10k_ml_ocm.h" #include "cnxk_ml_dev.h" +#include "cnxk_ml_model.h" static enum rte_ml_io_type cn10k_ml_io_type_map(uint8_t type) @@ -312,19 +313,17 @@ cn10k_ml_model_metadata_update(struct cn10k_ml_model_metadata *metadata) } void -cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_t *base_dma_addr) +cn10k_ml_layer_addr_update(struct cnxk_ml_layer *layer, uint8_t *buffer, uint8_t *base_dma_addr) { struct cn10k_ml_model_metadata *metadata; - struct cn10k_ml_model_addr *addr; + struct cn10k_ml_layer_addr *addr; size_t model_data_size; uint8_t *dma_addr_load; uint8_t *dma_addr_run; - uint8_t i; - uint8_t j; int fpos; - metadata = &model->metadata; - addr = &model->addr; + metadata = &layer->glow.metadata; + addr = &layer->glow.addr; model_data_size = metadata->init_model.file_size + metadata->main_model.file_size + metadata->finish_model.file_size + metadata->weights_bias.file_size; @@ -362,102 +361,136 @@ cn10k_ml_model_addr_update(struct cn10k_ml_model *model, uint8_t *buffer, uint8_ addr->wb_base_addr = PLT_PTR_SUB(dma_addr_load, metadata->weights_bias.mem_offset); addr->wb_load_addr = PLT_PTR_ADD(addr->wb_base_addr, metadata->weights_bias.mem_offset); rte_memcpy(addr->wb_load_addr, PLT_PTR_ADD(buffer, fpos), metadata->weights_bias.file_size); +} + +void +cn10k_ml_layer_info_update(struct cnxk_ml_layer *layer) +{ + struct cn10k_ml_model_metadata *metadata; + uint8_t i; + uint8_t j; + + metadata = &layer->glow.metadata; /* Inputs */ - addr->total_input_sz_d = 0; - addr->total_input_sz_q = 0; + layer->info.nb_inputs = metadata->model.num_input; + layer->info.total_input_sz_d = 0; + layer->info.total_input_sz_q = 0; for (i = 0; i < metadata->model.num_input; i++) { if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) { - addr->input[i].nb_dims = 4; - addr->input[i].shape[0] = metadata->input1[i].shape.w; - addr->input[i].shape[1] = metadata->input1[i].shape.x; - addr->input[i].shape[2] = metadata->input1[i].shape.y; - addr->input[i].shape[3] = metadata->input1[i].shape.z; - - addr->input[i].nb_elements = + strncpy(layer->info.input[i].name, (char *)metadata->input1[i].input_name, + MRVL_ML_INPUT_NAME_LEN); + layer->info.input[i].dtype = metadata->input1[i].input_type; +
[PATCH v2 03/34] ml/cnxk: add generic cnxk device structure
Introduce generic cnxk device structure. This structure is a top level device structure for the driver, which would encapsulate the target / platform specific device structure. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.c | 315 ++-- drivers/ml/cnxk/cn10k_ml_dev.h | 47 +-- drivers/ml/cnxk/cn10k_ml_model.c | 14 +- drivers/ml/cnxk/cn10k_ml_model.h | 8 +- drivers/ml/cnxk/cn10k_ml_ocm.c | 56 ++-- drivers/ml/cnxk/cn10k_ml_ops.c | 494 +-- drivers/ml/cnxk/cnxk_ml_dev.c| 11 + drivers/ml/cnxk/cnxk_ml_dev.h| 58 drivers/ml/cnxk/meson.build | 2 + 9 files changed, 562 insertions(+), 443 deletions(-) create mode 100644 drivers/ml/cnxk/cnxk_ml_dev.c create mode 100644 drivers/ml/cnxk/cnxk_ml_dev.h diff --git a/drivers/ml/cnxk/cn10k_ml_dev.c b/drivers/ml/cnxk/cn10k_ml_dev.c index b7e6ed9a00..367fb7014c 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.c +++ b/drivers/ml/cnxk/cn10k_ml_dev.c @@ -15,13 +15,15 @@ #include #include -#include - #include +#include + #include "cn10k_ml_dev.h" #include "cn10k_ml_ops.h" +#include "cnxk_ml_dev.h" + #define CN10K_ML_FW_PATH "fw_path" #define CN10K_ML_FW_ENABLE_DPE_WARNINGS "enable_dpe_warnings" #define CN10K_ML_FW_REPORT_DPE_WARNINGS "report_dpe_warnings" @@ -63,9 +65,6 @@ static const char *const valid_args[] = {CN10K_ML_FW_PATH, /* Supported OCM page sizes: 1KB, 2KB, 4KB, 8KB and 16KB */ static const int valid_ocm_page_size[] = {1024, 2048, 4096, 8192, 16384}; -/* Dummy operations for ML device */ -struct rte_ml_dev_ops ml_dev_dummy_ops = {0}; - static int ml_read_file(const char *file, size_t *size, char **buffer) { @@ -146,7 +145,7 @@ parse_integer_arg(const char *key __rte_unused, const char *value, void *extra_a } static int -cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mldev) +cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *cn10k_mldev) { bool enable_dpe_warnings_set = false; bool report_dpe_warnings_set = false; @@ -183,7 +182,7 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde if (rte_kvargs_count(kvlist, CN10K_ML_FW_ENABLE_DPE_WARNINGS) == 1) { ret = rte_kvargs_process(kvlist, CN10K_ML_FW_ENABLE_DPE_WARNINGS, -&parse_integer_arg, &mldev->fw.enable_dpe_warnings); +&parse_integer_arg, &cn10k_mldev->fw.enable_dpe_warnings); if (ret < 0) { plt_err("Error processing arguments, key = %s\n", CN10K_ML_FW_ENABLE_DPE_WARNINGS); @@ -195,7 +194,7 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde if (rte_kvargs_count(kvlist, CN10K_ML_FW_REPORT_DPE_WARNINGS) == 1) { ret = rte_kvargs_process(kvlist, CN10K_ML_FW_REPORT_DPE_WARNINGS, -&parse_integer_arg, &mldev->fw.report_dpe_warnings); +&parse_integer_arg, &cn10k_mldev->fw.report_dpe_warnings); if (ret < 0) { plt_err("Error processing arguments, key = %s\n", CN10K_ML_FW_REPORT_DPE_WARNINGS); @@ -207,7 +206,7 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde if (rte_kvargs_count(kvlist, CN10K_ML_DEV_CACHE_MODEL_DATA) == 1) { ret = rte_kvargs_process(kvlist, CN10K_ML_DEV_CACHE_MODEL_DATA, &parse_integer_arg, -&mldev->cache_model_data); +&cn10k_mldev->cache_model_data); if (ret < 0) { plt_err("Error processing arguments, key = %s\n", CN10K_ML_DEV_CACHE_MODEL_DATA); @@ -230,7 +229,7 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde if (rte_kvargs_count(kvlist, CN10K_ML_DEV_HW_QUEUE_LOCK) == 1) { ret = rte_kvargs_process(kvlist, CN10K_ML_DEV_HW_QUEUE_LOCK, &parse_integer_arg, -&mldev->hw_queue_lock); +&cn10k_mldev->hw_queue_lock); if (ret < 0) { plt_err("Error processing arguments, key = %s\n", CN10K_ML_DEV_HW_QUEUE_LOCK); @@ -242,7 +241,7 @@ cn10k_mldev_parse_devargs(struct rte_devargs *devargs, struct cn10k_ml_dev *mlde if (rte_kvargs_count(kvlist, CN10K_ML_OCM_PAGE_SIZE) == 1) { ret = rte_kvargs_process(kvlist, CN10K_ML_OCM_PAGE_SIZE, &parse_integer_arg, -&mldev->ocm_page_size); +&cn10k_mldev->ocm_page_size); if (ret < 0) {
[PATCH v2 07/34] ml/cnxk: rename cnxk ops function pointers struct
Renamed cn10k ML ops structure with cnxk prefix. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.c | 2 +- drivers/ml/cnxk/cn10k_ml_ops.c | 73 +- drivers/ml/cnxk/cn10k_ml_ops.h | 34 +++- drivers/ml/cnxk/cnxk_ml_ops.c | 38 ++ drivers/ml/cnxk/cnxk_ml_ops.h | 2 + 5 files changed, 93 insertions(+), 56 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_dev.c b/drivers/ml/cnxk/cn10k_ml_dev.c index f6e05cfc47..20c114b8bf 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.c +++ b/drivers/ml/cnxk/cn10k_ml_dev.c @@ -404,7 +404,7 @@ cn10k_ml_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_de goto pmd_destroy; } - dev->dev_ops = &cn10k_ml_ops; + dev->dev_ops = &cnxk_ml_ops; } else { plt_err("CN10K ML Ops are not supported on secondary process"); dev->dev_ops = &ml_dev_dummy_ops; diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 03a7447dc8..e6383283d3 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -123,7 +123,7 @@ cnxk_ml_qp_destroy(const struct rte_ml_dev *dev, struct cnxk_ml_qp *qp) return 0; } -static int +int cn10k_ml_dev_queue_pair_release(struct rte_ml_dev *dev, uint16_t queue_pair_id) { struct cnxk_ml_qp *qp; @@ -864,7 +864,7 @@ cn10k_ml_cache_model_data(struct rte_ml_dev *dev, uint16_t model_id) return ret; } -static int +int cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info) { struct cn10k_ml_dev *cn10k_mldev; @@ -892,7 +892,7 @@ cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info) return 0; } -static int +int cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *conf) { struct rte_ml_dev_info dev_info; @@ -1091,7 +1091,7 @@ cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *c return ret; } -static int +int cn10k_ml_dev_close(struct rte_ml_dev *dev) { struct cn10k_ml_dev *cn10k_mldev; @@ -1164,7 +1164,7 @@ cn10k_ml_dev_close(struct rte_ml_dev *dev) return rte_dev_remove(dev->device); } -static int +int cn10k_ml_dev_start(struct rte_ml_dev *dev) { struct cn10k_ml_dev *cn10k_mldev; @@ -1184,7 +1184,7 @@ cn10k_ml_dev_start(struct rte_ml_dev *dev) return 0; } -static int +int cn10k_ml_dev_stop(struct rte_ml_dev *dev) { struct cn10k_ml_dev *cn10k_mldev; @@ -1204,7 +1204,7 @@ cn10k_ml_dev_stop(struct rte_ml_dev *dev) return 0; } -static int +int cn10k_ml_dev_queue_pair_setup(struct rte_ml_dev *dev, uint16_t queue_pair_id, const struct rte_ml_dev_qp_conf *qp_conf, int socket_id) { @@ -1245,7 +1245,7 @@ cn10k_ml_dev_queue_pair_setup(struct rte_ml_dev *dev, uint16_t queue_pair_id, return 0; } -static int +int cn10k_ml_dev_stats_get(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats) { struct cnxk_ml_qp *qp; @@ -1262,7 +1262,7 @@ cn10k_ml_dev_stats_get(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats) return 0; } -static void +void cn10k_ml_dev_stats_reset(struct rte_ml_dev *dev) { struct cnxk_ml_qp *qp; @@ -1277,7 +1277,7 @@ cn10k_ml_dev_stats_reset(struct rte_ml_dev *dev) } } -static int +int cn10k_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map, uint32_t size) @@ -1325,7 +1325,7 @@ cn10k_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mod return idx; } -static int +int cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16_t *stat_id, uint64_t *value) { @@ -1367,7 +1367,7 @@ cn10k_ml_dev_xstats_by_name_get(struct rte_ml_dev *dev, const char *name, uint16 return -EINVAL; } -static int +int cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id, const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids) { @@ -1431,7 +1431,7 @@ cn10k_ml_dev_xstats_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode return idx; } -static int +int cn10k_ml_dev_xstats_reset(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id, const uint16_t stat_ids[], uint16_t nb_ids) { @@ -1445,7 +1445,7 @@ cn10k_ml_dev_xstats_reset(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mo return 0; } -static int +int cn10k_ml_dev_dump(struct rte_ml_dev *dev, FILE *fp) { struct cn10k_ml_dev *cn10k_mldev; @@ -1532,7 +1532,7 @@ cn10k_ml_dev_dump(struct rte_ml_dev *dev, FILE *fp) return 0; } -static
[PATCH v2 08/34] ml/cnxk: update device handling functions
Implement CNXK wrapper functions for dev_info_get, dev_configure, dev_close, dev_start and dev_stop. The wrapper functions allocate / release common resources for the ML driver and invoke device specific functions. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 230 ++ drivers/ml/cnxk/cn10k_ml_ops.h | 16 +- drivers/ml/cnxk/cnxk_ml_dev.h | 3 + drivers/ml/cnxk/cnxk_ml_ops.c | 286 - drivers/ml/cnxk/cnxk_ml_ops.h | 3 + 5 files changed, 314 insertions(+), 224 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index e6383283d3..0f32f3b2bb 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -105,7 +105,7 @@ qp_memzone_name_get(char *name, int size, int dev_id, int qp_id) snprintf(name, size, "cnxk_ml_qp_mem_%u:%u", dev_id, qp_id); } -static int +int cnxk_ml_qp_destroy(const struct rte_ml_dev *dev, struct cnxk_ml_qp *qp) { const struct rte_memzone *qp_mem; @@ -865,20 +865,12 @@ cn10k_ml_cache_model_data(struct rte_ml_dev *dev, uint16_t model_id) } int -cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info) +cn10k_ml_dev_info_get(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_dev_info *dev_info) { struct cn10k_ml_dev *cn10k_mldev; - struct cnxk_ml_dev *cnxk_mldev; - if (dev_info == NULL) - return -EINVAL; - - cnxk_mldev = dev->data->dev_private; cn10k_mldev = &cnxk_mldev->cn10k_mldev; - memset(dev_info, 0, sizeof(struct rte_ml_dev_info)); - dev_info->driver_name = dev->device->driver->name; - dev_info->max_models = ML_CNXK_MAX_MODELS; if (cn10k_mldev->hw_queue_lock) dev_info->max_queue_pairs = ML_CN10K_MAX_QP_PER_DEVICE_SL; else @@ -893,143 +885,17 @@ cn10k_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info) } int -cn10k_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *conf) +cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_config *conf) { - struct rte_ml_dev_info dev_info; struct cn10k_ml_dev *cn10k_mldev; - struct cnxk_ml_dev *cnxk_mldev; - struct cnxk_ml_model *model; struct cn10k_ml_ocm *ocm; - struct cnxk_ml_qp *qp; - uint16_t model_id; - uint32_t mz_size; uint16_t tile_id; - uint16_t qp_id; int ret; - if (dev == NULL || conf == NULL) - return -EINVAL; + RTE_SET_USED(conf); - /* Get CN10K device handle */ - cnxk_mldev = dev->data->dev_private; cn10k_mldev = &cnxk_mldev->cn10k_mldev; - cn10k_ml_dev_info_get(dev, &dev_info); - if (conf->nb_models > dev_info.max_models) { - plt_err("Invalid device config, nb_models > %u\n", dev_info.max_models); - return -EINVAL; - } - - if (conf->nb_queue_pairs > dev_info.max_queue_pairs) { - plt_err("Invalid device config, nb_queue_pairs > %u\n", dev_info.max_queue_pairs); - return -EINVAL; - } - - if (cnxk_mldev->state == ML_CNXK_DEV_STATE_PROBED) { - plt_ml_dbg("Configuring ML device, nb_queue_pairs = %u, nb_models = %u", - conf->nb_queue_pairs, conf->nb_models); - - /* Load firmware */ - ret = cn10k_ml_fw_load(cnxk_mldev); - if (ret != 0) - return ret; - } else if (cnxk_mldev->state == ML_CNXK_DEV_STATE_CONFIGURED) { - plt_ml_dbg("Re-configuring ML device, nb_queue_pairs = %u, nb_models = %u", - conf->nb_queue_pairs, conf->nb_models); - } else if (cnxk_mldev->state == ML_CNXK_DEV_STATE_STARTED) { - plt_err("Device can't be reconfigured in started state\n"); - return -ENOTSUP; - } else if (cnxk_mldev->state == ML_CNXK_DEV_STATE_CLOSED) { - plt_err("Device can't be reconfigured after close\n"); - return -ENOTSUP; - } - - /* Configure queue-pairs */ - if (dev->data->queue_pairs == NULL) { - mz_size = sizeof(dev->data->queue_pairs[0]) * conf->nb_queue_pairs; - dev->data->queue_pairs = - rte_zmalloc("cn10k_mldev_queue_pairs", mz_size, RTE_CACHE_LINE_SIZE); - if (dev->data->queue_pairs == NULL) { - dev->data->nb_queue_pairs = 0; - plt_err("Failed to get memory for queue_pairs, nb_queue_pairs %u", - conf->nb_queue_pairs); - return -ENOMEM; - } - } else { /* Re-configure */ - void **queue_pairs; - - /* Release all queue pairs as ML spec doesn't support queue_pair_destroy. */ - for (qp_id = 0; qp_id < dev->data->nb_
[PATCH v2 05/34] ml/cnxk: add generic cnxk request structure
Added generic cnxk request structure. Moved common fields from cn10k structures to cnxk structure. Moved job related structures and enumerations to ops headers. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.c | 70 --- drivers/ml/cnxk/cn10k_ml_dev.h | 269 + drivers/ml/cnxk/cn10k_ml_model.c | 6 +- drivers/ml/cnxk/cn10k_ml_model.h | 4 +- drivers/ml/cnxk/cn10k_ml_ops.c | 329 +-- drivers/ml/cnxk/cn10k_ml_ops.h | 296 +++ drivers/ml/cnxk/cnxk_ml_ops.c| 7 + drivers/ml/cnxk/cnxk_ml_ops.h| 63 ++ drivers/ml/cnxk/meson.build | 2 + 9 files changed, 558 insertions(+), 488 deletions(-) create mode 100644 drivers/ml/cnxk/cnxk_ml_ops.c create mode 100644 drivers/ml/cnxk/cnxk_ml_ops.h diff --git a/drivers/ml/cnxk/cn10k_ml_dev.c b/drivers/ml/cnxk/cn10k_ml_dev.c index 367fb7014c..f6e05cfc47 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.c +++ b/drivers/ml/cnxk/cn10k_ml_dev.c @@ -23,6 +23,7 @@ #include "cn10k_ml_ops.h" #include "cnxk_ml_dev.h" +#include "cnxk_ml_ops.h" #define CN10K_ML_FW_PATH "fw_path" #define CN10K_ML_FW_ENABLE_DPE_WARNINGS "enable_dpe_warnings" @@ -457,20 +458,23 @@ cn10k_ml_pci_remove(struct rte_pci_device *pci_dev) static void cn10k_ml_fw_print_info(struct cn10k_ml_fw *fw) { - plt_info("ML Firmware Version = %s", fw->req->jd.fw_load.version); - - plt_ml_dbg("Firmware capabilities = 0x%016lx", fw->req->jd.fw_load.cap.u64); - plt_ml_dbg("Version = %s", fw->req->jd.fw_load.version); - plt_ml_dbg("core0_debug_ptr = 0x%016lx", fw->req->jd.fw_load.debug.core0_debug_ptr); - plt_ml_dbg("core1_debug_ptr = 0x%016lx", fw->req->jd.fw_load.debug.core1_debug_ptr); - plt_ml_dbg("debug_buffer_size = %u bytes", fw->req->jd.fw_load.debug.debug_buffer_size); + plt_info("ML Firmware Version = %s", fw->req->cn10k_req.jd.fw_load.version); + + plt_ml_dbg("Firmware capabilities = 0x%016lx", fw->req->cn10k_req.jd.fw_load.cap.u64); + plt_ml_dbg("Version = %s", fw->req->cn10k_req.jd.fw_load.version); + plt_ml_dbg("core0_debug_ptr = 0x%016lx", + fw->req->cn10k_req.jd.fw_load.debug.core0_debug_ptr); + plt_ml_dbg("core1_debug_ptr = 0x%016lx", + fw->req->cn10k_req.jd.fw_load.debug.core1_debug_ptr); + plt_ml_dbg("debug_buffer_size = %u bytes", + fw->req->cn10k_req.jd.fw_load.debug.debug_buffer_size); plt_ml_dbg("core0_exception_buffer = 0x%016lx", - fw->req->jd.fw_load.debug.core0_exception_buffer); + fw->req->cn10k_req.jd.fw_load.debug.core0_exception_buffer); plt_ml_dbg("core1_exception_buffer = 0x%016lx", - fw->req->jd.fw_load.debug.core1_exception_buffer); + fw->req->cn10k_req.jd.fw_load.debug.core1_exception_buffer); plt_ml_dbg("exception_state_size = %u bytes", - fw->req->jd.fw_load.debug.exception_state_size); - plt_ml_dbg("flags = 0x%016lx", fw->req->jd.fw_load.flags); + fw->req->cn10k_req.jd.fw_load.debug.exception_state_size); + plt_ml_dbg("flags = 0x%016lx", fw->req->cn10k_req.jd.fw_load.flags); } uint64_t @@ -515,29 +519,30 @@ cn10k_ml_fw_load_asim(struct cn10k_ml_fw *fw) roc_ml_reg_save(&cn10k_mldev->roc, ML_MLR_BASE); /* Update FW load completion structure */ - fw->req->jd.hdr.jce.w1.u64 = PLT_U64_CAST(&fw->req->status); - fw->req->jd.hdr.job_type = ML_CN10K_JOB_TYPE_FIRMWARE_LOAD; - fw->req->jd.hdr.result = roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &fw->req->result); - fw->req->jd.fw_load.flags = cn10k_ml_fw_flags_get(fw); - plt_write64(ML_CNXK_POLL_JOB_START, &fw->req->status); + fw->req->cn10k_req.jd.hdr.jce.w1.u64 = PLT_U64_CAST(&fw->req->cn10k_req.status); + fw->req->cn10k_req.jd.hdr.job_type = ML_CN10K_JOB_TYPE_FIRMWARE_LOAD; + fw->req->cn10k_req.jd.hdr.result = + roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &fw->req->cn10k_req.result); + fw->req->cn10k_req.jd.fw_load.flags = cn10k_ml_fw_flags_get(fw); + plt_write64(ML_CNXK_POLL_JOB_START, &fw->req->cn10k_req.status); plt_wmb(); /* Enqueue FW load through scratch registers */ timeout = true; timeout_cycle = plt_tsc_cycles() + ML_CNXK_CMD_TIMEOUT * plt_tsc_hz(); - roc_ml_scratch_enqueue(&cn10k_mldev->roc, &fw->req->jd); + roc_ml_scratch_enqueue(&cn10k_mldev->roc, &fw->req->cn10k_req.jd); plt_rmb(); do { if (roc_ml_scratch_is_done_bit_set(&cn10k_mldev->roc) && - (plt_read64(&fw->req->status) == ML_CNXK_POLL_JOB_FINISH)) { + (plt_read64(&fw->req->cn10k_req.status) == ML_CNXK_POLL_JOB_FINISH)) { timeout = false; break; } } while (plt_tsc_cycles() < timeout_cycle);
[PATCH v2 06/34] ml/cnxk: add generic cnxk xstats structures
Introduced generic xstats structures and renamed cn10k xstats enumerations with cnxk prefix. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.h | 86 +--- drivers/ml/cnxk/cn10k_ml_model.h | 6 +- drivers/ml/cnxk/cn10k_ml_ops.c | 169 ++- drivers/ml/cnxk/cnxk_ml_xstats.h | 128 +++ drivers/ml/cnxk/meson.build | 1 + 5 files changed, 210 insertions(+), 180 deletions(-) create mode 100644 drivers/ml/cnxk/cnxk_ml_xstats.h diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index 1852d4f6c9..be989e0a20 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -10,6 +10,7 @@ #include "cn10k_ml_ocm.h" #include "cnxk_ml_io.h" +#include "cnxk_ml_xstats.h" /* Dummy Device ops */ extern struct rte_ml_dev_ops ml_dev_dummy_ops; @@ -121,89 +122,6 @@ struct cn10k_ml_fw { struct cnxk_ml_req *req; }; -/* Extended stats types enum */ -enum cn10k_ml_xstats_type { - /* Number of models loaded */ - nb_models_loaded, - - /* Number of models unloaded */ - nb_models_unloaded, - - /* Number of models started */ - nb_models_started, - - /* Number of models stopped */ - nb_models_stopped, - - /* Average inference hardware latency */ - avg_hw_latency, - - /* Minimum hardware latency */ - min_hw_latency, - - /* Maximum hardware latency */ - max_hw_latency, - - /* Average firmware latency */ - avg_fw_latency, - - /* Minimum firmware latency */ - min_fw_latency, - - /* Maximum firmware latency */ - max_fw_latency, -}; - -/* Extended stats function type enum. */ -enum cn10k_ml_xstats_fn_type { - /* Device function */ - CN10K_ML_XSTATS_FN_DEVICE, - - /* Model function */ - CN10K_ML_XSTATS_FN_MODEL, -}; - -/* Function pointer to get xstats for a type */ -typedef uint64_t (*cn10k_ml_xstats_fn)(struct rte_ml_dev *dev, uint16_t obj_idx, - enum cn10k_ml_xstats_type stat); - -/* Extended stats entry structure */ -struct cn10k_ml_xstats_entry { - /* Name-ID map */ - struct rte_ml_dev_xstats_map map; - - /* xstats mode, device or model */ - enum rte_ml_dev_xstats_mode mode; - - /* Type of xstats */ - enum cn10k_ml_xstats_type type; - - /* xstats function */ - enum cn10k_ml_xstats_fn_type fn_id; - - /* Object ID, model ID for model stat type */ - uint16_t obj_idx; - - /* Allowed to reset the stat */ - uint8_t reset_allowed; - - /* An offset to be taken away to emulate resets */ - uint64_t reset_value; -}; - -/* Extended stats data */ -struct cn10k_ml_xstats { - /* Pointer to xstats entries */ - struct cn10k_ml_xstats_entry *entries; - - /* Store num stats and offset of the stats for each model */ - uint16_t count_per_model[ML_CNXK_MAX_MODELS]; - uint16_t offset_for_model[ML_CNXK_MAX_MODELS]; - uint16_t count_mode_device; - uint16_t count_mode_model; - uint16_t count; -}; - /* Device private data */ struct cn10k_ml_dev { /* Device ROC */ @@ -216,7 +134,7 @@ struct cn10k_ml_dev { struct cn10k_ml_ocm ocm; /* Extended stats data */ - struct cn10k_ml_xstats xstats; + struct cnxk_ml_xstats xstats; /* Enable / disable model data caching */ int cache_model_data; diff --git a/drivers/ml/cnxk/cn10k_ml_model.h b/drivers/ml/cnxk/cn10k_ml_model.h index 74ada1531a..5c32f48c68 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.h +++ b/drivers/ml/cnxk/cn10k_ml_model.h @@ -404,7 +404,7 @@ struct cn10k_ml_layer_addr { }; /* Model fast-path stats */ -struct cn10k_ml_layer_stats { +struct cn10k_ml_layer_xstats { /* Total hardware latency, sum of all inferences */ uint64_t hw_latency_tot; @@ -447,10 +447,10 @@ struct cn10k_ml_layer_data { struct cnxk_ml_req *req; /* Layer: Stats for burst ops */ - struct cn10k_ml_layer_stats *burst_stats; + struct cn10k_ml_layer_xstats *burst_xstats; /* Layer: Stats for sync ops */ - struct cn10k_ml_layer_stats *sync_stats; + struct cn10k_ml_layer_xstats *sync_xstats; }; struct cn10k_ml_model_data { diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 2b1fa08154..03a7447dc8 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -14,6 +14,7 @@ #include "cnxk_ml_dev.h" #include "cnxk_ml_model.h" #include "cnxk_ml_ops.h" +#include "cnxk_ml_xstats.h" /* ML model macros */ #define CN10K_ML_MODEL_MEMZONE_NAME "ml_cn10k_model_mz" @@ -429,26 +430,6 @@ cn10k_ml_prep_fp_job_descriptor(struct cn10k_ml_dev *cn10k_mldev, struct cnxk_ml req->cn10k_req.jd.model_run.num_batches = op->nb_batches; } -struct xstat_info { - char name[32]; - enum cn10k_ml_xstats_type
[PATCH v2 10/34] ml/cnxk: update model load and unload functions
Implemented cnxk wrapper functions to load and unload ML models. Wrapper functions would invoke the cn10k model load and unload functions. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_model.c | 239 - drivers/ml/cnxk/cn10k_ml_model.h | 25 +-- drivers/ml/cnxk/cn10k_ml_ops.c | 296 ++- drivers/ml/cnxk/cn10k_ml_ops.h | 12 +- drivers/ml/cnxk/cnxk_ml_dev.h| 15 ++ drivers/ml/cnxk/cnxk_ml_ops.c| 144 ++- drivers/ml/cnxk/cnxk_ml_ops.h| 2 + 7 files changed, 455 insertions(+), 278 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c index 2a0ae44cfd..9a336cd18f 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.c +++ b/drivers/ml/cnxk/cn10k_ml_model.c @@ -6,7 +6,6 @@ #include -#include "cn10k_ml_dev.h" #include "cn10k_ml_model.h" #include "cn10k_ml_ocm.h" @@ -318,42 +317,31 @@ cn10k_ml_layer_addr_update(struct cnxk_ml_layer *layer, uint8_t *buffer, uint8_t { struct cn10k_ml_model_metadata *metadata; struct cn10k_ml_layer_addr *addr; - size_t model_data_size; uint8_t *dma_addr_load; - uint8_t *dma_addr_run; int fpos; metadata = &layer->glow.metadata; addr = &layer->glow.addr; - model_data_size = metadata->init_model.file_size + metadata->main_model.file_size + - metadata->finish_model.file_size + metadata->weights_bias.file_size; /* Base address */ addr->base_dma_addr_load = base_dma_addr; - addr->base_dma_addr_run = PLT_PTR_ADD(addr->base_dma_addr_load, model_data_size); /* Init section */ dma_addr_load = addr->base_dma_addr_load; - dma_addr_run = addr->base_dma_addr_run; fpos = sizeof(struct cn10k_ml_model_metadata); addr->init_load_addr = dma_addr_load; - addr->init_run_addr = dma_addr_run; rte_memcpy(dma_addr_load, PLT_PTR_ADD(buffer, fpos), metadata->init_model.file_size); /* Main section */ dma_addr_load += metadata->init_model.file_size; - dma_addr_run += metadata->init_model.file_size; fpos += metadata->init_model.file_size; addr->main_load_addr = dma_addr_load; - addr->main_run_addr = dma_addr_run; rte_memcpy(dma_addr_load, PLT_PTR_ADD(buffer, fpos), metadata->main_model.file_size); /* Finish section */ dma_addr_load += metadata->main_model.file_size; - dma_addr_run += metadata->main_model.file_size; fpos += metadata->main_model.file_size; addr->finish_load_addr = dma_addr_load; - addr->finish_run_addr = dma_addr_run; rte_memcpy(dma_addr_load, PLT_PTR_ADD(buffer, fpos), metadata->finish_model.file_size); /* Weights and Bias section */ @@ -365,140 +353,140 @@ cn10k_ml_layer_addr_update(struct cnxk_ml_layer *layer, uint8_t *buffer, uint8_t } void -cn10k_ml_layer_info_update(struct cnxk_ml_layer *layer) +cn10k_ml_layer_io_info_update(struct cnxk_ml_io_info *io_info, + struct cn10k_ml_model_metadata *metadata) { - struct cn10k_ml_model_metadata *metadata; uint8_t i; uint8_t j; - metadata = &layer->glow.metadata; - /* Inputs */ - layer->info.nb_inputs = metadata->model.num_input; - layer->info.total_input_sz_d = 0; - layer->info.total_input_sz_q = 0; + io_info->nb_inputs = metadata->model.num_input; + io_info->total_input_sz_d = 0; + io_info->total_input_sz_q = 0; for (i = 0; i < metadata->model.num_input; i++) { if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) { - strncpy(layer->info.input[i].name, (char *)metadata->input1[i].input_name, + strncpy(io_info->input[i].name, (char *)metadata->input1[i].input_name, MRVL_ML_INPUT_NAME_LEN); - layer->info.input[i].dtype = metadata->input1[i].input_type; - layer->info.input[i].qtype = metadata->input1[i].model_input_type; - layer->info.input[i].nb_dims = 4; - layer->info.input[i].shape[0] = metadata->input1[i].shape.w; - layer->info.input[i].shape[1] = metadata->input1[i].shape.x; - layer->info.input[i].shape[2] = metadata->input1[i].shape.y; - layer->info.input[i].shape[3] = metadata->input1[i].shape.z; - layer->info.input[i].nb_elements = + io_info->input[i].dtype = metadata->input1[i].input_type; + io_info->input[i].qtype = metadata->input1[i].model_input_type; + io_info->input[i].nb_dims = 4; + io_info->input[i].shape[0] = metadata->input1[i].shape.w; + io_info->input[i].shape[1] = metadata->input1[i].shape.x; + io_in
[PATCH v2 11/34] ml/cnxk: update model start and stop functions
Implemented cnxk wrapper functions to start and stop ML models. Wrapper functions would invoke the cn10k model start and stop functions. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ocm.c | 28 ++-- drivers/ml/cnxk/cn10k_ml_ocm.h | 12 +- drivers/ml/cnxk/cn10k_ml_ops.c | 282 - drivers/ml/cnxk/cn10k_ml_ops.h | 8 +- drivers/ml/cnxk/cnxk_ml_ops.c | 48 +- drivers/ml/cnxk/cnxk_ml_ops.h | 1 + 6 files changed, 240 insertions(+), 139 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.c b/drivers/ml/cnxk/cn10k_ml_ocm.c index 5682778e87..2d900dbc78 100644 --- a/drivers/ml/cnxk/cn10k_ml_ocm.c +++ b/drivers/ml/cnxk/cn10k_ml_ocm.c @@ -217,11 +217,10 @@ cn10k_ml_ocm_tilecount(uint64_t tilemask, int *start, int *end) * scratch & WB pages and OCM allocation mode. */ int -cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t wb_pages, +cn10k_ml_ocm_tilemask_find(struct cnxk_ml_dev *cnxk_mldev, uint8_t num_tiles, uint16_t wb_pages, uint16_t scratch_pages, uint64_t *tilemask) { struct cn10k_ml_dev *cn10k_mldev; - struct cnxk_ml_dev *cnxk_mldev; struct cn10k_ml_ocm *ocm; uint16_t used_scratch_pages_max; @@ -240,7 +239,6 @@ cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t w int max_slot_sz; int page_id; - cnxk_mldev = dev->data->dev_private; cn10k_mldev = &cnxk_mldev->cn10k_mldev; ocm = &cn10k_mldev->ocm; @@ -335,12 +333,10 @@ cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t w } void -cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t layer_id, +cn10k_ml_ocm_reserve_pages(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id, uint16_t layer_id, uint64_t tilemask, int wb_page_start, uint16_t wb_pages, uint16_t scratch_pages) { - struct cn10k_ml_dev *cn10k_mldev; - struct cnxk_ml_dev *cnxk_mldev; struct cnxk_ml_model *model; struct cnxk_ml_layer *layer; struct cn10k_ml_ocm *ocm; @@ -353,10 +349,8 @@ cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t l int tile_id; int page_id; - cnxk_mldev = dev->data->dev_private; - cn10k_mldev = &cnxk_mldev->cn10k_mldev; - ocm = &cn10k_mldev->ocm; - model = dev->data->models[model_id]; + ocm = &cnxk_mldev->cn10k_mldev.ocm; + model = cnxk_mldev->mldev->data->models[model_id]; layer = &model->layer[layer_id]; /* Get first set bit, tile_start */ @@ -398,12 +392,10 @@ cn10k_ml_ocm_reserve_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t l } void -cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t layer_id) +cn10k_ml_ocm_free_pages(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id, uint16_t layer_id) { struct cnxk_ml_model *local_model; struct cnxk_ml_layer *local_layer; - struct cn10k_ml_dev *cn10k_mldev; - struct cnxk_ml_dev *cnxk_mldev; struct cnxk_ml_model *model; struct cnxk_ml_layer *layer; struct cn10k_ml_ocm *ocm; @@ -418,10 +410,8 @@ cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t laye uint16_t i; uint16_t j; - cnxk_mldev = dev->data->dev_private; - cn10k_mldev = &cnxk_mldev->cn10k_mldev; - ocm = &cn10k_mldev->ocm; - model = dev->data->models[model_id]; + ocm = &cnxk_mldev->cn10k_mldev.ocm; + model = cnxk_mldev->mldev->data->models[model_id]; layer = &model->layer[layer_id]; /* Update OCM info for WB memory */ @@ -440,8 +430,8 @@ cn10k_ml_ocm_free_pages(struct rte_ml_dev *dev, uint16_t model_id, uint16_t laye /* Get max scratch pages required, excluding the current model */ scratch_resize_pages = 0; - for (i = 0; i < dev->data->nb_models; i++) { - local_model = dev->data->models[i]; + for (i = 0; i < cnxk_mldev->mldev->data->nb_models; i++) { + local_model = cnxk_mldev->mldev->data->models[i]; if (local_model == NULL) continue; diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.h b/drivers/ml/cnxk/cn10k_ml_ocm.h index 720f8caf76..97b723a56a 100644 --- a/drivers/ml/cnxk/cn10k_ml_ocm.h +++ b/drivers/ml/cnxk/cn10k_ml_ocm.h @@ -8,6 +8,8 @@ #include #include +struct cnxk_ml_dev; + /* Number of OCM tiles. */ #define ML_CN10K_OCM_NUMTILES 0x8 @@ -75,12 +77,12 @@ struct cn10k_ml_ocm { }; int cn10k_ml_ocm_tilecount(uint64_t tilemask, int *start, int *end); -int cn10k_ml_ocm_tilemask_find(struct rte_ml_dev *dev, uint8_t num_tiles, uint16_t wb_pages, +int cn10k_ml_ocm_tilemask_find(struct cnxk_ml_dev *cnxk_mldev, uint8_t num_tiles, uint16_t wb_pages,
[PATCH v2 09/34] ml/cnxk: update queue-pair handling functions
Added cnxk wrapper function to handle ML device queue-pairs. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 135 + drivers/ml/cnxk/cn10k_ml_ops.h | 7 +- drivers/ml/cnxk/cnxk_ml_ops.c | 153 - drivers/ml/cnxk/cnxk_ml_ops.h | 3 - 4 files changed, 154 insertions(+), 144 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 0f32f3b2bb..330cb050cb 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -99,93 +99,12 @@ cn10k_ml_get_poll_ptr(struct cnxk_ml_req *req) return plt_read64(req->status); } -static void -qp_memzone_name_get(char *name, int size, int dev_id, int qp_id) -{ - snprintf(name, size, "cnxk_ml_qp_mem_%u:%u", dev_id, qp_id); -} - -int -cnxk_ml_qp_destroy(const struct rte_ml_dev *dev, struct cnxk_ml_qp *qp) -{ - const struct rte_memzone *qp_mem; - char name[RTE_MEMZONE_NAMESIZE]; - int ret; - - qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp->id); - qp_mem = rte_memzone_lookup(name); - ret = rte_memzone_free(qp_mem); - if (ret) - return ret; - - rte_free(qp); - - return 0; -} - -int -cn10k_ml_dev_queue_pair_release(struct rte_ml_dev *dev, uint16_t queue_pair_id) -{ - struct cnxk_ml_qp *qp; - int ret; - - qp = dev->data->queue_pairs[queue_pair_id]; - if (qp == NULL) - return -EINVAL; - - ret = cnxk_ml_qp_destroy(dev, qp); - if (ret) { - plt_err("Could not destroy queue pair %u", queue_pair_id); - return ret; - } - - dev->data->queue_pairs[queue_pair_id] = NULL; - - return 0; -} - -static struct cnxk_ml_qp * -cnxk_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_desc, int socket_id) +void +cn10k_ml_qp_initialize(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_qp *qp) { - const struct rte_memzone *qp_mem; - char name[RTE_MEMZONE_NAMESIZE]; - struct cnxk_ml_qp *qp; - uint32_t len; - uint8_t *va; uint64_t i; - /* Allocate queue pair */ - qp = rte_zmalloc_socket("cn10k_ml_pmd_queue_pair", sizeof(struct cnxk_ml_qp), ROC_ALIGN, - socket_id); - if (qp == NULL) { - plt_err("Could not allocate queue pair"); - return NULL; - } - - /* For request queue */ - len = nb_desc * sizeof(struct cnxk_ml_req); - qp_memzone_name_get(name, RTE_MEMZONE_NAMESIZE, dev->data->dev_id, qp_id); - qp_mem = rte_memzone_reserve_aligned( - name, len, socket_id, RTE_MEMZONE_SIZE_HINT_ONLY | RTE_MEMZONE_256MB, ROC_ALIGN); - if (qp_mem == NULL) { - plt_err("Could not reserve memzone: %s", name); - goto qp_free; - } - - va = qp_mem->addr; - memset(va, 0, len); - - /* Initialize Request queue */ - qp->id = qp_id; - qp->queue.reqs = (struct cnxk_ml_req *)va; - qp->queue.head = 0; - qp->queue.tail = 0; - qp->queue.wait_cycles = ML_CNXK_CMD_TIMEOUT * plt_tsc_hz(); - qp->nb_desc = nb_desc; - qp->stats.enqueued_count = 0; - qp->stats.dequeued_count = 0; - qp->stats.enqueue_err_count = 0; - qp->stats.dequeue_err_count = 0; + RTE_SET_USED(cnxk_mldev); /* Initialize job command */ for (i = 0; i < qp->nb_desc; i++) { @@ -193,13 +112,6 @@ cnxk_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_desc qp->queue.reqs[i].cn10k_req.jcmd.w1.s.jobptr = PLT_U64_CAST(&qp->queue.reqs[i].cn10k_req.jd); } - - return qp; - -qp_free: - rte_free(qp); - - return NULL; } static void @@ -1006,47 +918,6 @@ cn10k_ml_dev_stop(struct cnxk_ml_dev *cnxk_mldev) return 0; } -int -cn10k_ml_dev_queue_pair_setup(struct rte_ml_dev *dev, uint16_t queue_pair_id, - const struct rte_ml_dev_qp_conf *qp_conf, int socket_id) -{ - struct rte_ml_dev_info dev_info; - struct cnxk_ml_qp *qp; - uint32_t nb_desc; - - if (queue_pair_id >= dev->data->nb_queue_pairs) { - plt_err("Queue-pair id = %u (>= max queue pairs supported, %u)\n", queue_pair_id, - dev->data->nb_queue_pairs); - return -EINVAL; - } - - if (dev->data->queue_pairs[queue_pair_id] != NULL) - cn10k_ml_dev_queue_pair_release(dev, queue_pair_id); - - cnxk_ml_dev_info_get(dev, &dev_info); - if ((qp_conf->nb_desc > dev_info.max_desc) || (qp_conf->nb_desc == 0)) { - plt_err("Could not setup queue pair for %u descriptors", qp_conf->nb_desc); - return -EINVAL; - } - plt_ml_dbg("Creating queue-pair, queue_pair_id = %u, nb_desc = %u", queue_pair_id, - qp
[PATCH v2 14/34] ml/cnxk: update device debug functions
Added cnxk wrapper for device dump and selftest debug functions. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_model.c | 118 + drivers/ml/cnxk/cn10k_ml_model.h | 1 + drivers/ml/cnxk/cn10k_ml_ocm.c | 11 +- drivers/ml/cnxk/cn10k_ml_ocm.h | 2 +- drivers/ml/cnxk/cn10k_ml_ops.c | 176 ++- drivers/ml/cnxk/cn10k_ml_ops.h | 4 +- drivers/ml/cnxk/cnxk_ml_model.c | 33 ++ drivers/ml/cnxk/cnxk_ml_model.h | 2 + drivers/ml/cnxk/cnxk_ml_ops.c| 39 ++- drivers/ml/cnxk/cnxk_ml_utils.c | 15 +++ drivers/ml/cnxk/cnxk_ml_utils.h | 17 +++ drivers/ml/cnxk/meson.build | 2 + 12 files changed, 237 insertions(+), 183 deletions(-) create mode 100644 drivers/ml/cnxk/cnxk_ml_utils.c create mode 100644 drivers/ml/cnxk/cnxk_ml_utils.h diff --git a/drivers/ml/cnxk/cn10k_ml_model.c b/drivers/ml/cnxk/cn10k_ml_model.c index 9a336cd18f..9e92d4acf3 100644 --- a/drivers/ml/cnxk/cn10k_ml_model.c +++ b/drivers/ml/cnxk/cn10k_ml_model.c @@ -12,6 +12,7 @@ #include "cnxk_ml_dev.h" #include "cnxk_ml_model.h" #include "cnxk_ml_ops.h" +#include "cnxk_ml_utils.h" static enum rte_ml_io_type cn10k_ml_io_type_map(uint8_t type) @@ -591,3 +592,120 @@ cn10k_ml_model_info_set(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *mo rte_ml_io_type_size_get(io_info->output[i].qtype); } } + +void +cn10k_ml_layer_print(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer, FILE *fp) +{ + struct cn10k_ml_ocm *ocm; + char str[STR_LEN]; + uint8_t i; + uint8_t j; + + ocm = &cnxk_mldev->cn10k_mldev.ocm; + + /* Print debug info */ + cnxk_ml_print_line(fp, LINE_LEN); + fprintf(fp, " Layer Information (Layer ID: %u, Name: %s)\n", + cnxk_mldev->index_map[layer->index].layer_id, layer->name); + cnxk_ml_print_line(fp, LINE_LEN); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "index", layer->index); + fprintf(fp, "%*s : %s\n", FIELD_LEN, "name", layer->name); + fprintf(fp, "%*s : %u.%u.%u.%u\n", FIELD_LEN, "version", + layer->glow.metadata.model.version[0], layer->glow.metadata.model.version[1], + layer->glow.metadata.model.version[2], layer->glow.metadata.model.version[3]); + fprintf(fp, "%*s : 0x%016lx\n", FIELD_LEN, "layer", PLT_U64_CAST(layer)); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "batch_size", layer->batch_size); + + /* Print model state */ + if (layer->state == ML_CNXK_LAYER_STATE_LOADED) + fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "loaded"); + if (layer->state == ML_CNXK_LAYER_STATE_JOB_ACTIVE) + fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "job_active"); + if (layer->state == ML_CNXK_LAYER_STATE_STARTED) + fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "started"); + + /* Print OCM status */ + fprintf(fp, "%*s : %" PRIu64 " bytes\n", FIELD_LEN, "wb_size", + layer->glow.metadata.model.ocm_wb_range_end - + layer->glow.metadata.model.ocm_wb_range_start + 1); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "wb_pages", layer->glow.ocm_map.wb_pages); + fprintf(fp, "%*s : %" PRIu64 " bytes\n", FIELD_LEN, "scratch_size", + ocm->size_per_tile - layer->glow.metadata.model.ocm_tmp_range_floor); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "scratch_pages", layer->glow.ocm_map.scratch_pages); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_tiles", + layer->glow.metadata.model.tile_end - layer->glow.metadata.model.tile_start + 1); + + if (layer->state == ML_CNXK_LAYER_STATE_STARTED) { + fprintf(fp, "%*s : 0x%0*" PRIx64 "\n", FIELD_LEN, "tilemask", + ML_CN10K_OCM_NUMTILES / 4, layer->glow.ocm_map.tilemask); + fprintf(fp, "%*s : 0x%" PRIx64 "\n", FIELD_LEN, "ocm_wb_start", + layer->glow.ocm_map.wb_page_start * ocm->page_size); + } + + fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_inputs", layer->glow.metadata.model.num_input); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_outputs", layer->glow.metadata.model.num_output); + fprintf(fp, "\n"); + + cnxk_ml_print_line(fp, LINE_LEN); + fprintf(fp, "%8s %16s %12s %18s\n", "input", "input_name", "input_type", + "model_input_type"); + cnxk_ml_print_line(fp, LINE_LEN); + for (i = 0; i < layer->glow.metadata.model.num_input; i++) { + if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) { + fprintf(fp, "%8u ", i); + fprintf(fp, "%*s ", 16, layer->glow.metadata.input1[i].input_name); + rte_ml_io_type_to_str(layer->glow.metadata.input1[i].input_type, str, + STR_LEN); + fprintf(fp, "%*s ", 12, str); + rt
[PATCH v2 12/34] ml/cnxk: update model utility functions
Added cnxk wrapper function to update model params and fetch model info. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 38 ++- drivers/ml/cnxk/cn10k_ml_ops.h | 5 ++-- drivers/ml/cnxk/cnxk_ml_ops.c | 48 -- 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index e5b9837ed7..0eebefee5f 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -1839,45 +1839,23 @@ cn10k_ml_model_stop(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model) } int -cn10k_ml_model_info_get(struct rte_ml_dev *dev, uint16_t model_id, - struct rte_ml_model_info *model_info) +cn10k_ml_model_params_update(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model, +void *buffer) { - struct cnxk_ml_model *model; - - model = dev->data->models[model_id]; - - if (model == NULL) { - plt_err("Invalid model_id = %u", model_id); - return -EINVAL; - } - - rte_memcpy(model_info, model->info, sizeof(struct rte_ml_model_info)); - model_info->input_info = ((struct rte_ml_model_info *)model->info)->input_info; - model_info->output_info = ((struct rte_ml_model_info *)model->info)->output_info; - - return 0; -} - -int -cn10k_ml_model_params_update(struct rte_ml_dev *dev, uint16_t model_id, void *buffer) -{ - struct cnxk_ml_model *model; - - model = dev->data->models[model_id]; + struct cnxk_ml_layer *layer; - if (model == NULL) { - plt_err("Invalid model_id = %u", model_id); - return -EINVAL; - } + RTE_SET_USED(cnxk_mldev); if (model->state == ML_CNXK_MODEL_STATE_UNKNOWN) return -1; else if (model->state != ML_CNXK_MODEL_STATE_LOADED) return -EBUSY; + layer = &model->layer[0]; + /* Update model weights & bias */ - rte_memcpy(model->layer[0].glow.addr.wb_load_addr, buffer, - model->layer[0].glow.metadata.weights_bias.file_size); + rte_memcpy(layer->glow.addr.wb_load_addr, buffer, + layer->glow.metadata.weights_bias.file_size); return 0; } diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h index a222a43d55..ef12069f0d 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.h +++ b/drivers/ml/cnxk/cn10k_ml_ops.h @@ -317,9 +317,8 @@ int cn10k_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_para int cn10k_ml_model_unload(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model); int cn10k_ml_model_start(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model); int cn10k_ml_model_stop(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model); -int cn10k_ml_model_info_get(struct rte_ml_dev *dev, uint16_t model_id, - struct rte_ml_model_info *model_info); -int cn10k_ml_model_params_update(struct rte_ml_dev *dev, uint16_t model_id, void *buffer); +int cn10k_ml_model_params_update(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model, +void *buffer); /* I/O ops */ int cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 915309168d..5ad0ea8c3c 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -606,6 +606,50 @@ cnxk_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id) return cn10k_ml_model_stop(cnxk_mldev, model); } +static int +cnxk_ml_model_info_get(struct rte_ml_dev *dev, uint16_t model_id, + struct rte_ml_model_info *model_info) +{ + struct rte_ml_model_info *info; + struct cnxk_ml_model *model; + + if ((dev == NULL) || (model_info == NULL)) + return -EINVAL; + + model = dev->data->models[model_id]; + if (model == NULL) { + plt_err("Invalid model_id = %u", model_id); + return -EINVAL; + } + + info = (struct rte_ml_model_info *)model->info; + rte_memcpy(model_info, info, sizeof(struct rte_ml_model_info)); + model_info->input_info = info->input_info; + model_info->output_info = info->output_info; + + return 0; +} + +static int +cnxk_ml_model_params_update(struct rte_ml_dev *dev, uint16_t model_id, void *buffer) +{ + struct cnxk_ml_dev *cnxk_mldev; + struct cnxk_ml_model *model; + + if ((dev == NULL) || (buffer == NULL)) + return -EINVAL; + + cnxk_mldev = dev->data->dev_private; + + model = dev->data->models[model_id]; + if (model == NULL) { + plt_err("Invalid model_id = %u", model_id); + return -EINVAL; + } + + return cn10k_ml_model_params_update(cnxk_mldev, model, buffer); +} +
[PATCH v2 15/34] ml/cnxk: update device stats functions
Added cnxk wrapper function to handle ML device stats Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 32 -- drivers/ml/cnxk/cn10k_ml_ops.h | 2 -- drivers/ml/cnxk/cnxk_ml_ops.c | 36 -- 3 files changed, 34 insertions(+), 36 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index c3608eec99..59cd3bb9b3 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -774,38 +774,6 @@ cn10k_ml_dev_stop(struct cnxk_ml_dev *cnxk_mldev) return 0; } -int -cn10k_ml_dev_stats_get(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats) -{ - struct cnxk_ml_qp *qp; - int qp_id; - - for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { - qp = dev->data->queue_pairs[qp_id]; - stats->enqueued_count += qp->stats.enqueued_count; - stats->dequeued_count += qp->stats.dequeued_count; - stats->enqueue_err_count += qp->stats.enqueue_err_count; - stats->dequeue_err_count += qp->stats.dequeue_err_count; - } - - return 0; -} - -void -cn10k_ml_dev_stats_reset(struct rte_ml_dev *dev) -{ - struct cnxk_ml_qp *qp; - int qp_id; - - for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { - qp = dev->data->queue_pairs[qp_id]; - qp->stats.enqueued_count = 0; - qp->stats.dequeued_count = 0; - qp->stats.enqueue_err_count = 0; - qp->stats.dequeue_err_count = 0; - } -} - int cn10k_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map, diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h index 5fda98ae88..47e7cb12af 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.h +++ b/drivers/ml/cnxk/cn10k_ml_ops.h @@ -298,8 +298,6 @@ int cn10k_ml_dev_stop(struct cnxk_ml_dev *cnxk_mldev); int cn10k_ml_dev_dump(struct cnxk_ml_dev *cnxk_mldev, FILE *fp); int cn10k_ml_dev_selftest(struct cnxk_ml_dev *cnxk_mldev); -int cn10k_ml_dev_stats_get(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats); -void cn10k_ml_dev_stats_reset(struct rte_ml_dev *dev); int cn10k_ml_dev_xstats_names_get(struct rte_ml_dev *dev, enum rte_ml_dev_xstats_mode mode, int32_t model_id, struct rte_ml_dev_xstats_map *xstats_map, uint32_t size); diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index b49ab59798..ffeb3f4452 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -491,6 +491,38 @@ cnxk_ml_dev_queue_pair_setup(struct rte_ml_dev *dev, uint16_t queue_pair_id, return 0; } +static int +cnxk_ml_dev_stats_get(struct rte_ml_dev *dev, struct rte_ml_dev_stats *stats) +{ + struct cnxk_ml_qp *qp; + int qp_id; + + for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { + qp = dev->data->queue_pairs[qp_id]; + stats->enqueued_count += qp->stats.enqueued_count; + stats->dequeued_count += qp->stats.dequeued_count; + stats->enqueue_err_count += qp->stats.enqueue_err_count; + stats->dequeue_err_count += qp->stats.dequeue_err_count; + } + + return 0; +} + +static void +cnxk_ml_dev_stats_reset(struct rte_ml_dev *dev) +{ + struct cnxk_ml_qp *qp; + int qp_id; + + for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { + qp = dev->data->queue_pairs[qp_id]; + qp->stats.enqueued_count = 0; + qp->stats.dequeued_count = 0; + qp->stats.enqueue_err_count = 0; + qp->stats.dequeue_err_count = 0; + } +} + static int cnxk_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, uint16_t *model_id) { @@ -774,8 +806,8 @@ struct rte_ml_dev_ops cnxk_ml_ops = { .dev_queue_pair_release = cnxk_ml_dev_queue_pair_release, /* Stats ops */ - .dev_stats_get = cn10k_ml_dev_stats_get, - .dev_stats_reset = cn10k_ml_dev_stats_reset, + .dev_stats_get = cnxk_ml_dev_stats_get, + .dev_stats_reset = cnxk_ml_dev_stats_reset, .dev_xstats_names_get = cn10k_ml_dev_xstats_names_get, .dev_xstats_by_name_get = cn10k_ml_dev_xstats_by_name_get, .dev_xstats_get = cn10k_ml_dev_xstats_get, -- 2.41.0
[PATCH v2 13/34] ml/cnxk: update data quantization functions
Added cnxk wrapper functions to quantize input data and dequantize output data. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 164 - drivers/ml/cnxk/cn10k_ml_ops.h | 7 -- drivers/ml/cnxk/cnxk_ml_io.c | 95 +++ drivers/ml/cnxk/cnxk_ml_io.h | 3 + drivers/ml/cnxk/cnxk_ml_ops.c | 78 +++- drivers/ml/cnxk/meson.build| 1 + 6 files changed, 175 insertions(+), 173 deletions(-) create mode 100644 drivers/ml/cnxk/cnxk_ml_io.c diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 0eebefee5f..1e6aee818c 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -1860,170 +1860,6 @@ cn10k_ml_model_params_update(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_mode return 0; } -int -cn10k_ml_io_quantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **dbuffer, -struct rte_ml_buff_seg **qbuffer) -{ - struct cnxk_ml_model *model; - uint8_t model_input_type; - uint8_t *lcl_dbuffer; - uint8_t *lcl_qbuffer; - uint8_t input_type; - float qscale; - uint32_t i; - uint32_t j; - int ret; - - model = dev->data->models[model_id]; - - if (model == NULL) { - plt_err("Invalid model_id = %u", model_id); - return -EINVAL; - } - - lcl_dbuffer = dbuffer[0]->addr; - lcl_qbuffer = qbuffer[0]->addr; - - for (i = 0; i < model->layer[0].glow.metadata.model.num_input; i++) { - if (i < MRVL_ML_NUM_INPUT_OUTPUT_1) { - input_type = model->layer[0].glow.metadata.input1[i].input_type; - model_input_type = model->layer[0].glow.metadata.input1[i].model_input_type; - qscale = model->layer[0].glow.metadata.input1[i].qscale; - } else { - j = i - MRVL_ML_NUM_INPUT_OUTPUT_1; - input_type = model->layer[0].glow.metadata.input2[j].input_type; - model_input_type = model->layer[0].glow.metadata.input2[j].model_input_type; - qscale = model->layer[0].glow.metadata.input2[j].qscale; - } - - if (input_type == model_input_type) { - rte_memcpy(lcl_qbuffer, lcl_dbuffer, model->layer[0].info.input[i].sz_d); - } else { - switch (model->layer[0].glow.metadata.input1[i].model_input_type) { - case RTE_ML_IO_TYPE_INT8: - ret = rte_ml_io_float32_to_int8( - qscale, model->layer[0].info.input[i].nb_elements, - lcl_dbuffer, lcl_qbuffer); - break; - case RTE_ML_IO_TYPE_UINT8: - ret = rte_ml_io_float32_to_uint8( - qscale, model->layer[0].info.input[i].nb_elements, - lcl_dbuffer, lcl_qbuffer); - break; - case RTE_ML_IO_TYPE_INT16: - ret = rte_ml_io_float32_to_int16( - qscale, model->layer[0].info.input[i].nb_elements, - lcl_dbuffer, lcl_qbuffer); - break; - case RTE_ML_IO_TYPE_UINT16: - ret = rte_ml_io_float32_to_uint16( - qscale, model->layer[0].info.input[i].nb_elements, - lcl_dbuffer, lcl_qbuffer); - break; - case RTE_ML_IO_TYPE_FP16: - ret = rte_ml_io_float32_to_float16( - model->layer[0].info.input[i].nb_elements, lcl_dbuffer, - lcl_qbuffer); - break; - default: - plt_err("Unsupported model_input_type[%u] : %u", i, - model->layer[0].glow.metadata.input1[i].model_input_type); - ret = -ENOTSUP; - } - if (ret < 0) - return ret; - } - - lcl_dbuffer += model->layer[0].info.input[i].sz_d; - lcl_qbuffer += model->layer[0].info.input[i].sz_q; - } - - return 0; -} - -int -cn10k_ml_io_dequantize(struct rte_ml_dev *dev, uint16_t model_id, struct rte_ml_buff_seg **qbuffer, - struct rte_ml_buff_seg **dbuffer) -{ - struct cnxk_ml_model *model; - uint8_t model_output_type; - uint8_t *lcl_qbuffer; - uint8_t *lcl_dbuffer; -
[PATCH v2 16/34] ml/cnxk: update device and model xstats functions
Added cnxk wrapper function to handle ML device and model extended stats. Handling resources for the xstats is done in the cnxk layer. Introduced internal xstats group. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.h | 4 - drivers/ml/cnxk/cn10k_ml_ops.c | 542 +-- drivers/ml/cnxk/cn10k_ml_ops.h | 11 - drivers/ml/cnxk/cnxk_ml_dev.h| 5 + drivers/ml/cnxk/cnxk_ml_ops.c| 540 +- drivers/ml/cnxk/cnxk_ml_xstats.h | 21 +- 6 files changed, 571 insertions(+), 552 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index be989e0a20..bde9d08901 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -10,7 +10,6 @@ #include "cn10k_ml_ocm.h" #include "cnxk_ml_io.h" -#include "cnxk_ml_xstats.h" /* Dummy Device ops */ extern struct rte_ml_dev_ops ml_dev_dummy_ops; @@ -133,9 +132,6 @@ struct cn10k_ml_dev { /* OCM info */ struct cn10k_ml_ocm ocm; - /* Extended stats data */ - struct cnxk_ml_xstats xstats; - /* Enable / disable model data caching */ int cache_model_data; diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 59cd3bb9b3..f1431b89a2 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -202,107 +202,21 @@ cn10k_ml_prep_fp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_r req->cn10k_req.jd.model_run.num_batches = op->nb_batches; } -static int -cn10k_ml_xstats_init(struct rte_ml_dev *dev) -{ - struct cn10k_ml_dev *cn10k_mldev; - struct cnxk_ml_dev *cnxk_mldev; - uint16_t nb_stats; - uint16_t stat_id; - uint16_t model; - uint16_t i; - - cnxk_mldev = dev->data->dev_private; - cn10k_mldev = &cnxk_mldev->cn10k_mldev; - - /* Allocate memory for xstats entries. Don't allocate during reconfigure */ - nb_stats = RTE_DIM(device_xstats) + ML_CNXK_MAX_MODELS * RTE_DIM(layer_xstats); - if (cn10k_mldev->xstats.entries == NULL) - cn10k_mldev->xstats.entries = rte_zmalloc( - "cn10k_ml_xstats", sizeof(struct cnxk_ml_xstats_entry) * nb_stats, - PLT_CACHE_LINE_SIZE); - - if (cn10k_mldev->xstats.entries == NULL) - return -ENOMEM; - - /* Initialize device xstats */ - stat_id = 0; - for (i = 0; i < RTE_DIM(device_xstats); i++) { - cn10k_mldev->xstats.entries[stat_id].map.id = stat_id; - snprintf(cn10k_mldev->xstats.entries[stat_id].map.name, -sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), "%s", -device_xstats[i].name); - - cn10k_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_DEVICE; - cn10k_mldev->xstats.entries[stat_id].type = device_xstats[i].type; - cn10k_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_DEVICE; - cn10k_mldev->xstats.entries[stat_id].obj_idx = 0; - cn10k_mldev->xstats.entries[stat_id].reset_allowed = device_xstats[i].reset_allowed; - stat_id++; - } - cn10k_mldev->xstats.count_mode_device = stat_id; - - /* Initialize model xstats */ - for (model = 0; model < ML_CNXK_MAX_MODELS; model++) { - cn10k_mldev->xstats.offset_for_model[model] = stat_id; - - for (i = 0; i < RTE_DIM(layer_xstats); i++) { - cn10k_mldev->xstats.entries[stat_id].map.id = stat_id; - cn10k_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL; - cn10k_mldev->xstats.entries[stat_id].type = layer_xstats[i].type; - cn10k_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL; - cn10k_mldev->xstats.entries[stat_id].obj_idx = model; - cn10k_mldev->xstats.entries[stat_id].reset_allowed = - layer_xstats[i].reset_allowed; - - /* Name of xstat is updated during model load */ - snprintf(cn10k_mldev->xstats.entries[stat_id].map.name, - sizeof(cn10k_mldev->xstats.entries[stat_id].map.name), -"Model-%u-%s", model, layer_xstats[i].name); - - stat_id++; - } - - cn10k_mldev->xstats.count_per_model[model] = RTE_DIM(layer_xstats); - } - - cn10k_mldev->xstats.count_mode_model = stat_id - cn10k_mldev->xstats.count_mode_device; - cn10k_mldev->xstats.count = stat_id; - - return 0; -} - static void -cn10k_ml_xstats_uninit(struct rte_ml_dev *dev) +cn10k_ml_xstats_layer_name_update(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id, + uint16_t layer_id) { - struct cn10k
[PATCH v2 17/34] ml/cnxk: update fast path functions
Implemented cnxk layer fast-path functions and added support for model specific fast-path functions. CNXK layer functions would invoke model specific fast-path functions. Added support for model specific poll handling functions and updated internal inference sync function. Drop use of rte_ml_op as argument. Updated function arguments to enable the function to be used as callback by TVM HW runtime. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.h | 5 - drivers/ml/cnxk/cn10k_ml_ops.c | 241 drivers/ml/cnxk/cn10k_ml_ops.h | 13 +- drivers/ml/cnxk/cnxk_ml_model.h | 14 ++ drivers/ml/cnxk/cnxk_ml_ops.c | 128 + drivers/ml/cnxk/cnxk_ml_ops.h | 7 + 6 files changed, 216 insertions(+), 192 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index bde9d08901..94a94d996f 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -143,11 +143,6 @@ struct cn10k_ml_dev { /* JCMD enqueue function handler */ bool (*ml_jcmdq_enqueue)(struct roc_ml *roc_ml, struct ml_job_cmd_s *job_cmd); - - /* Poll handling function pointers */ - void (*set_poll_addr)(struct cnxk_ml_req *req); - void (*set_poll_ptr)(struct cnxk_ml_req *req); - uint64_t (*get_poll_ptr)(struct cnxk_ml_req *req); }; uint64_t cn10k_ml_fw_flags_get(struct cn10k_ml_fw *fw); diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index f1431b89a2..7d809d25ae 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -69,24 +69,12 @@ static const struct cn10k_ml_stype_db_driver { {ML_DRIVER_ERR_FW_ERROR, "UNKNOWN FIRMWARE ERROR"}, }; -static inline void +__rte_hot void cn10k_ml_set_poll_addr(struct cnxk_ml_req *req) { req->status = &req->cn10k_req.status; } -static inline void -cn10k_ml_set_poll_ptr(struct cnxk_ml_req *req) -{ - plt_write64(ML_CNXK_POLL_JOB_START, req->status); -} - -static inline uint64_t -cn10k_ml_get_poll_ptr(struct cnxk_ml_req *req) -{ - return plt_read64(req->status); -} - void cn10k_ml_qp_initialize(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_qp *qp) { @@ -181,7 +169,7 @@ cn10k_ml_prep_sp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_l static __rte_always_inline void cn10k_ml_prep_fp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_req *req, - struct rte_ml_op *op) + uint16_t index, void *input, void *output, uint16_t nb_batches) { struct cn10k_ml_dev *cn10k_mldev; @@ -189,17 +177,17 @@ cn10k_ml_prep_fp_job_descriptor(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_r req->cn10k_req.jd.hdr.jce.w0.u64 = 0; req->cn10k_req.jd.hdr.jce.w1.u64 = PLT_U64_CAST(req->status); - req->cn10k_req.jd.hdr.model_id = op->model_id; + req->cn10k_req.jd.hdr.model_id = index; req->cn10k_req.jd.hdr.job_type = ML_CN10K_JOB_TYPE_MODEL_RUN; req->cn10k_req.jd.hdr.fp_flags = ML_FLAGS_POLL_COMPL; req->cn10k_req.jd.hdr.sp_flags = 0x0; req->cn10k_req.jd.hdr.result = roc_ml_addr_ap2mlip(&cn10k_mldev->roc, &req->cn10k_req.result); req->cn10k_req.jd.model_run.input_ddr_addr = - PLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, op->input[0]->addr)); + PLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, input)); req->cn10k_req.jd.model_run.output_ddr_addr = - PLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, op->output[0]->addr)); - req->cn10k_req.jd.model_run.num_batches = op->nb_batches; + PLT_U64_CAST(roc_ml_addr_ap2mlip(&cn10k_mldev->roc, output)); + req->cn10k_req.jd.model_run.num_batches = nb_batches; } static void @@ -236,30 +224,15 @@ cn10k_ml_xstats_layer_name_update(struct cnxk_ml_dev *cnxk_mldev, uint16_t model static int cn10k_ml_cache_model_data(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer) { - struct rte_ml_buff_seg seg[2]; - struct rte_ml_buff_seg *inp; - struct rte_ml_buff_seg *out; - struct rte_ml_op op; - char str[RTE_MEMZONE_NAMESIZE]; const struct plt_memzone *mz; uint64_t isize = 0; uint64_t osize = 0; int ret = 0; - uint32_t i; - - inp = &seg[0]; - out = &seg[1]; /* Create input and output buffers. */ - for (i = 0; i < layer->info.nb_inputs; i++) - isize += layer->info.input[i].sz_q; - - for (i = 0; i < layer->info.nb_outputs; i++) - osize += layer->info.output[i].sz_q; - - isize = layer->batch_size * isize; - osize = layer->batch_size * osize; + isize = layer->info.total_input_sz_q; + osize = layer->info.total_output_sz_q; snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", "ml_dummy_io", layer->index); mz = plt_m
[PATCH v2 19/34] ml/cnxk: support config and close of tvmdp library
Added support to configure and close TVMDP library based on ML device configuration options. Updated meson build to enable Jansson, TVM runtime, TVMDP library as build dependencies. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cnxk_ml_ops.c | 15 ++ drivers/ml/cnxk/meson.build| 50 ++ drivers/ml/cnxk/mvtvm_ml_ops.c | 42 drivers/ml/cnxk/mvtvm_ml_ops.h | 19 + 4 files changed, 126 insertions(+) create mode 100644 drivers/ml/cnxk/mvtvm_ml_ops.c create mode 100644 drivers/ml/cnxk/mvtvm_ml_ops.h diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index e6c67c71f5..358f16cead 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -9,6 +9,10 @@ #include "cn10k_ml_ops.h" +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM +#include "mvtvm_ml_ops.h" +#endif + #include "cnxk_ml_dev.h" #include "cnxk_ml_io.h" #include "cnxk_ml_model.h" @@ -625,6 +629,12 @@ cnxk_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *co goto error; } +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + ret = mvtvm_ml_dev_configure(cnxk_mldev, conf); + if (ret != 0) + goto error; +#endif + /* Set device capabilities */ cnxk_mldev->max_nb_layers = cnxk_mldev->cn10k_mldev.fw.req->cn10k_req.jd.fw_load.cap.s.max_models; @@ -685,6 +695,11 @@ cnxk_ml_dev_close(struct rte_ml_dev *dev) /* Un-initialize xstats */ cnxk_ml_xstats_uninit(cnxk_mldev); +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + if (mvtvm_ml_dev_close(cnxk_mldev) != 0) + plt_err("Failed to close MVTVM ML Device"); +#endif + if (cn10k_ml_dev_close(cnxk_mldev) != 0) plt_err("Failed to close CN10K ML Device"); diff --git a/drivers/ml/cnxk/meson.build b/drivers/ml/cnxk/meson.build index 575f08f9c0..61f7fa32af 100644 --- a/drivers/ml/cnxk/meson.build +++ b/drivers/ml/cnxk/meson.build @@ -7,6 +7,32 @@ if not is_linux or not dpdk_conf.get('RTE_ARCH_64') subdir_done() endif +enable_mvtvm = true + +if not jansson_dep.found() +message('drivers/ml/cnxk: jansson not found') +enable_mvtvm = false +endif + +if not cc.check_header('dlpack/dlpack.h') +message('drivers/ml/cnxk: dlpack.h not found') +enable_mvtvm = false +endif + +tvmrt_lib = cc.find_library('tvm_runtime', required: false) +if tvmrt_lib.found() +tvmrt_dep = declare_dependency(dependencies: tvmrt_lib) +else +message('drivers/ml/cnxk: tvm_runtime not found') +enable_mvtvm = false +endif + +tvmdp_dep = dependency('tvmdp', required: false) +if not tvmdp_dep.found() +message('drivers/ml/cnxk: tvmdp not found') +enable_mvtvm = false +endif + driver_sdk_headers = files( 'cn10k_ml_dev.h', 'cn10k_ml_ops.h', @@ -34,6 +60,30 @@ sources = files( deps += ['mldev', 'common_cnxk', 'kvargs', 'hash'] +if enable_mvtvm + +dpdk_conf.set('RTE_MLDEV_CNXK_ENABLE_MVTVM', true) + +driver_sdk_headers += files( +'mvtvm_ml_ops.h', +) + +sources += files( +'mvtvm_ml_ops.c', +) + +ext_deps += tvmrt_dep +ext_deps += tvmdp_dep +ext_deps += cc.find_library('stdc++', required: true) +ext_deps += jansson_dep + +deps += ['bus_vdev'] + +message('drivers/ml/cnxk: Enabled TVM model support') +else +message('drivers/ml/cnxk: Disabled TVM model support') +endif + require_iova_in_mbuf = false if get_option('buildtype').contains('debug') diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c new file mode 100644 index 00..f2b9499cf4 --- /dev/null +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Marvell. + */ + +#include +#include +#include +#include + +#include "mvtvm_ml_ops.h" + +#include "cnxk_ml_dev.h" + +int +mvtvm_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_config *conf) +{ + int ret; + + RTE_SET_USED(conf); + + /* Configure TVMDP library */ + ret = tvmdp_configure(cnxk_mldev->mldev->data->nb_models, rte_get_tsc_cycles); + if (ret != 0) + plt_err("TVMDP configuration failed, error = %d\n", ret); + + return ret; +} + +int +mvtvm_ml_dev_close(struct cnxk_ml_dev *cnxk_mldev) +{ + int ret; + + RTE_SET_USED(cnxk_mldev); + + /* Close TVMDP library configuration */ + ret = tvmdp_close(); + if (ret != 0) + plt_err("TVMDP close failed, error = %d\n", ret); + + return ret; +} diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.h b/drivers/ml/cnxk/mvtvm_ml_ops.h new file mode 100644 index 00..305b4681ed --- /dev/null +++ b/drivers/ml/cnxk/mvtvm_ml_ops.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Marvell. + */ + +#ifndef _MVTVM_ML_OPS_H_ +#define _MVTVM_ML_OPS_H_ + +#include + +#include + +#include + +str
[PATCH v2 18/34] ml/cnxk: move error handling to cnxk layer
Move error type structures to cnxk layer. cn10k layer to handle fw and hw error sub-types only. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.h | 41 ++- drivers/ml/cnxk/cn10k_ml_ops.c | 93 +- drivers/ml/cnxk/cnxk_ml_dev.c | 8 +++ drivers/ml/cnxk/cnxk_ml_dev.h | 18 +++ drivers/ml/cnxk/cnxk_ml_ops.c | 2 +- 5 files changed, 78 insertions(+), 84 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index 94a94d996f..2e7eb6c9ef 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -52,38 +52,27 @@ struct cnxk_ml_dev; struct cnxk_ml_req; struct cnxk_ml_qp; -/* Error types enumeration */ -enum cn10k_ml_error_etype { - /* 0x0 */ ML_ETYPE_NO_ERROR = 0, /* No error */ - /* 0x1 */ ML_ETYPE_FW_NONFATAL, /* Firmware non-fatal error */ - /* 0x2 */ ML_ETYPE_HW_NONFATAL, /* Hardware non-fatal error */ - /* 0x3 */ ML_ETYPE_HW_FATAL, /* Hardware fatal error */ - /* 0x4 */ ML_ETYPE_HW_WARNING, /* Hardware warning */ - /* 0x5 */ ML_ETYPE_DRIVER, /* Driver specific error */ - /* 0x6 */ ML_ETYPE_UNKNOWN, /* Unknown error */ -}; - /* Firmware non-fatal error sub-type */ enum cn10k_ml_error_stype_fw_nf { - /* 0x0 */ ML_FW_ERR_NOERR = 0, /* No error */ - /* 0x1 */ ML_FW_ERR_UNLOAD_ID_NOT_FOUND, /* Model ID not found during load */ - /* 0x2 */ ML_FW_ERR_LOAD_LUT_OVERFLOW, /* Lookup table overflow at load */ - /* 0x3 */ ML_FW_ERR_ID_IN_USE, /* Model ID already in use */ - /* 0x4 */ ML_FW_ERR_INVALID_TILEMASK,/* Invalid OCM tilemask */ - /* 0x5 */ ML_FW_ERR_RUN_LUT_OVERFLOW,/* Lookup table overflow at run */ - /* 0x6 */ ML_FW_ERR_RUN_ID_NOT_FOUND,/* Model ID not found during run */ - /* 0x7 */ ML_FW_ERR_COMMAND_NOTSUP, /* Unsupported command */ - /* 0x8 */ ML_FW_ERR_DDR_ADDR_RANGE, /* DDR address out of range */ - /* 0x9 */ ML_FW_ERR_NUM_BATCHES_INVALID, /* Invalid number of batches */ - /* 0xA */ ML_FW_ERR_INSSYNC_TIMEOUT, /* INS sync timeout */ + /* 0x0 */ ML_CN10K_FW_ERR_NOERR = 0, /* No error */ + /* 0x1 */ ML_CN10K_FW_ERR_UNLOAD_ID_NOT_FOUND, /* Model ID not found during load */ + /* 0x2 */ ML_CN10K_FW_ERR_LOAD_LUT_OVERFLOW, /* Lookup table overflow at load */ + /* 0x3 */ ML_CN10K_FW_ERR_ID_IN_USE, /* Model ID already in use */ + /* 0x4 */ ML_CN10K_FW_ERR_INVALID_TILEMASK,/* Invalid OCM tilemask */ + /* 0x5 */ ML_CN10K_FW_ERR_RUN_LUT_OVERFLOW,/* Lookup table overflow at run */ + /* 0x6 */ ML_CN10K_FW_ERR_RUN_ID_NOT_FOUND,/* Model ID not found during run */ + /* 0x7 */ ML_CN10K_FW_ERR_COMMAND_NOTSUP, /* Unsupported command */ + /* 0x8 */ ML_CN10K_FW_ERR_DDR_ADDR_RANGE, /* DDR address out of range */ + /* 0x9 */ ML_CN10K_FW_ERR_NUM_BATCHES_INVALID, /* Invalid number of batches */ + /* 0xA */ ML_CN10K_FW_ERR_INSSYNC_TIMEOUT, /* INS sync timeout */ }; /* Driver error sub-type */ enum cn10k_ml_error_stype_driver { - /* 0x0 */ ML_DRIVER_ERR_NOERR = 0, /* No error */ - /* 0x1 */ ML_DRIVER_ERR_UNKNOWN, /* Unable to determine error sub-type */ - /* 0x2 */ ML_DRIVER_ERR_EXCEPTION, /* Firmware exception */ - /* 0x3 */ ML_DRIVER_ERR_FW_ERROR, /* Unknown firmware error */ + /* 0x0 */ ML_CN10K_DRIVER_ERR_NOERR = 0, /* No error */ + /* 0x1 */ ML_CN10K_DRIVER_ERR_UNKNOWN, /* Unable to determine error sub-type */ + /* 0x2 */ ML_CN10K_DRIVER_ERR_EXCEPTION, /* Firmware exception */ + /* 0x3 */ ML_CN10K_DRIVER_ERR_FW_ERROR, /* Unknown firmware error */ }; /* Error structure */ diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 7d809d25ae..daeb3b712c 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -26,47 +26,27 @@ #define ML_FLAGS_POLL_COMPL BIT(0) #define ML_FLAGS_SSO_COMPL BIT(1) -/* Error message length */ -#define ERRMSG_LEN 32 - -/* Error type database */ -static const struct cn10k_ml_etype_db { - enum cn10k_ml_error_etype etype; - char name[ERRMSG_LEN]; -} ml_etype_db[] = { - {ML_ETYPE_NO_ERROR, "NO_ERROR"},{ML_ETYPE_FW_NONFATAL, "FW_NON_FATAL"}, - {ML_ETYPE_HW_NONFATAL, "HW_NON_FATAL"}, {ML_ETYPE_HW_FATAL, "HW_FATAL"}, - {ML_ETYPE_HW_WARNING, "HW_WARNING"},{ML_ETYPE_DRIVER, "DRIVER_ERROR"}, - {ML_ETYPE_UNKNOWN, "UNKNOWN_ERROR"}, -}; - /* Hardware non-fatal error subtype database */ -static const struct cn10k_ml_stype_db_hw_nf { - enum cn10k_ml_error_stype_fw_nf stype; - char msg[ERRMSG_LEN]; -} ml_stype_db_hw_nf[] = { - {ML_FW_ERR_NOERR, "NO ERROR"}, - {ML_FW_ERR_UNLOAD_ID_NOT_FOUND, "UNLOAD MODEL ID NOT FOUND"}, - {ML_FW_ERR_LOAD_LUT_OVERFLOW, "LOAD LUT OVERFLOW"}, - {ML_F
[PATCH v2 21/34] ml/cnxk: add support for identify model type
Enable support to parse model buffer to identify the model type and model sub-type. Enabled basic checks for Glow model type buffer. Signed-off-by: Srikanth Yalavarthi Signed-off-by: Anup Prabhu --- drivers/ml/cnxk/cnxk_ml_model.c | 96 drivers/ml/cnxk/cnxk_ml_model.h | 1 + drivers/ml/cnxk/cnxk_ml_ops.c| 9 +++ drivers/ml/cnxk/meson.build | 6 ++ drivers/ml/cnxk/mvtvm_ml_model.c | 11 5 files changed, 123 insertions(+) create mode 100644 drivers/ml/cnxk/mvtvm_ml_model.c diff --git a/drivers/ml/cnxk/cnxk_ml_model.c b/drivers/ml/cnxk/cnxk_ml_model.c index b069d4e3a5..746d3ca5a9 100644 --- a/drivers/ml/cnxk/cnxk_ml_model.c +++ b/drivers/ml/cnxk/cnxk_ml_model.c @@ -2,11 +2,107 @@ * Copyright (c) 2023 Marvell. */ +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM +#include +#include +#endif + +#include #include +#include "cn10k_ml_model.h" + +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM +#include "mvtvm_ml_model.h" +#endif + #include "cnxk_ml_model.h" #include "cnxk_ml_utils.h" +enum cnxk_ml_model_type +cnxk_ml_model_get_type(struct rte_ml_model_params *params) +{ + struct cn10k_ml_model_metadata_header *metadata_header; + uint32_t payload_crc32c; + uint32_t header_crc32c; + +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + struct archive_entry *entry; + struct archive *a; + uint8_t i; + int ret; + + /* Assume as archive and check for read status */ + a = archive_read_new(); + archive_read_support_filter_all(a); + archive_read_support_format_all(a); + + ret = archive_read_open_memory(a, params->addr, params->size); + if (ret == ARCHIVE_OK) + goto check_tvm; + else + goto check_glow; + +check_tvm: + bool object_found[ML_MVTVM_MODEL_OBJECT_MAX] = {false, false, false}; + + /* Parse buffer for available objects */ + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + for (i = 0; i < ML_MVTVM_MODEL_OBJECT_MAX; i++) { + if (!object_found[i] && + (strcmp(archive_entry_pathname(entry), mvtvm_object_list[i]) == 0)) + object_found[i] = true; + } + archive_read_data_skip(a); + } + + /* Check if all objects are available */ + for (i = 0; i < ML_MVTVM_MODEL_OBJECT_MAX; i++) { + if (!object_found[i]) { + plt_err("Object %s not found in archive!\n", mvtvm_object_list[i]); + return ML_CNXK_MODEL_TYPE_INVALID; + } + } + + return ML_CNXK_MODEL_TYPE_TVM; + +check_glow: +#endif + + /* Check model magic string */ + metadata_header = (struct cn10k_ml_model_metadata_header *)params->addr; + if (strncmp((char *)metadata_header->magic, MRVL_ML_MODEL_MAGIC_STRING, 4) != 0) { + plt_err("Invalid Glow model, magic = %s", metadata_header->magic); + return ML_CNXK_MODEL_TYPE_INVALID; + } + + /* Header CRC check */ + if (metadata_header->header_crc32c != 0) { + header_crc32c = rte_hash_crc( + params->addr, + sizeof(struct cn10k_ml_model_metadata_header) - sizeof(uint32_t), 0); + + if (header_crc32c != metadata_header->header_crc32c) { + plt_err("Invalid Glow model, Header CRC mismatch"); + return ML_CNXK_MODEL_TYPE_INVALID; + } + } + + /* Payload CRC check */ + if (metadata_header->payload_crc32c != 0) { + payload_crc32c = rte_hash_crc( + PLT_PTR_ADD(params->addr, sizeof(struct cn10k_ml_model_metadata_header)), + params->size - sizeof(struct cn10k_ml_model_metadata_header), 0); + + if (payload_crc32c != metadata_header->payload_crc32c) { + plt_err("Invalid Glow model, Payload CRC mismatch"); + return ML_CNXK_MODEL_TYPE_INVALID; + } + } + + return ML_CNXK_MODEL_TYPE_GLOW; +} + void cnxk_ml_model_dump(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model, FILE *fp) { diff --git a/drivers/ml/cnxk/cnxk_ml_model.h b/drivers/ml/cnxk/cnxk_ml_model.h index b5d6ab2b1e..577a96dc26 100644 --- a/drivers/ml/cnxk/cnxk_ml_model.h +++ b/drivers/ml/cnxk/cnxk_ml_model.h @@ -181,6 +181,7 @@ struct cnxk_ml_model { set_poll_addr_t set_poll_addr; }; +enum cnxk_ml_model_type cnxk_ml_model_get_type(struct rte_ml_model_params *params); void cnxk_ml_model_dump(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model, FILE *fp); #endif /* _CNXK_ML_MODEL_H_ */ diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index a20937ea11..052c69e510 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -10,6 +10,7 @@ #include "cn10k_ml_ops.h" #ifde
[PATCH v2 20/34] ml/cnxk: add structures to support TVM model type
Introduced model type, sub-type and layer type. Added internal structures for TVM model objects. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ocm.c | 3 ++ drivers/ml/cnxk/cn10k_ml_ops.c | 6 ++- drivers/ml/cnxk/cnxk_ml_model.h | 63 +++- drivers/ml/cnxk/cnxk_ml_ops.c| 60 +- drivers/ml/cnxk/meson.build | 1 + drivers/ml/cnxk/mvtvm_ml_model.h | 46 +++ 6 files changed, 166 insertions(+), 13 deletions(-) create mode 100644 drivers/ml/cnxk/mvtvm_ml_model.h diff --git a/drivers/ml/cnxk/cn10k_ml_ocm.c b/drivers/ml/cnxk/cn10k_ml_ocm.c index 70d207e646..a7b64ddf05 100644 --- a/drivers/ml/cnxk/cn10k_ml_ocm.c +++ b/drivers/ml/cnxk/cn10k_ml_ocm.c @@ -437,6 +437,9 @@ cn10k_ml_ocm_free_pages(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id, uint1 for (j = 0; j < local_model->nb_layers; j++) { local_layer = &local_model->layer[j]; + if (local_layer->type != ML_CNXK_LAYER_TYPE_MRVL) + continue; + if (local_layer != layer && local_layer->glow.ocm_map.ocm_reserved) { if (IS_BIT_SET(local_layer->glow.ocm_map.tilemask, tile_id)) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index daeb3b712c..db18f32052 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -650,6 +650,9 @@ cn10k_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * if (ret != 0) return ret; + /* Set model sub type */ + model->subtype = ML_CNXK_MODEL_SUBTYPE_GLOW_MRVL; + /* Copy metadata to internal buffer */ rte_memcpy(&model->glow.metadata, params->addr, sizeof(struct cn10k_ml_model_metadata)); cn10k_ml_model_metadata_update(&model->glow.metadata); @@ -671,6 +674,7 @@ cn10k_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * /* Load layer and get the index */ layer = &model->layer[0]; + layer->type = ML_CNXK_LAYER_TYPE_MRVL; ret = cn10k_ml_layer_load(cnxk_mldev, model->model_id, NULL, params->addr, params->size, &layer->index); if (ret != 0) { @@ -894,7 +898,7 @@ cn10k_ml_layer_start(void *device, uint16_t model_id, const char *layer_name) if (ret < 0) { cn10k_ml_layer_stop(device, model_id, layer_name); } else { - if (cn10k_mldev->cache_model_data) + if (cn10k_mldev->cache_model_data && model->type == ML_CNXK_MODEL_TYPE_GLOW) ret = cn10k_ml_cache_model_data(cnxk_mldev, layer); } diff --git a/drivers/ml/cnxk/cnxk_ml_model.h b/drivers/ml/cnxk/cnxk_ml_model.h index f618e5aa5f..b5d6ab2b1e 100644 --- a/drivers/ml/cnxk/cnxk_ml_model.h +++ b/drivers/ml/cnxk/cnxk_ml_model.h @@ -11,6 +11,10 @@ #include "cn10k_ml_model.h" +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM +#include "mvtvm_ml_model.h" +#endif + #include "cnxk_ml_io.h" struct cnxk_ml_dev; @@ -18,6 +22,45 @@ struct cnxk_ml_model; struct cnxk_ml_qp; struct cnxk_ml_req; +/* Model type */ +enum cnxk_ml_model_type { + /* Invalid model type */ + ML_CNXK_MODEL_TYPE_INVALID, + + /* Glow compiled model, for MLIP target */ + ML_CNXK_MODEL_TYPE_GLOW, + + /* TVM compiled model, for ARM64 / ARM64 + MLIP target */ + ML_CNXK_MODEL_TYPE_TVM, +}; + +/* Model subtype */ +enum cnxk_ml_model_subtype { + /* Marvell Glow model */ + ML_CNXK_MODEL_SUBTYPE_GLOW_MRVL, + + /* TVM model with single MRVL region */ + ML_CNXK_MODEL_SUBTYPE_TVM_MRVL, + + /* TVM model with LLVM regions only */ + ML_CNXK_MODEL_SUBTYPE_TVM_LLVM, + + /* TVM hybrid model, with both MRVL and LLVM regions or (> 1) MRVL regions*/ + ML_CNXK_MODEL_SUBTYPE_TVM_HYBRID, +}; + +/* Layer type */ +enum cnxk_ml_layer_type { + /* MRVL layer, for MLIP target*/ + ML_CNXK_LAYER_TYPE_UNKNOWN = 0, + + /* MRVL layer, for MLIP target*/ + ML_CNXK_LAYER_TYPE_MRVL, + + /* LLVM layer, for ARM64 target*/ + ML_CNXK_LAYER_TYPE_LLVM, +}; + /* Model state */ enum cnxk_ml_model_state { /* Unknown state */ @@ -53,6 +96,9 @@ struct cnxk_ml_layer { /* Name*/ char name[RTE_ML_STR_MAX]; + /* Type */ + enum cnxk_ml_layer_type type; + /* Model handle */ struct cnxk_ml_model *model; @@ -83,14 +129,27 @@ struct cnxk_ml_model { /* Device reference */ struct cnxk_ml_dev *cnxk_mldev; + /* Type */ + enum cnxk_ml_model_type type; + + /* Model subtype */ + enum cnxk_ml_model_subtype subtype; + /* ID */ uint16_t model_id; /* Name */ char name[RTE_ML_STR_MAX]; -
[PATCH v2 22/34] ml/cnxk: add support to parse TVM model objects
Added support to parse TVM model objects from the model archive buffer. Added support to check for all expected objects and copy TVM model objects to internal buffers. Signed-off-by: Srikanth Yalavarthi Signed-off-by: Anup Prabhu --- drivers/ml/cnxk/cnxk_ml_ops.c| 14 +-- drivers/ml/cnxk/mvtvm_ml_model.c | 62 +++ drivers/ml/cnxk/mvtvm_ml_model.h | 3 ++ drivers/ml/cnxk/mvtvm_ml_ops.c | 63 drivers/ml/cnxk/mvtvm_ml_ops.h | 3 ++ 5 files changed, 142 insertions(+), 3 deletions(-) diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 052c69e510..8e17f597af 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -1149,9 +1149,17 @@ cnxk_ml_model_load(struct rte_ml_dev *dev, struct rte_ml_model_params *params, u model, PLT_ALIGN_CEIL(sizeof(struct cnxk_ml_model), dev_info.align_size)); dev->data->models[lcl_model_id] = model; - ret = cn10k_ml_model_load(cnxk_mldev, params, model); - if (ret != 0) - goto error; + if (type == ML_CNXK_MODEL_TYPE_GLOW) { + ret = cn10k_ml_model_load(cnxk_mldev, params, model); + if (ret != 0) + goto error; +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + } else { + ret = mvtvm_ml_model_load(cnxk_mldev, params, model); + if (ret != 0) + goto error; +#endif + } plt_spinlock_init(&model->lock); model->state = ML_CNXK_MODEL_STATE_LOADED; diff --git a/drivers/ml/cnxk/mvtvm_ml_model.c b/drivers/ml/cnxk/mvtvm_ml_model.c index 6462267534..425a682209 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.c +++ b/drivers/ml/cnxk/mvtvm_ml_model.c @@ -2,10 +2,72 @@ * Copyright (c) 2023 Marvell. */ +#include +#include + #include +#include + #include "mvtvm_ml_model.h" /* Objects list */ char mvtvm_object_list[ML_MVTVM_MODEL_OBJECT_MAX][RTE_ML_STR_MAX] = {"mod.so", "mod.json", "mod.params"}; + +int +mvtvm_ml_model_blob_parse(struct rte_ml_model_params *params, struct mvtvm_ml_model_object *object) +{ + bool object_found[ML_MVTVM_MODEL_OBJECT_MAX] = {false, false, false}; + struct archive_entry *entry; + struct archive *a; + uint8_t i; + int ret; + + /* Open archive */ + a = archive_read_new(); + archive_read_support_filter_all(a); + archive_read_support_format_all(a); + + ret = archive_read_open_memory(a, params->addr, params->size); + if (ret != ARCHIVE_OK) + return archive_errno(a); + + /* Read archive */ + while (archive_read_next_header(a, &entry) == ARCHIVE_OK) { + for (i = 0; i < ML_MVTVM_MODEL_OBJECT_MAX; i++) { + if (!object_found[i] && + (strcmp(archive_entry_pathname(entry), mvtvm_object_list[i]) == 0)) { + memcpy(object[i].name, mvtvm_object_list[i], RTE_ML_STR_MAX); + object[i].size = archive_entry_size(entry); + object[i].buffer = rte_malloc(NULL, object[i].size, 0); + + if (archive_read_data(a, object[i].buffer, object[i].size) != + object[i].size) { + plt_err("Failed to read object from model archive: %s", + object[i].name); + goto error; + } + object_found[i] = true; + } + } + archive_read_data_skip(a); + } + + /* Check if all objects are parsed */ + for (i = 0; i < ML_MVTVM_MODEL_OBJECT_MAX; i++) { + if (!object_found[i]) { + plt_err("Object %s not found in archive!\n", mvtvm_object_list[i]); + goto error; + } + } + return 0; + +error: + for (i = 0; i < ML_MVTVM_MODEL_OBJECT_MAX; i++) { + if (object[i].buffer != NULL) + rte_free(object[i].buffer); + } + + return -EINVAL; +} diff --git a/drivers/ml/cnxk/mvtvm_ml_model.h b/drivers/ml/cnxk/mvtvm_ml_model.h index 1f6b435be0..73a45a91d6 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.h +++ b/drivers/ml/cnxk/mvtvm_ml_model.h @@ -43,4 +43,7 @@ struct mvtvm_ml_model_data { struct cnxk_ml_io_info info; }; +int mvtvm_ml_model_blob_parse(struct rte_ml_model_params *params, + struct mvtvm_ml_model_object *object); + #endif /* _MVTVM_ML_MODEL_H_ */ diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c index f2b9499cf4..baa9099084 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.c +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@
[PATCH v2 25/34] ml/cnxk: enable model unload in tvmdp library
Enable unloading model using external tvmdp library. Updated layer unload callback to support multiple layers. Signed-off-by: Srikanth Yalavarthi Signed-off-by: Anup Prabhu --- drivers/ml/cnxk/cn10k_ml_ops.c | 20 drivers/ml/cnxk/cnxk_ml_ops.c | 9 +++-- drivers/ml/cnxk/mvtvm_ml_ops.c | 28 drivers/ml/cnxk/mvtvm_ml_ops.h | 1 + 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 79217165cd..85d0a9e18b 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -725,7 +725,9 @@ cn10k_ml_layer_unload(void *device, uint16_t model_id, const char *layer_name) uint16_t layer_id = 0; int ret; +#ifndef RTE_MLDEV_CNXK_ENABLE_MVTVM PLT_SET_USED(layer_name); +#endif cnxk_mldev = (struct cnxk_ml_dev *)device; if (cnxk_mldev == NULL) { @@ -739,6 +741,24 @@ cn10k_ml_layer_unload(void *device, uint16_t model_id, const char *layer_name) return -EINVAL; } +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + if (model->type == ML_CNXK_MODEL_TYPE_TVM) { + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + if (strcmp(model->layer[layer_id].name, layer_name) == 0) + break; + } + + if (layer_id == model->mvtvm.metadata.model.nb_layers) { + plt_err("Invalid layer name: %s", layer_name); + return -EINVAL; + } + + if (model->layer[layer_id].type != ML_CNXK_LAYER_TYPE_MRVL) { + plt_err("Invalid layer name / type: %s", layer_name); + return -EINVAL; + } + } +#endif layer = &model->layer[layer_id]; snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u_%u", CN10K_ML_LAYER_MEMZONE_NAME, diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 8e17f597af..512bac641e 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -1182,7 +1182,7 @@ cnxk_ml_model_unload(struct rte_ml_dev *dev, uint16_t model_id) struct cnxk_ml_model *model; char str[RTE_MEMZONE_NAMESIZE]; - int ret; + int ret = 0; if (dev == NULL) return -EINVAL; @@ -1200,7 +1200,12 @@ cnxk_ml_model_unload(struct rte_ml_dev *dev, uint16_t model_id) return -EBUSY; } - ret = cn10k_ml_model_unload(cnxk_mldev, model); + if (model->type == ML_CNXK_MODEL_TYPE_GLOW) + ret = cn10k_ml_model_unload(cnxk_mldev, model); +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + else + ret = mvtvm_ml_model_unload(cnxk_mldev, model); +#endif if (ret != 0) return ret; diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c index 1d585a57ff..073773e409 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.c +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@ -189,3 +189,31 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * return ret; } + +int +mvtvm_ml_model_unload(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model) +{ + char str[RTE_MEMZONE_NAMESIZE]; + const struct plt_memzone *mz; + int ret; + + RTE_SET_USED(cnxk_mldev); + + /* Initialize model in TVMDP */ + ret = tvmdp_model_unload(model->model_id); + if (ret != 0) { + plt_err("TVMDP: Model unload failed, model_id = %u, error = %d", model->model_id, + ret); + return ret; + } + + snprintf(str, RTE_MEMZONE_NAMESIZE, "%s_%u", MVTVM_ML_MODEL_MEMZONE_NAME, model->model_id); + mz = rte_memzone_lookup(str); + if (mz == NULL) { + plt_err("Memzone lookup failed for TVM model: model_id = %u, mz = %s", + model->model_id, str); + return -EINVAL; + } + + return plt_memzone_free(mz); +} diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.h b/drivers/ml/cnxk/mvtvm_ml_ops.h index 6607537599..770794fe7d 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.h +++ b/drivers/ml/cnxk/mvtvm_ml_ops.h @@ -18,5 +18,6 @@ int mvtvm_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_d int mvtvm_ml_dev_close(struct cnxk_ml_dev *cnxk_mldev); int mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params *params, struct cnxk_ml_model *model); +int mvtvm_ml_model_unload(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model); #endif /* _MVTVM_ML_OPS_H_ */ -- 2.41.0
[PATCH v2 23/34] ml/cnxk: fetch layer info and load TVM model
Added support to fetch TVM model layer information and update internal structures based on the layer information Set callback functions for layer load and unload and enable model loading using TVMDP library. Added support to fetch full metadata after model load. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 22 - drivers/ml/cnxk/mvtvm_ml_model.h | 2 + drivers/ml/cnxk/mvtvm_ml_ops.c | 83 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index db18f32052..79217165cd 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -508,8 +508,10 @@ cn10k_ml_layer_load(void *device, uint16_t model_id, const char *layer_name, uin int qp_id; int ret; - PLT_SET_USED(size); +#ifndef RTE_MLDEV_CNXK_ENABLE_MVTVM PLT_SET_USED(layer_name); +#endif + PLT_SET_USED(size); cnxk_mldev = (struct cnxk_ml_dev *)device; if (cnxk_mldev == NULL) { @@ -523,6 +525,24 @@ cn10k_ml_layer_load(void *device, uint16_t model_id, const char *layer_name, uin return -EINVAL; } +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + if (model->type == ML_CNXK_MODEL_TYPE_TVM) { + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + if (strcmp(model->layer[layer_id].name, layer_name) == 0) + break; + } + + if (layer_id == model->mvtvm.metadata.model.nb_layers) { + plt_err("Invalid layer name: %s", layer_name); + return -EINVAL; + } + + if (model->layer[layer_id].type != ML_CNXK_LAYER_TYPE_MRVL) { + plt_err("Invalid layer name / type: %s", layer_name); + return -EINVAL; + } + } +#endif layer = &model->layer[layer_id]; ret = cn10k_ml_model_metadata_check(buffer, size); diff --git a/drivers/ml/cnxk/mvtvm_ml_model.h b/drivers/ml/cnxk/mvtvm_ml_model.h index 73a45a91d6..6c38217c15 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.h +++ b/drivers/ml/cnxk/mvtvm_ml_model.h @@ -11,6 +11,8 @@ #include "cnxk_ml_io.h" +struct cnxk_ml_model; + /* Maximum number of objects per model */ #define ML_MVTVM_MODEL_OBJECT_MAX 3 diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c index baa9099084..d9ec411385 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.c +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@ -7,6 +7,8 @@ #include #include +#include "cn10k_ml_ops.h" + #include "mvtvm_ml_model.h" #include "mvtvm_ml_ops.h" @@ -51,9 +53,13 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * struct cnxk_ml_model *model) { struct mvtvm_ml_model_object object[ML_MVTVM_MODEL_OBJECT_MAX]; + struct tvmrt_glow_callback *callback; char str[RTE_MEMZONE_NAMESIZE]; const struct plt_memzone *mz; size_t model_object_size = 0; + uint16_t nb_mrvl_layers; + uint16_t nb_llvm_layers; + uint8_t layer_id = 0; uint64_t mz_size = 0; int ret; @@ -101,5 +107,82 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * rte_memcpy(model->mvtvm.object.params.addr, object[2].buffer, object[2].size); rte_free(object[2].buffer); + /* Get metadata - stage 1 */ + ret = tvmdp_model_metadata_get_stage1(model->mvtvm.object.json.addr, + model->mvtvm.object.json.size, + &model->mvtvm.metadata); + if (ret != 0) { + plt_err("TVMDP: Failed to parse metadata - stage 1, model_id = %u, error = %d", + model->model_id, ret); + goto error; + } + + /* Set model fields */ + plt_strlcpy(model->name, model->mvtvm.metadata.model.name, TVMDP_NAME_STRLEN); + model->batch_size = 1; + model->nb_layers = model->mvtvm.metadata.model.nb_layers; + + /* Update layer info */ + nb_mrvl_layers = 0; + nb_llvm_layers = 0; + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + strncpy(model->layer[layer_id].name, + model->mvtvm.metadata.model.layer[layer_id].name, TVMDP_NAME_STRLEN); + if (strcmp(model->mvtvm.metadata.model.layer[layer_id].type, "mrvl") == 0 || + strcmp(model->mvtvm.metadata.model.layer[layer_id].type, "MRVL") == 0) { + model->layer[layer_id].type = ML_CNXK_LAYER_TYPE_MRVL; + nb_mrvl_layers++; + } else if (strcmp(model->mvtvm.metadata.model.layer[layer_id].type, "llvm") == 0 || + strcmp(model->mvtvm.metadata.model.la
[PATCH v2 26/34] ml/cnxk: support start and stop for TVM models
Added support to start and stop TVM models. TVM model start would invoke layer start for all Glow layers part of the model. TVM model stop would invoke layer stop for all Glow layers part of the model. Signed-off-by: Srikanth Yalavarthi Signed-off-by: Anup Prabhu --- drivers/ml/cnxk/cn10k_ml_ops.c | 42 +++ drivers/ml/cnxk/cnxk_ml_ops.c | 18 ++-- drivers/ml/cnxk/mvtvm_ml_ops.c | 52 ++ drivers/ml/cnxk/mvtvm_ml_ops.h | 2 ++ 4 files changed, 112 insertions(+), 2 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 85d0a9e18b..f70383b128 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -798,7 +798,9 @@ cn10k_ml_layer_start(void *device, uint16_t model_id, const char *layer_name) bool locked; int ret = 0; +#ifndef RTE_MLDEV_CNXK_ENABLE_MVTVM PLT_SET_USED(layer_name); +#endif cnxk_mldev = (struct cnxk_ml_dev *)device; if (cnxk_mldev == NULL) { @@ -812,6 +814,25 @@ cn10k_ml_layer_start(void *device, uint16_t model_id, const char *layer_name) return -EINVAL; } +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + if (model->type == ML_CNXK_MODEL_TYPE_TVM) { + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + if (strcmp(model->layer[layer_id].name, layer_name) == 0) + break; + } + + if (layer_id == model->mvtvm.metadata.model.nb_layers) { + plt_err("Invalid layer name: %s", layer_name); + return -EINVAL; + } + + if (model->layer[layer_id].type != ML_CNXK_LAYER_TYPE_MRVL) { + plt_err("Invalid layer name / type: %s", layer_name); + return -EINVAL; + } + } +#endif + layer = &model->layer[layer_id]; cn10k_mldev = &cnxk_mldev->cn10k_mldev; ocm = &cn10k_mldev->ocm; @@ -981,7 +1002,9 @@ cn10k_ml_layer_stop(void *device, uint16_t model_id, const char *layer_name) bool locked; int ret = 0; +#ifndef RTE_MLDEV_CNXK_ENABLE_MVTVM PLT_SET_USED(layer_name); +#endif cnxk_mldev = (struct cnxk_ml_dev *)device; if (cnxk_mldev == NULL) { @@ -995,6 +1018,25 @@ cn10k_ml_layer_stop(void *device, uint16_t model_id, const char *layer_name) return -EINVAL; } +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + if (model->type == ML_CNXK_MODEL_TYPE_TVM) { + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + if (strcmp(model->layer[layer_id].name, layer_name) == 0) + break; + } + + if (layer_id == model->mvtvm.metadata.model.nb_layers) { + plt_err("Invalid layer name: %s", layer_name); + return -EINVAL; + } + + if (model->layer[layer_id].type != ML_CNXK_LAYER_TYPE_MRVL) { + plt_err("Invalid layer name / type: %s", layer_name); + return -EINVAL; + } + } +#endif + layer = &model->layer[layer_id]; cn10k_mldev = &cnxk_mldev->cn10k_mldev; ocm = &cn10k_mldev->ocm; diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 512bac641e..1e567ad45c 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -1233,7 +1233,14 @@ cnxk_ml_model_start(struct rte_ml_dev *dev, uint16_t model_id) return -EINVAL; } - return cn10k_ml_model_start(cnxk_mldev, model); + if (model->type == ML_CNXK_MODEL_TYPE_GLOW) + return cn10k_ml_model_start(cnxk_mldev, model); +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + else + return mvtvm_ml_model_start(cnxk_mldev, model); +#endif + + return 0; } int @@ -1253,7 +1260,14 @@ cnxk_ml_model_stop(struct rte_ml_dev *dev, uint16_t model_id) return -EINVAL; } - return cn10k_ml_model_stop(cnxk_mldev, model); + if (model->type == ML_CNXK_MODEL_TYPE_GLOW) + return cn10k_ml_model_stop(cnxk_mldev, model); +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + else + return mvtvm_ml_model_stop(cnxk_mldev, model); +#endif + + return 0; } static int diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c index 073773e409..4015374b0d 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.c +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@ -217,3 +217,55 @@ mvtvm_ml_model_unload(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *mode return plt_memzone_free(mz); } + +int +mvtvm_ml_model_start(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model) +{ + struct cnxk_ml_layer *layer; + + uint16
[PATCH v2 24/34] ml/cnxk: update internal info for TVM model
Enabled updating internal IO info structures for TVM model. Compute static fields related to the model I/O. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/mvtvm_ml_model.c | 105 +++ drivers/ml/cnxk/mvtvm_ml_model.h | 1 + drivers/ml/cnxk/mvtvm_ml_ops.c | 3 + 3 files changed, 109 insertions(+) diff --git a/drivers/ml/cnxk/mvtvm_ml_model.c b/drivers/ml/cnxk/mvtvm_ml_model.c index 425a682209..86f465a645 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.c +++ b/drivers/ml/cnxk/mvtvm_ml_model.c @@ -7,10 +7,14 @@ #include +#include + #include #include "mvtvm_ml_model.h" +#include "cnxk_ml_model.h" + /* Objects list */ char mvtvm_object_list[ML_MVTVM_MODEL_OBJECT_MAX][RTE_ML_STR_MAX] = {"mod.so", "mod.json", "mod.params"}; @@ -71,3 +75,104 @@ mvtvm_ml_model_blob_parse(struct rte_ml_model_params *params, struct mvtvm_ml_mo return -EINVAL; } + +static enum rte_ml_io_type +mvtvm_ml_io_type_map(uint8_t type) +{ + switch (type) { + case kDLInt: + return RTE_ML_IO_TYPE_INT32; + case kDLUInt: + return RTE_ML_IO_TYPE_UINT32; + case kDLFloat: + return RTE_ML_IO_TYPE_FP32; + case kDLBfloat: + return RTE_ML_IO_TYPE_BFLOAT16; + } + + return RTE_ML_IO_TYPE_UNKNOWN; +} + +void +mvtvm_ml_model_io_info_update(struct cnxk_ml_model *model) +{ + struct tvmdp_model_metadata *metadata; + int32_t i; + int32_t j; + + if (model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) + goto tvm_mrvl_model; + + metadata = &model->mvtvm.metadata; + + /* Inputs, set for layer_id = 0 */ + model->mvtvm.info.nb_inputs = metadata->model.num_input; + model->mvtvm.info.total_input_sz_d = 0; + model->mvtvm.info.total_input_sz_q = 0; + for (i = 0; i < metadata->model.num_input; i++) { + strncpy(model->mvtvm.info.input[i].name, metadata->input[i].name, + TVMDP_NAME_STRLEN); + model->mvtvm.info.input[i].dtype = + mvtvm_ml_io_type_map(metadata->input[i].datatype.code); + model->mvtvm.info.input[i].qtype = + mvtvm_ml_io_type_map(metadata->input[i].model_datatype.code); + model->mvtvm.info.input[i].nb_dims = metadata->input[i].ndim; + + model->mvtvm.info.input[i].nb_elements = 1; + for (j = 0; j < metadata->input[i].ndim; j++) { + model->mvtvm.info.input[i].shape[j] = metadata->input[i].shape[j]; + model->mvtvm.info.input[i].nb_elements *= metadata->input[i].shape[j]; + } + + model->mvtvm.info.input[i].sz_d = + model->mvtvm.info.input[i].nb_elements * + rte_ml_io_type_size_get(model->mvtvm.info.input[i].dtype); + model->mvtvm.info.input[i].sz_q = + model->mvtvm.info.input[i].nb_elements * + rte_ml_io_type_size_get(model->mvtvm.info.input[i].qtype); + + model->mvtvm.info.total_input_sz_d += model->mvtvm.info.input[i].sz_d; + model->mvtvm.info.total_input_sz_q += model->mvtvm.info.input[i].sz_q; + + plt_ml_dbg("model_id = %u, input[%u] - sz_d = %u sz_q = %u", model->model_id, i, + model->mvtvm.info.input[i].sz_d, model->mvtvm.info.input[i].sz_q); + } + + /* Outputs, set for nb_layers - 1 */ + model->mvtvm.info.nb_outputs = metadata->model.num_output; + model->mvtvm.info.total_output_sz_d = 0; + model->mvtvm.info.total_output_sz_q = 0; + for (i = 0; i < metadata->model.num_output; i++) { + strncpy(model->mvtvm.info.output[i].name, metadata->output[i].name, + TVMDP_NAME_STRLEN); + model->mvtvm.info.output[i].dtype = + mvtvm_ml_io_type_map(metadata->output[i].datatype.code); + model->mvtvm.info.output[i].qtype = + mvtvm_ml_io_type_map(metadata->output[i].model_datatype.code); + model->mvtvm.info.output[i].nb_dims = metadata->output[i].ndim; + + model->mvtvm.info.output[i].nb_elements = 1; + for (j = 0; j < metadata->output[i].ndim; j++) { + model->mvtvm.info.output[i].shape[j] = metadata->output[i].shape[j]; + model->mvtvm.info.output[i].nb_elements *= metadata->output[i].shape[j]; + } + + model->mvtvm.info.output[i].sz_d = + model->mvtvm.info.output[i].nb_elements * + rte_ml_io_type_size_get(model->mvtvm.info.output[i].dtype); + model->mvtvm.info.output[i].sz_q = + model->mvtvm.info.output[i].nb_elements * + r
[PATCH v2 27/34] ml/cnxk: update internal TVM model info structure
From: Prince Takkar Added support to update internal model info structure for TVM models. Signed-off-by: Prince Takkar Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/mvtvm_ml_model.c | 65 drivers/ml/cnxk/mvtvm_ml_model.h | 2 + drivers/ml/cnxk/mvtvm_ml_ops.c | 3 ++ 3 files changed, 70 insertions(+) diff --git a/drivers/ml/cnxk/mvtvm_ml_model.c b/drivers/ml/cnxk/mvtvm_ml_model.c index 86f465a645..8c04d4652f 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.c +++ b/drivers/ml/cnxk/mvtvm_ml_model.c @@ -13,6 +13,7 @@ #include "mvtvm_ml_model.h" +#include "cnxk_ml_dev.h" #include "cnxk_ml_model.h" /* Objects list */ @@ -176,3 +177,67 @@ mvtvm_ml_model_io_info_update(struct cnxk_ml_model *model) tvm_mrvl_model: cn10k_ml_layer_io_info_update(&model->mvtvm.info, &model->layer[0].glow.metadata); } + +void +mvtvm_ml_model_info_set(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model) +{ + struct tvmdp_model_metadata *metadata; + struct rte_ml_model_info *info; + struct rte_ml_io_info *output; + struct rte_ml_io_info *input; + uint8_t i; + + info = PLT_PTR_CAST(model->info); + input = PLT_PTR_ADD(info, sizeof(struct rte_ml_model_info)); + output = PLT_PTR_ADD(input, ML_CNXK_MODEL_MAX_INPUT_OUTPUT * sizeof(struct rte_ml_io_info)); + + /* Reset model info */ + memset(info, 0, sizeof(struct rte_ml_model_info)); + + if (model->subtype == ML_CNXK_MODEL_SUBTYPE_TVM_MRVL) + goto tvm_mrvl_model; + + metadata = &model->mvtvm.metadata; + rte_memcpy(info->name, metadata->model.name, TVMDP_NAME_STRLEN); + snprintf(info->version, RTE_ML_STR_MAX, "%u.%u.%u.%u", metadata->model.version[0], +metadata->model.version[1], metadata->model.version[2], +metadata->model.version[3]); + info->model_id = model->model_id; + info->device_id = cnxk_mldev->mldev->data->dev_id; + info->io_layout = RTE_ML_IO_LAYOUT_SPLIT; + info->min_batches = model->batch_size; + info->max_batches = model->batch_size; + info->nb_inputs = metadata->model.num_input; + info->input_info = input; + info->nb_outputs = metadata->model.num_output; + info->output_info = output; + info->wb_size = 0; + + /* Set input info */ + for (i = 0; i < info->nb_inputs; i++) { + rte_memcpy(input[i].name, metadata->input[i].name, MRVL_ML_INPUT_NAME_LEN); + input[i].nb_dims = metadata->input[i].ndim; + input[i].shape = &model->mvtvm.info.input[i].shape[0]; + input[i].type = model->mvtvm.info.input[i].qtype; + input[i].nb_elements = model->mvtvm.info.input[i].nb_elements; + input[i].size = model->mvtvm.info.input[i].nb_elements * + rte_ml_io_type_size_get(model->mvtvm.info.input[i].qtype); + } + + /* Set output info */ + for (i = 0; i < info->nb_outputs; i++) { + rte_memcpy(output[i].name, metadata->output[i].name, MRVL_ML_OUTPUT_NAME_LEN); + output[i].nb_dims = metadata->output[i].ndim; + output[i].shape = &model->mvtvm.info.output[i].shape[0]; + output[i].type = model->mvtvm.info.output[i].qtype; + output[i].nb_elements = model->mvtvm.info.output[i].nb_elements; + output[i].size = model->mvtvm.info.output[i].nb_elements * + rte_ml_io_type_size_get(model->mvtvm.info.output[i].qtype); + } + + return; + +tvm_mrvl_model: + cn10k_ml_model_info_set(cnxk_mldev, model, &model->mvtvm.info, + &model->layer[0].glow.metadata); +} diff --git a/drivers/ml/cnxk/mvtvm_ml_model.h b/drivers/ml/cnxk/mvtvm_ml_model.h index 2b25a7b568..eef424b5c2 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.h +++ b/drivers/ml/cnxk/mvtvm_ml_model.h @@ -11,6 +11,7 @@ #include "cnxk_ml_io.h" +struct cnxk_ml_dev; struct cnxk_ml_model; /* Maximum number of objects per model */ @@ -48,5 +49,6 @@ struct mvtvm_ml_model_data { int mvtvm_ml_model_blob_parse(struct rte_ml_model_params *params, struct mvtvm_ml_model_object *object); void mvtvm_ml_model_io_info_update(struct cnxk_ml_model *model); +void mvtvm_ml_model_info_set(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model); #endif /* _MVTVM_ML_MODEL_H_ */ diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c index 4015374b0d..213151e68b 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.c +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@ -182,6 +182,9 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * /* Update model I/O data */ mvtvm_ml_model_io_info_update(model); + /* Set model info */ + mvtvm_ml_model_info_set(cnxk_mldev, model); + return 0; error: -- 2.41.0
[PATCH v2 28/34] ml/cnxk: support device dump for TVM models
Enabled support to print TVM model layer info. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cnxk_ml_model.c | 9 - drivers/ml/cnxk/cnxk_ml_ops.c| 1 + drivers/ml/cnxk/mvtvm_ml_model.c | 59 drivers/ml/cnxk/mvtvm_ml_model.h | 2 ++ 4 files changed, 70 insertions(+), 1 deletion(-) diff --git a/drivers/ml/cnxk/cnxk_ml_model.c b/drivers/ml/cnxk/cnxk_ml_model.c index 746d3ca5a9..e63ee58ab2 100644 --- a/drivers/ml/cnxk/cnxk_ml_model.c +++ b/drivers/ml/cnxk/cnxk_ml_model.c @@ -115,6 +115,8 @@ cnxk_ml_model_dump(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model, cnxk_ml_print_line(fp, LINE_LEN); fprintf(fp, "%*s : %u\n", FIELD_LEN, "model_id", model->model_id); fprintf(fp, "%*s : %s\n", FIELD_LEN, "name", model->name); + fprintf(fp, "%*s : %d\n", FIELD_LEN, "type", model->type); + fprintf(fp, "%*s : %d\n", FIELD_LEN, "subtype", model->subtype); fprintf(fp, "%*s : 0x%016lx\n", FIELD_LEN, "model", PLT_U64_CAST(model)); fprintf(fp, "%*s : %u\n", FIELD_LEN, "batch_size", model->batch_size); fprintf(fp, "%*s : %u\n", FIELD_LEN, "nb_layers", model->nb_layers); @@ -131,6 +133,11 @@ cnxk_ml_model_dump(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model, for (layer_id = 0; layer_id < model->nb_layers; layer_id++) { layer = &model->layer[layer_id]; - cn10k_ml_layer_print(cnxk_mldev, layer, fp); + if (layer->type == ML_CNXK_LAYER_TYPE_MRVL) + cn10k_ml_layer_print(cnxk_mldev, layer, fp); +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + else + mvtvm_ml_layer_print(cnxk_mldev, layer, fp); +#endif } } diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 1e567ad45c..361184620b 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -18,6 +18,7 @@ #include "cnxk_ml_io.h" #include "cnxk_ml_model.h" #include "cnxk_ml_ops.h" +#include "cnxk_ml_utils.h" /* ML model macros */ #define CNXK_ML_MODEL_MEMZONE_NAME "ml_cnxk_model_mz" diff --git a/drivers/ml/cnxk/mvtvm_ml_model.c b/drivers/ml/cnxk/mvtvm_ml_model.c index 8c04d4652f..7086c7a407 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.c +++ b/drivers/ml/cnxk/mvtvm_ml_model.c @@ -15,6 +15,7 @@ #include "cnxk_ml_dev.h" #include "cnxk_ml_model.h" +#include "cnxk_ml_utils.h" /* Objects list */ char mvtvm_object_list[ML_MVTVM_MODEL_OBJECT_MAX][RTE_ML_STR_MAX] = {"mod.so", "mod.json", @@ -241,3 +242,61 @@ mvtvm_ml_model_info_set(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *mo cn10k_ml_model_info_set(cnxk_mldev, model, &model->mvtvm.info, &model->layer[0].glow.metadata); } + +void +mvtvm_ml_layer_print(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_layer *layer, FILE *fp) +{ + char str[STR_LEN]; + uint8_t i; + + /* Print debug info */ + cnxk_ml_print_line(fp, LINE_LEN); + fprintf(fp, " Layer Information (Layer ID: %u, Name: %s)\n", + cnxk_mldev->index_map[layer->index].layer_id, layer->name); + cnxk_ml_print_line(fp, LINE_LEN); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "layer_id", + cnxk_mldev->index_map[layer->index].layer_id); + fprintf(fp, "%*s : %s\n", FIELD_LEN, "name", layer->name); + fprintf(fp, "%*s : %d\n", FIELD_LEN, "type", layer->type); + fprintf(fp, "%*s : 0x%016lx\n", FIELD_LEN, "layer", PLT_U64_CAST(layer)); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "batch_size", layer->batch_size); + + /* Print model state */ + if (layer->state == ML_CNXK_LAYER_STATE_LOADED) + fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "loaded"); + if (layer->state == ML_CNXK_LAYER_STATE_JOB_ACTIVE) + fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "job_active"); + if (layer->state == ML_CNXK_LAYER_STATE_STARTED) + fprintf(fp, "%*s : %s\n", FIELD_LEN, "state", "started"); + + fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_inputs", layer->info.nb_inputs); + fprintf(fp, "%*s : %u\n", FIELD_LEN, "num_outputs", layer->info.nb_outputs); + fprintf(fp, "\n"); + + cnxk_ml_print_line(fp, LINE_LEN); + fprintf(fp, "%8s %16s %12s\n", "input", "input_name", "input_type"); + cnxk_ml_print_line(fp, LINE_LEN); + for (i = 0; i < layer->info.nb_inputs; i++) { + fprintf(fp, "%8u ", i); + fprintf(fp, "%*s ", 16, layer->info.input[i].name); + rte_ml_io_type_to_str(layer->info.input[i].qtype, str, STR_LEN); + fprintf(fp, "%*s ", 12, str); + } + fprintf(fp, "\n"); + cnxk_ml_print_line(fp, LINE_LEN); + fprintf(fp, "\n"); + + cnxk_ml_print_line(fp, LINE_LEN); + fprintf(fp, "%8s %16s %12s\n", "output", "output_name", "output_type"); + cnxk_ml_print_line(fp, LINE_LEN); + for
[PATCH v2 29/34] ml/cnxk: enable reporting model runtime as xstats
Added model xstats entries to compute runtime latency. Allocated internal resources for TVM model xstats. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cnxk_ml_ops.c| 200 --- drivers/ml/cnxk/cnxk_ml_ops.h| 1 + drivers/ml/cnxk/cnxk_ml_xstats.h | 7 ++ drivers/ml/cnxk/mvtvm_ml_model.h | 24 drivers/ml/cnxk/mvtvm_ml_ops.c | 24 +++- 5 files changed, 238 insertions(+), 18 deletions(-) diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 361184620b..f281e6070f 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -146,7 +146,8 @@ cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev) /* Allocate memory for xstats entries. Don't allocate during reconfigure */ nb_stats = RTE_DIM(device_xstats) + - RTE_DIM(layer_xstats) * ML_CNXK_MAX_MODELS * ML_CNXK_MODEL_MAX_LAYERS; + RTE_DIM(layer_xstats) * ML_CNXK_MAX_MODELS * ML_CNXK_MODEL_MAX_LAYERS + + RTE_DIM(model_xstats) * ML_CNXK_MAX_MODELS; if (cnxk_mldev->xstats.entries == NULL) cnxk_mldev->xstats.entries = rte_zmalloc( "cnxk_ml_xstats", sizeof(struct cnxk_ml_xstats_entry) * nb_stats, @@ -177,6 +178,25 @@ cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev) for (model = 0; model < ML_CNXK_MAX_MODELS; model++) { cnxk_mldev->xstats.offset_for_model[model] = stat_id; + for (i = 0; i < RTE_DIM(model_xstats); i++) { + cnxk_mldev->xstats.entries[stat_id].map.id = stat_id; + cnxk_mldev->xstats.entries[stat_id].mode = RTE_ML_DEV_XSTATS_MODEL; + cnxk_mldev->xstats.entries[stat_id].group = CNXK_ML_XSTATS_GROUP_MODEL; + cnxk_mldev->xstats.entries[stat_id].type = model_xstats[i].type; + cnxk_mldev->xstats.entries[stat_id].fn_id = CNXK_ML_XSTATS_FN_MODEL; + cnxk_mldev->xstats.entries[stat_id].obj_idx = model; + cnxk_mldev->xstats.entries[stat_id].layer_id = -1; + cnxk_mldev->xstats.entries[stat_id].reset_allowed = + model_xstats[i].reset_allowed; + + /* Name of xstat is updated during model load */ + snprintf(cnxk_mldev->xstats.entries[stat_id].map.name, + sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), +"Model-%u-%s", model, model_xstats[i].name); + + stat_id++; + } + for (layer = 0; layer < ML_CNXK_MODEL_MAX_LAYERS; layer++) { cnxk_mldev->xstats.offset_for_layer[model][layer] = stat_id; @@ -203,7 +223,8 @@ cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev) cnxk_mldev->xstats.count_per_layer[model][layer] = RTE_DIM(layer_xstats); } - cnxk_mldev->xstats.count_per_model[model] = RTE_DIM(layer_xstats); + cnxk_mldev->xstats.count_per_model[model] = + RTE_DIM(layer_xstats) + ML_CNXK_MODEL_MAX_LAYERS * RTE_DIM(model_xstats); } cnxk_mldev->xstats.count_mode_model = stat_id - cnxk_mldev->xstats.count_mode_device; @@ -212,6 +233,42 @@ cnxk_ml_xstats_init(struct cnxk_ml_dev *cnxk_mldev) return 0; } +void +cnxk_ml_xstats_model_name_update(struct cnxk_ml_dev *cnxk_mldev, uint16_t model_id) +{ + struct cnxk_ml_model *model; + uint16_t rclk_freq; + uint16_t sclk_freq; + uint16_t stat_id; + char suffix[8]; + uint16_t i; + + model = cnxk_mldev->mldev->data->models[model_id]; + stat_id = cnxk_mldev->xstats.offset_for_model[model_id]; + + roc_clk_freq_get(&rclk_freq, &sclk_freq); + if (sclk_freq == 0) + strcpy(suffix, "cycles"); + else + strcpy(suffix, "ns"); + + /* Update xstat name based on layer name and sclk availability */ + for (i = 0; i < RTE_DIM(model_xstats); i++) { + if (model->type == ML_CNXK_MODEL_TYPE_GLOW) + snprintf(cnxk_mldev->xstats.entries[stat_id].map.name, + sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s", +model->glow.metadata.model.name, model_xstats[i].name, suffix); +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + else + snprintf(cnxk_mldev->xstats.entries[stat_id].map.name, + sizeof(cnxk_mldev->xstats.entries[stat_id].map.name), "%s-%s-%s", +model->mvtvm.metadata.model.name, model_xstats[i].name, suffix); +#endif + + stat_id++; + } +} + static void cnxk_ml_xstats_uninit(struct cnxk_ml_dev *cnxk_mldev) { @@ -249,6 +306,9 @@ cnxk_ml_dev_xstat_get(struct
[PATCH v2 30/34] ml/cnxk: implement I/O alloc and free callbacks
Implemented callback functions for IO allocation and free for Glow layers. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 123 + drivers/ml/cnxk/cn10k_ml_ops.h | 3 + drivers/ml/cnxk/mvtvm_ml_ops.c | 2 + 3 files changed, 128 insertions(+) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index f70383b128..23e98b96c5 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -1399,3 +1399,126 @@ cn10k_ml_inference_sync(void *device, uint16_t index, void *input, void *output, error_enqueue: return ret; } + +int +cn10k_ml_io_alloc(void *device, uint16_t model_id, const char *layer_name, uint64_t **input_qbuffer, + uint64_t **output_qbuffer) +{ + struct cnxk_ml_dev *cnxk_mldev; + struct cnxk_ml_model *model; + struct cnxk_ml_layer *layer; + + char str[RTE_MEMZONE_NAMESIZE]; + const struct plt_memzone *mz; + uint16_t layer_id = 0; + uint64_t output_size; + uint64_t input_size; + +#ifndef RTE_MLDEV_CNXK_ENABLE_MVTVM + PLT_SET_USED(layer_name); +#endif + + cnxk_mldev = (struct cnxk_ml_dev *)device; + if (cnxk_mldev == NULL) { + plt_err("Invalid device = %p", cnxk_mldev); + return -EINVAL; + } + + model = cnxk_mldev->mldev->data->models[model_id]; + if (model == NULL) { + plt_err("Invalid model_id = %u", model_id); + return -EINVAL; + } + +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + if (model->type == ML_CNXK_MODEL_TYPE_TVM) { + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + if (strcmp(model->layer[layer_id].name, layer_name) == 0) + break; + } + + if (layer_id == model->mvtvm.metadata.model.nb_layers) { + plt_err("Invalid layer name: %s", layer_name); + return -EINVAL; + } + + if (model->layer[layer_id].type != ML_CNXK_LAYER_TYPE_MRVL) { + plt_err("Invalid layer name / type: %s", layer_name); + return -EINVAL; + } + } +#endif + + layer = &model->layer[layer_id]; + input_size = PLT_ALIGN_CEIL(layer->info.total_input_sz_q, ML_CN10K_ALIGN_SIZE); + output_size = PLT_ALIGN_CEIL(layer->info.total_output_sz_q, ML_CN10K_ALIGN_SIZE); + + sprintf(str, "cn10k_ml_io_mz_%u_%u", model_id, layer_id); + mz = plt_memzone_reserve_aligned(str, input_size + output_size, 0, ML_CN10K_ALIGN_SIZE); + if (mz == NULL) { + plt_err("io_alloc failed: Unable to allocate memory: model_id = %u, layer_name = %s", + model_id, layer_name); + return -ENOMEM; + } + + *input_qbuffer = mz->addr; + *output_qbuffer = PLT_PTR_ADD(mz->addr, input_size); + + return 0; +} + +int +cn10k_ml_io_free(void *device, uint16_t model_id, const char *layer_name) +{ + struct cnxk_ml_dev *cnxk_mldev; + struct cnxk_ml_model *model; + + char str[RTE_MEMZONE_NAMESIZE]; + const struct plt_memzone *mz; + uint16_t layer_id = 0; + +#ifndef RTE_MLDEV_CNXK_ENABLE_MVTVM + PLT_SET_USED(layer_name); +#endif + + cnxk_mldev = (struct cnxk_ml_dev *)device; + if (cnxk_mldev == NULL) { + plt_err("Invalid device = %p", cnxk_mldev); + return -EINVAL; + } + + model = cnxk_mldev->mldev->data->models[model_id]; + if (model == NULL) { + plt_err("Invalid model_id = %u", model_id); + return -EINVAL; + } + +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + if (model->type == ML_CNXK_MODEL_TYPE_TVM) { + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + if (strcmp(model->layer[layer_id].name, layer_name) == 0) + break; + } + + if (layer_id == model->mvtvm.metadata.model.nb_layers) { + plt_err("Invalid layer name: %s", layer_name); + return -EINVAL; + } + + if (model->layer[layer_id].type != ML_CNXK_LAYER_TYPE_MRVL) { + plt_err("Invalid layer name / type: %s", layer_name); + return -EINVAL; + } + } +#endif + + sprintf(str, "cn10k_ml_io_mz_%u_%u", model_id, layer_id); + mz = plt_memzone_lookup(str); + if (mz == NULL) { + plt_err("io_free failed: Memzone not found: model_id = %u, layer_name = %s", + model_id, layer_name); + return -EINVAL; + } + + return plt_memzone_free(mz); +} diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h index 3e75cae65a..05565
[PATCH v2 31/34] ml/cnxk: add generic ML malloc and free callback
Implemented generic ML malloc and free callbacks Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 30 ++ drivers/ml/cnxk/cn10k_ml_ops.h | 3 +++ drivers/ml/cnxk/mvtvm_ml_ops.c | 2 ++ 3 files changed, 35 insertions(+) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 23e98b96c5..140f7a343f 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -1522,3 +1522,33 @@ cn10k_ml_io_free(void *device, uint16_t model_id, const char *layer_name) return plt_memzone_free(mz); } + +int +cn10k_ml_malloc(const char *name, size_t size, uint32_t align, void **addr) +{ + const struct plt_memzone *mz; + + mz = plt_memzone_reserve_aligned(name, size, 0, align); + if (mz == NULL) { + plt_err("ml_malloc failed: Unable to allocate memory: name = %s", name); + return -ENOMEM; + } + + *addr = mz->addr; + + return 0; +} + +int +cn10k_ml_free(const char *name) +{ + const struct plt_memzone *mz; + + mz = plt_memzone_lookup(name); + if (mz == NULL) { + plt_err("ml_free failed: Memzone not found: name = %s", name); + return -EINVAL; + } + + return plt_memzone_free(mz); +} diff --git a/drivers/ml/cnxk/cn10k_ml_ops.h b/drivers/ml/cnxk/cn10k_ml_ops.h index 055651eaa2..d7df1d003a 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.h +++ b/drivers/ml/cnxk/cn10k_ml_ops.h @@ -332,4 +332,7 @@ int cn10k_ml_io_alloc(void *device, uint16_t model_id, const char *layer_name, uint64_t **input_qbuffer, uint64_t **output_qbuffer); int cn10k_ml_io_free(void *device, uint16_t model_id, const char *layer_name); +int cn10k_ml_malloc(const char *name, size_t size, uint32_t align, void **addr); +int cn10k_ml_free(const char *name); + #endif /* _CN10K_ML_OPS_H_ */ diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c index a41ba4d343..95238d43d8 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.c +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@ -166,6 +166,8 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * callback->tvmrt_glow_layer_unload = cn10k_ml_layer_unload; callback->tvmrt_io_alloc = cn10k_ml_io_alloc; callback->tvmrt_io_free = cn10k_ml_io_free; + callback->tvmrt_malloc = cn10k_ml_malloc; + callback->tvmrt_free = cn10k_ml_free; } else { callback = NULL; } -- 2.41.0
[PATCH v2 32/34] ml/cnxk: support quantize and dequantize callback
From: Prince Takkar Added support for quantize and dequantize callback functions for TVM models. Signed-off-by: Prince Takkar --- drivers/ml/cnxk/mvtvm_ml_model.h | 2 + drivers/ml/cnxk/mvtvm_ml_ops.c | 127 +++ drivers/ml/cnxk/mvtvm_ml_ops.h | 4 + 3 files changed, 133 insertions(+) diff --git a/drivers/ml/cnxk/mvtvm_ml_model.h b/drivers/ml/cnxk/mvtvm_ml_model.h index d71df36f5a..57a6ce0bb1 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.h +++ b/drivers/ml/cnxk/mvtvm_ml_model.h @@ -5,6 +5,8 @@ #ifndef _MVTVM_ML_MODEL_H_ #define _MVTVM_ML_MODEL_H_ +#include + #include #include diff --git a/drivers/ml/cnxk/mvtvm_ml_ops.c b/drivers/ml/cnxk/mvtvm_ml_ops.c index 95238d43d8..5292ac97fe 100644 --- a/drivers/ml/cnxk/mvtvm_ml_ops.c +++ b/drivers/ml/cnxk/mvtvm_ml_ops.c @@ -7,6 +7,8 @@ #include #include +#include + #include "cn10k_ml_ops.h" #include "mvtvm_ml_model.h" @@ -168,6 +170,8 @@ mvtvm_ml_model_load(struct cnxk_ml_dev *cnxk_mldev, struct rte_ml_model_params * callback->tvmrt_io_free = cn10k_ml_io_free; callback->tvmrt_malloc = cn10k_ml_malloc; callback->tvmrt_free = cn10k_ml_free; + callback->tvmrt_quantize = mvtvm_ml_io_quantize; + callback->tvmrt_dequantize = mvtvm_ml_io_dequantize; } else { callback = NULL; } @@ -298,3 +302,126 @@ mvtvm_ml_model_stop(struct cnxk_ml_dev *cnxk_mldev, struct cnxk_ml_model *model) return 0; } + +int +mvtvm_ml_io_quantize(void *device, uint16_t model_id, const char *layer_name, +const DLTensor **deq_tensor, void *qbuffer) +{ + struct cnxk_ml_io_info *info = NULL; + struct cnxk_ml_dev *cnxk_mldev; + struct cnxk_ml_model *model; + uint16_t layer_id = 0; + uint8_t *lcl_dbuffer; + uint8_t *lcl_qbuffer; + uint32_t i; + int ret; + +#ifdef CNXK_ML_DEV_DEBUG + if ((device == NULL) || (deq_tensor == NULL) || (qbuffer == NULL)) + return -EINVAL; +#endif + + cnxk_mldev = (struct cnxk_ml_dev *)device; + + model = cnxk_mldev->mldev->data->models[model_id]; +#ifdef CNXK_ML_DEV_DEBUG + if (model == NULL) { + plt_err("Invalid model_id = %u", model_id); + return -EINVAL; + } +#endif + + /* Get layer id */ + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + if (strcmp(model->layer[layer_id].name, layer_name) == 0) + break; + } + +#ifdef CNXK_ML_DEV_DEBUG + if (layer_id == model->mvtvm.metadata.model.nb_layers) { + plt_err("Invalid layer name: %s", layer_name); + return -EINVAL; + } + + if (model->layer[layer_id].type != ML_CNXK_LAYER_TYPE_MRVL) { + plt_err("Invalid layer name / type: %s", layer_name); + return -EINVAL; + } +#endif + + info = &model->layer[layer_id].info; + lcl_qbuffer = (uint8_t *)qbuffer; + + for (i = 0; i < info->nb_inputs; i++) { + lcl_dbuffer = PLT_PTR_ADD(deq_tensor[i]->data, deq_tensor[i]->byte_offset); + + ret = cnxk_ml_io_quantize_single(&info->input[i], lcl_dbuffer, lcl_qbuffer); + if (ret < 0) + return ret; + + lcl_qbuffer += info->input[i].sz_q; + } + + return 0; +} + +int +mvtvm_ml_io_dequantize(void *device, uint16_t model_id, const char *layer_name, void *qbuffer, + const DLTensor **deq_tensor) +{ + struct cnxk_ml_io_info *info = NULL; + struct cnxk_ml_dev *cnxk_mldev; + struct cnxk_ml_model *model; + uint16_t layer_id = 0; + uint8_t *lcl_dbuffer; + uint8_t *lcl_qbuffer; + uint32_t i; + int ret; + +#ifdef CNXK_ML_DEV_DEBUG + if ((device == NULL) || (deq_tensor == NULL) || (qbuffer == NULL)) + return -EINVAL; +#endif + + cnxk_mldev = (struct cnxk_ml_dev *)device; + + model = cnxk_mldev->mldev->data->models[model_id]; +#ifdef CNXK_ML_DEV_DEBUG + if (model == NULL) { + plt_err("Invalid model_id = %u", model_id); + return -EINVAL; + } +#endif + + for (layer_id = 0; layer_id < model->mvtvm.metadata.model.nb_layers; layer_id++) { + if (strcmp(model->layer[layer_id].name, layer_name) == 0) + break; + } + +#ifdef CNXK_ML_DEV_DEBUG + if (layer_id == model->mvtvm.metadata.model.nb_layers) { + plt_err("Invalid layer name: %s", layer_name); + return -EINVAL; + } + + if (model->layer[layer_id].type != ML_CNXK_LAYER_TYPE_MRVL) { + plt_err("Invalid layer name / type: %s", layer_name); + return -EINVAL; + } +#endif + + info = &model->layer[layer_id].info; + lcl_qbuffer = (uint8_t *)qbuffer; + + for (i = 0;
[PATCH v2 33/34] ml/cnxk: enable fast-path ops for TVM models
From: Anup Prabhu Enable fast-path ops support for TVM models. Models would use TVMDP library function calls to execute inference operations for Hybrid and LLVM model sub-types. For TVM MRVL model subtypes that have a single MRVL layer, the inference requests are directly enqueued to hardware by the driver. Signed-off-by: Anup Prabhu Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_ops.c | 4 - drivers/ml/cnxk/cnxk_ml_io.h | 6 ++ drivers/ml/cnxk/cnxk_ml_ops.c| 4 + drivers/ml/cnxk/cnxk_ml_ops.h| 9 +++ drivers/ml/cnxk/mvtvm_ml_model.c | 20 + drivers/ml/cnxk/mvtvm_ml_model.h | 6 ++ drivers/ml/cnxk/mvtvm_ml_ops.c | 124 +++ drivers/ml/cnxk/mvtvm_ml_ops.h | 43 +++ 8 files changed, 212 insertions(+), 4 deletions(-) diff --git a/drivers/ml/cnxk/cn10k_ml_ops.c b/drivers/ml/cnxk/cn10k_ml_ops.c index 140f7a343f..c1353fb0c8 100644 --- a/drivers/ml/cnxk/cn10k_ml_ops.c +++ b/drivers/ml/cnxk/cn10k_ml_ops.c @@ -287,10 +287,6 @@ cn10k_ml_dev_configure(struct cnxk_ml_dev *cnxk_mldev, const struct rte_ml_dev_c else cn10k_mldev->ml_jcmdq_enqueue = roc_ml_jcmdq_enqueue_lf; - cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst; - cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst; - cnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get; - return 0; } diff --git a/drivers/ml/cnxk/cnxk_ml_io.h b/drivers/ml/cnxk/cnxk_ml_io.h index 5de166c252..6d5d25a7c9 100644 --- a/drivers/ml/cnxk/cnxk_ml_io.h +++ b/drivers/ml/cnxk/cnxk_ml_io.h @@ -47,6 +47,12 @@ struct cnxk_ml_io { /* Scale */ float scale; + + /* Dequantized offset */ + uint32_t offset_d; + + /* Quantized offset */ + uint32_t offset_q; }; /* Model / Layer IO structure */ diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index f281e6070f..274d152b81 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -770,6 +770,10 @@ cnxk_ml_dev_configure(struct rte_ml_dev *dev, const struct rte_ml_dev_config *co cnxk_mldev->max_nb_layers = cnxk_mldev->cn10k_mldev.fw.req->cn10k_req.jd.fw_load.cap.s.max_models; + cnxk_mldev->mldev->enqueue_burst = cnxk_ml_enqueue_burst; + cnxk_mldev->mldev->dequeue_burst = cnxk_ml_dequeue_burst; + cnxk_mldev->mldev->op_error_get = cn10k_ml_op_error_get; + /* Allocate and initialize index_map */ if (cnxk_mldev->index_map == NULL) { cnxk_mldev->index_map = diff --git a/drivers/ml/cnxk/cnxk_ml_ops.h b/drivers/ml/cnxk/cnxk_ml_ops.h index 2575f4c6e1..62e2b17e35 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.h +++ b/drivers/ml/cnxk/cnxk_ml_ops.h @@ -12,12 +12,21 @@ #include "cn10k_ml_ops.h" +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM +#include "mvtvm_ml_ops.h" +#endif + /* Request structure */ struct cnxk_ml_req { /* Device specific request */ union { /* CN10K */ struct cn10k_ml_req cn10k_req; + +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + /* MVTVM */ + struct mvtvm_ml_req mvtvm_req; +#endif }; /* Address of status field */ diff --git a/drivers/ml/cnxk/mvtvm_ml_model.c b/drivers/ml/cnxk/mvtvm_ml_model.c index 7086c7a407..8af84b6972 100644 --- a/drivers/ml/cnxk/mvtvm_ml_model.c +++ b/drivers/ml/cnxk/mvtvm_ml_model.c @@ -136,6 +136,16 @@ mvtvm_ml_model_io_info_update(struct cnxk_ml_model *model) model->mvtvm.info.total_input_sz_d += model->mvtvm.info.input[i].sz_d; model->mvtvm.info.total_input_sz_q += model->mvtvm.info.input[i].sz_q; + model->mvtvm.info.input[i].offset_d = model->mvtvm.info.total_input_sz_d; + model->mvtvm.info.input[i].offset_q = model->mvtvm.info.total_input_sz_q; + + model->mvtvm.input_tensor[i].device = metadata->input[i].device; + model->mvtvm.input_tensor[i].ndim = metadata->input[i].ndim; + model->mvtvm.input_tensor[i].dtype = metadata->input[i].datatype; + model->mvtvm.input_tensor[i].shape = metadata->input[i].shape; + model->mvtvm.input_tensor[i].strides = NULL; + model->mvtvm.input_tensor[i].byte_offset = model->mvtvm.info.input[i].offset_q; + plt_ml_dbg("model_id = %u, input[%u] - sz_d = %u sz_q = %u", model->model_id, i, model->mvtvm.info.input[i].sz_d, model->mvtvm.info.input[i].sz_q); } @@ -169,6 +179,16 @@ mvtvm_ml_model_io_info_update(struct cnxk_ml_model *model) model->mvtvm.info.total_output_sz_d += model->mvtvm.info.output[i].sz_d; model->mvtvm.info.total_output_sz_q += model->mvtvm.info.output[i].sz_q; + model->mvtvm.info.output[i].offset_d = model->mvtvm.info.total_output_sz_d; + model->mvtvm.info.output[i].offset_q = model->mvtvm.info.total_outp
[PATCH v2 34/34] ml/cnxk: enable creation of mvtvm virtual device
Enable support to create a mvtvm virtual device on system's without a PCI based ML HW accelerator. Signed-off-by: Srikanth Yalavarthi --- drivers/ml/cnxk/cn10k_ml_dev.c | 8 ++ drivers/ml/cnxk/cn10k_ml_dev.h | 3 + drivers/ml/cnxk/cnxk_ml_dev.c | 3 + drivers/ml/cnxk/cnxk_ml_dev.h | 21 drivers/ml/cnxk/cnxk_ml_ops.c | 86 ++ drivers/ml/cnxk/meson.build| 2 + drivers/ml/cnxk/mvtvm_ml_dev.c | 198 + drivers/ml/cnxk/mvtvm_ml_dev.h | 40 +++ drivers/ml/cnxk/mvtvm_ml_ops.c | 34 +- drivers/ml/cnxk/mvtvm_ml_ops.h | 2 + 10 files changed, 372 insertions(+), 25 deletions(-) create mode 100644 drivers/ml/cnxk/mvtvm_ml_dev.c create mode 100644 drivers/ml/cnxk/mvtvm_ml_dev.h diff --git a/drivers/ml/cnxk/cn10k_ml_dev.c b/drivers/ml/cnxk/cn10k_ml_dev.c index 20c114b8bf..e6dc87e353 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.c +++ b/drivers/ml/cnxk/cn10k_ml_dev.c @@ -368,6 +368,12 @@ cn10k_ml_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_de PLT_SET_USED(pci_drv); + if (cnxk_ml_dev_initialized == 1) { + plt_err("ML CNXK device already initialized!"); + plt_err("Cannot initialize CN10K PCI dev"); + rte_exit(-EINVAL, "Invalid EAL arguments "); + } + init_params = (struct rte_ml_dev_pmd_init_params){ .socket_id = rte_socket_id(), .private_data_size = sizeof(struct cnxk_ml_dev)}; @@ -414,6 +420,8 @@ cn10k_ml_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_de dev->dequeue_burst = NULL; dev->op_error_get = NULL; + cnxk_ml_dev_initialized = 1; + cnxk_mldev->type = CNXK_ML_DEV_TYPE_PCI; cnxk_mldev->state = ML_CNXK_DEV_STATE_PROBED; return 0; diff --git a/drivers/ml/cnxk/cn10k_ml_dev.h b/drivers/ml/cnxk/cn10k_ml_dev.h index 2e7eb6c9ef..cee405f3f5 100644 --- a/drivers/ml/cnxk/cn10k_ml_dev.h +++ b/drivers/ml/cnxk/cn10k_ml_dev.h @@ -11,6 +11,9 @@ #include "cnxk_ml_io.h" +/* Device status */ +extern int cnxk_ml_dev_initialized; + /* Dummy Device ops */ extern struct rte_ml_dev_ops ml_dev_dummy_ops; diff --git a/drivers/ml/cnxk/cnxk_ml_dev.c b/drivers/ml/cnxk/cnxk_ml_dev.c index 63d1c9e417..dc4512223c 100644 --- a/drivers/ml/cnxk/cnxk_ml_dev.c +++ b/drivers/ml/cnxk/cnxk_ml_dev.c @@ -7,6 +7,9 @@ #include "cnxk_ml_dev.h" +/* Device status */ +int cnxk_ml_dev_initialized; + /* Dummy operations for ML device */ struct rte_ml_dev_ops ml_dev_dummy_ops = {0}; diff --git a/drivers/ml/cnxk/cnxk_ml_dev.h b/drivers/ml/cnxk/cnxk_ml_dev.h index 382fca64be..491c4c4aea 100644 --- a/drivers/ml/cnxk/cnxk_ml_dev.h +++ b/drivers/ml/cnxk/cnxk_ml_dev.h @@ -9,6 +9,10 @@ #include "cn10k_ml_dev.h" +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM +#include "mvtvm_ml_dev.h" +#endif + #include "cnxk_ml_xstats.h" /* ML command timeout in seconds */ @@ -34,6 +38,15 @@ struct cnxk_ml_error_db { char str[RTE_ML_STR_MAX]; }; +/* Device type */ +enum cnxk_ml_dev_type { + /* PCI based Marvell's ML HW accelerator device */ + CNXK_ML_DEV_TYPE_PCI, + + /* Generic Virtual device */ + CNXK_ML_DEV_TYPE_VDEV, +}; + /* Device configuration state enum */ enum cnxk_ml_dev_state { /* Probed and not configured */ @@ -66,6 +79,9 @@ struct cnxk_ml_dev { /* RTE device */ struct rte_ml_dev *mldev; + /* Device type */ + enum cnxk_ml_dev_type type; + /* Configuration state */ enum cnxk_ml_dev_state state; @@ -87,6 +103,11 @@ struct cnxk_ml_dev { /* CN10K device structure */ struct cn10k_ml_dev cn10k_mldev; +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + /* MVTVM device structure */ + struct mvtvm_ml_dev mvtvm_mldev; +#endif + /* Maximum number of layers */ uint64_t max_nb_layers; diff --git a/drivers/ml/cnxk/cnxk_ml_ops.c b/drivers/ml/cnxk/cnxk_ml_ops.c index 274d152b81..9a59e3b40b 100644 --- a/drivers/ml/cnxk/cnxk_ml_ops.c +++ b/drivers/ml/cnxk/cnxk_ml_ops.c @@ -125,7 +125,8 @@ cnxk_ml_qp_create(const struct rte_ml_dev *dev, uint16_t qp_id, uint32_t nb_desc qp->stats.enqueue_err_count = 0; qp->stats.dequeue_err_count = 0; - cn10k_ml_qp_initialize(cnxk_mldev, qp); + if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) + cn10k_ml_qp_initialize(cnxk_mldev, qp); return qp; @@ -616,7 +617,14 @@ cnxk_ml_dev_info_get(struct rte_ml_dev *dev, struct rte_ml_dev_info *dev_info) dev_info->driver_name = dev->device->driver->name; dev_info->max_models = ML_CNXK_MAX_MODELS; - return cn10k_ml_dev_info_get(cnxk_mldev, dev_info); + if (cnxk_mldev->type == CNXK_ML_DEV_TYPE_PCI) + return cn10k_ml_dev_info_get(cnxk_mldev, dev_info); +#ifdef RTE_MLDEV_CNXK_ENABLE_MVTVM + else + return mvtvm_ml_dev_info_get(cnxk_mldev, dev_info); +#endif + + return 0; } static int @@ -654,9 +662
Re: [PATCH v2 2/2] eal: remove NUMFLAGS enumeration
On Wed, Sep 20, 2023 at 8:01 AM Stanisław Kardach wrote: > > On Tue, Sep 19, 2023 at 4:47 PM David Marchand > wrote: > > > > Also I see you're still removing the RTE_CPUFLAG_NUMFLAGS (what I call a > > > last element canary). Why? If you're concerned with ABI, then we're > > > talking about an application linking dynamically with DPDK or talking via > > > some RPC channel with another DPDK application. So clashing with this > > > definition does not come into question. One should rather use > > > rte_cpu_get_flag_enabled(). > > > Also if you want to introduce new features, one would add them yo the > > > rte_cpuflags headers, unless you'd like to not add those and keep an > > > undocumented list "above" the last defined element. > > > Could you explain a bit more Your use-case? > > > > Hey Stanislaw, > > > > Talking generically, one problem with such pattern (having a LAST, or > > MAX enum) is when an array sized with such a symbol is exposed. > > As I mentionned in the past, this can have unwanted effects: > > https://patchwork.dpdk.org/project/dpdk/patch/20230919140430.3251493-1-david.march...@redhat.com/ Argh... who broke copy/paste in my browser ?! Wrt to MAX and arrays, I wanted to point at: http://inbox.dpdk.org/dev/cajfav8xs5cvde2xwrtaxk5ve_piqmv5ly5tkstk3r1gourt...@mail.gmail.com/ > I agree, though I'd argue "LAST" and "MAX" semantics are a bit different. > "LAST" delimits the known enumeration territory while "MAX" is more of a > `constepxr` value type. > > > > Another issue is when an existing enum meaning changes: from the > > application pov, the (old) MAX value is incorrect, but for the library > > pov, a new meaning has been associated. > > This may trigger bugs in the application when calling a function that > > returns such an enum which never return this MAX value in the past. > > > > For at least those two reasons, removing those canary elements is > > being done in DPDK. > > > > This specific removal has been announced: > > https://patchwork.dpdk.org/project/dpdk/patch/20230919140430.3251493-1-david.march...@redhat.com/ > Thanks for pointing this out but did you mean to link to the patch again here? Sorry, same here, bad copy/paste :-(. The intended link is: https://git.dpdk.org/dpdk/commit/?id=5da7c13521 The deprecation notice was badly formulated and this patch here is consistent with it. > > > > Now, practically, when I look at the cpuflags API, I don't see us > > exposed to those two issues wrt rte_cpu_flag_t, so maybe this change > > is unneeded. > > But on the other hand, is it really an issue for an application to > > lose this (internal) information? > I doubt it, maybe it could be used as a sanity check for choosing proper > functors in the application. Though the initial description of the reason > behind this patch was to not break the ABI and I don't think it does that. > What it does is enforces users to use explicit cpu flag values which is a > good thing. Though if so, then it should be stated in the commit description. I agree. Siva, can you work on a new revision? Thanks. -- David Marchand
Re: Commit broke 32-bit testpmd app
Hi, I tried to reproduce without success(see attached log). I fail to reproduce because buf_iova fits into 32 bits in my case: (gdb) p /x *tx_pkts[0] $4 = { cacheline0 = 0x77b19ec0, buf_addr = 0x77b19f40, buf_iova = 0x49519f40, rearm_data = 0x77b19ed0, However, looking at your report, something like this would work for you: diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 9d4aba11a3..38efbc517a 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -124,7 +124,7 @@ virtqueue_store_flags_packed(struct vring_packed_desc *dp, * (virtio-pci and virtio-user). */ #define VIRTIO_MBUF_ADDR(mb, vq) \ - ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset))) + (*(uint64_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset)) The problem is that it would likely break Virtio-user en 32bits mode, as this is how it was initially implemented, and got fixed few years ago, as David hinted to me: commit 260aae9ad9621e3e758f1443abb8fcbc25ece07c Author: Jianfeng Tan Date: Wed Apr 19 02:30:33 2017 + net/virtio-user: fix address on 32-bit system virtio-user cannot work on 32-bit system as higher 32-bit of the addr field (64-bit) in the desc is filled with non-zero value which should not happen for a 32-bit system. In case of virtio-user, we use buf_addr of mbuf to fill the virtqueue desc addr. This is a regression bug. For 32-bit system, the first 4 bytes of mbuf is buf_addr, with following 8 bytes for buf_phyaddr. With below wrong definition, both buf_addr and lower 4 bytes buf_phyaddr are obtained to fill the virtqueue desc. #define VIRTIO_MBUF_ADDR(mb, vq) \ (*(uint64_t *)((uintptr_t)(mb) + (vq)->offset)) Fixes: 25f80d108780 ("net/virtio: fix packet corruption") Cc: sta...@dpdk.org Signed-off-by: Jianfeng Tan Acked-by: Yuanhan Liu If my understanding is correct, on 32 bits, when mbuf->buf_addr is used (Virtio-user), we need to mask out the higher 4 bytes, while when using Virtio-pci we need the full 64 bits (as the physical addresses used as IOVA on the guest are 64 bits). Regards, Maxime On 9/13/23 15:24, Roger Melton (rmelton) wrote: +Chris Brezovec Hi Maxime, Chris from our team is attending the DPDK Summit in Dublin this week. If you have some time available, we'd appreciate it if he could meet with you to discuss the 32bit virtio issue we are seeing. Regards, Roger Melton On 9/6/23 2:57 PM, Dave Johnson (davejo) wrote: Hi Maxime, This email is regarding the following commit: https://github.com/DPDK/dpdk/commit/ba55c94a7ebc386d2288d6578ed57aad6cb92657 A query had been sent previously on this topic (see below) indicating this commit appears to have broken the 32-bit testpmd app and impacted one of our products that runs as a 32-bit DPDK application. We consequently backed the commit out of our product but would prefer to get a fix for it. In the earlier exchange, you had asked if we were using virtio-pci or virtio-user (we are using virtio-pci) and asked for logs which Sampath provided. It’s been a while, so let me now if you need me to send resend those logs or need any other information. FWIW, I reproduced this using testpmd and noticed that this part of the change seems to be the interesting part (in drivers/net/virtio/virtqueue.h): /** * Return the IOVA (or virtual address in case of virtio-user) of mbuf * data buffer. * * The address is firstly casted to the word size (sizeof(uintptr_t)) * before casting it to uint64_t. This is to make it work with different * combination of word size (64 bit and 32 bit) and virtio device * (virtio-pci and virtio-user). */ #define VIRTIO_MBUF_ADDR(mb, vq) \ ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset)) If I revert just this part of the changeset (by re-using the VIRTIO_MBUF_ADDR to return buf_iova which matches what it had used previously), then 32-bit testpmd is able to receive traffic again: #define VIRTIO_MBUF_ADDR(mb, vq) (mb->buf_iova) Looking at the address produced by each of these, I see the address is the same except that the casting results in the upper bits getting cleared: Address from patch (nonworking case) = 0x58e7c900 Address using buf_iova (working case) = 0x158e7c900 :: Address from patch (nonworking case) = 0x58e7bfc0 Address using buf_iova (working case) = 0x158e7bfc0 :: Address from patch (nonworking case) = 0x58e7b680 Address using buf_iova (working case) = 0x158e7b680 :: Regards, Dave *From: *Sampath Peechu (speechu) *Date: *Monday, January 30, 2023 at 3:29 PM *To: *Maxime Coquelin , chenbo@intel.com , dev@dpdk.org *Cc: *Roger Melton (rmelton) , Malcolm Bumgardner (mbumgard) *Subject: *Re: Commit broke 32-bit testpmd app Hi Maxime, Could you please let us know if you got a chance to look at the debugs logs I provided? Thanks, Sampath *From: *Sampath Peechu (speec
Re:Re: [PATCH v1] examples/l3fwd: relax the RSS/Offload requirement
Hi Trevor, At 2023-09-18 02:04:19, "Konstantin Ananyev" wrote: 03/09/2023 05:01, Trevor Tao пишет: Now the port Rx mq_mode had been set to RTE_ETH_MQ_RX_RSS, and offload mode set to RTE_ETH_RX_OFFLOAD_CHECKSUM by default, but some hardware and/or virtual interface does not support the RSS and offload mode presupposed, e.g., some virtio interfaces in the cloud don't support RSS and may only partly support RTE_ETH_RX_OFFLOAD_UDP_CKSUM/ RTE_ETH_RX_OFFLOAD_TCP_CKSUM, but not RTE_ETH_RX_OFFLOAD_IPV4_CKSUM, and the error msg here: virtio_dev_configure(): RSS support requested but not supported by the device Port0 dev_configure = -95 and: Ethdev port_id=0 requested Rx offloads 0xe does not match Rx offloads capabilities 0x201d in rte_eth_dev_configure() So to enable the l3fwd running in that environment, the Rx mode requirement can be relaxed to reflect the hardware feature reality here, and the l3fwd can run smoothly then. A warning msg would be provided to user in case it happens here. On the other side, enabling the software cksum check in case the hw support missing. Fixes: af75078fece3 ("first public release") Cc: sta...@dpdk.org I don't think there was abug here. We are talking about changing current requirements for the app. So not sure it is a real fix and that such change can be propagated to stable releases. Trevor: I think it's not a bug fix but a feature enhancement, it would enable l3fwd to work smoothly on the HW/virtual interfaces which don't support RSS and/or cksum offloading. Yes. it seems like sort of an enhancement. While 'Fixes: ...' are for bugs only. AFAIK, only bug-fixes are take for backporting by stable releases. That's why there seems no point to add CC: sta...@dpdk.org Another generic things: - l3fwd doc and release notes probably need to be updated - as you areintroducing 2 distinct features: no-rss and no-ipv4-cksum it is probably better to split it into 2 different patches (in the same series). Signed-off-by: Trevor Tao --- examples/l3fwd/l3fwd.h | 12 +++- examples/l3fwd/main.c | 21 +++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index b55855c932..cc10643c4b 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -115,6 +115,8 @@ extern struct acl_algorithms acl_alg[]; extern uint32_t max_pkt_len; +extern struct rte_eth_conf port_conf; + /* Send burst of packets on an output interface */ static inline int send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port) @@ -170,7 +172,15 @@ is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) return -1; /* 2. The IP checksum must be correct. */ - /* this is checked in H/W */ + /* if this is not checked in H/W, check it. */ + if ((port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) == 0) { Might be better to check particular mbuf flag: if ((mbuf->ol_flags & RTE_MBUF_F_RX_IP_CKSUM_MASK) == TE_MBUF_F_RX_IP_CKSUM_UNKNOWN) {...} Trevor: the utility function is_valid_ipv4_pkt is just against an IPv4 pkt, and there's no mbuf information, and if needed, there would be an extra ol_flags added here to check if it was already done by the ethernet device, but look for a sample in: https://github.com/DPDK/dpdk/blob/main/examples/l3fwd-power/main.c#L487 so I think it's ok to just use the port_conf here. If you still think it's better to use m->ol_flags, please tell me. Yep, passing ol_flags, or mbuf itself seems like a proper way to do it. Aproach taken in l3fwd-power doesn't look right to me, see below. + uint16_t actual_cksum, expected_cksum; + actual_cksum = pkt->hdr_checksum; + pkt->hdr_checksum = 0; + expected_cksum = rte_ipv4_cksum(pkt); + if (actual_cksum != expected_cksum) + return -2; + } /* * 3. The IP version number must be 4. If the version number is not 4 diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index 6063eb1399..37aec64718 100644 --- a/examples/l3fwd/main.c +++ b/examples/l3fwd/main.c @@ -117,7 +117,7 @@ static struct lcore_params * lcore_params = lcore_params_array_default; static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) / sizeof(lcore_params_array_default[0]); -static struct rte_eth_conf port_conf = { +struct rte_eth_conf port_conf = { .rxmode = { .mq_mode = RTE_ETH_MQ_RX_RSS, .offloads = RTE_ETH_RX_OFFLOAD_CHECKSUM, @@ -1257,8 +1257,12 @@ l3fwd_poll_resource_setup(void) local_port_conf.rx_adv_conf.rss_conf.rss_hf &= dev_info.flow_type_rss_offloads; - if (dev_info.max_rx_queues == 1) + /* relax the rx rss requirement */ + if (dev_info.max_rx_queues == 1 || !local_port_conf.rx_adv_conf.rss_co
RE: [PATCH 1/1] net/mana: add 32 bit short doorbell
> -Original Message- > From: Long Li > Sent: Wednesday, September 20, 2023 3:24 AM > To: Wei Hu ; dev@dpdk.org > Cc: sta...@dpdk.org; Ferruh Yigit ; Luca Boccassi > ; Kevin Traynor > Subject: RE: [PATCH 1/1] net/mana: add 32 bit short doorbell > > > > > +#ifdef RTE_ARCH_32 > > > > + uint16_t cqe_incr = > > > > +(uint16_t)rxq->gdma_cq.head_incr_to_short_db; > > > > > > How do you make sure head_incr_to_short_db doesn't overflow? > > > > > > > I have checked this with hardware team. In my opinion it would be > > easily overflown. > > The hw team seems suggesting the hw will take care of this. > > > > Thanks, > > Wei > > I'm not sure how HW can take care of this when it overflows. When it > happens, the HW will miss a doorbell and CQ queue will get full. And > eventually you'll lose completions for TX/RX. > > In mana_alloc_and_post_rx_wqes() and mana_rx_burst(), the code has check > for RX/TX_WQE_SHORT_DB_THRESHOLD to make sure tail_incr doesn't > overflow when ringing the doorbell. > > In gdma_poll_completion_queue(), you need to have a similar mechanism to > not overflow tail_incr when ringing the doorbell. > I am not sure what can be done here. Applications could run in poll mode without need to ring cq doorbell, or it could take very long time before it change the interrupt state. What we can do when cq->head_incr_to_short_db reaches 0x in gdma_poll_completion_queue()? If it breaks out the loop and return, the next time it enters it may still at0x because it has not rung doorbell the have it reset yet. If just resetting the value to 0 and let it keep going in the loop, it is no difference than casting it to 16 bit unsigned int, which would be done in mana_arm_cq() if it is eventually called. Anyway, ringing cq doorbell has not been tested as the driver doesn't support interrupts. Wei
Re: [PATCH v4 02/14] app/graph: add telnet connectivity framework
On Wed, Sep 20, 2023 at 10:04:55AM +0530, Jerin Jacob wrote: > On Tue, Sep 19, 2023 at 9:35 PM wrote: > > > > From: Sunil Kumar Kori > > > > It adds framework to initate a telnet session with application. > > > > Some configurations and debug commands are exposed as runtime APIs. > > Those commands can be invoked using telnet session. > > > > Application initiates a telnet server with host address 0.0.0.0 > > and port number 8086 by default. > > > > To make it configurable, "-h" and "-p" options are provided. > > Using them user can pass host address and port number on which > > application will start telnet server. > > > > Using same host address and port number, telnet client can connect > > to application. > > > > Syntax to connect with application: > > # telnet > > > > Once session is connected, "graph> " prompt will be available. > > Example: > > # telnet 10.28.35.207 5 > > Trying 10.28.35.207... > > Connected to 10.28.35.207. > > Escape character is '^]'. > > > > Welcome! > > > > graph> > > Some improvements > 1) Please squash 14/14 patch to 1/14. > 2) Ctrl - C doesn't work which is a serious issue. We have to kill it > via kill -9 from a separate window. This is probably because of > command line library. > 3). In case app launch fails due to error, it will leave terminal into > a bad state until "reset" command is executed. This might also be > because of command line library. > 4). Just a wishlist: If on terminal console I could do a tab and get > commands help, just like testpmd console. I think that the tab completion is only available if you use cmdline_stdin_new vs regular cmdline creation function. In the case of the telnet connection, it may work to set the telnet fd as stdin/stdout for the connection and then use cmdline_stdin_new to create the cmdline. /Bruce
Re: [PATCH v5 00/26] refact the nfpcore module
On 9/20/2023 2:55 AM, Chaoyong He wrote: >> On 9/19/2023 10:54 AM, Chaoyong He wrote: >>> This patch series aims to: >>> - Make the coding style satisfy with DPDK. >>> - Sync the logic with kernel driver. >>> - Make the sub-module more modular. >>> - Extend the nfp configure BAR from 8 to 24. >>> >>> --- >>> v5: >>> * Fix one bug in the logic of VF using dev module. >>> * Remove the R-b tag as the requirement of Niklas. >>> * Revise logic follow the advice of reviewer. >>> v4: >>> * Drop the commit 'using the DPDK memory management API'. >>> * Modify the commit message of 'standard the comment style'. >>> * Revise some comment logic as the advice of reviewer. >>> v3: >>> * Fix one bug in the BAR find logic. >>> * Modify more coding style. >>> v2: >>> * Fix the compile error in Fodora 37 environment. >>> --- >>> >>> Chaoyong He (26): >>> net/nfp: explicitly compare to null and 0 >>> net/nfp: unify the indent coding style >>> net/nfp: unify the type of integer variable >>> net/nfp: remove the unneeded logic >>> net/nfp: standard the local variable coding style >>> net/nfp: adjust the log statement >>> net/nfp: standard the comment style >>> net/nfp: standard the blank character >>> net/nfp: unify the guide line of header file >>> net/nfp: rename some parameter and variable >>> net/nfp: refact the hwinfo module >>> net/nfp: refact the nffw module >>> net/nfp: refact the mip module >>> net/nfp: refact the rtsym module >>> net/nfp: refact the resource module >>> net/nfp: refact the target module >>> net/nfp: add a new header file >>> net/nfp: refact the nsp module >>> net/nfp: refact the mutex module >>> net/nfp: rename data field to sync with kernel driver >>> net/nfp: add the dev module >>> net/nfp: add header file for PCIe module >>> net/nfp: refact the cppcore module >>> net/nfp: refact the PCIe module >>> net/nfp: refact the cppcore and PCIe module >>> net/nfp: extend the usage of nfp BAR from 8 to 24 >>> >> >> >> Hi Chaoyong, >> >> This set has 24 patches in the mail_list/patchwork, last two patches are >> missing, can you please double check? > > Sorry, the send email command failed after the 24/26 patch has send out, and > I'm not found that. > > $ git send-email --to dev@dpdk.org --cc oss-driv...@corigine.com > --in-reply-to 20230918024612.1600536-1-chaoyong...@corigine.com ./*.patch > > Can't locate Email/Valid.pm in @INC (you may need to install the Email::Valid > module) (@INC contains: /usr/share/perl5/vendor_perl > /usr/local/lib64/perl5/5.32 /usr/local/share/perl5/5.32 > /usr/lib64/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5) at > /usr/libexec/git-core/git-send-email line 1138. > ...propagated at /usr/libexec/git-core/git-send-email line 1694. > > This is the first time I meet this problem, and it's my fault did not notice > it, and I still not sure what cause it. > Same here, error is not familiar and not sure about root cause. > I re-send the 25/26 and 26/26 patch with the same command (except the > './*.patch' part), but seems they are not recognized as the same series in > the patchwork: > Untitled series #29559 > Untitled series #29560 > > How to fix that or should I send a v6 version for the whole series? > That is OK, I will get missing patches manually.
RE: [PATCH v3 1/3] lib: introduce dispatcher library
> -Original Message- > From: Mattias Rönnblom > Sent: Tuesday, September 19, 2023 2:51 PM > To: Naga Harish K, S V ; mattias.ronnblom > ; dev@dpdk.org > Cc: Jerin Jacob ; techbo...@dpdk.org; Van Haaren, > Harry ; Nilsson, Peter > ; Heng Wang ; > Pavan Nikhilesh ; Gujjar, Abhinandan S > ; Carrillo, Erik G ; > Shijith Thotton ; Hemant Agrawal > ; Sachin Saxena ; > Liang Ma ; Mccarthy, Peter > ; Yan, Zhirun > Subject: Re: [PATCH v3 1/3] lib: introduce dispatcher library > > On 2023-09-17 18:46, Naga Harish K, S V wrote: > > > > > >> -Original Message- > >> From: Mattias Rönnblom > >> Sent: Monday, September 4, 2023 6:33 PM > >> To: dev@dpdk.org > >> Cc: Jerin Jacob ; techbo...@dpdk.org; Van Haaren, > >> Harry ; hof...@lysator.liu.se; Nilsson, > >> Peter ; Heng Wang > >> ; Naga Harish K, S V > >> ; Pavan Nikhilesh > >> ; Gujjar, Abhinandan S > >> ; Carrillo, Erik G > >> ; Shijith Thotton ; > >> Hemant Agrawal ; Sachin Saxena > >> ; Liang Ma ; > >> Mccarthy, Peter ; Yan, Zhirun > >> ; mattias.ronnblom > >> > >> Subject: [PATCH v3 1/3] lib: introduce dispatcher library > >> > >> The purpose of the dispatcher library is to help reduce coupling in > >> an Eventdev-based DPDK application. > >> > >> In addition, the dispatcher also provides a convenient and flexible > >> way for the application to use service cores for application-level > >> processing. > >> > >> Signed-off-by: Mattias Rönnblom > >> Tested-by: Peter Nilsson > >> Reviewed-by: Heng Wang > >> > >> -- > >> > >> PATCH v3: > >> o To underline its optional character and since it does not provide > >> hardware abstraction, the event dispatcher is now a separate > >> library. > >> o Change name from rte_event_dispatcher -> rte_dispatcher, to make it > >> shorter and to avoid the rte_event_* namespace. > >> > > > > Rte_dispatcher is basically dispatching events but it feels like the name > > does > not convey that. > > Also, it is like any other adapter service that can reside within the > > eventdev > directory. > > > > I can see some discussion in previous threads related to the placement of > > the > dispatcher library. > > > > It is an optional eventdev application service, not enforcing this > programming model to the application. > > The documentation may need to be updated and mention that this is > optional. > > > > If any hardware comes up with the dispatcher feature, then this library may > need to be moved inside eventdev library later. > > > > It seems to me that the deciding factor for what functionality goes into a > DPDK > library or not is not such much dependent on if it's implemented in hardware, > in software, or some combination thereof. The important thing is that the > library is be able to present a coherent API to the application (or other > libraries). > > That said, as I've mentioned before, I have no strong opionion on this > subject. > What is the next step here? The response is not conclusive as It looks like both yes and no to change the directory structure. > > So, It makes sense to keep this optional service in the eventdev folder as > > an > optional feature. > > > >> PATCH v2: > >> o Add dequeue batch count statistic. > >> o Add statistics reset function to API. > >> o Clarify MT safety guarantees (or lack thereof) in the API > >> documentation. > >> o Change loop variable type in evd_lcore_get_handler_by_id() to uint16_t, > >> to be consistent with similar loops elsewhere in the dispatcher. > >> o Fix variable names in finalizer unregister function. > >> > >> PATCH: > >> o Change prefix from RED to EVD, to avoid confusion with random > >> early detection. > >> > >> RFC v4: > >> o Move handlers to per-lcore data structures. > >> o Introduce mechanism which rearranges handlers so that often-used > >> handlers tend to be tried first. > >> o Terminate dispatch loop in case all events are delivered. > >> o To avoid the dispatcher's service function hogging the CPU, process > >> only one batch per call. > >> o Have service function return -EAGAIN if no work is performed. > >> o Events delivered in the process function is no longer marked 'const', > >> since modifying them may be useful for the application and cause > >> no difficulties for the dispatcher. > >> o Various minor API documentation improvements. > >> > >> RFC v3: > >> o Add stats_get() function to the version.map file. > >> --- > >> MAINTAINERS | 3 + > >> lib/dispatcher/meson.build | 17 + > >> lib/dispatcher/rte_dispatcher.c | 791 > >> lib/dispatcher/rte_dispatcher.h > | > >> 480 +++ > >> lib/dispatcher/version.map | 20 + > >> lib/meson.build | 2 + > >> 6 files changed, 1313 insertions(+) > >> create mode 100644 lib/dispatcher/meson.build create mode 100644 > >> lib/dispatcher/rte_dispatcher.c create mode 100644 > >> lib/dispatcher/rte_dispatche
RE: [RFC PATCH 0/3] add TLS record processing security offload
> -Original Message- > From: Anoob Joseph > Sent: Friday, August 11, 2023 8:17 AM > To: Thomas Monjalon ; Akhil Goyal > ; Jerin Jacob ; Konstantin Ananyev > > Cc: Hemant Agrawal ; dev@dpdk.org; Matz, > Olivier ; Vidya Sagar Velumuri > > Subject: [RFC PATCH 0/3] add TLS record processing security offload > > Add Transport Layer Security (TLS) and Datagram Transport Layer Security > (DTLS). The protocols provide communications privacy for L4 protocols > such as TCP & UDP. > > TLS (and DTLS) protocol is composed of two layers, > 1. TLS Record Protocol > 2. TLS Handshake Protocol > > While TLS Handshake Protocol helps in establishing security parameters > by which client and server can communicate, TLS Record Protocol provides > the connection security. TLS Record Protocol leverages symmetric > cryptographic operations such as data encryption and authentication for > providing security to the communications. > > Cryptodevs that are capable of offloading TLS Record Protocol may > perform other operations like IV generation, header insertion, atomic > sequence number updates and anti-replay window check in addition to > cryptographic transformations. > > In record write operations, message content type is a per packet field > which is used in constructing the TLS header. One session is expected > to handle all types of content types and so, 'rte_crypto_op.aux_flags' > is used for passing the same. > > The support is added for TLS 1.2, TLS 1.3 and DTLS 1.2. > > Akhil Goyal (1): > net: add headers for TLS/DTLS packets > > Anoob Joseph (2): > security: add TLS record processing > cryptodev: add details of datapath handling of TLS records Hi Folks, I've reviewed these 3 patches, generally fine, with two main opens; 1) The part that I do not fully understand how it is defined is the 'rte_crypto_op.aux_flags' field usage, and what values to read/write there. 2) Error handling (again with aux_flags) is not well defined, and is critical to correct (high-bw/high-packet-count) usage. I do not understand how to do correct error handling today with aux_flags, so more docs/examples required. Some detail-level comments inline in the patch files. Regards -Harry
RE: [RFC PATCH 1/3] net: add headers for TLS/DTLS packets
> -Original Message- > From: Anoob Joseph > Sent: Friday, August 11, 2023 8:17 AM > To: Thomas Monjalon ; Akhil Goyal > ; Jerin Jacob ; Konstantin Ananyev > > Cc: Hemant Agrawal ; dev@dpdk.org; Matz, > Olivier ; Vidya Sagar Velumuri > > Subject: [RFC PATCH 1/3] net: add headers for TLS/DTLS packets > > From: Akhil Goyal > > Added TLS and DTLS packet headers for L4 security applications. > > Signed-off-by: Akhil Goyal > Signed-off-by: Anoob Joseph > Signed-off-by: Vidya Sagar Velumuri > --- > doc/api/doxy-api-index.md | 2 ++ > lib/net/meson.build | 2 ++ > lib/net/rte_dtls.h| 61 +++ > lib/net/rte_tls.h | 48 ++ > 4 files changed, 113 insertions(+) > create mode 100644 lib/net/rte_dtls.h > create mode 100644 lib/net/rte_tls.h > > diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md > index fdeda13932..03e2445bb1 100644 > --- a/doc/api/doxy-api-index.md > +++ b/doc/api/doxy-api-index.md > @@ -128,6 +128,8 @@ The public API headers are grouped by topics: >[eCPRI](@ref rte_ecpri.h), >[PDCP hdr](@ref rte_pdcp_hdr.h), >[PDCP](@ref rte_pdcp.h), > + [TLS](@ref rte_tls.h), > + [DTLS](@ref rte_dtls.h), >[L2TPv2](@ref rte_l2tpv2.h), >[PPP](@ref rte_ppp.h), >[IB](@ref rte_ib.h) > diff --git a/lib/net/meson.build b/lib/net/meson.build > index b1bc27bad5..0b69138949 100644 > --- a/lib/net/meson.build > +++ b/lib/net/meson.build > @@ -5,6 +5,8 @@ headers = files( > 'rte_ip.h', > 'rte_tcp.h', > 'rte_udp.h', > +'rte_tls.h', > +'rte_dtls.h', > 'rte_esp.h', > 'rte_sctp.h', > 'rte_icmp.h', > diff --git a/lib/net/rte_dtls.h b/lib/net/rte_dtls.h > new file mode 100644 > index 00..1455c07a92 > --- /dev/null > +++ b/lib/net/rte_dtls.h > @@ -0,0 +1,61 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2023 Marvell. > + */ > + > +#ifndef RTE_DTLS_H > +#define RTE_DTLS_H > + > +/** > + * @file > + * > + * Datagram transport layer security(DTLS) related defines. > + */ > + > +#include > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#define RTE_DTLS_TYPE_INVALID0 /**< Invalid DTLS message type. */ > +#define RTE_DTLS_TYPE_CCS20 /**< Change cipher message. */ I don't see the "CCS" acronym in the RFC, suggest to make more accurate/verbose; - Rename to RTE_DTLS_TYPE_CHANGE_CIPHER (or RTE_DTLS_TYPE_CHANGE_CIPHER_SPEC if preferred) - Reword description to "change cipher" to "change cipher spec message." > +#define RTE_DTLS_TYPE_ALERT 21 /**< Alert message. */ > +#define RTE_DTLS_TYPE_HANDSHAKE 22 /**< Handshake message for > DTLS. */ > +#define RTE_DTLS_TYPE_APPDATA23 /**< DTLS application data message. > */ > +#define RTE_DTLS_TYPE_HEARTBEAT 24 /**< DTLS 1.3 heartbeat message. */ > +#define RTE_DTLS_TYPE_CIPHERTEXT_WITH_CID25 /**< DTLS 1.3 > ciphertext with CID message. */ > +#define RTE_DTLS_TYPE_ACK26 /**< DTLS 1.3 ACK message. */ > +#define RTE_DTLS_TYPE_MAX255 /**< Maximum value as DTLS > content type. */ > + > +#define RTE_DTLS_VERSION_1_2 0xFEFD /**< DTLS 1.2 version. 1's > complement of 1.2. */ > +#define RTE_DTLS_VERSION_1_3 0xFEFC /**< DTLS 1.3 version. 1's > complement of 1.3. */ > > + > +/** > + * DTLS Header > + */ > +__extension__ > +struct rte_dtls_hdr { > + /** Content type of DTLS packet. Defined as RTE_DTLS_TYPE_*. */ > + uint8_t type; > + /** DTLS Version defined as RTE_DTLS_VERSION*. */ > + rte_be16_t version; (same comment on be16_t vs struct as in TLS version below, no rework needed) > +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN > + /** The sequence number for the DTLS record. */ > + uint64_t sequence_number : 48; > + /** A counter value that is incremented on every cipher state change. > */ > + uint64_t epoch : 16; > +#elif RTE_BYTE_ORDER == RTE_BIG_ENDIAN > + /** A counter value that is incremented on every cipher state change. > */ > + uint64_t epoch : 16; > + /** The sequence number for the DTLS record. */ > + uint64_t sequence_number : 48; > +#endif > + /** The length (in bytes) of the following DTLS packet. */ > + rte_be16_t length; > +} __rte_packed; > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif /* RTE_DTLS_H */ > diff --git a/lib/net/rte_tls.h b/lib/net/rte_tls.h > new file mode 100644 > index 00..d708d06014 > --- /dev/null > +++ b/lib/net/rte_tls.h > @@ -0,0 +1,48 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(C) 2023 Marvell. > + */ > + > +#ifndef RTE_TLS_H > +#define RTE_TLS_H > + > +/** > + * @file > + * > + * Transport layer security(TLS) related defines. > + */ > + > +#include > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#define RTE_TLS_TYPE_INVALID 0 /**< Invalid TLS message type. */ > +#define RTE_TLS_TYPE_CCS 20 /**< Change cipher message. */ > +#define RTE_TLS_TYPE_ALERT 21 /**< Alert message. */
RE: [RFC PATCH 2/3] security: add TLS record processing
> -Original Message- > From: Anoob Joseph > Sent: Friday, August 11, 2023 8:17 AM > To: Thomas Monjalon ; Akhil Goyal > ; Jerin Jacob ; Konstantin Ananyev > > Cc: Hemant Agrawal ; dev@dpdk.org; Matz, > Olivier ; Vidya Sagar Velumuri > > Subject: [RFC PATCH 2/3] security: add TLS record processing > > Add Transport Layer Security (TLS) and Datagram Transport Layer Security > (DTLS). The protocols provide communications privacy for L4 protocols > such as TCP & UDP. > > TLS (and DTLS) protocol is composed of two layers, > 1. TLS Record Protocol > 2. TLS Handshake Protocol > > While TLS Handshake Protocol helps in establishing security parameters > by which client and server can communicate, TLS Record Protocol provides > the connection security. TLS Record Protocol leverages symmetric > cryptographic operations such as data encryption and authentication for > providing security to the communications. > > Cryptodevs that are capable of offloading TLS Record Protocol may > perform other operations like IV generation, header insertion, atomic > sequence number updates and anti-replay window check in addition to > cryptographic transformations. > > The support is added for TLS 1.2, TLS 1.3 and DTLS 1.2. >From the code below, my understanding is that *ONLY* the record layer is being added/supported? The difference is described well above, but the intended support added is not clearly defined. Suggest reword the last line to clarify: "Support for TLS record protocol is added for TLS 1.2, TLS 1.3 and DTLS 1.2." > Signed-off-by: Akhil Goyal > Signed-off-by: Anoob Joseph > Signed-off-by: Vidya Sagar Velumuri > --- > doc/guides/prog_guide/rte_security.rst | 58 + > lib/security/rte_security.c| 4 + > lib/security/rte_security.h| 110 + > 3 files changed, 172 insertions(+) > > diff --git a/doc/guides/prog_guide/rte_security.rst > b/doc/guides/prog_guide/rte_security.rst > index 7418e35c1b..7716d7239f 100644 > --- a/doc/guides/prog_guide/rte_security.rst > +++ b/doc/guides/prog_guide/rte_security.rst > @@ -399,6 +399,64 @@ The API ``rte_security_macsec_sc_create`` returns a > handle for SC, > and this handle is set in ``rte_security_macsec_xform`` > to create a MACsec session using ``rte_security_session_create``. > > +TLS-Record Protocol > +~~~ > + > +The Transport Layer Protocol provides communications security over the > Internet. The protocol > +allows client/server applications to communicate in a way that is designed to > prevent eavesdropping, > +tampering, or message forgery. > + > +TLS protocol is composed of two layers: the TLS Record Protocol and the TLS > Handshake Protocol. At > +the lowest level, layered on top of some reliable transport protocol (e.g., > TCP), > is the TLS Record > +Protocol. The TLS Record Protocol provides connection security that has two > basic properties: > + > + - The connection is private. Symmetric cryptography is used for data > + encryption (e.g., AES, DES, etc.). The keys for this symmetric > encryption > + are generated uniquely for each connection and are based on a secret > + negotiated by another protocol (such as the TLS Handshake Protocol). > The > + Record Protocol can also be used without encryption. > + > + - The connection is reliable. Message transport includes a message > + integrity check using a keyed MAC. Secure hash functions (e.g., > + SHA-1, etc.) are used for MAC computations. The Record Protocol > + can operate without a MAC, but is generally only used in this mode > + while another protocol is using the Record Protocol as a transport > + for negotiating security parameters. > + > +.. code-block:: c The code block below isn't C? Is there a better code block type for a text diagram? > + Record Write Record Read > + --- > + > + TLSPlaintext TLSCiphertext > + | | > + ~ ~ > + | | > + V V > ++-|--++--|-+ > +| Seq. no generation || Seq. no generation | > ++-|--++--|-+ > + | | > ++-|--++--|-+ > +| Header insertion ||Decryption &| > ++-|--+| MAC verification | > + | +--|-+ > ++-|--+ | > +| MAC generation & |+--|-+ > +| Encryption || TLS Header removal | > ++-|--++
RE: [RFC PATCH 3/3] cryptodev: add details of datapath handling of TLS records
> -Original Message- > From: Anoob Joseph > Sent: Friday, August 11, 2023 8:17 AM > To: Thomas Monjalon ; Akhil Goyal > ; Jerin Jacob ; Konstantin Ananyev > > Cc: Hemant Agrawal ; dev@dpdk.org; Matz, > Olivier ; Vidya Sagar Velumuri > > Subject: [RFC PATCH 3/3] cryptodev: add details of datapath handling of TLS > records > > TLS/DTLS record processing requires content type to be provided per > packet (for record write operation). Extend usage of > rte_crypto_op.aux_flags for the same purpose. I understand the goal to extend the usage of the aux_flags, but I do not understand what data/structs/values I should use to set or error-check the aux-flags here. > > Signed-off-by: Akhil Goyal > Signed-off-by: Anoob Joseph > Signed-off-by: Vidya Sagar Velumuri > --- > doc/guides/prog_guide/rte_security.rst | 10 ++ > lib/cryptodev/rte_crypto.h | 6 ++ > 2 files changed, 16 insertions(+) > > diff --git a/doc/guides/prog_guide/rte_security.rst > b/doc/guides/prog_guide/rte_security.rst > index 7716d7239f..6cb69bc949 100644 > --- a/doc/guides/prog_guide/rte_security.rst > +++ b/doc/guides/prog_guide/rte_security.rst > @@ -451,6 +451,16 @@ Protocol. The TLS Record Protocol provides > connection security that has two basi >V V > TLSCiphertext TLSPlaintext > > +TLS and DTLS header formation (in record write operation) would depend on > the > +type of content. It is a per packet variable and would need to be handled by > +the same session. Application may pass this info to a cryptodev performing > +lookaside protocol offload by passing the same in > ``rte_crypto_op.aux_flags``. > + > +In record read operation, application is required to preserve any info it may > +need from the TLS/DTLS header (such as content type and sequence number) > as the > +cryptodev would remove the header and padding as part of the lookaside > protocol > +processing. > + > Supported Versions > ^^ > > diff --git a/lib/cryptodev/rte_crypto.h b/lib/cryptodev/rte_crypto.h > index 9b8d0331a4..7c12a2b705 100644 > --- a/lib/cryptodev/rte_crypto.h > +++ b/lib/cryptodev/rte_crypto.h > @@ -101,6 +101,12 @@ struct rte_crypto_op { > /**< Operation specific auxiliary/additional flags. >* These flags carry additional information from the >* operation. Processing of the same is optional. It says "processing is optional" here, but in TLS/DTLS, it is proposed that the soft-error and hard-errors are returned to the user through this struct? That is not optional, and failing to check that is a failure mode which can result in IV-reuse, and hence decryption of payload by a malicious actor? I see this part of the API as being critical to correct usage, and it does not seem well defined or clear to me at this point. If I am mis-understanding, please clarify, as likely other developers will likely mis-understand too. Example code snippets of good hardened error-handling for soft-error and hard-error would help. > + * With TLS record offload > (RTE_SECURITY_PROTOCOL_TLS_RECORD), > + * application would be required to provide the message > + * type of the input provided. The 'aux_flags' field > + * can be used for passing the same. Message types are > + * listed as RTE_TLS_TYPE_* and RTE_DTLS_TYPE_*. >*/ Same comment as above the "aux_fields can be used" string does not explain to the user *how* to use the field correctly. Examples (in rte_security.rst?) would help. > uint8_t reserved[2]; > /**< Reserved bytes to fill 64 bits for > -- > 2.25.1
Re: [PATCH v3 1/3] lib: introduce dispatcher library
On Mon, Sep 18, 2023 at 5:26 AM Naga Harish K, S V wrote: > > > > > -Original Message- > > From: Mattias Rönnblom > > Sent: Monday, September 4, 2023 6:33 PM > > To: dev@dpdk.org > > Cc: Jerin Jacob ; techbo...@dpdk.org; Van Haaren, > > Harry ; hof...@lysator.liu.se; Nilsson, Peter > > ; Heng Wang ; > > Naga Harish K, S V ; Pavan Nikhilesh > > ; Gujjar, Abhinandan S > > ; Carrillo, Erik G ; > > Shijith Thotton ; Hemant Agrawal > > ; Sachin Saxena ; > > Liang Ma ; Mccarthy, Peter > > ; Yan, Zhirun ; > > mattias.ronnblom > > Subject: [PATCH v3 1/3] lib: introduce dispatcher library > > > > The purpose of the dispatcher library is to help reduce coupling in an > > Eventdev-based DPDK application. > > > > In addition, the dispatcher also provides a convenient and flexible way for > > the > > application to use service cores for application-level processing. > > > > Signed-off-by: Mattias Rönnblom > > Tested-by: Peter Nilsson > > Reviewed-by: Heng Wang > > > > -- > > > > PATCH v3: > > o To underline its optional character and since it does not provide > >hardware abstraction, the event dispatcher is now a separate > >library. > > o Change name from rte_event_dispatcher -> rte_dispatcher, to make it > >shorter and to avoid the rte_event_* namespace. > > > > Rte_dispatcher is basically dispatching events but it feels like the name > does not convey that. > Also, it is like any other adapter service that can reside within the > eventdev directory. > > I can see some discussion in previous threads related to the placement of the > dispatcher library. > > It is an optional eventdev application service, not enforcing this > programming model to the application. > The documentation may need to be updated and mention that this is optional. > > If any hardware comes up with the dispatcher feature, then this library may > need to be moved inside eventdev library later. I would like to follow YAGNI principle in eventdev library. Even if a HW comes(I assume not), the interface should not look like that. None of the HW will be comparing a bunch of function pointers and call the callback. So interface will look different for HW enablement. We need to model the API based on HW for device libraries and SW libraries based on CPU modeling dynamics. Also, There is no need to tie up this library/framework only event ]dev, other than using rte_event_dequeue() to pull packet it has no eventdev significance. The library scope if just pull the packet from a source and compare with in N number of matches and call respective process callback. The dispatcher source can rte_ethdev_rx_burst or ring.
Re: [PATCH v16 1/8] net/ntnic: initial commit which adds register defines
On 9/19/2023 10:06 AM, Christian Koue Muf wrote: > On 9/18/23 10:34 AM, Ferruh Yigit wrote: >> On 9/15/2023 7:37 PM, Morten Brørup wrote: From: Ferruh Yigit [mailto:ferruh.yi...@amd.com] Sent: Friday, 15 September 2023 17.55 On 9/8/2023 5:07 PM, Mykola Kostenok wrote: > From: Christian Koue Muf > > The NTNIC PMD does not rely on a kernel space Napatech driver, thus > all defines related to the register layout is part of the PMD code, > which will be added in later commits. > > Signed-off-by: Christian Koue Muf > Reviewed-by: Mykola Kostenok > Hi Mykola, Christiam, This PMD scares me, overall it is a big drop: "249 files changed, 87128 insertions(+)" I think it is not possible to review all in one release cycle, and it is not even possible to say if all code used or not. I can see code is already developed, and it is difficult to restructure developed code, but restructure it into small pieces really helps for reviews. Driver supports good list of features, can it be possible to distribute upstream effort into multiple release. Starting from basic functionality and add features gradually. Target for this release can be providing datapath, and add more if we have time in the release, what do you think? Also there are large amount of base code (HAL / FPGA code), instead of adding them as a bulk, relevant ones with a feature can be added with the feature patch, this eliminates dead code in the base code layer, also helps user/review to understand the link between driver code and base code. >>> >>> Jumping in here with an opinion about welcoming new NIC vendors to the >>> community: >>> >>> Generally, if a NIC vendor supplies a PMD for their NIC, I expect the >>> vendor to take responsibility for the quality of the PMD, including >>> providing a maintainer and support backporting of fixes to the PMD in LTS >>> releases. This should align with the vendor's business case for upstreaming >>> their driver. >>> >>> If the vendor provides one big patch series, which may be difficult to >>> understand/review, the fallout mainly hits the vendor's customers (and thus >>> the vendor's support organization), not the community as a whole. >>> >> >> Hi Morten, >> >> I was thinking same before making my above comment, what happens if vendors >> submit as one big patch and when a problem occurs we can ask owner to fix. >> Probably this makes vendor happy and makes my life (or any other >> maintainer's life) easier, it is always easier to say yes. >> >> >> But I come up with two main reasons to ask for a rework: >> >> 1- Technically any vendor can deliver their software to their customers via >> a public git repository, they don't have to upstream to >> https://linkprotect.cudasvc.com/url?a=https%3a%2f%2fdpdk.org&c=E,1,NpoJejuuvPdOPfcFJYtsmkQF6PVrDjGsZ8x_gi5xDrTyZokK_nM11u4ZpzHgM10J9bOLlnhoR6fFAzWtCzOhRCzVruYj520zZORv6-MjJeSC5TrGnIFL&typo=1, >> but upstreaming has many benefits. >> >> One of those benefits is upstreaming provides a quality assurance for >> vendor's customers (that is why customer can be asking for this, as we are >> having in many cases), and this quality assurance comes from additional eyes >> reviewing the code and guiding vendors for the DPDK quality standards (some >> vendors already doing pretty good, but new ones sometimes requires >> hand-holding). >> >> If driver is one big patch series, it is practically not possible to review >> it, I can catch a few bits here or there, you may some others, but >> practically it will be merged without review, and we will fail on our >> quality assurance task. >> >> 2- Make code more accessible to the rest of the world. >> >> When it is a big patch, code can be functional but lots of details, >> reasoning, relation between components gets lost, which makes it even harder >> for an external developer, like me, to understand it (I am a mere guinea pig >> here :). >> >> If a customer would like to add a feature themselves, or fix something, even >> after vendor no more working on that product anymore, customer needs to >> understand the code or some reasoning in the code. >> Or if someone wants to backport the driver to rust, or a DPDK developer >> wants to do a rework that requires updating all drivers, or a tester would >> like to analyze the code to figure out behavior difference of the devices. I >> think I have witness all above cases in real life. >> >> If driver is split into more patches, it makes patch easier to understand >> which makes code practically more accessible to other developers that are >> not expert in driver. >> >> >> Overall, yes splitting patch takes time and effort, and yes this is an >> overhead for a code that is already developed, but I think benefit is big so >> it worth doing the task. >> >> >>> We, the community, should no
Re: [ADDENDUM] Technical board meeting agenda for 2023-09-20
On 9/19/2023 2:53 PM, Morten Brørup wrote: >> From: Maxime Coquelin [mailto:maxime.coque...@redhat.com] >> Sent: Tuesday, 19 September 2023 11.51 >> >> Dear community, >> >> One topic was missed to be added to tomorrow's meeting agenda: >> >> On 9/18/23 22:20, Maxime Coquelin wrote: >>> Dear DPDK community, >>> >>> Following topics are planned to be discussed at the next Technical board >>> meeting, which will take place on Sept. 20th @3PM UTC: >> 0- Event dispatcher library inclusion >> >>> 1- Memarea library inclusion >>> 2- Power management brainstorming >>> 3- Bugzilla maintenance: request for volunteers >>> 4- Process to add new drivers > > Christian Muf from Napatech will be joining the meeting for the new driver > process discussion. > > I suppose Ferruh will be joining for this discussion too. > I explained my rationale in the mail list, let me share the link, it may save time in the meeting: https://mails.dpdk.org/archives/dev/2023-September/276671.html >>> >>> The order the topics will be discussed may change based on their >>> priority/urgency. Some topics might have to be postponed to the >>> following meeting if time does not allow to cover them. >>> >>> If time permits, below recurring items will be checked: >>> - Security process >>> - Community Lab >>> - Doc maintenance >>> - Bugzilla status >>> - Examples to drop or move > > Please note the meeting URL is no longer Jitsi, but LFX Zoom: > https://zoom-lfx.platform.linuxfoundation.org/meeting/96459488340?password=d808f1f6-0a28-4165-929e-5a5bcae7efeb > >
Re: [PATCH v5 00/26] refact the nfpcore module
On 9/19/2023 10:54 AM, Chaoyong He wrote: > This patch series aims to: > - Make the coding style satisfy with DPDK. > - Sync the logic with kernel driver. > - Make the sub-module more modular. > - Extend the nfp configure BAR from 8 to 24. > > --- > v5: > * Fix one bug in the logic of VF using dev module. > * Remove the R-b tag as the requirement of Niklas. > * Revise logic follow the advice of reviewer. > v4: > * Drop the commit 'using the DPDK memory management API'. > * Modify the commit message of 'standard the comment style'. > * Revise some comment logic as the advice of reviewer. > v3: > * Fix one bug in the BAR find logic. > * Modify more coding style. > v2: > * Fix the compile error in Fodora 37 environment. > --- > > Chaoyong He (26): > net/nfp: explicitly compare to null and 0 > net/nfp: unify the indent coding style > net/nfp: unify the type of integer variable > net/nfp: remove the unneeded logic > net/nfp: standard the local variable coding style > net/nfp: adjust the log statement > net/nfp: standard the comment style > net/nfp: standard the blank character > net/nfp: unify the guide line of header file > net/nfp: rename some parameter and variable > net/nfp: refact the hwinfo module > net/nfp: refact the nffw module > net/nfp: refact the mip module > net/nfp: refact the rtsym module > net/nfp: refact the resource module > net/nfp: refact the target module > net/nfp: add a new header file > net/nfp: refact the nsp module > net/nfp: refact the mutex module > net/nfp: rename data field to sync with kernel driver > net/nfp: add the dev module > net/nfp: add header file for PCIe module > net/nfp: refact the cppcore module > net/nfp: refact the PCIe module > net/nfp: refact the cppcore and PCIe module > net/nfp: extend the usage of nfp BAR from 8 to 24 > Series applied to dpdk-next-net/main, thanks. For reference following sets merged (as set split into multiple series because of send-email issues): https://patchwork.dpdk.org/project/dpdk/list/?series=29547 https://patchwork.dpdk.org/project/dpdk/list/?series=29559 https://patchwork.dpdk.org/project/dpdk/list/?series=29560
Re: [PATCH 0/1] make file prefix unit test more resilient
On Thu, Sep 14, 2023 at 12:42 PM Bruce Richardson wrote: > > When examining the IOL testing failures for patch series [1], I observed > that the failures reported were in the eal_flags_file_prefix unit test. > I was able to reproduce this on my system by passing an additional > "--on-pci" flag to the test run, since the log to the test has errors > about device availability. Adding the "no-pci" flag to the individual Something is not clear to me. While I understand that passing "no-pci" helps avoiding the issue (as described below), I have some trouble understanding this passage (above) with "--on-pci". How did you reproduce the issue? > test commands used by the unit tests fixed the issue thereafter, > allowing the test to pass in all cases for me. Therefore, I am > submitting this patch in the hopes of making the test more robust, since > the observed failures seem unrelated to the original patchset [1] I > submitted. > > [1] http://patches.dpdk.org/project/dpdk/list/?series=29406 > > Bruce Richardson (1): > app/test: skip PCI bus scan when testing prefix flags > > app/test/test_eal_flags.c | 20 ++-- > 1 file changed, 10 insertions(+), 10 deletions(-) Iiuc, the problem is that the file_prefix unit test can fail if any DPDK subsystem forgets to release some memory and some hugepages are left behind at the cleanup step. Passing --no-pci as you suggest hides issues coming from PCI drivers. This is something I tried to fix too, with https://patchwork.dpdk.org/project/dpdk/list/?series=29288 though my fix only handles a part of the issue (here, the ethdev drivers). Another way to make the file prefix more robust would be to remove the check on released memory, or move it to another test. -- David Marchand
Re: [PATCH v3] mbuf: add ESP packet type
On Mon, Aug 28, 2023 at 11:53 PM Alexander Kozyrev wrote: > > Support the IP Encapsulating Security Payload (ESP) in transport mode. As per IPSEC ESP RFC 4303, for both tunnel mode or transport mode, next proto 50, so we cannot identify a packet is for tunnel mode or transport mode by just packet parsing. Am I missing something ? > > Signed-off-by: Alexander Kozyrev > Acked-by: Morten Brørup > --- > lib/mbuf/rte_mbuf_ptype.h | 36 ++-- > 1 file changed, 30 insertions(+), 6 deletions(-) > > diff --git a/lib/mbuf/rte_mbuf_ptype.h b/lib/mbuf/rte_mbuf_ptype.h > index 17a2dd3576..cdd6fd460e 100644 > --- a/lib/mbuf/rte_mbuf_ptype.h > +++ b/lib/mbuf/rte_mbuf_ptype.h > @@ -247,7 +247,7 @@ extern "C" { > * It refers to those packets of any IP types, which can be recognized as > * fragmented. A fragmented packet cannot be recognized as any other L4 types > * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, > - * RTE_PTYPE_L4_NONFRAG). > + * RTE_PTYPE_L4_NONFRAG, RTE_PTYPE_L4_IGMP, RTE_PTYPE_L4_ESP). > * > * Packet format: > * <'ether type'=0x0800 > @@ -290,14 +290,15 @@ extern "C" { > * > * It refers to those packets of any IP types, while cannot be recognized as > * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, > - * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). > + * RTE_PTYPE_L4_FRAG (for IPv6), RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, > + * RTE_PTYPE_L4_IGMP (for IPv4), RTE_PTYPE_L4_ESP). > * > * Packet format: > * <'ether type'=0x0800 > - * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0> > + * | 'version'=4, 'protocol'!=[1|2|6|17|50|132], 'MF'=0, 'frag_offset'=0> > * or, > * <'ether type'=0x86DD > - * | 'version'=6, 'next header'!=[6|17|44|132|1]> > + * | 'version'=6, 'next header'!=[1|6|17|44|50|132]> > */ > #define RTE_PTYPE_L4_NONFRAG0x0600 > /** > @@ -308,6 +309,17 @@ extern "C" { > * | 'version'=4, 'protocol'=2, 'MF'=0, 'frag_offset'=0> > */ > #define RTE_PTYPE_L4_IGMP 0x0700 > +/** > + * ESP (IP Encapsulating Security Payload) transport packet type. > + * > + * Packet format: > + * <'ether type'=0x0800 > + * | 'version'=4, 'protocol'=50, 'MF'=0, 'frag_offset'=0> > + * or, > + * <'ether type'=0x86DD > + * | 'version'=6, 'next header'=50> > + */ > +#define RTE_PTYPE_L4_ESP0x0800 Currently there is already a PTYPE `RTE_PTYPE_TUNNEL_ESP` being used by all drivers / ipsec-secgw to indicate ESP packet. So why is this needed ? There is also a documentation issue with `RTE_PTYPE_TUNNEL_ESP` where it indicates next-proto of 51 but it should have been 50. next-proto of 51 is for IPSEC AH. > /** > * Mask of layer 4 packet types. > * It is used for outer packet for tunneling cases. > @@ -652,12 +664,24 @@ extern "C" { > * > * Packet format (inner only): > * <'ether type'=0x0800 > - * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0, 'frag_offset'=0> > + * | 'version'=4, 'protocol'!=[1|6|17|50|132], 'MF'=0, 'frag_offset'=0> > * or, > * <'ether type'=0x86DD > - * | 'version'=6, 'next header'!=[6|17|44|132|1]> > + * | 'version'=6, 'next header'!=[1|6|17|44|50|132]> > */ > #define RTE_PTYPE_INNER_L4_NONFRAG 0x0600 > +/** > + * ESP (IP Encapsulating Security Payload) transport packet type. > + * It is used for inner packet only. > + * > + * Packet format (inner only): > + * <'ether type'=0x0800 > + * | 'version'=4, 'protocol'=50, 'MF'=0, 'frag_offset'=0> > + * or, > + * <'ether type'=0x86DD > + * | 'version'=6, 'next header'=50> > + */ > +#define RTE_PTYPE_INNER_L4_ESP 0x0800 > /** > * Mask of inner layer 4 packet types. > */ > -- > 2.18.2 >
Re: [PATCH v2 1/1] net/mana: enable 32 bit build for mana driver
On 9/20/2023 4:06 AM, Wei Hu wrote: >> -Original Message- >> From: Ferruh Yigit >> Sent: Tuesday, September 19, 2023 7:05 PM >> To: Wei Hu ; dev@dpdk.org; Long Li >> >> Cc: sta...@dpdk.org; Kevin Traynor ; Luca Boccassi >> >> Subject: Re: [PATCH v2 1/1] net/mana: enable 32 bit build for mana driver >> >> On 9/19/2023 3:44 AM, Wei Hu wrote: -Original Message- From: Ferruh Yigit Sent: Tuesday, September 19, 2023 1:41 AM To: Wei Hu ; dev@dpdk.org; Long Li Cc: sta...@dpdk.org; Kevin Traynor ; Luca Boccassi Subject: Re: [PATCH v2 1/1] net/mana: enable 32 bit build for mana driver On 9/13/2023 1:23 PM, Wei Hu wrote: > Enable 32 bit build on x86 Linux. Fixed build warnings and errors > when building in 32 bit. > > Cc: sta...@dpdk.org > This is not a fix, but adding 32bit support to the driver, so not sure about backporting. cc'ed Kevin & Luca for guidance. I will drop the tag while merging unles otherwise suggested by LTS maintainers. >>> >>> Hi Ferruh, >>> >>> We have a customer who needs it to be on 22.11.x. That is why I put "Cc: >> sta...@dpdk.org" in it. >>> >> >> Got it, lets get comment from the LTS maintainers. >> > Signed-off-by: Wei Hu > Hi Wei, Patch looks good, but it basically fixes the format specifiers that will cause build error for 32 bit. Is there any other change required to make driver functional for 32 bit, or does it become functional with this change? And if it does can you please confirm explicityly that driver tested on 32 bit with this patch? >>> >>> Another patch, " net/mana: add 32 bit short doorbell", is required to >>> make mana fully functional with 32 bit applications. This patch is to >>> fix some build time errors and warnings when build in 32 bit. These >>> patches can be independently built and they are addressing two different >> issues. So, I put them into two independent patches. >>> >>> I have tested it on 32 bit applications. They can run with this patch. >>> Just mana would not work as the applications still use 64 bit long door >>> bells, >> which do not work for 32 bit apps. >>> >> >> Thanks Wei, for clarification. >> >> As this patch enables the 32bit build of driver in meson, can you please add >> comment log that 32bit driver is not functional yet? >> >> Or maybe meson update can be moved to short doorbell patch, where driver >> becomes functional for 32 bit, this patch can mention in commit log that is >> preparation for 32 bit support, what do you think? > > Thanks Ferruh. I would like to add comment log that 32bit driver is not fully > functioning yet until short doorbell support is added. Meanwhile still keep > the meson update in within this patch to keep it complete. Hope this works > for you. > I think it will do. Can you please send those two patches in a same patchset, that gives some context if we need to back trace it later?
Re: [PATCH 0/1] make file prefix unit test more resilient
On Wed, Sep 20, 2023 at 12:00:08PM +0200, David Marchand wrote: > On Thu, Sep 14, 2023 at 12:42 PM Bruce Richardson > wrote: > > > > When examining the IOL testing failures for patch series [1], I observed > > that the failures reported were in the eal_flags_file_prefix unit test. > > I was able to reproduce this on my system by passing an additional > > "--on-pci" flag to the test run, since the log to the test has errors > > about device availability. Adding the "no-pci" flag to the individual > > Something is not clear to me. > > While I understand that passing "no-pci" helps avoiding the issue (as > described below), I have some trouble understanding this passage > (above) with "--on-pci". That's a typo for no-pci. When I ran the test on my system with the main process using no-pci, I was able to reproduce the issue seen in the IOL lab. Otherwise I couldn't reproduce it. > How did you reproduce the issue? > > > > test commands used by the unit tests fixed the issue thereafter, > > allowing the test to pass in all cases for me. Therefore, I am > > submitting this patch in the hopes of making the test more robust, since > > the observed failures seem unrelated to the original patchset [1] I > > submitted. > > > > [1] http://patches.dpdk.org/project/dpdk/list/?series=29406 > > > > Bruce Richardson (1): > > app/test: skip PCI bus scan when testing prefix flags > > > > app/test/test_eal_flags.c | 20 ++-- > > 1 file changed, 10 insertions(+), 10 deletions(-) > > Iiuc, the problem is that the file_prefix unit test can fail if any > DPDK subsystem forgets to release some memory and some hugepages are > left behind at the cleanup step. > Passing --no-pci as you suggest hides issues coming from PCI drivers. > > This is something I tried to fix too, with > https://patchwork.dpdk.org/project/dpdk/list/?series=29288 though my > fix only handles a part of the issue (here, the ethdev drivers). > > Another way to make the file prefix more robust would be to remove the > check on released memory, or move it to another test. > I actually think the test is a good one to have. Also, taking in your patch to help with the issue is a good idea also. I'd still suggest that this patch be considered anyway, as there is no need to do PCI bus scanning as part of this test. Therefore I'd view it as a harmless addition that may help things. /Bruce
[PATCH 1/2] common/cnxk: reserve last LMT line for control ops
As rte_eth_dev_configure() can be called from any EAL or non-EAL cores. And in case of non-EAL core, LMT address will not be a valid. So, reserving last LMT line 2047 for control path specific functionality. Signed-off-by: Rahul Bhansali --- drivers/common/cnxk/roc_dev.c | 5 + drivers/common/cnxk/roc_nix_inl.c | 6 -- drivers/common/cnxk/roc_platform.c | 25 + drivers/common/cnxk/roc_platform.h | 5 + drivers/common/cnxk/version.map| 2 ++ 5 files changed, 41 insertions(+), 2 deletions(-) diff --git a/drivers/common/cnxk/roc_dev.c b/drivers/common/cnxk/roc_dev.c index 18d7981825..3815da078a 100644 --- a/drivers/common/cnxk/roc_dev.c +++ b/drivers/common/cnxk/roc_dev.c @@ -1369,6 +1369,11 @@ dev_init(struct dev *dev, struct plt_pci_device *pci_dev) if (!dev_cache_line_size_valid()) return -EFAULT; + if (!roc_plt_lmt_validate()) { + plt_err("Failed to validate LMT line"); + return -EFAULT; + } + bar2 = (uintptr_t)pci_dev->mem_resource[2].addr; bar4 = (uintptr_t)pci_dev->mem_resource[4].addr; if (bar2 == 0 || bar4 == 0) { diff --git a/drivers/common/cnxk/roc_nix_inl.c b/drivers/common/cnxk/roc_nix_inl.c index 5cb1f11f53..750fd08355 100644 --- a/drivers/common/cnxk/roc_nix_inl.c +++ b/drivers/common/cnxk/roc_nix_inl.c @@ -779,8 +779,10 @@ nix_inl_eng_caps_get(struct nix *nix) hw_res->cn10k.compcode = CPT_COMP_NOT_DONE; - /* Use this lcore's LMT line as no one else is using it */ - ROC_LMT_BASE_ID_GET(lmt_base, lmt_id); + /* Use this reserved LMT line as no one else is using it */ + lmt_id = roc_plt_control_lmt_id_get(); + lmt_base += ((uint64_t)lmt_id << ROC_LMT_LINE_SIZE_LOG2); + memcpy((void *)lmt_base, &inst, sizeof(inst)); lmt_arg = ROC_CN10K_CPT_LMT_ARG | (uint64_t)lmt_id; diff --git a/drivers/common/cnxk/roc_platform.c b/drivers/common/cnxk/roc_platform.c index f91b95ceab..ffc82720b0 100644 --- a/drivers/common/cnxk/roc_platform.c +++ b/drivers/common/cnxk/roc_platform.c @@ -21,6 +21,31 @@ roc_plt_init_cb_register(roc_plt_init_cb_t cb) return 0; } +uint16_t +roc_plt_control_lmt_id_get(void) +{ + uint32_t lcore_id = plt_lcore_id(); + if (lcore_id != LCORE_ID_ANY) + return lcore_id << ROC_LMT_LINES_PER_CORE_LOG2; + else + /* Return Last LMT ID to be use in control path functionality */ + return ROC_NUM_LMT_LINES - 1; +} + +uint16_t +roc_plt_lmt_validate(void) +{ + if (!roc_model_is_cn9k()) { + /* Last LMT line is reserved for control specific operation and can be +* use from any EAL or non EAL cores. +*/ + if ((RTE_MAX_LCORE << ROC_LMT_LINES_PER_CORE_LOG2) > + (ROC_NUM_LMT_LINES - 1)) + return 0; + } + return 1; +} + int roc_plt_init(void) { diff --git a/drivers/common/cnxk/roc_platform.h b/drivers/common/cnxk/roc_platform.h index e7a6564163..7605eed33d 100644 --- a/drivers/common/cnxk/roc_platform.h +++ b/drivers/common/cnxk/roc_platform.h @@ -314,6 +314,11 @@ extern int cnxk_logtype_ree; __rte_internal int roc_plt_init(void); +__rte_internal +uint16_t roc_plt_control_lmt_id_get(void); +__rte_internal +uint16_t roc_plt_lmt_validate(void); + /* Init callbacks */ typedef int (*roc_plt_init_cb_t)(void); int __roc_api roc_plt_init_cb_register(roc_plt_init_cb_t cb); diff --git a/drivers/common/cnxk/version.map b/drivers/common/cnxk/version.map index 43c3d9ed77..04f8fabfcb 100644 --- a/drivers/common/cnxk/version.map +++ b/drivers/common/cnxk/version.map @@ -463,6 +463,8 @@ INTERNAL { roc_ot_ipsec_outb_sa_init; roc_plt_init; roc_plt_init_cb_register; + roc_plt_lmt_validate; + roc_plt_control_lmt_id_get; roc_sso_dev_fini; roc_sso_dev_init; roc_sso_dump; -- 2.25.1
[PATCH 2/2] net/cnxk: separate callback for Rx flush on CN10k
In dev stop case, Rx packet flush callback uses LMT lines to bulk free of the meta buffers. If dev stop is called from non EAL core then LMT address will not be valid. To avoid this, A separate callback for Rx packets flush is added, which will use NPA aura free API on individual meta packets. Signed-off-by: Rahul Bhansali --- drivers/net/cnxk/cn10k_rx.h| 93 ++ drivers/net/cnxk/cn10k_rx_select.c | 10 +++- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h index 41d11349fd..1d7c5215a7 100644 --- a/drivers/net/cnxk/cn10k_rx.h +++ b/drivers/net/cnxk/cn10k_rx.h @@ -1007,6 +1007,99 @@ cn10k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, return nb_pkts; } +static __rte_always_inline uint16_t +cn10k_nix_flush_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, + const uint16_t flags) +{ + struct cn10k_eth_rxq *rxq = rx_queue; + const uint64_t mbuf_init = rxq->mbuf_initializer; + const void *lookup_mem = rxq->lookup_mem; + const uint64_t data_off = rxq->data_off; + struct rte_mempool *meta_pool = NULL; + const uint64_t wdata = rxq->wdata; + const uint32_t qmask = rxq->qmask; + const uintptr_t desc = rxq->desc; + uint64_t lbase = rxq->lmt_base; + uint16_t packets = 0, nb_pkts; + uint16_t lmt_id __rte_unused; + uint32_t head = rxq->head; + struct nix_cqe_hdr_s *cq; + struct rte_mbuf *mbuf; + uint64_t sa_base = 0; + uintptr_t cpth = 0; + uint8_t loff = 0; + uint64_t laddr; + + nb_pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask); + + if (flags & NIX_RX_OFFLOAD_SECURITY_F) { + sa_base = rxq->sa_base; + sa_base &= ~(ROC_NIX_INL_SA_BASE_ALIGN - 1); + ROC_LMT_BASE_ID_GET(lbase, lmt_id); + laddr = lbase; + laddr += 8; + if (flags & NIX_RX_REAS_F) + meta_pool = (struct rte_mempool *)rxq->meta_pool; + } + + while (packets < nb_pkts) { + /* Prefetch N desc ahead */ + rte_prefetch_non_temporal((void *)(desc + (CQE_SZ((head + 2) & qmask; + cq = (struct nix_cqe_hdr_s *)(desc + CQE_SZ(head)); + + mbuf = nix_get_mbuf_from_cqe(cq, data_off); + + /* Mark mempool obj as "get" as it is alloc'ed by NIX */ + RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1); + + /* Translate meta to mbuf */ + if (flags & NIX_RX_OFFLOAD_SECURITY_F) { + const uint64_t cq_w1 = *((const uint64_t *)cq + 1); + const uint64_t cq_w5 = *((const uint64_t *)cq + 5); + struct rte_mbuf *meta_buf = mbuf; + + cpth = ((uintptr_t)meta_buf + (uint16_t)data_off); + + /* Update mempool pointer for full mode pkt */ + if ((flags & NIX_RX_REAS_F) && (cq_w1 & BIT(11)) && + !((*(uint64_t *)cpth) & BIT(15))) + meta_buf->pool = meta_pool; + + mbuf = nix_sec_meta_to_mbuf_sc(cq_w1, cq_w5, sa_base, laddr, &loff, + meta_buf, data_off, flags, mbuf_init); + /* Free Meta mbuf, not use LMT line for flush as this will be called +* from non-datapath i.e. dev_stop case. +*/ + if (loff) { + roc_npa_aura_op_free(meta_buf->pool->pool_id, 0, +(uint64_t)meta_buf); + loff = 0; + } + } + + cn10k_nix_cqe_to_mbuf(cq, cq->tag, mbuf, lookup_mem, mbuf_init, + cpth, sa_base, flags); + cn10k_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, + (flags & NIX_RX_OFFLOAD_TSTAMP_F), + (uint64_t *)((uint8_t *)mbuf + data_off)); + rx_pkts[packets++] = mbuf; + roc_prefetch_store_keep(mbuf); + head++; + head &= qmask; + } + + rxq->head = head; + rxq->available -= nb_pkts; + + /* Free all the CQs that we've processed */ + plt_write64((wdata | nb_pkts), rxq->cq_door); + + if (flags & NIX_RX_OFFLOAD_SECURITY_F) + rte_io_wmb(); + + return nb_pkts; +} + #if defined(RTE_ARCH_ARM64) static __rte_always_inline uint64_t diff --git a/drivers/net/cnxk/cn10k_rx_select.c b/drivers/net/cnxk/cn10k_rx_select.c index 1d44f2924e..6a5c34287e 100644 --- a/drivers/net/cnxk/cn10k_rx_select.c +++ b/drivers/net/cnxk/cn10k_rx_select.c @@ -22,6 +22,13 @@ pick
[PATCH v2] security: hide security context
rte_security_ctx is used by all security APIs to identify which device security_op it need to call and hence it should be opaque to the application. Hence, it is now moved to internal header file and all APIs will now take an opaque pointer for it. The fast path inline APIs like set metadata need to get flags from security_ctx. The flags are now retrieved using inline APIs which use macros to get the offset of flags in security_ctx. Signed-off-by: Akhil Goyal --- Changes in v2: Rebased. app/test-crypto-perf/cperf_ops.c | 9 +-- app/test-crypto-perf/cperf_test_latency.c | 3 +- .../cperf_test_pmd_cyclecount.c | 8 +- app/test-crypto-perf/cperf_test_throughput.c | 9 +-- app/test-crypto-perf/cperf_test_verify.c | 4 +- app/test-security-perf/test_security_perf.c | 2 +- app/test/test_cryptodev.c | 22 ++ app/test/test_cryptodev_security_ipsec.c | 2 +- app/test/test_cryptodev_security_ipsec.h | 2 +- app/test/test_security_inline_macsec.c| 10 +-- app/test/test_security_inline_proto.c | 14 ++-- examples/ipsec-secgw/ipsec-secgw.c| 2 +- examples/ipsec-secgw/ipsec.c | 15 ++-- examples/ipsec-secgw/ipsec.h | 2 +- examples/ipsec-secgw/ipsec_worker.c | 2 +- examples/ipsec-secgw/ipsec_worker.h | 4 +- lib/security/rte_security.c | 61 +-- lib/security/rte_security.h | 77 +-- lib/security/rte_security_driver.h| 24 ++ 19 files changed, 140 insertions(+), 132 deletions(-) diff --git a/app/test-crypto-perf/cperf_ops.c b/app/test-crypto-perf/cperf_ops.c index 93b9bfb240..84945d1313 100644 --- a/app/test-crypto-perf/cperf_ops.c +++ b/app/test-crypto-perf/cperf_ops.c @@ -749,8 +749,7 @@ create_ipsec_session(struct rte_mempool *sess_mp, else sess_conf.ipsec.direction = RTE_SECURITY_IPSEC_SA_DIR_INGRESS; - struct rte_security_ctx *ctx = (struct rte_security_ctx *) - rte_cryptodev_get_sec_ctx(dev_id); + void *ctx = rte_cryptodev_get_sec_ctx(dev_id); /* Create security session */ return (void *)rte_security_session_create(ctx, &sess_conf, sess_mp); @@ -853,8 +852,7 @@ cperf_create_session(struct rte_mempool *sess_mp, .crypto_xform = &cipher_xform }; - struct rte_security_ctx *ctx = (struct rte_security_ctx *) - rte_cryptodev_get_sec_ctx(dev_id); + void *ctx = rte_cryptodev_get_sec_ctx(dev_id); /* Create security session */ return (void *)rte_security_session_create(ctx, &sess_conf, sess_mp); @@ -901,8 +899,7 @@ cperf_create_session(struct rte_mempool *sess_mp, } }, .crypto_xform = &cipher_xform }; - struct rte_security_ctx *ctx = (struct rte_security_ctx *) - rte_cryptodev_get_sec_ctx(dev_id); + void *ctx = rte_cryptodev_get_sec_ctx(dev_id); /* Create security session */ return (void *)rte_security_session_create(ctx, &sess_conf, sess_mp); diff --git a/app/test-crypto-perf/cperf_test_latency.c b/app/test-crypto-perf/cperf_test_latency.c index f1676a9aa9..484bc9eb4e 100644 --- a/app/test-crypto-perf/cperf_test_latency.c +++ b/app/test-crypto-perf/cperf_test_latency.c @@ -53,8 +53,7 @@ cperf_latency_test_free(struct cperf_latency_ctx *ctx) else if (ctx->options->op_type == CPERF_PDCP || ctx->options->op_type == CPERF_DOCSIS || ctx->options->op_type == CPERF_IPSEC) { - struct rte_security_ctx *sec_ctx = - rte_cryptodev_get_sec_ctx(ctx->dev_id); + void *sec_ctx = rte_cryptodev_get_sec_ctx(ctx->dev_id); rte_security_session_destroy(sec_ctx, ctx->sess); } #endif diff --git a/app/test-crypto-perf/cperf_test_pmd_cyclecount.c b/app/test-crypto-perf/cperf_test_pmd_cyclecount.c index 0307e82996..4a60f6d558 100644 --- a/app/test-crypto-perf/cperf_test_pmd_cyclecount.c +++ b/app/test-crypto-perf/cperf_test_pmd_cyclecount.c @@ -67,11 +67,9 @@ cperf_pmd_cyclecount_test_free(struct cperf_pmd_cyclecount_ctx *ctx) #ifdef RTE_LIB_SECURITY if (ctx->options->op_type == CPERF_PDCP || ctx->options->op_type == CPERF_DOCSIS) { - struct rte_security_ctx *sec_ctx = - (struct rte_security_ctx *) - rte_cryptodev_get_sec_ctx(ctx->dev_id); - rte_security_session_destroy(sec_ctx, - (void *)ctx->sess); + void *sec_ctx = rte_cryptodev_ge
RE: [PATCH 1/4] security: remove redundant cast
> Subject: [PATCH 1/4] security: remove redundant cast > > The API 'rte_cryptodev_get_sec_ctx' returns void *. Type cast is not > required. > > Signed-off-by: Anoob Joseph > --- The changes in the series are part of https://patches.dpdk.org/project/dpdk/patch/20230920104000.2868230-1-gak...@marvell.com/ Hence dropping this series.
Re: [PATCH v2 1/1] net/mana: enable 32 bit build for mana driver
On 20/09/2023 11:03, Ferruh Yigit wrote: On 9/20/2023 4:06 AM, Wei Hu wrote: -Original Message- From: Ferruh Yigit Sent: Tuesday, September 19, 2023 7:05 PM To: Wei Hu ; dev@dpdk.org; Long Li Cc: sta...@dpdk.org; Kevin Traynor ; Luca Boccassi Subject: Re: [PATCH v2 1/1] net/mana: enable 32 bit build for mana driver On 9/19/2023 3:44 AM, Wei Hu wrote: -Original Message- From: Ferruh Yigit Sent: Tuesday, September 19, 2023 1:41 AM To: Wei Hu ; dev@dpdk.org; Long Li Cc: sta...@dpdk.org; Kevin Traynor ; Luca Boccassi Subject: Re: [PATCH v2 1/1] net/mana: enable 32 bit build for mana driver On 9/13/2023 1:23 PM, Wei Hu wrote: Enable 32 bit build on x86 Linux. Fixed build warnings and errors when building in 32 bit. Cc: sta...@dpdk.org This is not a fix, but adding 32bit support to the driver, so not sure about backporting. cc'ed Kevin & Luca for guidance. I will drop the tag while merging unles otherwise suggested by LTS maintainers. Hi Ferruh, We have a customer who needs it to be on 22.11.x. That is why I put "Cc: sta...@dpdk.org" in it. Got it, lets get comment from the LTS maintainers. Signed-off-by: Wei Hu Hi Wei, Patch looks good, but it basically fixes the format specifiers that will cause build error for 32 bit. Is there any other change required to make driver functional for 32 bit, or does it become functional with this change? And if it does can you please confirm explicityly that driver tested on 32 bit with this patch? Another patch, " net/mana: add 32 bit short doorbell", is required to make mana fully functional with 32 bit applications. This patch is to fix some build time errors and warnings when build in 32 bit. These patches can be independently built and they are addressing two different issues. So, I put them into two independent patches. I have tested it on 32 bit applications. They can run with this patch. Just mana would not work as the applications still use 64 bit long door bells, which do not work for 32 bit apps. Thanks Wei, for clarification. As this patch enables the 32bit build of driver in meson, can you please add comment log that 32bit driver is not functional yet? Or maybe meson update can be moved to short doorbell patch, where driver becomes functional for 32 bit, this patch can mention in commit log that is preparation for 32 bit support, what do you think? Thanks Ferruh. I would like to add comment log that 32bit driver is not fully functioning yet until short doorbell support is added. Meanwhile still keep the meson update in within this patch to keep it complete. Hope this works for you. I think it will do. Can you please send those two patches in a same patchset, that gives some context if we need to back trace it later? +1 Adding 22.11 maintainer Xueming. The main thing is that it would not cause a regression to 64 bit. That is something that would need to be tested by Microsoft.
[PATCH 01/13] net/nfp: make sure header file is self-containing
Make sure the header file 'nfp_ctrl.h' is self-containing by including 'stdint.h' header files to access the size specific integer types. Signed-off-by: Chaoyong He --- drivers/net/nfp/nfp_ctrl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/nfp/nfp_ctrl.h b/drivers/net/nfp/nfp_ctrl.h index 7007548447..53727992a9 100644 --- a/drivers/net/nfp/nfp_ctrl.h +++ b/drivers/net/nfp/nfp_ctrl.h @@ -6,6 +6,8 @@ #ifndef _NFP_CTRL_H_ #define _NFP_CTRL_H_ +#include + /* * Configuration BAR size. * -- 2.39.1
[PATCH 00/13] improve the modularization of NFP PMD
This patch series aims to improve the modularization of NFP PMD through: * Make the header files self-containing by adding the correct include statement. * Try to keep the interface of modules as small as possible. * Remove the unneeded include statements to make sure a clean dependent relations among modules. Chaoyong He (13): net/nfp: make sure header file is self-containing net/nfp: improve modularazation of rxtx module net/nfp: improve modularazation of nfd3 module net/nfp: improve modularazation of nfdk module net/nfp: improve modularazation of common module net/nfp: improve modularazation of flower module net/nfp: improve modularazation of flower representor module net/nfp: improve modularazation of flower ctrl module net/nfp: improve modularazation of flower cmsg module net/nfp: improve modularazation of flow module net/nfp: improve modularazation of meter module net/nfp: improve modularazation of CPP bridge module net/nfp: cleanup the include statement of PMD drivers/net/nfp/flower/nfp_flower.c | 22 +-- drivers/net/nfp/flower/nfp_flower.h | 2 + drivers/net/nfp/flower/nfp_flower_cmsg.c | 11 +- drivers/net/nfp/flower/nfp_flower_cmsg.h | 77 +++- drivers/net/nfp/flower/nfp_flower_ctrl.c | 13 +- drivers/net/nfp/flower/nfp_flower_ctrl.h | 2 + .../net/nfp/flower/nfp_flower_representor.c | 29 +-- .../net/nfp/flower/nfp_flower_representor.h | 14 -- drivers/net/nfp/nfd3/nfp_nfd3.h | 89 + drivers/net/nfp/nfd3/nfp_nfd3_dp.c| 96 +- drivers/net/nfp/nfdk/nfp_nfdk.h | 71 +-- drivers/net/nfp/nfdk/nfp_nfdk_dp.c| 81 +++- drivers/net/nfp/nfp_common.c | 51 ++--- drivers/net/nfp/nfp_common.h | 34 +--- drivers/net/nfp/nfp_cpp_bridge.c | 14 +- drivers/net/nfp/nfp_cpp_bridge.h | 8 - drivers/net/nfp/nfp_ctrl.h| 2 + drivers/net/nfp/nfp_ethdev.c | 25 +-- drivers/net/nfp/nfp_ethdev_vf.c | 9 +- drivers/net/nfp/nfp_flow.c| 88 - drivers/net/nfp/nfp_flow.h| 78 +--- drivers/net/nfp/nfp_mtr.c | 16 +- drivers/net/nfp/nfp_mtr.h | 70 +-- drivers/net/nfp/nfp_rxtx.c| 174 +- drivers/net/nfp/nfp_rxtx.h| 168 + drivers/net/nfp/nfpcore/nfp_platform.h| 2 + 26 files changed, 579 insertions(+), 667 deletions(-) -- 2.39.1
[PATCH 03/13] net/nfp: improve modularazation of nfd3 module
Make the header file self-containing by adding the correct include statement. Try to keep the API small by move the logic which need not expose from header file to source file verbatim. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/nfd3/nfp_nfd3.h| 89 +-- drivers/net/nfp/nfd3/nfp_nfd3_dp.c | 96 -- 2 files changed, 92 insertions(+), 93 deletions(-) diff --git a/drivers/net/nfp/nfd3/nfp_nfd3.h b/drivers/net/nfp/nfd3/nfp_nfd3.h index 910e622fa2..7c56ca4908 100644 --- a/drivers/net/nfp/nfd3/nfp_nfd3.h +++ b/drivers/net/nfp/nfd3/nfp_nfd3.h @@ -6,20 +6,12 @@ #ifndef _NFP_NFD3_H_ #define _NFP_NFD3_H_ +#include "../nfp_rxtx.h" + /* TX descriptor format */ #define NFD3_DESC_TX_EOPRTE_BIT32(7) #define NFD3_DESC_TX_OFFSET_MASK(0x7F)/* [0,6] */ -/* Flags in the host TX descriptor */ -#define NFD3_DESC_TX_CSUM RTE_BIT32(7) -#define NFD3_DESC_TX_IP4_CSUM RTE_BIT32(6) -#define NFD3_DESC_TX_TCP_CSUM RTE_BIT32(5) -#define NFD3_DESC_TX_UDP_CSUM RTE_BIT32(4) -#define NFD3_DESC_TX_VLAN RTE_BIT32(3) -#define NFD3_DESC_TX_LSORTE_BIT32(2) -#define NFD3_DESC_TX_ENCAP RTE_BIT32(1) -#define NFD3_DESC_TX_O_IP4_CSUM RTE_BIT32(0) - #define NFD3_TX_DESC_PER_PKT 1 struct nfp_net_nfd3_tx_desc { @@ -76,83 +68,6 @@ nfp_net_nfd3_txq_full(struct nfp_net_txq *txq) return (nfp_net_nfd3_free_tx_desc(txq) < txq->tx_free_thresh); } -/* nfp_net_nfd3_tx_tso() - Set NFD3 TX descriptor for TSO */ -static inline void -nfp_net_nfd3_tx_tso(struct nfp_net_txq *txq, - struct nfp_net_nfd3_tx_desc *txd, - struct rte_mbuf *mb) -{ - uint64_t ol_flags; - struct nfp_net_hw *hw = txq->hw; - - if ((hw->cap & NFP_NET_CFG_CTRL_LSO_ANY) == 0) - goto clean_txd; - - ol_flags = mb->ol_flags; - if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) - goto clean_txd; - - txd->l3_offset = mb->l2_len; - txd->l4_offset = mb->l2_len + mb->l3_len; - txd->lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len; - - if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) != 0) { - txd->l3_offset += mb->outer_l2_len + mb->outer_l3_len; - txd->l4_offset += mb->outer_l2_len + mb->outer_l3_len; - txd->lso_hdrlen += mb->outer_l2_len + mb->outer_l3_len; - } - - txd->mss = rte_cpu_to_le_16(mb->tso_segsz); - txd->flags = NFD3_DESC_TX_LSO; - - return; - -clean_txd: - txd->flags = 0; - txd->l3_offset = 0; - txd->l4_offset = 0; - txd->lso_hdrlen = 0; - txd->mss = 0; -} - -/* nfp_net_nfd3_tx_cksum() - Set TX CSUM offload flags in NFD3 TX descriptor */ -static inline void -nfp_net_nfd3_tx_cksum(struct nfp_net_txq *txq, - struct nfp_net_nfd3_tx_desc *txd, - struct rte_mbuf *mb) -{ - uint64_t ol_flags; - struct nfp_net_hw *hw = txq->hw; - - if ((hw->cap & NFP_NET_CFG_CTRL_TXCSUM) == 0) - return; - - ol_flags = mb->ol_flags; - - /* Set TCP csum offload if TSO enabled. */ - if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) - txd->flags |= NFD3_DESC_TX_TCP_CSUM; - - /* IPv6 does not need checksum */ - if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM) != 0) - txd->flags |= NFD3_DESC_TX_IP4_CSUM; - - if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) != 0) - txd->flags |= NFD3_DESC_TX_ENCAP; - - switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) { - case RTE_MBUF_F_TX_UDP_CKSUM: - txd->flags |= NFD3_DESC_TX_UDP_CSUM; - break; - case RTE_MBUF_F_TX_TCP_CKSUM: - txd->flags |= NFD3_DESC_TX_TCP_CSUM; - break; - } - - if ((ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK)) != 0) - txd->flags |= NFD3_DESC_TX_CSUM; -} - uint32_t nfp_flower_nfd3_pkt_add_metadata(struct rte_mbuf *mbuf, uint32_t port_id); uint16_t nfp_net_nfd3_xmit_pkts_common(void *tx_queue, diff --git a/drivers/net/nfp/nfd3/nfp_nfd3_dp.c b/drivers/net/nfp/nfd3/nfp_nfd3_dp.c index 79e1e1041c..ab0747fc16 100644 --- a/drivers/net/nfp/nfd3/nfp_nfd3_dp.c +++ b/drivers/net/nfp/nfd3/nfp_nfd3_dp.c @@ -3,16 +3,100 @@ * All rights reserved. */ -#include +#include "nfp_nfd3.h" + #include #include -#include "../nfp_logs.h" -#include "../nfp_common.h" -#include "../nfp_rxtx.h" #include "../flower/nfp_flower.h" -#include "../flower/nfp_flower_cmsg.h" -#include "nfp_nfd3.h" +#include "../nfp_logs.h" + +/* Flags in the host TX descriptor */ +#define NFD3_DESC_TX_CSUM RTE_BIT32(7) +#define NFD3_DESC_TX_IP4_CSUM RTE_BIT32(6) +#define NFD3_DESC_TX_TCP_CSUM RTE_BIT32(5) +#define NFD3_DESC_TX_UDP_CSUM RTE_BIT32(4) +
[PATCH 04/13] net/nfp: improve modularazation of nfdk module
Make the header file self-containing by adding the correct include statement. Try to keep the API small by move the logic which need not expose from header file to source file verbatim. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/nfdk/nfp_nfdk.h| 71 +- drivers/net/nfp/nfdk/nfp_nfdk_dp.c | 82 ++ 2 files changed, 75 insertions(+), 78 deletions(-) diff --git a/drivers/net/nfp/nfdk/nfp_nfdk.h b/drivers/net/nfp/nfdk/nfp_nfdk.h index 66f020efb0..75ecb361ee 100644 --- a/drivers/net/nfp/nfdk/nfp_nfdk.h +++ b/drivers/net/nfp/nfdk/nfp_nfdk.h @@ -6,8 +6,9 @@ #ifndef _NFP_NFDK_H_ #define _NFP_NFDK_H_ +#include "../nfp_rxtx.h" + #define NFDK_TX_DESC_PER_SIMPLE_PKT 2 -#define NFDK_TX_DESC_GATHER_MAX 17 #define NFDK_TX_MAX_DATA_PER_HEAD 0x1000/* 4K */ #define NFDK_TX_MAX_DATA_PER_DESC 0x4000/* 16K */ @@ -16,7 +17,6 @@ /* The mask of 'dma_len_xx' of address descriptor */ #define NFDK_DESC_TX_DMA_LEN_HEAD 0x0FFF/* [0,11] */ #define NFDK_DESC_TX_DMA_LEN0x3FFF/* [0,13] */ -#define NFDK_DESC_TX_TYPE_HEAD 0xF000/* [12,15] */ /* The mask of upper 4 bit of first address descriptor */ #define NFDK_DESC_TX_TYPE_HEAD 0xF000/* [12,15] */ @@ -160,73 +160,6 @@ nfp_net_nfdk_txq_full(struct nfp_net_txq *txq) return (nfp_net_nfdk_free_tx_desc(txq) < txq->tx_free_thresh); } -/* nfp_net_nfdk_tx_cksum() - Set TX CSUM offload flags in TX descriptor of nfdk */ -static inline uint64_t -nfp_net_nfdk_tx_cksum(struct nfp_net_txq *txq, - struct rte_mbuf *mb, - uint64_t flags) -{ - uint64_t ol_flags; - struct nfp_net_hw *hw = txq->hw; - - if ((hw->cap & NFP_NET_CFG_CTRL_TXCSUM) == 0) - return flags; - - ol_flags = mb->ol_flags; - - /* Set TCP csum offload if TSO enabled. */ - if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) - flags |= NFDK_DESC_TX_L4_CSUM; - - if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) != 0) - flags |= NFDK_DESC_TX_ENCAP; - - /* IPv6 does not need checksum */ - if ((ol_flags & RTE_MBUF_F_TX_IP_CKSUM) != 0) - flags |= NFDK_DESC_TX_L3_CSUM; - - if ((ol_flags & RTE_MBUF_F_TX_L4_MASK) != 0) - flags |= NFDK_DESC_TX_L4_CSUM; - - return flags; -} - -/* nfp_net_nfdk_tx_tso() - Set TX descriptor for TSO of nfdk */ -static inline uint64_t -nfp_net_nfdk_tx_tso(struct nfp_net_txq *txq, - struct rte_mbuf *mb) -{ - uint8_t outer_len; - uint64_t ol_flags; - struct nfp_net_nfdk_tx_desc txd; - struct nfp_net_hw *hw = txq->hw; - - txd.raw = 0; - - if ((hw->cap & NFP_NET_CFG_CTRL_LSO_ANY) == 0) - return txd.raw; - - ol_flags = mb->ol_flags; - if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) == 0) - return txd.raw; - - txd.l3_offset = mb->l2_len; - txd.l4_offset = mb->l2_len + mb->l3_len; - txd.lso_meta_res = 0; - txd.mss = rte_cpu_to_le_16(mb->tso_segsz); - txd.lso_hdrlen = mb->l2_len + mb->l3_len + mb->l4_len; - txd.lso_totsegs = (mb->pkt_len + mb->tso_segsz) / mb->tso_segsz; - - if ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) != 0) { - outer_len = mb->outer_l2_len + mb->outer_l3_len; - txd.l3_offset += outer_len; - txd.l4_offset += outer_len; - txd.lso_hdrlen += outer_len; - } - - return txd.raw; -} - uint32_t nfp_flower_nfdk_pkt_add_metadata(struct rte_mbuf *mbuf, uint32_t port_id); uint16_t nfp_net_nfdk_xmit_pkts_common(void *tx_queue, diff --git a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c index 97002eed23..a85734f121 100644 --- a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c +++ b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c @@ -3,19 +3,83 @@ * All rights reserved. */ -#include +#include "nfp_nfdk.h" + #include #include -#include "../nfp_logs.h" -#include "../nfp_common.h" -#include "../nfp_rxtx.h" -#include "../nfpcore/nfp_mip.h" -#include "../nfpcore/nfp_platform.h" -#include "../nfpcore/nfp_rtsym.h" #include "../flower/nfp_flower.h" -#include "../flower/nfp_flower_cmsg.h" -#include "nfp_nfdk.h" +#include "../nfpcore/nfp_platform.h" +#include "../nfp_logs.h" + +#define NFDK_TX_DESC_GATHER_MAX 17 + +/* Set TX CSUM offload flags in TX descriptor of nfdk */ +static uint64_t +nfp_net_nfdk_tx_cksum(struct nfp_net_txq *txq, + struct rte_mbuf *mb, + uint64_t flags) +{ + uint64_t ol_flags; + struct nfp_net_hw *hw = txq->hw; + + if ((hw->cap & NFP_NET_CFG_CTRL_TXCSUM) == 0) + return flags; + + ol_flags = mb->ol_flags; + + /* Set TCP csum offload if TSO enabled. */ + if ((ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0) +
[PATCH 05/13] net/nfp: improve modularazation of common module
Make the header file self-containing by adding the correct include statement. Try to keep the API small by move the logic which need not expose from header file to source file verbatim and remove the unused macro. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/nfp_common.c | 51 +--- drivers/net/nfp/nfp_common.h | 34 2 files changed, 23 insertions(+), 62 deletions(-) diff --git a/drivers/net/nfp/nfp_common.c b/drivers/net/nfp/nfp_common.c index 160df27c94..40027dbdfc 100644 --- a/drivers/net/nfp/nfp_common.c +++ b/drivers/net/nfp/nfp_common.c @@ -5,47 +5,32 @@ * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "nfp_common.h" + #include -#include -#include -#include "nfpcore/nfp_cpp.h" -#include "nfpcore/nfp_nffw.h" -#include "nfpcore/nfp_hwinfo.h" +#include "flower/nfp_flower_representor.h" +#include "nfd3/nfp_nfd3.h" +#include "nfdk/nfp_nfdk.h" #include "nfpcore/nfp_mip.h" -#include "nfpcore/nfp_rtsym.h" #include "nfpcore/nfp_nsp.h" +#include "nfp_logs.h" -#include "flower/nfp_flower_representor.h" +#define NFP_TX_MAX_SEG UINT8_MAX +#define NFP_TX_MAX_MTU_SEG 8 -#include "nfp_common.h" -#include "nfp_ctrl.h" -#include "nfp_rxtx.h" -#include "nfp_logs.h" -#include "nfp_cpp_bridge.h" +/* + * This is used by the reconfig protocol. It sets the maximum time waiting in + * milliseconds before a reconfig timeout happens. + */ +#define NFP_NET_POLL_TIMEOUT5000 -#include "nfd3/nfp_nfd3.h" -#include "nfdk/nfp_nfdk.h" +#define NFP_NET_LINK_DOWN_CHECK_TIMEOUT 4000 /* ms */ +#define NFP_NET_LINK_UP_CHECK_TIMEOUT 1000 /* ms */ -#include -#include -#include -#include -#include -#include -#include +/* Maximum supported NFP frame size (MTU + layer 2 headers) */ +#define NFP_FRAME_SIZE_MAX10048 +#define DEFAULT_FLBUF_SIZE9216 enum nfp_xstat_group { NFP_XSTAT_GROUP_NET, diff --git a/drivers/net/nfp/nfp_common.h b/drivers/net/nfp/nfp_common.h index 36ad4e1d32..b0372c3dc6 100644 --- a/drivers/net/nfp/nfp_common.h +++ b/drivers/net/nfp/nfp_common.h @@ -6,22 +6,14 @@ #ifndef _NFP_COMMON_H_ #define _NFP_COMMON_H_ +#include +#include +#include +#include + #include "nfp_ctrl.h" #include "nfpcore/nfp_dev.h" -#define NFP_NET_PMD_VERSION "0.1" - -/* Forward declaration */ -struct nfp_net_adapter; - -#define NFP_TX_MAX_SEG UINT8_MAX -#define NFP_TX_MAX_MTU_SEG 8 - -/* Bar allocation */ -#define NFP_NET_CRTL_BAR0 -#define NFP_NET_TX_BAR 2 -#define NFP_NET_RX_BAR 2 - /* Macros for accessing the Queue Controller Peripheral 'CSRs' */ #define NFP_QCP_QUEUE_OFF(_x) ((_x) * 0x800) #define NFP_QCP_QUEUE_ADD_RPTR 0x @@ -50,27 +42,11 @@ struct nfp_net_adapter; /* Alignment for dma zones */ #define NFP_MEMZONE_ALIGN 128 -/* - * This is used by the reconfig protocol. It sets the maximum time waiting in - * milliseconds before a reconfig timeout happens. - */ -#define NFP_NET_POLL_TIMEOUT5000 - #define NFP_QCP_QUEUE_ADDR_SZ (0x800) -#define NFP_NET_LINK_DOWN_CHECK_TIMEOUT 4000 /* ms */ -#define NFP_NET_LINK_UP_CHECK_TIMEOUT 1000 /* ms */ - /* Number of supported physical ports */ #define NFP_MAX_PHYPORTS 12 -/* Maximum supported NFP frame size (MTU + layer 2 headers) */ -#define NFP_FRAME_SIZE_MAX 10048 -#define DEFAULT_FLBUF_SIZE9216 - -#include -#include - /* Firmware application ID's */ enum nfp_app_fw_id { NFP_APP_FW_CORE_NIC = 0x1, -- 2.39.1
[PATCH 06/13] net/nfp: improve modularazation of flower module
Make the header file self-containing by adding the correct include statement. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/flower/nfp_flower.c | 24 ++-- drivers/net/nfp/flower/nfp_flower.h | 2 ++ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/net/nfp/flower/nfp_flower.c b/drivers/net/nfp/flower/nfp_flower.c index bbcfa6e66a..bbcbb0060b 100644 --- a/drivers/net/nfp/flower/nfp_flower.c +++ b/drivers/net/nfp/flower/nfp_flower.c @@ -3,27 +3,23 @@ * All rights reserved. */ -#include -#include -#include +#include "nfp_flower.h" + #include -#include -#include +#include +#include -#include "../nfp_common.h" -#include "../nfp_logs.h" -#include "../nfp_ctrl.h" -#include "../nfp_cpp_bridge.h" -#include "../nfp_rxtx.h" #include "../nfd3/nfp_nfd3.h" #include "../nfdk/nfp_nfdk.h" -#include "../nfpcore/nfp_mip.h" -#include "../nfpcore/nfp_rtsym.h" #include "../nfpcore/nfp_nsp.h" -#include "nfp_flower.h" +#include "../nfpcore/nfp_rtsym.h" +#include "../nfp_cpp_bridge.h" +#include "../nfp_flow.h" +#include "../nfp_logs.h" +#include "../nfp_mtr.h" +#include "nfp_flower_cmsg.h" #include "nfp_flower_ctrl.h" #include "nfp_flower_representor.h" -#include "nfp_flower_cmsg.h" #define CTRL_VNIC_NB_DESC 512 diff --git a/drivers/net/nfp/flower/nfp_flower.h b/drivers/net/nfp/flower/nfp_flower.h index e8df8b1769..244b6daa37 100644 --- a/drivers/net/nfp/flower/nfp_flower.h +++ b/drivers/net/nfp/flower/nfp_flower.h @@ -34,7 +34,9 @@ #define MAX_FLOWER_PHYPORTS 8 #define MAX_FLOWER_VFS 64 +/* Forward declaration */ struct nfp_app_fw_flower; +struct nfp_flower_representor; /* The function pointers for different NFD version */ struct nfp_flower_nfd_func { -- 2.39.1
[PATCH 02/13] net/nfp: improve modularazation of rxtx module
Make the header file self-containing by adding the correct include statement. Try to keep the API small by move the logic which need not expose from header file to source file verbatim and move the macro to the right header file. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/nfdk/nfp_nfdk_dp.c | 1 + drivers/net/nfp/nfp_rxtx.c | 173 - drivers/net/nfp/nfp_rxtx.h | 168 +--- drivers/net/nfp/nfpcore/nfp_platform.h | 2 + 4 files changed, 173 insertions(+), 171 deletions(-) diff --git a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c index 48a74d109a..97002eed23 100644 --- a/drivers/net/nfp/nfdk/nfp_nfdk_dp.c +++ b/drivers/net/nfp/nfdk/nfp_nfdk_dp.c @@ -11,6 +11,7 @@ #include "../nfp_common.h" #include "../nfp_rxtx.h" #include "../nfpcore/nfp_mip.h" +#include "../nfpcore/nfp_platform.h" #include "../nfpcore/nfp_rtsym.h" #include "../flower/nfp_flower.h" #include "../flower/nfp_flower_cmsg.h" diff --git a/drivers/net/nfp/nfp_rxtx.c b/drivers/net/nfp/nfp_rxtx.c index 1b8bb3471d..eeca193d14 100644 --- a/drivers/net/nfp/nfp_rxtx.c +++ b/drivers/net/nfp/nfp_rxtx.c @@ -5,19 +5,180 @@ * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. */ -#include +#include "nfp_rxtx.h" + #include #include "nfp_common.h" -#include "nfp_ctrl.h" -#include "nfp_rxtx.h" -#include "nfp_logs.h" #include "nfd3/nfp_nfd3.h" #include "nfdk/nfp_nfdk.h" -#include "nfpcore/nfp_mip.h" -#include "nfpcore/nfp_rtsym.h" #include "flower/nfp_flower.h" +#include "nfp_logs.h" + +/* Maximum number of supported VLANs in parsed form packet metadata. */ +#define NFP_META_MAX_VLANS 2 + +/* + * struct nfp_meta_parsed - Record metadata parsed from packet + * + * Parsed NFP packet metadata are recorded in this struct. The content is + * read-only after it have been recorded during parsing by nfp_net_parse_meta(). + * + * @port_id: Port id value + * @hash: RSS hash value + * @hash_type: RSS hash type + * @vlan_layer: The layers of VLAN info which are passed from nic. + * Only this number of entries of the @vlan array are valid. + * + * @vlan: Holds information parses from NFP_NET_META_VLAN. The inner most vlan + *starts at position 0 and only @vlan_layer entries contain valid + *information. + * + *Currently only 2 layers of vlan are supported, + *vlan[0] - vlan strip info + *vlan[1] - qinq strip info + * + * @vlan.offload: Flag indicates whether VLAN is offloaded + * @vlan.tpid: Vlan TPID + * @vlan.tci: Vlan TCI including PCP + Priority + VID + */ +struct nfp_meta_parsed { + uint32_t port_id; + uint32_t hash; + uint8_t hash_type; + uint8_t vlan_layer; + struct { + uint8_t offload; + uint8_t tpid; + uint16_t tci; + } vlan[NFP_META_MAX_VLANS]; +}; + +/* + * The bit format and map of nfp packet type for rxd.offload_info in Rx descriptor. + * + * Bit format about nfp packet type refers to the following: + * - + *1 0 + * 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | |ol3|tunnel | l3 | l4 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * Bit map about nfp packet type refers to the following: + * + * L4: bit 0~2, used for layer 4 or inner layer 4. + * 000: NFP_NET_PTYPE_L4_NONE + * 001: NFP_NET_PTYPE_L4_TCP + * 010: NFP_NET_PTYPE_L4_UDP + * 011: NFP_NET_PTYPE_L4_FRAG + * 100: NFP_NET_PTYPE_L4_NONFRAG + * 101: NFP_NET_PTYPE_L4_ICMP + * 110: NFP_NET_PTYPE_L4_SCTP + * 111: reserved + * + * L3: bit 3~5, used for layer 3 or inner layer 3. + * 000: NFP_NET_PTYPE_L3_NONE + * 001: NFP_NET_PTYPE_L3_IPV6 + * 010: NFP_NET_PTYPE_L3_IPV4 + * 011: NFP_NET_PTYPE_L3_IPV4_EXT + * 100: NFP_NET_PTYPE_L3_IPV6_EXT + * 101: NFP_NET_PTYPE_L3_IPV4_EXT_UNKNOWN + * 110: NFP_NET_PTYPE_L3_IPV6_EXT_UNKNOWN + * 111: reserved + * + * Tunnel: bit 6~9, used for tunnel. + * : NFP_NET_PTYPE_TUNNEL_NONE + * 0001: NFP_NET_PTYPE_TUNNEL_VXLAN + * 0100: NFP_NET_PTYPE_TUNNEL_NVGRE + * 0101: NFP_NET_PTYPE_TUNNEL_GENEVE + * 0010, 0011, 0110~: reserved + * + * Outer L3: bit 10~11, used for outer layer 3. + * 00: NFP_NET_PTYPE_OUTER_L3_NONE + * 01: NFP_NET_PTYPE_OUTER_L3_IPV6 + * 10: NFP_NET_PTYPE_OUTER_L3_IPV4 + * 11: reserved + * + * Reserved: bit 10~15, used for extension. + */ + +/* Mask and offset about nfp packet type based on the bit map above. */ +#define NFP_NET_PTYPE_L4_MASK 0x0007 +#define NFP_NET_PTYPE_L3_MASK 0x0038 +#define NFP_NET_PTYPE_TUNNEL_MASK 0x03c0 +#define NFP_NET_PTYPE_OUTER_L3_MASK0x0c00 + +#define NFP_NET_PTYPE_L4_OFFSET0 +#define NFP_NET_PTYPE_L3_OFFSET3 +#define NFP_NET_PTYPE_TUNNEL_OFFSET6 +#define NFP_NET_PTYPE_OUTER_
[PATCH 07/13] net/nfp: improve modularazation of flower representor module
Try to keep the API small by move the logic which need not expose from header file to source file verbatim. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- .../net/nfp/flower/nfp_flower_representor.c | 29 --- .../net/nfp/flower/nfp_flower_representor.h | 14 - 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/drivers/net/nfp/flower/nfp_flower_representor.c b/drivers/net/nfp/flower/nfp_flower_representor.c index 48c3b3f844..d4df88fb73 100644 --- a/drivers/net/nfp/flower/nfp_flower_representor.c +++ b/drivers/net/nfp/flower/nfp_flower_representor.c @@ -3,22 +3,29 @@ * All rights reserved. */ -#include -#include +#include "nfp_flower_representor.h" -#include "../nfp_common.h" -#include "../nfp_logs.h" -#include "../nfp_ctrl.h" -#include "../nfp_rxtx.h" #include "../nfd3/nfp_nfd3.h" -#include "../nfpcore/nfp_mip.h" -#include "../nfpcore/nfp_rtsym.h" #include "../nfpcore/nfp_nsp.h" -#include "nfp_flower.h" -#include "nfp_flower_representor.h" -#include "nfp_flower_ctrl.h" +#include "../nfp_flow.h" +#include "../nfp_logs.h" +#include "../nfp_mtr.h" #include "nfp_flower_cmsg.h" +/* + * enum nfp_repr_type - type of representor + * @NFP_REPR_TYPE_PHYS_PORT: external NIC port + * @NFP_REPR_TYPE_PF: physical function + * @NFP_REPR_TYPE_VF: virtual function + * @NFP_REPR_TYPE_MAX: number of representor types + */ +enum nfp_repr_type { + NFP_REPR_TYPE_PHYS_PORT, + NFP_REPR_TYPE_PF, + NFP_REPR_TYPE_VF, + NFP_REPR_TYPE_MAX, +}; + static int nfp_pf_repr_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, diff --git a/drivers/net/nfp/flower/nfp_flower_representor.h b/drivers/net/nfp/flower/nfp_flower_representor.h index 685cbe46b4..5ac5e38186 100644 --- a/drivers/net/nfp/flower/nfp_flower_representor.h +++ b/drivers/net/nfp/flower/nfp_flower_representor.h @@ -8,20 +8,6 @@ #include "nfp_flower.h" -/* - * enum nfp_repr_type - type of representor - * @NFP_REPR_TYPE_PHYS_PORT: external NIC port - * @NFP_REPR_TYPE_PF: physical function - * @NFP_REPR_TYPE_VF: virtual function - * @NFP_REPR_TYPE_MAX: number of representor types - */ -enum nfp_repr_type { - NFP_REPR_TYPE_PHYS_PORT = 0, - NFP_REPR_TYPE_PF, - NFP_REPR_TYPE_VF, - NFP_REPR_TYPE_MAX, -}; - struct nfp_flower_representor { uint16_t vf_id; uint16_t switch_domain_id; -- 2.39.1
[PATCH 08/13] net/nfp: improve modularazation of flower ctrl module
Make the header file self-containing by adding the correct include statement. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/flower/nfp_flower_ctrl.c | 13 + drivers/net/nfp/flower/nfp_flower_ctrl.h | 2 ++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/net/nfp/flower/nfp_flower_ctrl.c b/drivers/net/nfp/flower/nfp_flower_ctrl.c index 4cb2c2f99e..bdb042142a 100644 --- a/drivers/net/nfp/flower/nfp_flower_ctrl.c +++ b/drivers/net/nfp/flower/nfp_flower_ctrl.c @@ -3,20 +3,17 @@ * All rights reserved. */ -#include +#include "nfp_flower_ctrl.h" + #include -#include -#include "../nfp_common.h" -#include "../nfp_logs.h" -#include "../nfp_ctrl.h" -#include "../nfp_rxtx.h" #include "../nfd3/nfp_nfd3.h" #include "../nfdk/nfp_nfdk.h" -#include "nfp_flower.h" -#include "nfp_flower_ctrl.h" +#include "../nfp_flow.h" +#include "../nfp_logs.h" #include "nfp_flower_cmsg.h" #include "nfp_flower_representor.h" +#include "nfp_mtr.h" #define MAX_PKT_BURST 32 diff --git a/drivers/net/nfp/flower/nfp_flower_ctrl.h b/drivers/net/nfp/flower/nfp_flower_ctrl.h index b7e836cf7e..f73a024266 100644 --- a/drivers/net/nfp/flower/nfp_flower_ctrl.h +++ b/drivers/net/nfp/flower/nfp_flower_ctrl.h @@ -6,6 +6,8 @@ #ifndef _NFP_FLOWER_CTRL_H_ #define _NFP_FLOWER_CTRL_H_ +#include "nfp_flower.h" + void nfp_flower_ctrl_vnic_poll(struct nfp_app_fw_flower *app_fw_flower); uint16_t nfp_flower_ctrl_vnic_xmit(struct nfp_app_fw_flower *app_fw_flower, struct rte_mbuf *mbuf); -- 2.39.1
[PATCH 09/13] net/nfp: improve modularazation of flower cmsg module
Make the header file self-containing by adding the correct include statement. Try to keep the API small by move the logic which need not expose from header file to source file verbatim and remove the unused macro. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/flower/nfp_flower_cmsg.c | 12 +++- drivers/net/nfp/flower/nfp_flower_cmsg.h | 81 drivers/net/nfp/nfp_flow.c | 1 + drivers/net/nfp/nfp_mtr.h| 68 +--- 4 files changed, 82 insertions(+), 80 deletions(-) diff --git a/drivers/net/nfp/flower/nfp_flower_cmsg.c b/drivers/net/nfp/flower/nfp_flower_cmsg.c index 00f94c7492..0b8feec05b 100644 --- a/drivers/net/nfp/flower/nfp_flower_cmsg.c +++ b/drivers/net/nfp/flower/nfp_flower_cmsg.c @@ -3,14 +3,20 @@ * All rights reserved. */ +#include "nfp_flower_cmsg.h" + #include "../nfpcore/nfp_nsp.h" +#include "../nfp_flow.h" #include "../nfp_logs.h" -#include "../nfp_common.h" -#include "nfp_flower.h" -#include "nfp_flower_cmsg.h" #include "nfp_flower_ctrl.h" #include "nfp_flower_representor.h" +static char* +nfp_flower_cmsg_get_data(struct rte_mbuf *m) +{ + return rte_pktmbuf_mtod(m, char *) + 4 + 4 + NFP_FLOWER_CMSG_HLEN; +} + static void * nfp_flower_cmsg_init(struct nfp_app_fw_flower *app_fw_flower, struct rte_mbuf *m, diff --git a/drivers/net/nfp/flower/nfp_flower_cmsg.h b/drivers/net/nfp/flower/nfp_flower_cmsg.h index f643d54d39..3c2b279f40 100644 --- a/drivers/net/nfp/flower/nfp_flower_cmsg.h +++ b/drivers/net/nfp/flower/nfp_flower_cmsg.h @@ -6,11 +6,7 @@ #ifndef _NFP_CMSG_H_ #define _NFP_CMSG_H_ -#include -#include - -#include "../nfp_mtr.h" -#include "../nfp_flow.h" +#include "nfp_flower.h" struct nfp_flower_cmsg_hdr { rte_be16_t pad; @@ -351,6 +347,72 @@ struct nfp_flower_stats_frame { rte_be64_t stats_cookie; }; +/** + * See RFC 2698 for more details. + * Word[0](Flag options): + * [15] p(pps) 1 for pps, 0 for bps + * + * Meter control message + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +---+-+---+-+-+-+-+---+-+ + * |Reserved |p| Y |TYPE |E| TSHFV |P| PC|R| + * +---+-+---+-+-+-+-+---+-+ + * | Profile ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Token Bucket Peak | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Token Bucket Committed| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Peak Burst Size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Committed Burst Size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Peak Information Rate| + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |Committed Information Rate | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +struct nfp_cfg_head { + rte_be32_t flags_opts; + rte_be32_t profile_id; +}; + +/** + * Struct nfp_profile_conf - profile config, offload to NIC + * @head:config head information + * @bkt_tkn_p: token bucket peak + * @bkt_tkn_c: token bucket committed + * @pbs: peak burst size + * @cbs: committed burst size + * @pir: peak information rate + * @cir: committed information rate + */ +struct nfp_profile_conf { + struct nfp_cfg_head head; + rte_be32_t bkt_tkn_p; + rte_be32_t bkt_tkn_c; + rte_be32_t pbs; + rte_be32_t cbs; + rte_be32_t pir; + rte_be32_t cir; +}; + +/** + * Struct nfp_mtr_stats_reply - meter stats, read from firmware + * @head: config head information + * @pass_bytes:count of passed bytes + * @pass_pkts: count of passed packets + * @drop_bytes:count of dropped bytes + * @drop_pkts: count of dropped packets + */ +struct nfp_mtr_stats_reply { + struct nfp_cfg_head head; + rte_be64_t pass_bytes; + rte_be64_t pass_pkts; + rte_be64_t drop_bytes; + rte_be64_t drop_pkts; +}; + enum nfp_flower_cmsg_port_type { NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC, NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT, @@ -378,12 +440,6 @@ enum nfp_flower_cmsg_port_vnic_type { #define NFP_FLOWER_CMSG_PORT_PCIE_Q(x) ((x) & 0x3f) /* [0,5] */ #define NFP_FLOWER_CMSG_PORT_PHYS_PORT_NUM(x) ((x) & 0xff) /* [0,7] */ -static inline char* -nfp_flower_cmsg_get_data(struct rte_mbuf *m) -{ - return rte_pktmbuf_mtod(m, char *) + 4 + 4 + NFP_FLOWER_CMSG_HLEN; -} -
[PATCH 11/13] net/nfp: improve modularazation of meter module
Try to keep the API small by move the logic which need not expose from header file to source file verbatim. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/nfp_mtr.c | 16 drivers/net/nfp/nfp_mtr.h | 2 -- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/nfp/nfp_mtr.c b/drivers/net/nfp/nfp_mtr.c index afc4de4cc7..255977ec22 100644 --- a/drivers/net/nfp/nfp_mtr.c +++ b/drivers/net/nfp/nfp_mtr.c @@ -3,17 +3,17 @@ * All rights reserved. */ -#include -#include -#include +#include "nfp_mtr.h" + #include +#include +#include -#include "nfp_common.h" -#include "nfp_mtr.h" -#include "nfp_logs.h" -#include "flower/nfp_flower.h" -#include "flower/nfp_flower_cmsg.h" #include "flower/nfp_flower_representor.h" +#include "nfp_logs.h" + +#define NFP_MAX_POLICY_CNT NFP_MAX_MTR_CNT +#define NFP_MAX_PROFILE_CNTNFP_MAX_MTR_CNT #define NFP_FL_QOS_PPS RTE_BIT32(15) #define NFP_FL_QOS_METERRTE_BIT32(10) diff --git a/drivers/net/nfp/nfp_mtr.h b/drivers/net/nfp/nfp_mtr.h index 4b1360cad8..7ab0324721 100644 --- a/drivers/net/nfp/nfp_mtr.h +++ b/drivers/net/nfp/nfp_mtr.h @@ -15,8 +15,6 @@ * The max count is 65536 defined by OF_METER_COUNT. */ #define NFP_MAX_MTR_CNT65536 -#define NFP_MAX_POLICY_CNT NFP_MAX_MTR_CNT -#define NFP_MAX_PROFILE_CNTNFP_MAX_MTR_CNT /** * Struct nfp_mtr_profile - meter profile, stored in driver -- 2.39.1
[PATCH 10/13] net/nfp: improve modularazation of flow module
Make the header file self-containing by adding the correct include statement. Try to keep the API small by move the logic which need not expose from header file to source file verbatim and remove the unused macro. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/nfp_flow.c | 88 +- drivers/net/nfp/nfp_flow.h | 78 + 2 files changed, 79 insertions(+), 87 deletions(-) diff --git a/drivers/net/nfp/nfp_flow.c b/drivers/net/nfp/nfp_flow.c index 9847eb0615..ff03bea6ce 100644 --- a/drivers/net/nfp/nfp_flow.c +++ b/drivers/net/nfp/nfp_flow.c @@ -3,24 +3,92 @@ * All rights reserved. */ +#include "nfp_flow.h" + #include #include #include -#include #include -#include "nfp_common.h" -#include "nfp_ctrl.h" -#include "nfp_flow.h" -#include "nfp_logs.h" -#include "nfp_rxtx.h" -#include "nfp_mtr.h" -#include "flower/nfp_flower.h" #include "flower/nfp_flower_cmsg.h" -#include "flower/nfp_flower_ctrl.h" #include "flower/nfp_flower_representor.h" -#include "nfpcore/nfp_mip.h" #include "nfpcore/nfp_rtsym.h" +#include "nfp_logs.h" +#include "nfp_mtr.h" + +#define NFP_FLOWER_LAYER_EXT_META RTE_BIT32(0) +#define NFP_FLOWER_LAYER_PORT RTE_BIT32(1) +#define NFP_FLOWER_LAYER_MACRTE_BIT32(2) +#define NFP_FLOWER_LAYER_TP RTE_BIT32(3) +#define NFP_FLOWER_LAYER_IPV4 RTE_BIT32(4) +#define NFP_FLOWER_LAYER_IPV6 RTE_BIT32(5) +#define NFP_FLOWER_LAYER_CT RTE_BIT32(6) +#define NFP_FLOWER_LAYER_VXLAN RTE_BIT32(7) + +#define NFP_FLOWER_LAYER2_GRE RTE_BIT32(0) +#define NFP_FLOWER_LAYER2_QINQ RTE_BIT32(4) +#define NFP_FLOWER_LAYER2_GENEVERTE_BIT32(5) +#define NFP_FLOWER_LAYER2_GENEVE_OP RTE_BIT32(6) +#define NFP_FLOWER_LAYER2_TUN_IPV6 RTE_BIT32(7) + +/* Compressed HW representation of TCP Flags */ +#define NFP_FL_TCP_FLAG_FIN RTE_BIT32(0) +#define NFP_FL_TCP_FLAG_SYN RTE_BIT32(1) +#define NFP_FL_TCP_FLAG_RST RTE_BIT32(2) +#define NFP_FL_TCP_FLAG_PSH RTE_BIT32(3) +#define NFP_FL_TCP_FLAG_URG RTE_BIT32(4) + +#define NFP_FL_META_FLAG_MANAGE_MASKRTE_BIT32(7) + +#define NFP_FLOWER_MASK_VLAN_CFIRTE_BIT32(12) + +#define NFP_MASK_TABLE_ENTRIES 1024 + +/* The maximum action list size (in bytes) supported by the NFP. */ +#define NFP_FL_MAX_A_SIZ1216 + +#define NFP_FL_SC_ACT_DROP 0x8000 +#define NFP_FL_SC_ACT_USER 0x7D00 +#define NFP_FL_SC_ACT_POPV 0x6A00 +#define NFP_FL_SC_ACT_NULL 0x + +/* GRE Tunnel flags */ +#define NFP_FL_GRE_FLAG_KEY (1 << 2) + +/* Action opcodes */ +#define NFP_FL_ACTION_OPCODE_OUTPUT 0 +#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1 +#define NFP_FL_ACTION_OPCODE_POP_VLAN 2 +#define NFP_FL_ACTION_OPCODE_PUSH_MPLS 3 +#define NFP_FL_ACTION_OPCODE_POP_MPLS 4 +#define NFP_FL_ACTION_OPCODE_USERSPACE 5 +#define NFP_FL_ACTION_OPCODE_SET_TUNNEL 6 +#define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7 +#define NFP_FL_ACTION_OPCODE_SET_MPLS 8 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9 +#define NFP_FL_ACTION_OPCODE_SET_IPV4_TTL_TOS 10 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC 11 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12 +#define NFP_FL_ACTION_OPCODE_SET_IPV6_TC_HL_FL 13 +#define NFP_FL_ACTION_OPCODE_SET_UDP14 +#define NFP_FL_ACTION_OPCODE_SET_TCP15 +#define NFP_FL_ACTION_OPCODE_PRE_LAG16 +#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17 +#define NFP_FL_ACTION_OPCODE_PRE_GS 18 +#define NFP_FL_ACTION_OPCODE_GS 19 +#define NFP_FL_ACTION_OPCODE_PUSH_NSH 20 +#define NFP_FL_ACTION_OPCODE_POP_NSH21 +#define NFP_FL_ACTION_OPCODE_SET_QUEUE 22 +#define NFP_FL_ACTION_OPCODE_CONNTRACK 23 +#define NFP_FL_ACTION_OPCODE_METER 24 +#define NFP_FL_ACTION_OPCODE_CT_NAT_EXT 25 +#define NFP_FL_ACTION_OPCODE_PUSH_GENEVE26 +#define NFP_FL_ACTION_OPCODE_NUM32 + +#define NFP_FL_OUT_FLAGS_LASTRTE_BIT32(15) + +/* Tunnel ports */ +#define NFP_FL_PORT_TYPE_TUN0x5000 /* * Maximum number of items in struct rte_flow_action_vxlan_encap. diff --git a/drivers/net/nfp/nfp_flow.h b/drivers/net/nfp/nfp_flow.h index 414bd4573b..7ce7f62453 100644 --- a/drivers/net/nfp/nfp_flow.h +++ b/drivers/net/nfp/nfp_flow.h @@ -6,87 +6,11 @@ #ifndef _NFP_FLOW_H_ #define _NFP_FLOW_H_ -#include -#include -#include - -#define NFP_FLOWER_LAYER_EXT_META RTE_BIT32(0) -#define NFP_FLOWER_LAYER_PORT RTE_BIT32(1) -#define NFP_FLOWER_LAYER_MACRTE_BIT32(2) -#define NFP_FLOWER_LAYER_TP RTE_BIT32(3) -#define NFP_FLOWER_LAYER_IPV4 RTE_BIT32(4) -#define NFP_FLOWER_
[PATCH 12/13] net/nfp: improve modularazation of CPP bridge module
Try to keep the API small by move the logic which need not expose from header file to source file verbatim. Also remove the unneeded header file include statement of source file. Signed-off-by: Chaoyong He --- drivers/net/nfp/nfp_cpp_bridge.c | 14 ++ drivers/net/nfp/nfp_cpp_bridge.h | 8 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/net/nfp/nfp_cpp_bridge.c b/drivers/net/nfp/nfp_cpp_bridge.c index a9998f3c08..ed9a946b0c 100644 --- a/drivers/net/nfp/nfp_cpp_bridge.c +++ b/drivers/net/nfp/nfp_cpp_bridge.c @@ -5,17 +5,23 @@ * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. */ +#include "nfp_cpp_bridge.h" + #include #include #include #include "nfpcore/nfp_cpp.h" -#include "nfpcore/nfp_mip.h" -#include "nfpcore/nfp_nsp.h" - #include "nfp_logs.h" -#include "nfp_cpp_bridge.h" + +#define NFP_CPP_MEMIO_BOUNDARY(1 << 20) +#define NFP_BRIDGE_OP_READ20 +#define NFP_BRIDGE_OP_WRITE 30 +#define NFP_BRIDGE_OP_IOCTL 40 + +#define NFP_IOCTL 'n' +#define NFP_IOCTL_CPP_IDENTIFICATION _IOW(NFP_IOCTL, 0x8f, uint32_t) /* Prototypes */ static int nfp_cpp_bridge_serve_write(int sockfd, struct nfp_cpp *cpp); diff --git a/drivers/net/nfp/nfp_cpp_bridge.h b/drivers/net/nfp/nfp_cpp_bridge.h index 85289e158b..e6a957a090 100644 --- a/drivers/net/nfp/nfp_cpp_bridge.h +++ b/drivers/net/nfp/nfp_cpp_bridge.h @@ -10,14 +10,6 @@ #include "nfp_common.h" -#define NFP_CPP_MEMIO_BOUNDARY (1 << 20) -#define NFP_BRIDGE_OP_READ 20 -#define NFP_BRIDGE_OP_WRITE30 -#define NFP_BRIDGE_OP_IOCTL40 - -#define NFP_IOCTL 'n' -#define NFP_IOCTL_CPP_IDENTIFICATION _IOW(NFP_IOCTL, 0x8f, uint32_t) - int nfp_enable_cpp_service(struct nfp_pf_dev *pf_dev); int nfp_map_service(uint32_t service_id); -- 2.39.1
[PATCH 13/13] net/nfp: cleanup the include statement of PMD
Remove the unneeded header file include statement of PMD source files, also adjust the include statement of 'nfp_rxtx' and 'nfp_flower_cmsg' module. Signed-off-by: Chaoyong He --- drivers/net/nfp/flower/nfp_flower.c | 2 -- drivers/net/nfp/flower/nfp_flower_cmsg.c | 1 - drivers/net/nfp/flower/nfp_flower_cmsg.h | 4 +-- drivers/net/nfp/flower/nfp_flower_ctrl.c | 2 -- .../net/nfp/flower/nfp_flower_representor.c | 2 -- drivers/net/nfp/nfp_ethdev.c | 25 --- drivers/net/nfp/nfp_ethdev_vf.c | 9 +++ drivers/net/nfp/nfp_flow.c| 1 - drivers/net/nfp/nfp_rxtx.c| 1 - 9 files changed, 9 insertions(+), 38 deletions(-) diff --git a/drivers/net/nfp/flower/nfp_flower.c b/drivers/net/nfp/flower/nfp_flower.c index bbcbb0060b..1e10b38120 100644 --- a/drivers/net/nfp/flower/nfp_flower.c +++ b/drivers/net/nfp/flower/nfp_flower.c @@ -14,10 +14,8 @@ #include "../nfpcore/nfp_nsp.h" #include "../nfpcore/nfp_rtsym.h" #include "../nfp_cpp_bridge.h" -#include "../nfp_flow.h" #include "../nfp_logs.h" #include "../nfp_mtr.h" -#include "nfp_flower_cmsg.h" #include "nfp_flower_ctrl.h" #include "nfp_flower_representor.h" diff --git a/drivers/net/nfp/flower/nfp_flower_cmsg.c b/drivers/net/nfp/flower/nfp_flower_cmsg.c index 0b8feec05b..6b9532f5b6 100644 --- a/drivers/net/nfp/flower/nfp_flower_cmsg.c +++ b/drivers/net/nfp/flower/nfp_flower_cmsg.c @@ -6,7 +6,6 @@ #include "nfp_flower_cmsg.h" #include "../nfpcore/nfp_nsp.h" -#include "../nfp_flow.h" #include "../nfp_logs.h" #include "nfp_flower_ctrl.h" #include "nfp_flower_representor.h" diff --git a/drivers/net/nfp/flower/nfp_flower_cmsg.h b/drivers/net/nfp/flower/nfp_flower_cmsg.h index 3c2b279f40..9449760145 100644 --- a/drivers/net/nfp/flower/nfp_flower_cmsg.h +++ b/drivers/net/nfp/flower/nfp_flower_cmsg.h @@ -6,6 +6,7 @@ #ifndef _NFP_CMSG_H_ #define _NFP_CMSG_H_ +#include "../nfp_flow.h" #include "nfp_flower.h" struct nfp_flower_cmsg_hdr { @@ -974,9 +975,6 @@ struct nfp_fl_act_meter { rte_be32_t profile_id; }; -/* Forward declaration */ -struct nfp_fl_rule_metadata; - int nfp_flower_cmsg_mac_repr(struct nfp_app_fw_flower *app_fw_flower); int nfp_flower_cmsg_repr_reify(struct nfp_app_fw_flower *app_fw_flower, struct nfp_flower_representor *repr); diff --git a/drivers/net/nfp/flower/nfp_flower_ctrl.c b/drivers/net/nfp/flower/nfp_flower_ctrl.c index bdb042142a..c5282053cf 100644 --- a/drivers/net/nfp/flower/nfp_flower_ctrl.c +++ b/drivers/net/nfp/flower/nfp_flower_ctrl.c @@ -9,9 +9,7 @@ #include "../nfd3/nfp_nfd3.h" #include "../nfdk/nfp_nfdk.h" -#include "../nfp_flow.h" #include "../nfp_logs.h" -#include "nfp_flower_cmsg.h" #include "nfp_flower_representor.h" #include "nfp_mtr.h" diff --git a/drivers/net/nfp/flower/nfp_flower_representor.c b/drivers/net/nfp/flower/nfp_flower_representor.c index d4df88fb73..55ca3e6db0 100644 --- a/drivers/net/nfp/flower/nfp_flower_representor.c +++ b/drivers/net/nfp/flower/nfp_flower_representor.c @@ -7,10 +7,8 @@ #include "../nfd3/nfp_nfd3.h" #include "../nfpcore/nfp_nsp.h" -#include "../nfp_flow.h" #include "../nfp_logs.h" #include "../nfp_mtr.h" -#include "nfp_flower_cmsg.h" /* * enum nfp_repr_type - type of representor diff --git a/drivers/net/nfp/nfp_ethdev.c b/drivers/net/nfp/nfp_ethdev.c index b2a2cd9ed8..241595be9d 100644 --- a/drivers/net/nfp/nfp_ethdev.c +++ b/drivers/net/nfp/nfp_ethdev.c @@ -5,35 +5,20 @@ * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include -#include "eal_firmware.h" +#include "flower/nfp_flower.h" +#include "nfd3/nfp_nfd3.h" +#include "nfdk/nfp_nfdk.h" #include "nfpcore/nfp_cpp.h" -#include "nfpcore/nfp_nffw.h" #include "nfpcore/nfp_hwinfo.h" -#include "nfpcore/nfp_mip.h" #include "nfpcore/nfp_rtsym.h" #include "nfpcore/nfp_nsp.h" #include "nfpcore/nfp6000_pcie.h" -#include "nfp_common.h" -#include "nfp_ctrl.h" -#include "nfp_rxtx.h" -#include "nfp_logs.h" #include "nfp_cpp_bridge.h" - -#include "nfd3/nfp_nfd3.h" -#include "nfdk/nfp_nfdk.h" -#include "flower/nfp_flower.h" +#include "nfp_logs.h" static int nfp_net_pf_read_mac(struct nfp_app_fw_nic *app_fw_nic, int port) diff --git a/drivers/net/nfp/nfp_ethdev_vf.c b/drivers/net/nfp/nfp_ethdev_vf.c index 1a02a857ea..0c94fc51ad 100644 --- a/drivers/net/nfp/nfp_ethdev_vf.c +++ b/drivers/net/nfp/nfp_ethdev_vf.c @@ -7,15 +7,12 @@ #include -#include "nfpcore/nfp_mip.h" -#include "nfpcore/nfp_rtsym.h" +#include "nfd3/nfp_nfd3.h" +#include "nfdk/nfp_nfdk.h" +#include "nfpcore/nfp_cpp.h" #include "nfp_common.h" -#include "nfp_ctrl.h" -#include "nfp_rxtx.h" #include "nfp_logs.h" -#include "nfd3/nfp_nfd3.h" -#include "nfdk/nfp_nfdk.h" static void nfp_netvf_read_mac(struct nfp_net_hw *hw) diff --git a/dr
RE: [RFC PATCH 2/3] security: add TLS record processing
Hi Harry, Thanks for the review. Please see inline. Thanks, Anoob > -Original Message- > From: Van Haaren, Harry > Sent: Wednesday, September 20, 2023 2:53 PM > To: Anoob Joseph ; Thomas Monjalon > ; Akhil Goyal ; Jerin Jacob > Kollanukkaran ; Konstantin Ananyev > > Cc: Hemant Agrawal ; dev@dpdk.org; Matz, > Olivier ; Vidya Sagar Velumuri > > Subject: [EXT] RE: [RFC PATCH 2/3] security: add TLS record processing > > External Email > > -- > > -Original Message- > > From: Anoob Joseph > > Sent: Friday, August 11, 2023 8:17 AM > > To: Thomas Monjalon ; Akhil Goyal > > ; Jerin Jacob ; Konstantin > > Ananyev > > Cc: Hemant Agrawal ; dev@dpdk.org; Matz, > > Olivier ; Vidya Sagar Velumuri > > > > Subject: [RFC PATCH 2/3] security: add TLS record processing > > > > Add Transport Layer Security (TLS) and Datagram Transport Layer > > Security (DTLS). The protocols provide communications privacy for L4 > > protocols such as TCP & UDP. > > > > TLS (and DTLS) protocol is composed of two layers, 1. TLS Record > > Protocol 2. TLS Handshake Protocol > > > > While TLS Handshake Protocol helps in establishing security parameters > > by which client and server can communicate, TLS Record Protocol > > provides the connection security. TLS Record Protocol leverages > > symmetric cryptographic operations such as data encryption and > > authentication for providing security to the communications. > > > > Cryptodevs that are capable of offloading TLS Record Protocol may > > perform other operations like IV generation, header insertion, atomic > > sequence number updates and anti-replay window check in addition to > > cryptographic transformations. > > > > The support is added for TLS 1.2, TLS 1.3 and DTLS 1.2. > > From the code below, my understanding is that *ONLY* the record layer is > being added/supported? The difference is described well above, but the > intended support added is not clearly defined. > > Suggest reword the last line to clarify: > "Support for TLS record protocol is added for TLS 1.2, TLS 1.3 and DTLS 1.2." [Anoob] Indeed. Will reword as suggested. > > > > Signed-off-by: Akhil Goyal > > Signed-off-by: Anoob Joseph > > Signed-off-by: Vidya Sagar Velumuri > > --- > > doc/guides/prog_guide/rte_security.rst | 58 + > > lib/security/rte_security.c| 4 + > > lib/security/rte_security.h| 110 + > > 3 files changed, 172 insertions(+) > > > > diff --git a/doc/guides/prog_guide/rte_security.rst > > b/doc/guides/prog_guide/rte_security.rst > > index 7418e35c1b..7716d7239f 100644 > > --- a/doc/guides/prog_guide/rte_security.rst > > +++ b/doc/guides/prog_guide/rte_security.rst > > @@ -399,6 +399,64 @@ The API ``rte_security_macsec_sc_create`` returns > > a handle for SC, and this handle is set in > > ``rte_security_macsec_xform`` to create a MACsec session using > > ``rte_security_session_create``. > > > > +TLS-Record Protocol > > +~~~ > > + > > +The Transport Layer Protocol provides communications security over > > +the > > Internet. The protocol > > +allows client/server applications to communicate in a way that is > > +designed to > > prevent eavesdropping, > > +tampering, or message forgery. > > + > > +TLS protocol is composed of two layers: the TLS Record Protocol and > > +the TLS > > Handshake Protocol. At > > +the lowest level, layered on top of some reliable transport protocol > > +(e.g., TCP), > > is the TLS Record > > +Protocol. The TLS Record Protocol provides connection security that > > +has two > > basic properties: > > + > > + - The connection is private. Symmetric cryptography is used for data > > + encryption (e.g., AES, DES, etc.). The keys for this symmetric > encryption > > + are generated uniquely for each connection and are based on a secret > > + negotiated by another protocol (such as the TLS Handshake Protocol). > The > > + Record Protocol can also be used without encryption. > > + > > + - The connection is reliable. Message transport includes a message > > + integrity check using a keyed MAC. Secure hash functions (e.g., > > + SHA-1, etc.) are used for MAC computations. The Record Protocol > > + can operate without a MAC, but is generally only used in this mode > > + while another protocol is using the Record Protocol as a transport > > + for negotiating security parameters. > > + > > +.. code-block:: c > > The code block below isn't C? Is there a better code block type for a text > diagram? [Anoob] Valid point. I was just following the general scheme followed in this file. May be, I'll introduce a .svg image for newly added code. > > > + Record Write Record Read > > + --- > > + > > + TLSPlaintext TLSCiphertext > > + |
Re:Re:Re: [PATCH v1] examples/l3fwd: relax the RSS/Offload requirement
Hi Konstantin, Please see my comments inline Thanks. Trevor At 2023-09-20 16:04:41, "Konstantin Ananyev" wrote: > >Hi Trevor, > > >> >> At 2023-09-18 02:04:19, "Konstantin Ananyev" >> wrote: >>>03/09/2023 05:01, Trevor Tao пишет: Now the port Rx mq_mode had been set to RTE_ETH_MQ_RX_RSS, and offload mode set to RTE_ETH_RX_OFFLOAD_CHECKSUM by default, but some hardware and/or virtual interface does not support the RSS and offload mode presupposed, e.g., some virtio interfaces in the cloud don't support RSS and may only partly support RTE_ETH_RX_OFFLOAD_UDP_CKSUM/ RTE_ETH_RX_OFFLOAD_TCP_CKSUM, but not RTE_ETH_RX_OFFLOAD_IPV4_CKSUM, and the error msg here: virtio_dev_configure(): RSS support requested but not supported by the device Port0 dev_configure = -95 and: Ethdev port_id=0 requested Rx offloads 0xe does not match Rx offloads capabilities 0x201d in rte_eth_dev_configure() So to enable the l3fwd running in that environment, the Rx mode requirement can be relaxed to reflect the hardware feature reality here, and the l3fwd can run smoothly then. A warning msg would be provided to user in case it happens here. On the other side, enabling the software cksum check in case the hw support missing. Fixes: af75078fece3 ("first public release") Cc: sta...@dpdk.org >>> >>>I don't think there was abug here. >>>We are talking about changing current requirements for the app. >>>So not sure it is a real fix and that such change can be >> >>>propagated to stable releases. >> Trevor: I think it's not a bug fix but a feature enhancement, it would >> enable l3fwd to work smoothly on the HW/virtual interfaces which don't >> support RSS and/or cksum offloading. > > >Yes. it seems like sort of an enhancement. >While 'Fixes: ...' are for bugs only. >AFAIK, only bug-fixes are take for backporting by stable releases. >That's why there seems no point to add CC: sta...@dpdk.org > >Another generic things: >- l3fwd doc and release notes probably need to be updated Trevor>>I think it's ok to update the l3fwd doc and release notes, but I would like to know which part of the doc/notes is approriate to add the enhancement declaration. >- as you areintroducing 2 distinct features: no-rss and no-ipv4-cksum > it is probably better to split it into 2 different patches (in the >same series). Trevor>>I think it's ok to split it into 2 patches here in the same series, if you would like to. Thanks. > >> >> >>> Signed-off-by: Trevor Tao --- examples/l3fwd/l3fwd.h | 12 +++- examples/l3fwd/main.c | 21 +++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index b55855c932..cc10643c4b 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -115,6 +115,8 @@ extern struct acl_algorithms acl_alg[]; extern uint32_t max_pkt_len; +extern struct rte_eth_conf port_conf; + /* Send burst of packets on an output interface */ static inline int send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port) @@ -170,7 +172,15 @@ is_valid_ipv4_pkt(struct rte_ipv4_hdr *pkt, uint32_t link_len) return -1; /* 2. The IP checksum must be correct. */ - /* this is checked in H/W */ + /* if this is not checked in H/W, check it. */ + if ((port_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM) == 0) { >>> >>>Might be better to check particular mbuf flag: >>>if ((mbuf->ol_flags & RTE_MBUF_F_RX_IP_CKSUM_MASK) == >> >>>TE_MBUF_F_RX_IP_CKSUM_UNKNOWN) {...} >> Trevor: the utility function is_valid_ipv4_pkt is just against an IPv4 pkt, >> and there's no mbuf information, and if needed, there would be an extra >> ol_flags added here to check if it was already done by the ethernet device, >> but look for a sample in: >> https://github.com/DPDK/dpdk/blob/main/examples/l3fwd-power/main.c#L487 >> so I think it's ok to just use the port_conf here. If you still think it's >> better to use m->ol_flags, please tell me. > > >Yep, passing ol_flags, or mbuf itself seems like a proper way to do it. >Aproach taken in l3fwd-power doesn't look right to me, see below. > >>> + uint16_t actual_cksum, expected_cksum; + actual_cksum = pkt->hdr_checksum; + pkt->hdr_checksum = 0; + expected_cksum = rte_ipv4_cksum(pkt); + if (actual_cksum != expected_cksum) + return -2; + } /* * 3. The IP version number must be 4. If the version number is not 4 diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c index 6063eb1399..37aec64718 100644 --- a/examples/l3fwd/main.c +++ b/examples/l3fwd/main.c @@ -117,7 +117,7 @
[PATCH v2] testpmd: add hairpin-map parameter
Testpmd hairpin implementation always sets the next valid port to complete hairpin binding. That limits hairpin configuration options. The new parameter allows explicit selection of Rx and Tx ports and queues in hairpin configuration. The new `hairpin-map` parameter is provided with 5 parameters, separated by `:` `--hairpin-map=Rx port id:Rx queue:Tx port id:Tx queue:queues number` Testpmd operator can provide several `hairpin-map` parameters for different hairpin maps. Example: dpdk-testpmd -- \ \ --rxq=2 --txq=2 --hairpinq=2 --hairpin-mode=0x12 \ --hairpin-map=0:2:1:2:1 \ # [1] --hairpin-map=0:3:2:2:3 # [2] Hairpin map [1] binds Rx port 0, queue 2 with Tx port 1, queue 2. Hairpin map [2] binds Rx port 0, queue 3 with Tx port 2, queue 2, Rx port 0, queue 4 with Tx port 2, queue 3, Rx port 0, queue 5 with Tx port 2, queue 4. The new `hairpin-map` parameter is optional. If omitted, testpmd will create "default" hairpin maps. Signed-off-by: Gregory Etelson --- v2: fix Windows Server 2019 compilation failure. --- app/test-pmd/parameters.c | 63 app/test-pmd/testpmd.c| 212 ++ app/test-pmd/testpmd.h| 18 +++ doc/guides/testpmd_app_ug/run_app.rst | 3 + 4 files changed, 230 insertions(+), 66 deletions(-) diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c index a9ca58339d..c6bdfdf06f 100644 --- a/app/test-pmd/parameters.c +++ b/app/test-pmd/parameters.c @@ -206,6 +206,12 @@ usage(char* progname) printf(" --hairpin-mode=0xXX: bitmask set the hairpin port mode.\n" "0x10 - explicit Tx rule, 0x02 - hairpin ports paired\n" "0x01 - hairpin ports loop, 0x00 - hairpin port self\n"); + printf(" --hairpin-map=rxpi:rxq:txpi:txq:n: hairpin map.\n" + "rxpi - Rx port index.\n" + "rxq - Rx queue.\n" + "txpi - Tx port index.\n" + "txq - Tx queue.\n" + "n- hairpin queues number.\n"); } #ifdef RTE_LIB_CMDLINE @@ -588,6 +594,55 @@ parse_link_speed(int n) return speed; } +static __rte_always_inline +char *parse_hairpin_map_entry(char *input, char **next) +{ + char *tail = strchr(input, ':'); + + if (!tail) + return NULL; + tail[0] = '\0'; + *next = tail + 1; + return input; +} + +static int +parse_hairpin_map(const char *hpmap) +{ + /* +* Testpmd hairpin map format: +* +*/ + char *head, *next = (char *)(uintptr_t)hpmap; + struct hairpin_map *map = calloc(1, sizeof(*map)); + + if (!map) + return -ENOMEM; + + head = parse_hairpin_map_entry(next, &next); + if (!head) + goto err; + map->rx_port = atoi(head); + head = parse_hairpin_map_entry(next, &next); + if (!head) + goto err; + map->rxq_head = atoi(head); + head = parse_hairpin_map_entry(next, &next); + if (!head) + goto err; + map->tx_port = atoi(head); + head = parse_hairpin_map_entry(next, &next); + if (!head) + goto err; + map->txq_head = atoi(head); + map->qnum = atoi(next); + hairpin_add_multiport_map(map); + return 0; +err: + free(map); + return -EINVAL; +} + void launch_args_parse(int argc, char** argv) { @@ -663,6 +718,7 @@ launch_args_parse(int argc, char** argv) { "txd",1, 0, 0 }, { "hairpinq", 1, 0, 0 }, { "hairpin-mode", 1, 0, 0 }, + { "hairpin-map",1, 0, 0 }, { "burst", 1, 0, 0 }, { "flowgen-clones", 1, 0, 0 }, { "flowgen-flows", 1, 0, 0 }, @@ -,6 +1167,13 @@ launch_args_parse(int argc, char** argv) else hairpin_mode = (uint32_t)n; } + if (!strcmp(lgopts[opt_idx].name, "hairpin-map")) { + hairpin_multiport_mode = true; + ret = parse_hairpin_map(optarg); + if (ret) + rte_exit(EXIT_FAILURE, "invalid hairpin map\n"); + + } if (!strcmp(lgopts[opt_idx].name, "burst")) { n = atoi(optarg); if (n == 0) { diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index 938ca035d4..2c6975f22d 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -434,6 +434,16 @@ uint8_t clear_ptypes = true; /* Hairpin ports configuration mode. */ uint32_t hairpin_mode; +bool hairpin_multiport_mode = false; + +static LIST_HEAD(, hairpin_map) hairpin_map_he
RE: [PATCH] eventdev/eth_rx: fix timestamp field register in mbuf
> -Original Message- > From: Rahul Bhansali > Sent: Monday, September 18, 2023 1:56 PM > To: dev@dpdk.org; Naga Harish K, S V ; Jerin > Jacob ; Kundapura, Ganapati > > Cc: Rahul Bhansali ; sta...@dpdk.org > Subject: [PATCH] eventdev/eth_rx: fix timestamp field register in mbuf > > For eventdev internal port, timestamp dynamic field registration in mbuf is > not > required as that will be done from net device. > For SW eventdev, Rx timestamp field registration will be done during Rx queue > add operation as per device capabilities and offload configuration. > > Fixes: 83ab470d1259 ("eventdev/eth_rx: use timestamp as dynamic mbuf > field") > Cc: sta...@dpdk.org > > Signed-off-by: Rahul Bhansali > --- > lib/eventdev/rte_event_eth_rx_adapter.c | 19 --- > 1 file changed, 12 insertions(+), 7 deletions(-) > > diff --git a/lib/eventdev/rte_event_eth_rx_adapter.c > b/lib/eventdev/rte_event_eth_rx_adapter.c > index 3ebfa5366d..5a5fade466 100644 > --- a/lib/eventdev/rte_event_eth_rx_adapter.c > +++ b/lib/eventdev/rte_event_eth_rx_adapter.c > @@ -2472,13 +2472,6 @@ rxa_create(uint8_t id, uint8_t dev_id, > if (conf_cb == rxa_default_conf_cb) > rx_adapter->default_cb_arg = 1; > > - if (rte_mbuf_dyn_rx_timestamp_register( > - &event_eth_rx_timestamp_dynfield_offset, > - &event_eth_rx_timestamp_dynflag) != 0) { > - RTE_EDEV_LOG_ERR("Error registering timestamp field in > mbuf\n"); > - return -rte_errno; > - } > - > rte_eventdev_trace_eth_rx_adapter_create(id, dev_id, conf_cb, > conf_arg); > return 0; > @@ -2738,6 +2731,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, > 1); > } > } else { > + uint64_t dev_offloads; > rte_spinlock_lock(&rx_adapter->rx_lock); > dev_info->internal_event_port = 0; > ret = rxa_init_service(rx_adapter, id); @@ -2749,6 +2743,17 > @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, > rxa_sw_adapter_queue_count(rx_adapter)); > } > rte_spinlock_unlock(&rx_adapter->rx_lock); > + > + dev_offloads = dev_info->dev->data- > >dev_conf.rxmode.offloads; This is a one-time operation and need not happen for every queue_add. Move this registration to "rxa_init_service()" function which executes only once for creating rte_service. Also, no need to check for offload capabilities and directly do the registration inside Rxa_init_service as done before in rxa_create. Mbuf field is global to the entire application and need not be done based on ethdev offload capabilities. > + if (dev_offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) { > + if (rte_mbuf_dyn_rx_timestamp_register( > + > &event_eth_rx_timestamp_dynfield_offset, > + > &event_eth_rx_timestamp_dynflag) != 0) { > + RTE_EDEV_LOG_ERR("Error registering > timestamp field in mbuf\n"); > + return -rte_errno; > + } > + } > + > } > > rte_eventdev_trace_eth_rx_adapter_queue_add(id, eth_dev_id, > -- > 2.25.1
Re: [PATCH] net/nfp: fix invalid control message packets
On 7/13/2023 4:02 AM, Chaoyong He wrote: > From: Long Wu > > If we add two cards that uses flower firmware into one dpdk-testpmd, > NFP PMD will printf error log. The reason is that the second card > uses the control VNIC Rx queue of the first card. > > Because rte_eth_dma_zone_reserve() will reserve new DMA zone if > DMA zone's name is unique. But if there is already a zone with the > same name, rte_eth_dma_zone_reserve() will return the pointer of > the previously DMA zone. We try to reserve DMA zone for each card > but we use the same name to reserve. > > We use the PCI address to give control VNIC a unique ring name > to avoid the above situation and let each NIC's ring have its > own DMA zone. > > Fixes: 945441ebdb9c ("net/nfp: add flower ctrl VNIC") > Cc: chaoyong...@corigine.com > Cc: sta...@dpdk.org > > Signed-off-by: Long Wu > Acked-by: Ferruh Yigit Applied to dpdk-next-net/main, thanks.
[PATCH 0/2] ethdev: add group set miss actions API
Introduce new group set miss actions API: rte_flow_group_set_miss_actions(). A group's miss actions are a set of actions to be performed in case of a miss on a group, i.e. when a packet didn't hit any flow rules in the group. Currently, the expected behavior in this case is undefined. In order to achieve such functionality, a user can add a flow rule that matches on all traffic with the lowest priority in the group - this is not explicit however, and can be overridden by another flow rule with a lower priority. This new API function allows a user to set a group's miss actions in an explicit way. RFC discussion: http://patches.dpdk.org/project/dpdk/patch/20230807133601.164018-1-tshmilov...@nvidia.com/ Tomer Shmilovich (2): ethdev: add group set miss actions API app/testpmd: add group set miss actions CLI commands .mailmap | 1 + app/test-pmd/cmdline_flow.c| 112 + app/test-pmd/config.c | 27 ++ app/test-pmd/testpmd.h | 2 + doc/guides/prog_guide/rte_flow.rst | 30 +++ doc/guides/rel_notes/release_23_11.rst | 5 ++ lib/ethdev/rte_flow.c | 22 + lib/ethdev/rte_flow.h | 35 lib/ethdev/rte_flow_driver.h | 7 ++ lib/ethdev/version.map | 3 + 10 files changed, 244 insertions(+) -- 2.34.1
[PATCH 2/2] app/testpmd: add group set miss actions CLI commands
Add testpmd CLI interface for the group set miss actions API: flow group 0 group_id 1 ingress set_miss_actions jump group 3 / end flow group 0 group_id 1 ingress set_miss_actions end Signed-off-by: Tomer Shmilovich Acked-by: Ori Kam --- app/test-pmd/cmdline_flow.c | 112 app/test-pmd/config.c | 27 + app/test-pmd/testpmd.h | 2 + 3 files changed, 141 insertions(+) diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 94827bcc4a..b3b8893e37 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -86,6 +86,7 @@ enum index { PATTERN_TEMPLATE, ACTIONS_TEMPLATE, TABLE, + FLOW_GROUP, INDIRECT_ACTION, VALIDATE, CREATE, @@ -206,6 +207,13 @@ enum index { TABLE_PATTERN_TEMPLATE, TABLE_ACTIONS_TEMPLATE, + /* Group arguments */ + GROUP_ID, + GROUP_INGRESS, + GROUP_EGRESS, + GROUP_TRANSFER, + GROUP_SET_MISS_ACTIONS, + /* Tunnel arguments. */ TUNNEL_CREATE, TUNNEL_CREATE_TYPE, @@ -1293,6 +1301,14 @@ static const enum index next_at_destroy_attr[] = { ZERO, }; +static const enum index next_group_attr[] = { + GROUP_INGRESS, + GROUP_EGRESS, + GROUP_TRANSFER, + GROUP_SET_MISS_ACTIONS, + ZERO, +}; + static const enum index next_table_subcmd[] = { TABLE_CREATE, TABLE_DESTROY, @@ -2678,6 +2694,9 @@ static int parse_push(struct context *, const struct token *, static int parse_pull(struct context *, const struct token *, const char *, unsigned int, void *, unsigned int); +static int parse_group(struct context *, const struct token *, + const char *, unsigned int, + void *, unsigned int); static int parse_tunnel(struct context *, const struct token *, const char *, unsigned int, void *, unsigned int); @@ -3021,6 +3040,7 @@ static const struct token token_list[] = { PATTERN_TEMPLATE, ACTIONS_TEMPLATE, TABLE, + FLOW_GROUP, INDIRECT_ACTION, VALIDATE, CREATE, @@ -3411,6 +3431,46 @@ static const struct token token_list[] = { .call = parse_table, }, /* Top-level command. */ + [FLOW_GROUP] = { + .name = "group", + .help = "manage flow groups", + .next = NEXT(NEXT_ENTRY(GROUP_ID), NEXT_ENTRY(COMMON_PORT_ID)), + .args = ARGS(ARGS_ENTRY(struct buffer, port)), + .call = parse_group, + }, + /* Sub-level commands. */ + [GROUP_SET_MISS_ACTIONS] = { + .name = "set_miss_actions", + .help = "set group miss actions", + .next = NEXT(next_action), + .call = parse_group, + }, + /* Group arguments */ + [GROUP_ID] = { + .name = "group_id", + .help = "group id", + .next = NEXT(next_group_attr, NEXT_ENTRY(COMMON_GROUP_ID)), + .args = ARGS(ARGS_ENTRY(struct buffer, args.vc.attr.group)), + }, + [GROUP_INGRESS] = { + .name = "ingress", + .help = "group ingress attr", + .next = NEXT(next_group_attr), + .call = parse_group, + }, + [GROUP_EGRESS] = { + .name = "egress", + .help = "group egress attr", + .next = NEXT(next_group_attr), + .call = parse_group, + }, + [GROUP_TRANSFER] = { + .name = "transfer", + .help = "group transfer attr", + .next = NEXT(next_group_attr), + .call = parse_group, + }, + /* Top-level command. */ [QUEUE] = { .name = "queue", .help = "queue a flow rule operation", @@ -10449,6 +10509,54 @@ parse_pull(struct context *ctx, const struct token *token, return len; } +static int +parse_group(struct context *ctx, const struct token *token, + const char *str, unsigned int len, + void *buf, unsigned int size) +{ + struct buffer *out = buf; + + /* Token name must match. */ + if (parse_default(ctx, token, str, len, NULL, 0) < 0) + return -1; + /* Nothing else to do if there is no buffer. */ + if (!out) + return len; + if (!out->command) { + if (ctx->curr != FLOW_GROUP) + return -1; + if (sizeof(*out) > size) + return -1; + out->command = ctx->curr; + ctx->objdata = 0; + ctx->object = o
[PATCH 1/2] ethdev: add group set miss actions API
Introduce new group set miss actions API: rte_flow_group_set_miss_actions(). A group's miss actions are a set of actions to be performed in case of a miss on a group, meaning a packet didn't hit any rules in the group. This API function allows a user to set a group's miss actions. Signed-off-by: Tomer Shmilovich Acked-by: Ori Kam --- .mailmap | 1 + doc/guides/prog_guide/rte_flow.rst | 30 ++ doc/guides/rel_notes/release_23_11.rst | 5 lib/ethdev/rte_flow.c | 22 lib/ethdev/rte_flow.h | 35 ++ lib/ethdev/rte_flow_driver.h | 7 ++ lib/ethdev/version.map | 3 +++ 7 files changed, 103 insertions(+) diff --git a/.mailmap b/.mailmap index 864d33ee46..0cd6be849e 100644 --- a/.mailmap +++ b/.mailmap @@ -1411,6 +1411,7 @@ Tom Barbette Tom Crugnale Tom Millington Tom Rix +Tomer Shmilovich Tone Zhang Tonghao Zhang Tony Nguyen diff --git a/doc/guides/prog_guide/rte_flow.rst b/doc/guides/prog_guide/rte_flow.rst index 5bc998a433..590d2a770e 100644 --- a/doc/guides/prog_guide/rte_flow.rst +++ b/doc/guides/prog_guide/rte_flow.rst @@ -3758,6 +3758,36 @@ Information about the number of available resources can be retrieved via struct rte_flow_queue_info *queue_info, struct rte_flow_error *error); +Group Miss Actions +~~ + +In an application, many flow rules share common group attributes, meaning they can be grouped and +classified together. A user can explicitly specify a set of actions performed on a packet when it +did not match any flows rules in a group using the following API: + +.. code-block:: c + + int + rte_flow_group_set_miss_actions(uint16_t port_id, + uint32_t group_id, + const struct rte_flow_group_attr *attr, + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +For example, to configure a RTE_FLOW_TYPE_JUMP action as a miss action for ingress group 1: + +.. code-block:: c + + struct rte_flow_group_attr attr = {.ingress = 1}; + struct rte_flow_action act[] = { + /* Setting miss actions to jump to group 3 */ + [0] = {.type = RTE_FLOW_ACTION_TYPE_JUMP, + .conf = &(struct rte_flow_action_jump){.group = 3}}, + [1] = {.type = RTE_FLOW_ACTION_TYPE_END}, + }; + struct rte_flow_error err; + rte_flow_group_set_miss_actions(port, 1, &attr, act, &err); + Flow templates ~~ diff --git a/doc/guides/rel_notes/release_23_11.rst b/doc/guides/rel_notes/release_23_11.rst index 333e1d95a2..da0ddc2078 100644 --- a/doc/guides/rel_notes/release_23_11.rst +++ b/doc/guides/rel_notes/release_23_11.rst @@ -41,6 +41,11 @@ DPDK Release 23.11 New Features +* **Added flow group set miss actions.** + Introduced ``rte_flow_group_set_miss_actions()`` API to explicitly set a group's miss actions, + which are the actions to be performed on packets that didn't match any of the flow rules + in the group. + .. This section should contain new features added in this release. Sample format: diff --git a/lib/ethdev/rte_flow.c b/lib/ethdev/rte_flow.c index 271d854f78..a98d87265f 100644 --- a/lib/ethdev/rte_flow.c +++ b/lib/ethdev/rte_flow.c @@ -1973,6 +1973,28 @@ rte_flow_template_table_destroy(uint16_t port_id, NULL, rte_strerror(ENOTSUP)); } +int +rte_flow_group_set_miss_actions(uint16_t port_id, + uint32_t group_id, + const struct rte_flow_group_attr *attr, + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->group_set_miss_actions)) { + return flow_err(port_id, + ops->group_set_miss_actions(dev, group_id, attr, actions, error), + error); + } + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOTSUP)); +} + struct rte_flow * rte_flow_async_create(uint16_t port_id, uint32_t queue_id, diff --git a/lib/ethdev/rte_flow.h b/lib/ethdev/rte_flow.h index 2ebb76dbc0..82548a8b93 100644 --- a/lib/ethdev/rte_flow.h +++ b/lib/ethdev/rte_flow.h @@ -129,6 +129,12 @@ struct rte_flow_attr { uint32_t reserved:29; /**< Reserved, must be zero. */ }; +struct rte_flow_group_attr { + uint32_t i
[PATCH] net/virtio: fix descriptors buffer addresses on 32 bits builds
With Virtio-user, the Virtio descriptor buffer address is the virtual address of the mbuf's buffer. On 32 bits builds, it is expected to be 32 bits. With Virtio-PCI, the Virtio descriptor buffer address is the physical address of the mbuf's buffer. On 32 bits builds running on 64 bits kernel, it is expected to be up to 64 bits. This patch introduces a new mask field in virtqueue's struct to filter our the upper 4 bytes of the address only when necessary. An optimization is introduced for 64 bits builds to remove the masking, as the address is always 64 bits wide. Fixes: ba55c94a7ebc ("net/virtio: revert forcing IOVA as VA mode for virtio-user") Cc: sta...@dpdk.org Reported-by: Sampath Peechu Signed-off-by: Maxime Coquelin --- drivers/net/virtio/virtqueue.c | 2 ++ drivers/net/virtio/virtqueue.h | 18 ++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c index 1d836f2530..6f419665f1 100644 --- a/drivers/net/virtio/virtqueue.c +++ b/drivers/net/virtio/virtqueue.c @@ -469,9 +469,11 @@ virtqueue_alloc(struct virtio_hw *hw, uint16_t index, uint16_t num, int type, if (hw->use_va) { vq->vq_ring_mem = (uintptr_t)mz->addr; vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_addr); + vq->mbuf_addr_mask = UINTPTR_MAX; } else { vq->vq_ring_mem = mz->iova; vq->mbuf_addr_offset = offsetof(struct rte_mbuf, buf_iova); + vq->mbuf_addr_mask = UINT64_MAX; } PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, vq->vq_ring_mem); diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 9d4aba11a3..c1cb941c43 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -114,17 +114,26 @@ virtqueue_store_flags_packed(struct vring_packed_desc *dp, #define VIRTQUEUE_MAX_NAME_SZ 32 +#ifdef RTE_ARCH_32 +#define VIRTIO_MBUF_ADDR_MASK(vq) ((vq)->mbuf_addr_mask) +#else +#define VIRTIO_MBUF_ADDR_MASK(vq) UINT64_MAX +#endif + /** * Return the IOVA (or virtual address in case of virtio-user) of mbuf * data buffer. * * The address is firstly casted to the word size (sizeof(uintptr_t)) - * before casting it to uint64_t. This is to make it work with different - * combination of word size (64 bit and 32 bit) and virtio device - * (virtio-pci and virtio-user). + * before casting it to uint64_t. It is then masked with the expected + * address length (64 bits for virtio-pci, word size for virtio-user). + * + * This is to make it work with different combination of word size (64 + * bit and 32 bit) and virtio device (virtio-pci and virtio-user). */ #define VIRTIO_MBUF_ADDR(mb, vq) \ - ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset))) + ((*(uint64_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset)) & \ + VIRTIO_MBUF_ADDR_MASK(vq)) /** * Return the physical address (or virtual address in case of @@ -194,6 +203,7 @@ struct virtqueue { void *vq_ring_virt_mem; /**< linear address of vring*/ unsigned int vq_ring_size; uint16_t mbuf_addr_offset; + uint64_t mbuf_addr_mask; union { struct virtnet_rx rxq; -- 2.41.0
Re: Commit broke 32-bit testpmd app
On 9/20/23 09:35, Maxime Coquelin wrote: Hi, I tried to reproduce without success(see attached log). I fail to reproduce because buf_iova fits into 32 bits in my case: (gdb) p /x *tx_pkts[0] $4 = { cacheline0 = 0x77b19ec0, buf_addr = 0x77b19f40, buf_iova = 0x49519f40, rearm_data = 0x77b19ed0, However, looking at your report, something like this would work for you: diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 9d4aba11a3..38efbc517a 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -124,7 +124,7 @@ virtqueue_store_flags_packed(struct vring_packed_desc *dp, * (virtio-pci and virtio-user). */ #define VIRTIO_MBUF_ADDR(mb, vq) \ - ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset))) + (*(uint64_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset)) The problem is that it would likely break Virtio-user en 32bits mode, as this is how it was initially implemented, and got fixed few years ago, as David hinted to me: commit 260aae9ad9621e3e758f1443abb8fcbc25ece07c Author: Jianfeng Tan Date: Wed Apr 19 02:30:33 2017 + net/virtio-user: fix address on 32-bit system virtio-user cannot work on 32-bit system as higher 32-bit of the addr field (64-bit) in the desc is filled with non-zero value which should not happen for a 32-bit system. In case of virtio-user, we use buf_addr of mbuf to fill the virtqueue desc addr. This is a regression bug. For 32-bit system, the first 4 bytes of mbuf is buf_addr, with following 8 bytes for buf_phyaddr. With below wrong definition, both buf_addr and lower 4 bytes buf_phyaddr are obtained to fill the virtqueue desc. #define VIRTIO_MBUF_ADDR(mb, vq) \ (*(uint64_t *)((uintptr_t)(mb) + (vq)->offset)) Fixes: 25f80d108780 ("net/virtio: fix packet corruption") Cc: sta...@dpdk.org Signed-off-by: Jianfeng Tan Acked-by: Yuanhan Liu If my understanding is correct, on 32 bits, when mbuf->buf_addr is used (Virtio-user), we need to mask out the higher 4 bytes, while when using Virtio-pci we need the full 64 bits (as the physical addresses used as IOVA on the guest are 64 bits). I posted a fix aiming at making it work for both Virtio-user and Virtio- PCI 32 bits builds while not impacting 64 bits performance. Could you please have a try and report feedback by replying to the patch? Regards, Maxime Regards, Maxime On 9/13/23 15:24, Roger Melton (rmelton) wrote: +Chris Brezovec Hi Maxime, Chris from our team is attending the DPDK Summit in Dublin this week. If you have some time available, we'd appreciate it if he could meet with you to discuss the 32bit virtio issue we are seeing. Regards, Roger Melton On 9/6/23 2:57 PM, Dave Johnson (davejo) wrote: Hi Maxime, This email is regarding the following commit: https://github.com/DPDK/dpdk/commit/ba55c94a7ebc386d2288d6578ed57aad6cb92657 A query had been sent previously on this topic (see below) indicating this commit appears to have broken the 32-bit testpmd app and impacted one of our products that runs as a 32-bit DPDK application. We consequently backed the commit out of our product but would prefer to get a fix for it. In the earlier exchange, you had asked if we were using virtio-pci or virtio-user (we are using virtio-pci) and asked for logs which Sampath provided. It’s been a while, so let me now if you need me to send resend those logs or need any other information. FWIW, I reproduced this using testpmd and noticed that this part of the change seems to be the interesting part (in drivers/net/virtio/virtqueue.h): /** * Return the IOVA (or virtual address in case of virtio-user) of mbuf * data buffer. * * The address is firstly casted to the word size (sizeof(uintptr_t)) * before casting it to uint64_t. This is to make it work with different * combination of word size (64 bit and 32 bit) and virtio device * (virtio-pci and virtio-user). */ #define VIRTIO_MBUF_ADDR(mb, vq) \ ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->mbuf_addr_offset)) If I revert just this part of the changeset (by re-using the VIRTIO_MBUF_ADDR to return buf_iova which matches what it had used previously), then 32-bit testpmd is able to receive traffic again: #define VIRTIO_MBUF_ADDR(mb, vq) (mb->buf_iova) Looking at the address produced by each of these, I see the address is the same except that the casting results in the upper bits getting cleared: Address from patch (nonworking case) = 0x58e7c900 Address using buf_iova (working case) = 0x158e7c900 :: Address from patch (nonworking case) = 0x58e7bfc0 Address using buf_iova (working case) = 0x158e7bfc0 :: Address from patch (nonworking case) = 0x58e7b680 Address using buf_iova (working case) = 0x158e7b680 :: Regards, Dave *From: *Sampath Peechu (speechu) *Date: *Monday, January 30, 2023 at 3:29 PM *To: *Maxime Coquelin , che
Re: [PATCH v12 1/4] ethdev: add API for mbufs recycle mode
On 8/24/2023 8:36 AM, Feifei Wang wrote: > Add 'rte_eth_recycle_rx_queue_info_get' and 'rte_eth_recycle_mbufs' > APIs to recycle used mbufs from a transmit queue of an Ethernet device, > and move these mbufs into a mbuf ring for a receive queue of an Ethernet > device. This can bypass mempool 'put/get' operations hence saving CPU > cycles. > > For each recycling mbufs, the rte_eth_recycle_mbufs() function performs > the following operations: > - Copy used *rte_mbuf* buffer pointers from Tx mbuf ring into Rx mbuf > ring. > - Replenish the Rx descriptors with the recycling *rte_mbuf* mbufs freed > from the Tx mbuf ring. > > Suggested-by: Honnappa Nagarahalli > Suggested-by: Ruifeng Wang > Signed-off-by: Feifei Wang > Reviewed-by: Ruifeng Wang > Reviewed-by: Honnappa Nagarahalli > Acked-by: Morten Brørup > Acked-by: Konstantin Ananyev > Acked-by: Ferruh Yigit
Re: [PATCH v12 4/4] app/testpmd: add recycle mbufs engine
On 8/24/2023 8:36 AM, Feifei Wang wrote: > Add recycle mbufs engine for testpmd. This engine forward pkts with > I/O forward mode. But enable mbufs recycle feature to recycle used > txq mbufs for rxq mbuf ring, which can bypass mempool path and save > CPU cycles. > > Suggested-by: Jerin Jacob > Replacing with Jerin's primary email in .mailmap
Re: [PATCH v12 0/4] Recycle mbufs from Tx queue into Rx queue
On 8/24/2023 8:36 AM, Feifei Wang wrote: > Currently, the transmit side frees the buffers into the lcore cache and > the receive side allocates buffers from the lcore cache. The transmit > side typically frees 32 buffers resulting in 32*8=256B of stores to > lcore cache. The receive side allocates 32 buffers and stores them in > the receive side software ring, resulting in 32*8=256B of stores and > 256B of load from the lcore cache. > > This patch proposes a mechanism to avoid freeing to/allocating from > the lcore cache. i.e. the receive side will free the buffers from > transmit side directly into its software ring. This will avoid the 256B > of loads and stores introduced by the lcore cache. It also frees up the > cache lines used by the lcore cache. And we can call this mode as mbufs > recycle mode. > > In the latest version, mbufs recycle mode is packaged as a separate API. > This allows for the users to change rxq/txq pairing in real time in data > plane, > according to the analysis of the packet flow by the application, for example: > --- > Step 1: upper application analyse the flow direction > Step 2: recycle_rxq_info = rte_eth_recycle_rx_queue_info_get(rx_portid, > rx_queueid) > Step 3: rte_eth_recycle_mbufs(rx_portid, rx_queueid, tx_portid, tx_queueid, > recycle_rxq_info); > Step 4: rte_eth_rx_burst(rx_portid,rx_queueid); > Step 5: rte_eth_tx_burst(tx_portid,tx_queueid); > --- > Above can support user to change rxq/txq pairing at run-time and user does > not need to > know the direction of flow in advance. This can effectively expand mbufs > recycle mode's > use scenarios. > > Furthermore, mbufs recycle mode is no longer limited to the same pmd, > it can support moving mbufs between different vendor pmds, even can put the > mbufs > anywhere into your Rx mbuf ring as long as the address of the mbuf ring can > be provided. > In the latest version, we enable mbufs recycle mode in i40e pmd and ixgbe > pmd, and also try to > use i40e driver in Rx, ixgbe driver in Tx, and then achieve 7-9% performance > improvement > by mbufs recycle mode. > > Difference between mbuf recycle, ZC API used in mempool and general path > For general path: > Rx: 32 pkts memcpy from mempool cache to rx_sw_ring > Tx: 32 pkts memcpy from tx_sw_ring to temporary variable + 32 > pkts memcpy from temporary variable to mempool cache > For ZC API used in mempool: > Rx: 32 pkts memcpy from mempool cache to rx_sw_ring > Tx: 32 pkts memcpy from tx_sw_ring to zero-copy mempool cache > Refer link: > http://patches.dpdk.org/project/dpdk/patch/20230221055205.22984-2-kamalakshitha.alig...@arm.com/ > For mbufs recycle: > Rx/Tx: 32 pkts memcpy from tx_sw_ring to rx_sw_ring > Thus we can see in the one loop, compared to general path, mbufs recycle mode > reduces 32+32=64 pkts memcpy; > Compared to ZC API used in mempool, we can see mbufs recycle mode reduce 32 > pkts memcpy in each loop. > So, mbufs recycle has its own benefits. > > Testing status: > (1) dpdk l3fwd test with multiple drivers: > port 0: 82599 NIC port 1: XL710 NIC > - > Without fast free With fast free > Thunderx2: +7.53%+13.54% > - > > (2) dpdk l3fwd test with same driver: > port 0 && 1: XL710 NIC > - > Without fast free With fast free > Ampere altra: +12.61% +11.42% > n1sdp:+8.30% +3.85% > x86-sse: +8.43% +3.72% > - > > (3) Performance comparison with ZC_mempool used > port 0 && 1: XL710 NIC > with fast free > - > With recycle buffer With zc_mempool > Ampere altra: 11.42% 3.54% > - > > Furthermore, we add recycle_mbuf engine in testpmd. Due to XL710 NIC has > I/O bottleneck in testpmd in ampere altra, we can not see throughput change > compared with I/O fwd engine. However, using record cmd in testpmd: > '$set record-burst-stats on' > we can see the ratio of 'Rx/Tx burst size of 32' is reduced. This > indicate mbufs recycle can save CPU cycles. > > V2: > 1. Use data-plane API to enable direct-rearm (Konstantin, Honnappa) > 2. Add 'txq_data_get' API to get txq info for Rx (Konstantin) > 3. Use input parameter to enable direct rearm in l3fwd (Konstantin) > 4. Add condition detection for direct rearm API (Morten, Andrew Rybchenko) > > V3: > 1. Seperate Rx and Tx operation wi
Re: [PATCH v16 1/8] net/ntnic: initial commit which adds register defines
Hello, 19/09/2023 11:06, Christian Koue Muf: > On 9/18/23 10:34 AM, Ferruh Yigit wrote: > >On 9/15/2023 7:37 PM, Morten Brørup wrote: > >>> From: Ferruh Yigit [mailto:ferruh.yi...@amd.com] > >>> Sent: Friday, 15 September 2023 17.55 > >>> > >>> On 9/8/2023 5:07 PM, Mykola Kostenok wrote: > From: Christian Koue Muf > > The NTNIC PMD does not rely on a kernel space Napatech driver, thus > all defines related to the register layout is part of the PMD code, > which will be added in later commits. > > Signed-off-by: Christian Koue Muf > Reviewed-by: Mykola Kostenok > > >>> > >>> Hi Mykola, Christiam, > >>> > >>> This PMD scares me, overall it is a big drop: > >>> "249 files changed, 87128 insertions(+)" > >>> > >>> I think it is not possible to review all in one release cycle, and it > >>> is not even possible to say if all code used or not. > >>> > >>> I can see code is already developed, and it is difficult to > >>> restructure developed code, but restructure it into small pieces > >>> really helps for reviews. > >>> > >>> > >>> Driver supports good list of features, can it be possible to > >>> distribute upstream effort into multiple release. > >>> Starting from basic functionality and add features gradually. > >>> Target for this release can be providing datapath, and add more if we > >>> have time in the release, what do you think? I was expecting to get only Rx/Tx in this release, not really more. I agree it may be interesting to discuss some design and check whether we need more features in ethdev as part of the driver upstreaming process. > >>> Also there are large amount of base code (HAL / FPGA code), instead > >>> of adding them as a bulk, relevant ones with a feature can be added > >>> with the feature patch, this eliminates dead code in the base code > >>> layer, also helps user/review to understand the link between driver > >>> code and base code. Yes it would be interesting to see what is really needed for the basic initialization and what is linked to a specific offload or configuration feature. As a maintainer, I have to do some changes across all drivers sometimes, and I use git blame a lot to understand why something was added. > >> Jumping in here with an opinion about welcoming new NIC vendors to the > >> community: > >> > >> Generally, if a NIC vendor supplies a PMD for their NIC, I expect the > >> vendor to take responsibility for the quality of the PMD, including > >> providing a maintainer and support backporting of fixes to the PMD in LTS > >> releases. This should align with the vendor's business case for > >> upstreaming their driver. > >> > >> If the vendor provides one big patch series, which may be difficult to > >> understand/review, the fallout mainly hits the vendor's customers (and > >> thus the vendor's support organization), not the community as a whole. > >> > > > >Hi Morten, > > > >I was thinking same before making my above comment, what happens if vendors > >submit as one big patch and when a problem occurs we can ask owner to fix. > >Probably this makes vendor happy and makes my life (or any other > >maintainer's life) easier, it is always easier to say yes. > > > > > >But I come up with two main reasons to ask for a rework: > > > >1- Technically any vendor can deliver their software to their customers via > >a public git repository, they don't have to upstream to > >https://linkprotect.cudasvc.com/url?a=https%3a%2f%2fdpdk.org&c=E,1,NpoJejuuvPdOPfcFJYtsmkQF6PVrDjGsZ8x_gi5xDrTyZokK_nM11u4ZpzHgM10J9bOLlnhoR6fFAzWtCzOhRCzVruYj520zZORv6-MjJeSC5TrGnIFL&typo=1, > >but upstreaming has many benefits. > > > >One of those benefits is upstreaming provides a quality assurance for > >vendor's customers (that is why customer can be asking for this, as we are > >having in many cases), and this quality assurance comes from additional eyes > >reviewing the code and guiding vendors for the DPDK quality standards (some > >vendors already doing pretty good, but new ones sometimes requires > >hand-holding). > > > >If driver is one big patch series, it is practically not possible to review > >it, I can catch a few bits here or there, you may some others, but > >practically it will be merged without review, and we will fail on our > >quality assurance task. > > > >2- Make code more accessible to the rest of the world. > > > >When it is a big patch, code can be functional but lots of details, > >reasoning, relation between components gets lost, which makes it even harder > >for an external developer, like me, to understand it (I am a mere guinea pig > >here :). > > > >If a customer would like to add a feature themselves, or fix something, even > >after vendor no more working on that product anymore, customer needs to > >understand the code or some reasoning in the code. > >Or if someone wants to backport the driver to rust, or a DPDK developer > >wants to do a rework that requires updating all driv
[PATCH v2 00/13] crypto/dpaax_sec: misc enhancements
v2: compilation fixes This series include misc enhancements in dpaax_sec drivers. - improving the IPsec protocol offload features - enhancing PDCP protocol processing - code optimization and cleanup Apeksha Gupta (1): crypto/dpaa2_sec: enhance dpaa FD FL FMT offset set Gagandeep Singh (3): common/dpaax: update IPsec base descriptor length common/dpaax: change mode to wait in shared desc crypto/dpaax_sec: set the authdata in non-auth case Hemant Agrawal (8): crypto/dpaa2_sec: supporting null cipher and auth crypto/dpaa_sec: supporting null cipher and auth crypto/dpaa2_sec: support copy df and dscp in proto offload crypto/dpaa2_sec: increase the anti replay window size crypto/dpaa2_sec: enable esn support crypto/dpaa2_sec: add NAT-T support in IPsec offload crypto/dpaa2_sec: add support to set df and diffserv crypto/dpaax_sec: enable sha224-hmac support for IPsec Vanshika Shukla (1): crypto/dpaa2_sec: initialize the pdcp alg to null drivers/common/dpaax/caamflib/desc.h | 5 +- drivers/common/dpaax/caamflib/desc/ipsec.h| 9 +- drivers/common/dpaax/caamflib/desc/pdcp.h | 82 +++--- .../common/dpaax/caamflib/rta/protocol_cmd.h | 5 +- .../dpaax/caamflib/rta/sec_run_time_asm.h | 2 +- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 245 +++--- drivers/crypto/dpaa2_sec/dpaa2_sec_priv.h | 64 - drivers/crypto/dpaa2_sec/dpaa2_sec_raw_dp.c | 47 +--- drivers/crypto/dpaa_sec/dpaa_sec.c| 15 +- drivers/crypto/dpaa_sec/dpaa_sec.h| 42 ++- drivers/net/dpaa2/dpaa2_rxtx.c| 3 +- 11 files changed, 326 insertions(+), 193 deletions(-) -- 2.17.1
[PATCH v2 01/13] common/dpaax: update IPsec base descriptor length
From: Gagandeep Singh If all the keys are inlined, the descriptor would be 32 + 20 = 52 which is the size of the CURRENT shared descriptor created. So 32 * CAAM_CMD_SZ is the value that must be passed to rta_inline_query() for its "sd_base_len" parameter and drivers are using IPSEC_AUTH_VAR_AES_DEC_BASE_DESC_LEN value to pass as first argument to rta_inline_query(). So, Value of IPSEC_AUTH_VAR_AES_DEC_BASE_DESC_LEN must be updated to 32 CAAM_CMD_SZ. Signed-off-by: Franck LENORMAND Signed-off-by: Gagandeep Singh --- drivers/common/dpaax/caamflib/desc/ipsec.h | 4 ++-- drivers/common/dpaax/caamflib/rta/sec_run_time_asm.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/common/dpaax/caamflib/desc/ipsec.h b/drivers/common/dpaax/caamflib/desc/ipsec.h index 8ec6aac915..14e80baf77 100644 --- a/drivers/common/dpaax/caamflib/desc/ipsec.h +++ b/drivers/common/dpaax/caamflib/desc/ipsec.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) * * Copyright 2008-2016 Freescale Semiconductor Inc. - * Copyright 2016,2019-2020 NXP + * Copyright 2016,2019-2022 NXP * */ @@ -1380,7 +1380,7 @@ cnstr_shdsc_ipsec_new_decap(uint32_t *descbuf, bool ps, * layers to determine whether keys can be inlined or not. To be used as first * parameter of rta_inline_query(). */ -#define IPSEC_AUTH_VAR_BASE_DESC_LEN (27 * CAAM_CMD_SZ) +#define IPSEC_AUTH_VAR_BASE_DESC_LEN (31 * CAAM_CMD_SZ) /** * IPSEC_AUTH_VAR_AES_DEC_BASE_DESC_LEN - IPsec AES decap shared descriptor diff --git a/drivers/common/dpaax/caamflib/rta/sec_run_time_asm.h b/drivers/common/dpaax/caamflib/rta/sec_run_time_asm.h index f40eaadea3..5c2efeb2c5 100644 --- a/drivers/common/dpaax/caamflib/rta/sec_run_time_asm.h +++ b/drivers/common/dpaax/caamflib/rta/sec_run_time_asm.h @@ -413,7 +413,7 @@ rta_program_finalize(struct program *program) { /* Descriptor is usually not allowed to go beyond 64 words size */ if (program->current_pc > MAX_CAAM_DESCSIZE) - pr_warn("Descriptor Size exceeded max limit of 64 words\n"); + pr_debug("Descriptor Size exceeded max limit of 64 words"); /* Descriptor is erroneous */ if (program->first_error_pc) { -- 2.17.1
[PATCH v2 03/13] crypto/dpaa2_sec: initialize the pdcp alg to null
From: Vanshika Shukla This patch initializes the pdcp alg to null. Signed-off-by: Vanshika Shukla --- drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c index f9eba4a7bd..3ceb886ddb 100644 --- a/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c +++ b/drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved. - * Copyright 2016-2022 NXP + * Copyright 2016-2023 NXP * */ @@ -3512,6 +3512,7 @@ dpaa2_sec_set_pdcp_session(struct rte_cryptodev *dev, session->auth_key.data = NULL; session->auth_key.length = 0; session->auth_alg = 0; + authdata.algtype = PDCP_AUTH_TYPE_NULL; } authdata.key = (size_t)session->auth_key.data; authdata.keylen = session->auth_key.length; -- 2.17.1
[PATCH v2 02/13] common/dpaax: change mode to wait in shared desc
From: Gagandeep Singh In case of protocol based offload, it is better to wait before the share descriptor complete the execution. Simultaneous sharing may cause issues. Signed-off-by: Gagandeep Singh --- drivers/common/dpaax/caamflib/desc/pdcp.h | 82 +++ 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/drivers/common/dpaax/caamflib/desc/pdcp.h b/drivers/common/dpaax/caamflib/desc/pdcp.h index 289ee2a7d5..7d16c66d79 100644 --- a/drivers/common/dpaax/caamflib/desc/pdcp.h +++ b/drivers/common/dpaax/caamflib/desc/pdcp.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause or GPL-2.0+ * Copyright 2008-2013 Freescale Semiconductor, Inc. - * Copyright 2019-2022 NXP + * Copyright 2019-2023 NXP */ #ifndef __DESC_PDCP_H__ @@ -2338,27 +2338,27 @@ cnstr_shdsc_pdcp_c_plane_encap(uint32_t *descbuf, desc_share[PDCP_CIPHER_TYPE_INVALID][PDCP_AUTH_TYPE_INVALID] = { { /* NULL */ SHR_WAIT, /* NULL */ - SHR_ALWAYS, /* SNOW f9 */ - SHR_ALWAYS, /* AES CMAC */ - SHR_ALWAYS /* ZUC-I */ + SHR_WAIT, /* SNOW f9 */ + SHR_WAIT, /* AES CMAC */ + SHR_WAIT/* ZUC-I */ }, { /* SNOW f8 */ - SHR_ALWAYS, /* NULL */ - SHR_ALWAYS, /* SNOW f9 */ + SHR_WAIT, /* NULL */ + SHR_WAIT, /* SNOW f9 */ SHR_WAIT, /* AES CMAC */ SHR_WAIT/* ZUC-I */ }, { /* AES CTR */ - SHR_ALWAYS, /* NULL */ - SHR_ALWAYS, /* SNOW f9 */ - SHR_ALWAYS, /* AES CMAC */ + SHR_WAIT, /* NULL */ + SHR_WAIT, /* SNOW f9 */ + SHR_WAIT, /* AES CMAC */ SHR_WAIT/* ZUC-I */ }, { /* ZUC-E */ - SHR_ALWAYS, /* NULL */ + SHR_WAIT, /* NULL */ SHR_WAIT, /* SNOW f9 */ SHR_WAIT, /* AES CMAC */ - SHR_ALWAYS /* ZUC-I */ + SHR_WAIT/* ZUC-I */ }, }; enum pdb_type_e pdb_type; @@ -2478,27 +2478,27 @@ cnstr_shdsc_pdcp_c_plane_decap(uint32_t *descbuf, desc_share[PDCP_CIPHER_TYPE_INVALID][PDCP_AUTH_TYPE_INVALID] = { { /* NULL */ SHR_WAIT, /* NULL */ - SHR_ALWAYS, /* SNOW f9 */ - SHR_ALWAYS, /* AES CMAC */ - SHR_ALWAYS /* ZUC-I */ + SHR_WAIT, /* SNOW f9 */ + SHR_WAIT, /* AES CMAC */ + SHR_WAIT/* ZUC-I */ }, { /* SNOW f8 */ - SHR_ALWAYS, /* NULL */ - SHR_ALWAYS, /* SNOW f9 */ + SHR_WAIT, /* NULL */ + SHR_WAIT, /* SNOW f9 */ SHR_WAIT, /* AES CMAC */ SHR_WAIT/* ZUC-I */ }, { /* AES CTR */ - SHR_ALWAYS, /* NULL */ - SHR_ALWAYS, /* SNOW f9 */ - SHR_ALWAYS, /* AES CMAC */ + SHR_WAIT, /* NULL */ + SHR_WAIT, /* SNOW f9 */ + SHR_WAIT, /* AES CMAC */ SHR_WAIT/* ZUC-I */ }, { /* ZUC-E */ - SHR_ALWAYS, /* NULL */ + SHR_WAIT, /* NULL */ SHR_WAIT, /* SNOW f9 */ SHR_WAIT, /* AES CMAC */ - SHR_ALWAYS /* ZUC-I */ + SHR_WAIT/* ZUC-I */ }, }; enum pdb_type_e pdb_type; @@ -2643,24 +2643,24 @@ cnstr_shdsc_pdcp_u_plane_encap(uint32_t *descbuf, desc_share[PDCP_CIPHER_TYPE_INVALID][PDCP_AUTH_TYPE_INVALID] = { { /* NULL */ SHR_WAIT, /* NULL */ - SHR_ALWAYS, /* SNOW f9 */ - SHR_ALWAYS, /* AES CMAC */ - SHR_ALWAYS /* ZUC-I */ + SHR_WAIT, /* SNOW f9 */ + SHR_WAIT, /* AES CMAC */ + SHR_WAIT/* ZUC-I */ }, { /* SNOW f8 */ -