[dpdk-dev] [PATCH 0/4] Chained Mbufs support in SW PMDs
This patch set adds support of scattered-gather list for SW PMDs. As of now, application needs to reserve continuous block of memory for mbufs which is not always the case. Hence needed to support chaining of mbufs which are smaller in size but can be used if chained. Above work involves: a) Create mbuf functions to coalesce mbuf chains into a single mbuf. b) For each software poll mode driver code to detect chained mbufs support and coalesce these before preforming crypto. c) Add relevant unit tests to test the functionality. Tomasz Kulasek (4): rte_mbuf: add rte_pktmbuf_coalesce test: add rte_pktmbuf_coalesce unit tests crypto: add sgl support for sw PMDs test: add sgl unit tests for crypto devices app/test/test_cryptodev.c | 456 app/test/test_cryptodev.h | 111 +++ app/test/test_cryptodev_aes_test_vectors.h | 32 +- app/test/test_cryptodev_blockcipher.c | 170 +++ app/test/test_cryptodev_blockcipher.h |1 + app/test/test_mbuf.c | 134 drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 + drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 +- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 11 + drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 + drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 +- lib/librte_cryptodev/rte_cryptodev.h |3 +- lib/librte_mbuf/rte_mbuf.h | 34 +++ 16 files changed, 966 insertions(+), 67 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH 2/4] test: add rte_pktmbuf_coalesce unit tests
This patch tests rte_pktmbuf_coalesce functionality: 1) Creates banch of segmented mbufs with different size and number of segments. 2) Generates pkt_len bytes of random data. 3) Fills noncontigouos mbuf with randomly generated data. 4) Uses rte_pktmbuf_coalesce to coalesce segmented buffer into one contiguous. 5) Verifies data in destination buffer. Signed-off-by: Tomasz Kulasek --- app/test/test_mbuf.c | 134 ++ 1 file changed, 134 insertions(+) diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c index c0823ea..e3c7657 100644 --- a/app/test/test_mbuf.c +++ b/app/test/test_mbuf.c @@ -930,6 +930,135 @@ return 0; } +static int +test_mbuf_coalesce(int pkt_len, int nb_segs) { + + struct rte_mbuf *m = NULL, *mbuf_src = NULL, *mbuf_dst = NULL; + uint8_t data[pkt_len], *src, *dst; + int data_len = 0; + int i, size; + int t_len; + + if (pkt_len < 1) { + printf("Packet size must be 1 or more (is %d)\n", pkt_len); + return -1; + } + + if (nb_segs < 1) { + printf("Number of segments must be 1 or more (is %d)\n", + nb_segs); + return -1; + } + + /* Setup buffer */ + for (i = 0; i < pkt_len; i++) + data[i] = (uint8_t) rte_rand(); + + t_len = pkt_len >= nb_segs ? pkt_len / nb_segs : 1; + src = data; + size = pkt_len; + + /* Create chained mbuf_src and fill it generated data */ + for (i = 0; size > 0; i++) { + + m = rte_pktmbuf_alloc(pktmbuf_pool); + if (i == 0) + mbuf_src = m; + + if (!m) { + printf("Cannot create segment for source mbuf"); + goto fail; + } + + /* Make sure if tailroom is zeroed */ + memset(rte_pktmbuf_mtod(m, uint8_t *), 0, + rte_pktmbuf_tailroom(m)); + + data_len = size > t_len ? t_len : size; + dst = (uint8_t *)rte_pktmbuf_append(m, data_len); + if (!dst) { + printf("Cannot append %d bytes to the mbuf\n", + data_len); + goto fail; + } + + rte_memcpy(dst, src, data_len); + src += data_len; + + if (mbuf_src != m) + rte_pktmbuf_chain(mbuf_src, m); + + size -= data_len; + + } + + /* Create destination buffer to store coalesced data */ + mbuf_dst = rte_pktmbuf_alloc(pktmbuf_pool); + if (!mbuf_dst) { + printf("Cannot create destination buffer\n"); + goto fail; + } + + dst = (uint8_t *)rte_pktmbuf_append(m, rte_pktmbuf_pkt_len(mbuf_dst)); + + if (rte_pktmbuf_coalesce(mbuf_dst, mbuf_src)) { + printf("Mbuf coalesce failed\n"); + goto fail; + } + + if (!rte_pktmbuf_is_contiguous(mbuf_dst)) { + printf("Destination buffer should be contiguous\n"); + goto fail; + } + + dst = rte_pktmbuf_mtod(mbuf_dst, uint8_t *); + + if (memcmp(dst, data, rte_pktmbuf_pkt_len(mbuf_src))) { + printf("Incorrect data in coalesced mbuf\n"); + goto fail; + } + + if (mbuf_src) + rte_pktmbuf_free(mbuf_src); + if (mbuf_dst) + rte_pktmbuf_free(mbuf_dst); + return 0; + +fail: + if (mbuf_src) + rte_pktmbuf_free(mbuf_src); + if (mbuf_dst) + rte_pktmbuf_free(mbuf_dst); + return -1; +} + +static int +test_mbuf_coalesce_check(void) +{ + struct test_mbuf_array { + int size; + int nb_segs; + } mbuf_array[5] = { + { 128, 2 }, + { 64, 64 }, + { 512, 10 }, + { 250, 11 }, + { 123, 8 }, + }; + unsigned i; + + printf("Test mbuf coalesce API\n"); + + for (i = 0; i < RTE_DIM(mbuf_array); i++) + if (test_mbuf_coalesce(mbuf_array[i].size, + mbuf_array[i].nb_segs)) { + printf("Test failed for %d, %d\n", mbuf_array[i].size, + mbuf_array[i].nb_segs); + return -1; + } + + return 0; +} static int test_mbuf(void) @@ -1023,6 +1152,11 @@ printf("test_failing_mbuf_sanity_check() failed\n"); return -1; } + + if (test_mbuf_coalesce_check() < 0) { + printf("test_mbuf_coalesce_check() failed\n"); + return -1; + } return 0; } -- 1.7.9.5
[dpdk-dev] [PATCH 1/4] rte_mbuf: add rte_pktmbuf_coalesce
This patch adds function rte_pktmbuf_coalesce to let crypto PMD coalesce chained mbuf before crypto operation and extend their capabilities to support segmented mbufs when device cannot handle them natively. Signed-off-by: Tomasz Kulasek --- lib/librte_mbuf/rte_mbuf.h | 34 ++ 1 file changed, 34 insertions(+) diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index ead7c6e..f048681 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -1647,6 +1647,40 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail } /** + * Coalesce data from mbuf to the continuous buffer. + * + * @param mbuf_dst + * Contiguous destination mbuf + * @param mbuf_src + * Uncontiguous source mbuf + * + * @return + * - 0, on success + * - -EINVAL, on error + */ + +#include + +static inline int +rte_pktmbuf_coalesce(struct rte_mbuf *mbuf_dst, struct rte_mbuf *mbuf_src) +{ + char *dst; + + if (!rte_pktmbuf_is_contiguous(mbuf_dst) || + rte_pktmbuf_data_len(mbuf_dst) >= + rte_pktmbuf_pkt_len(mbuf_src)) + return -EINVAL; + + dst = rte_pktmbuf_mtod(mbuf_dst, char *); + + if (!__rte_pktmbuf_read(mbuf_src, 0, rte_pktmbuf_pkt_len(mbuf_src), + dst)) + return -EINVAL; + + return 0; +} + +/** * Dump an mbuf structure to a file. * * Dump all fields for the given packet mbuf and all its associated -- 1.7.9.5
[dpdk-dev] [PATCH 3/4] crypto: add sgl support for sw PMDs
This patch introduces RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER feature flag informing that selected crypto device supports segmented mbufs natively and doesn't need to be coalesced before crypto operation. While using segmented buffers in crypto devices may have unpredictable results, for PMDs which doesn't support it natively, additional check is made for debug compilation. Signed-off-by: Tomasz Kulasek --- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 ++ drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 --- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 ++- drivers/crypto/openssl/rte_openssl_pmd.c | 11 +++ drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 +++ drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 ++-- lib/librte_cryptodev/rte_cryptodev.h |3 ++- 9 files changed, 88 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c index dba5e15..1a6120c 100644 --- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c @@ -375,6 +375,20 @@ break; } +#ifdef RTE_LIBRTE_PMD_AESNI_GCM_DEBUG + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; + GCM_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + qp->qp_stats.enqueue_err_count++; + break; + } +#endif + retval = process_gcm_crypto_op(qp, ops[i]->sym, sess); if (retval < 0) { ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c index f07cd07..b5e115e 100644 --- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c @@ -529,15 +529,28 @@ int i, processed_jobs = 0; for (i = 0; i < nb_ops; i++) { -#ifdef RTE_LIBRTE_AESNI_MB_DEBUG - if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { +#ifdef RTE_LIBRTE_PMD_AESNI_MB_DEBUG + if (unlikely(ops[i]->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { MB_LOG_ERR("PMD only supports symmetric crypto " "operation requests, op (%p) is not a " - "symmetric operation.", op); + "symmetric operation.", ops[i]); + qp->stats.enqueue_err_count++; + goto flush_jobs; + } + + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + MB_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; qp->stats.enqueue_err_count++; goto flush_jobs; } #endif + sess = get_session(qp, ops[i]); if (unlikely(sess == NULL)) { qp->stats.enqueue_err_count++; diff --git a/drivers/crypto/kasumi/rte_kasumi_pmd.c b/drivers/crypto/kasumi/rte_kasumi_pmd.c index b119da2..4bdd7bb 100644 --- a/drivers/crypto/kasumi/rte_kasumi_pmd.c +++ b/drivers/crypto/kasumi/rte_kasumi_pmd.c @@ -455,6 +455,19 @@ for (i = 0; i < nb_ops; i++) { curr_c_op = ops[i]; +#ifdef RTE_LIBRTE_PMD_KASUMI_DEBUG + if (!rte_pktmbuf_is_contiguous(curr_c_op->sym->m_src) || + (curr_c_op->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + curr_c_op->sym->m_dst))) { + KASUMI_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) pr
[dpdk-dev] [PATCH 4/4] test: add sgl unit tests for crypto devices
This patch provides unit tests for set of cipher/hash combinations covering currently implemented crypto PMD's and allowing to verify scatter gather support. Signed-off-by: Daniel Mrzyglod Signed-off-by: Tomasz Kulasek --- app/test/test_cryptodev.c | 456 app/test/test_cryptodev.h | 111 +++ app/test/test_cryptodev_aes_test_vectors.h | 32 +- app/test/test_cryptodev_blockcipher.c | 170 +++ app/test/test_cryptodev_blockcipher.h |1 + 5 files changed, 710 insertions(+), 60 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 872f8b4..613dae9 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -1688,6 +1688,10 @@ struct crypto_unittest_params { TEST_ASSERT_NOT_NULL(sym_op->cipher.iv.data, "no room to prepend iv"); + /* For OOP operation both buffers must have the same size */ + if (ut_params->obuf) + rte_pktmbuf_prepend(ut_params->obuf, iv_pad_len); + memset(sym_op->cipher.iv.data, 0, iv_pad_len); sym_op->cipher.iv.phys_addr = rte_pktmbuf_mtophys(ut_params->ibuf); sym_op->cipher.iv.length = iv_pad_len; @@ -2509,6 +2513,84 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; + + int retval; + + unsigned plaintext_pad_len; + unsigned plaintext_len; + + uint8_t buffer[1]; + const uint8_t *ciphertext; + + struct rte_cryptodev_info dev_info; + + rte_cryptodev_info_get(ts_params->valid_devs[0], &dev_info); + if (!(dev_info.feature_flags & RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER)) { + printf("Device doesn't support scatter-gather. " + "Test Skipped.\n"); + return 0; + } + + /* Create KASUMI session */ + retval = create_wireless_algo_cipher_session(ts_params->valid_devs[0], + RTE_CRYPTO_CIPHER_OP_ENCRYPT, + RTE_CRYPTO_CIPHER_KASUMI_F8, + tdata->key.data, tdata->key.len); + if (retval < 0) + return retval; + + plaintext_len = ceil_byte_length(tdata->plaintext.len); + + + /* Append data which is padded to a multiple */ + /* of the algorithms block size */ + plaintext_pad_len = RTE_ALIGN_CEIL(plaintext_len, 8); + + ut_params->ibuf = create_segmented_mbuf(ts_params->mbuf_pool, + plaintext_pad_len, 10); + + pktmbuf_write(ut_params->ibuf, 0, plaintext_len, tdata->plaintext.data); + + /* Create KASUMI operation */ + retval = create_wireless_algo_cipher_operation(tdata->iv.data, + tdata->iv.len, + tdata->plaintext.len, + tdata->validCipherOffsetLenInBits.len, + RTE_CRYPTO_CIPHER_KASUMI_F8); + if (retval < 0) + return retval; + + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); + TEST_ASSERT_NOT_NULL(ut_params->op, "failed to retrieve obuf"); + + ut_params->obuf = ut_params->op->sym->m_dst; + + if (ut_params->obuf) + ciphertext = rte_pktmbuf_read(ut_params->obuf, tdata->iv.len, + plaintext_len, buffer); + else + ciphertext = rte_pktmbuf_read(ut_params->ibuf, tdata->iv.len, + plaintext_len, buffer); + + /* Validate obuf */ + TEST_HEXDUMP(stdout, "ciphertext:", ciphertext, plaintext_len); + + /* Validate obuf */ + TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( + ciphertext, + tdata->ciphertext.data, + tdata->validCipherLenInBits.len, + "KASUMI Ciphertext data not as expected"); + return 0; +} + + +static int test_kasumi_encryption_oop(const struct kasumi_test_data *tdata) { struct crypto_testsuite_params *ts_params = &testsuite_params; @@ -2577,6 +2659,81 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_oop_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; + + int retval; + unsigned plaintext_pad_len; +
[dpdk-dev] [PATCH v4] rte_mbuf: add rte_pktmbuf_linearize
This patch adds function rte_pktmbuf_linearize to let crypto PMD coalesce chained mbuf before crypto operation and extend their capabilities to support segmented mbufs when device cannot handle them natively. Included unit tests for rte_pktmbuf_linearize functionality: 1) Creates banch of segmented mbufs with different size and number of segments. 2) Generates pkt_len bytes of random data. 3) Fills noncontigouos mbuf with randomly generated data. 4) Uses rte_pktmbuf_linearize to coalesce segmented buffer into one contiguous. 5) Verifies data in linearized buffer. Dependencies: This patch is rebased to the dpdk-next-crypto and should be applied before "Chained Mbufs support in SW PMDs" patchset. changes in v4: - separated from "Chained Mbufs support in SW PMDs" patch set for better reviewing, - merged "rte_pktmbuf_linearize" implementation with unit tests, changes in v3: - rebased to dpdk-next-crypto changes in v2: - rte_pktmbuf_coalesce replaced with rte_pktmbuf_linearize Cc: Pablo de Lara Cc: Olivier Matz Signed-off-by: Tomasz Kulasek --- app/test/test_mbuf.c | 123 lib/librte_mbuf/rte_mbuf.h | 56 2 files changed, 179 insertions(+) diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c index c0823ea..39577e7 100644 --- a/app/test/test_mbuf.c +++ b/app/test/test_mbuf.c @@ -930,6 +930,124 @@ return 0; } +static int +test_mbuf_linearize(int pkt_len, int nb_segs) { + + struct rte_mbuf *m = NULL, *mbuf_src = NULL; + uint8_t data[pkt_len], *src, *dst; + int data_len = 0; + int i, size; + int t_len; + + if (pkt_len < 1) { + printf("Packet size must be 1 or more (is %d)\n", pkt_len); + return -1; + } + + if (nb_segs < 1) { + printf("Number of segments must be 1 or more (is %d)\n", + nb_segs); + return -1; + } + + /* Setup buffer */ + for (i = 0; i < pkt_len; i++) + data[i] = (uint8_t) rte_rand(); + + t_len = pkt_len >= nb_segs ? pkt_len / nb_segs : 1; + src = data; + size = pkt_len; + + /* Create chained mbuf_src and fill it generated data */ + for (i = 0; size > 0; i++) { + + m = rte_pktmbuf_alloc(pktmbuf_pool); + if (i == 0) + mbuf_src = m; + + if (!m) { + printf("Cannot create segment for source mbuf"); + goto fail; + } + + /* Make sure if tailroom is zeroed */ + memset(rte_pktmbuf_mtod(m, uint8_t *), 0, + rte_pktmbuf_tailroom(m)); + + data_len = size > t_len ? t_len : size; + dst = (uint8_t *)rte_pktmbuf_append(m, data_len); + if (!dst) { + printf("Cannot append %d bytes to the mbuf\n", + data_len); + goto fail; + } + + rte_memcpy(dst, src, data_len); + src += data_len; + + if (mbuf_src != m) + rte_pktmbuf_chain(mbuf_src, m); + + size -= data_len; + + } + + /* Create destination buffer to store coalesced data */ + if (rte_pktmbuf_linearize(mbuf_src)) { + printf("Mbuf linearization failed\n"); + goto fail; + } + + if (!rte_pktmbuf_is_contiguous(mbuf_src)) { + printf("Source buffer should be contiguous after " + "linearization\n"); + goto fail; + } + + src = rte_pktmbuf_mtod(mbuf_src, uint8_t *); + + if (memcmp(src, data, rte_pktmbuf_pkt_len(mbuf_src))) { + printf("Incorrect data in coalesced mbuf\n"); + goto fail; + } + + if (mbuf_src) + rte_pktmbuf_free(mbuf_src); + return 0; + +fail: + if (mbuf_src) + rte_pktmbuf_free(mbuf_src); + return -1; +} + +static int +test_mbuf_linearize_check(void) +{ + struct test_mbuf_array { + int size; + int nb_segs; + } mbuf_array[5] = { + { 128, 1 }, + { 64, 64 }, + { 512, 10 }, + { 250, 11 }, + { 123, 8 }, + }; + unsigned int i; + + printf("Test mbuf linearize API\n"); + + for (i = 0; i < RTE_DIM(mbuf_array); i++) + if (test_mbuf_linearize(mbuf_array[i].size, + mbuf_array[i].nb_segs)) { + printf("Test failed for %d, %d\n", mbuf_array[i].size, +
[dpdk-dev] [PATCH v4 1/3] crypto: add sgl support in sw PMDs
This patch introduces RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER feature flag informing that selected crypto device supports segmented mbufs natively and doesn't need to be coalesced before crypto operation. While using segmented buffers in crypto devices may have unpredictable results, for PMDs which doesn't support it natively, additional check is made for debug compilation. Signed-off-by: Tomasz Kulasek --- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 ++ drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 --- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 ++- drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 +++ drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 ++-- lib/librte_cryptodev/rte_cryptodev.h |2 ++ 8 files changed, 77 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c index dba5e15..1a6120c 100644 --- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c @@ -375,6 +375,20 @@ break; } +#ifdef RTE_LIBRTE_PMD_AESNI_GCM_DEBUG + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; + GCM_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + qp->qp_stats.enqueue_err_count++; + break; + } +#endif + retval = process_gcm_crypto_op(qp, ops[i]->sym, sess); if (retval < 0) { ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c index 6d27d75..25f681b 100644 --- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c @@ -571,15 +571,28 @@ int i, processed_jobs = 0; for (i = 0; i < nb_ops; i++) { -#ifdef RTE_LIBRTE_AESNI_MB_DEBUG - if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { +#ifdef RTE_LIBRTE_PMD_AESNI_MB_DEBUG + if (unlikely(ops[i]->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { MB_LOG_ERR("PMD only supports symmetric crypto " "operation requests, op (%p) is not a " - "symmetric operation.", op); + "symmetric operation.", ops[i]); + qp->stats.enqueue_err_count++; + goto flush_jobs; + } + + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + MB_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; qp->stats.enqueue_err_count++; goto flush_jobs; } #endif + sess = get_session(qp, ops[i]); if (unlikely(sess == NULL)) { qp->stats.enqueue_err_count++; diff --git a/drivers/crypto/kasumi/rte_kasumi_pmd.c b/drivers/crypto/kasumi/rte_kasumi_pmd.c index b119da2..4bdd7bb 100644 --- a/drivers/crypto/kasumi/rte_kasumi_pmd.c +++ b/drivers/crypto/kasumi/rte_kasumi_pmd.c @@ -455,6 +455,19 @@ for (i = 0; i < nb_ops; i++) { curr_c_op = ops[i]; +#ifdef RTE_LIBRTE_PMD_KASUMI_DEBUG + if (!rte_pktmbuf_is_contiguous(curr_c_op->sym->m_src) || + (curr_c_op->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + curr_c_op->sym->m_dst))) { + KASUMI_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "s
[dpdk-dev] [PATCH v4 0/3] Chained Mbufs support in SW PMDs
This patch set adds support of scattered-gather list for SW PMDs. As of now, application needs to reserve continuous block of memory for mbufs which is not always the case. Hence needed to support chaining of mbufs which are smaller in size but can be used if chained. Above work involves: a) Create mbuf functions to coalesce mbuf chains into a single mbuf. b) For each software poll mode driver code to detect chained mbufs support and coalesce these before preforming crypto. c) Add relevant unit tests to test the functionality. Known limitations for openssl PMD: -- While libcrypto library expects continuous destination buffer for output of cipher operations, implementation of openssl PMD is limited the same way, and requires contiguous destination mbuf. Dependencies: - While "rte_mbuf: add rte_pktmbuf_linearize" were separated from this patch set, patch "rte_mbuf: add rte_pktmbuf_linearize" should be applied before this one. This patch set shares some unit tests with SGL implementation for QAT (already merged in dpdk-next-crypto) and should be applied on top of it, and after applying fix "app/test: fix aad padding size in SGL operation" by Arek Kusztal. changes in v4: - separated "rte_pktmbuf_linearize" implementation from this patch set and sent as new patch for better reviewing, changes in v3: - rebased to dpdk-next-crypto - reused tests for AES GCM SGL support in opensll from "app/test: add SGL tests to cryptodev QAT suite" changes in v2: - add support for sgl in openssl PMD - rte_pktmbuf_coalesce replaced with rte_pktmbuf_linearize - extended test vector data for aes gcm from 60 to 2048 bytes Tomasz Kulasek (3): crypto: add sgl support in sw PMDs crypto: add sgl support in openssl PMD test: add sgl unit tests for crypto devices app/test/test_cryptodev.c | 386 ++- app/test/test_cryptodev.h | 139 +++ app/test/test_cryptodev_aes_test_vectors.h | 52 +++ app/test/test_cryptodev_blockcipher.c | 180 + app/test/test_cryptodev_blockcipher.h |1 + app/test/test_cryptodev_gcm_test_vectors.h | 553 doc/guides/cryptodevs/openssl.rst |3 +- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 + drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 +- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 329 + drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 + drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 +- lib/librte_cryptodev/rte_cryptodev.h |2 + 16 files changed, 1577 insertions(+), 149 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v4 2/3] crypto: add sgl support in openssl PMD
Signed-off-by: Tomasz Kulasek --- doc/guides/cryptodevs/openssl.rst|3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 329 +++--- 2 files changed, 259 insertions(+), 73 deletions(-) diff --git a/doc/guides/cryptodevs/openssl.rst b/doc/guides/cryptodevs/openssl.rst index d2b5906..d0b1eeb 100644 --- a/doc/guides/cryptodevs/openssl.rst +++ b/doc/guides/cryptodevs/openssl.rst @@ -112,6 +112,7 @@ Limitations --- * Maximum number of sessions is 2048. -* Chained mbufs are not supported. +* Chained mbufs are supported only for source mbuf (destination must be + contiguous). * Hash only is not supported for GCM and GMAC. * Cipher only is not supported for GCM and GMAC. diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c index 832ea1d..e466c79 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd.c +++ b/drivers/crypto/openssl/rte_openssl_pmd.c @@ -484,24 +484,112 @@ * Process Operations *-- */ +static inline int +process_openssl_encryption_update(struct rte_mbuf *mbuf_src, int offset, + uint8_t **dst, int srclen, EVP_CIPHER_CTX *ctx) +{ + struct rte_mbuf *m; + int dstlen; + int l, n = srclen; + uint8_t *src; + + for (m = mbuf_src; m != NULL && offset > rte_pktmbuf_data_len(m); + m = m->next) + offset -= rte_pktmbuf_data_len(m); + + if (m == 0) + return -1; + + src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset); + + l = rte_pktmbuf_data_len(m) - offset; + if (srclen <= l) { + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, srclen) <= 0) + return -1; + *dst += l; + return 0; + } + + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + + *dst += dstlen; + n -= l; + + for (m = m->next; (m != NULL) && (n > 0); m = m->next) { + src = rte_pktmbuf_mtod(m, uint8_t *); + l = rte_pktmbuf_data_len(m) < n ? rte_pktmbuf_data_len(m) : n; + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + *dst += dstlen; + n -= l; + } + + return 0; +} + +static inline int +process_openssl_decryption_update(struct rte_mbuf *mbuf_src, int offset, + uint8_t **dst, int srclen, EVP_CIPHER_CTX *ctx) +{ + struct rte_mbuf *m; + int dstlen; + int l, n = srclen; + uint8_t *src; + + for (m = mbuf_src; m != NULL && offset > rte_pktmbuf_data_len(m); + m = m->next) + offset -= rte_pktmbuf_data_len(m); + + if (m == 0) + return -1; + + src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset); + + l = rte_pktmbuf_data_len(m) - offset; + if (srclen <= l) { + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, srclen) <= 0) + return -1; + *dst += l; + return 0; + } + + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + + *dst += dstlen; + n -= l; + + for (m = m->next; (m != NULL) && (n > 0); m = m->next) { + src = rte_pktmbuf_mtod(m, uint8_t *); + l = rte_pktmbuf_data_len(m) < n ? rte_pktmbuf_data_len(m) : n; + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + *dst += dstlen; + n -= l; + } + + return 0; +} /** Process standard openssl cipher encryption */ static int -process_openssl_cipher_encrypt(uint8_t *src, uint8_t *dst, - uint8_t *iv, uint8_t *key, int srclen, +process_openssl_cipher_encrypt(struct rte_mbuf *mbuf_src, uint8_t *dst, + int offset, uint8_t *iv, uint8_t *key, int srclen, EVP_CIPHER_CTX *ctx, const EVP_CIPHER *algo) { - int dstlen, totlen; + int totlen; if (EVP_EncryptInit_ex(ctx, algo, NULL, key, iv) <= 0) goto process_cipher_encrypt_err; EVP_CIPHER_CTX_set_padding(ctx, 0); - if (EVP_EncryptUpdate(ctx, dst, &dstlen, src, srclen) <= 0) + if (process_openssl_encryption_update(mbuf_src, offset, &dst, + srclen, ctx)) goto process_cipher_encrypt_err; - if (EVP_EncryptFinal_ex(ctx, dst + dstlen, &totlen) <= 0) + if (EVP_EncryptFinal_ex(ctx, dst, &totlen) <= 0) goto process_cipher_encrypt_err; return 0; @@ -513,11 +601,11 @@ /** Process standard openssl cipher decryption */ static int -process_openssl_cipher_decrypt(uint8_t *s
[dpdk-dev] [PATCH v4 3/3] test: add sgl unit tests for crypto devices
This patch provides unit tests for set of cipher/hash combinations covering currently implemented crypto PMD's and allowing to verify scatter gather support. Signed-off-by: Daniel Mrzyglod Signed-off-by: Tomasz Kulasek --- app/test/test_cryptodev.c | 386 ++- app/test/test_cryptodev.h | 139 +++ app/test/test_cryptodev_aes_test_vectors.h | 52 +++ app/test/test_cryptodev_blockcipher.c | 180 + app/test/test_cryptodev_blockcipher.h |1 + app/test/test_cryptodev_gcm_test_vectors.h | 553 6 files changed, 1241 insertions(+), 70 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 3eaf1b7..4c9a54f 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -1736,6 +1736,10 @@ struct crypto_unittest_params { TEST_ASSERT_NOT_NULL(sym_op->cipher.iv.data, "no room to prepend iv"); + /* For OOP operation both buffers must have the same size */ + if (ut_params->obuf) + rte_pktmbuf_prepend(ut_params->obuf, iv_pad_len); + memset(sym_op->cipher.iv.data, 0, iv_pad_len); sym_op->cipher.iv.phys_addr = rte_pktmbuf_mtophys(ut_params->ibuf); sym_op->cipher.iv.length = iv_pad_len; @@ -2557,6 +2561,83 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; + + int retval; + + unsigned int plaintext_pad_len; + unsigned int plaintext_len; + + uint8_t buffer[1]; + const uint8_t *ciphertext; + + struct rte_cryptodev_info dev_info; + + rte_cryptodev_info_get(ts_params->valid_devs[0], &dev_info); + if (!(dev_info.feature_flags & RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER)) { + printf("Device doesn't support scatter-gather. " + "Test Skipped.\n"); + return 0; + } + + /* Create KASUMI session */ + retval = create_wireless_algo_cipher_session(ts_params->valid_devs[0], + RTE_CRYPTO_CIPHER_OP_ENCRYPT, + RTE_CRYPTO_CIPHER_KASUMI_F8, + tdata->key.data, tdata->key.len); + if (retval < 0) + return retval; + + plaintext_len = ceil_byte_length(tdata->plaintext.len); + + + /* Append data which is padded to a multiple */ + /* of the algorithms block size */ + plaintext_pad_len = RTE_ALIGN_CEIL(plaintext_len, 8); + + ut_params->ibuf = create_segmented_mbuf(ts_params->mbuf_pool, + plaintext_pad_len, 10); + + pktmbuf_write(ut_params->ibuf, 0, plaintext_len, tdata->plaintext.data); + + /* Create KASUMI operation */ + retval = create_wireless_algo_cipher_operation(tdata->iv.data, + tdata->iv.len, + tdata->plaintext.len, + tdata->validCipherOffsetLenInBits.len, + RTE_CRYPTO_CIPHER_KASUMI_F8); + if (retval < 0) + return retval; + + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); + TEST_ASSERT_NOT_NULL(ut_params->op, "failed to retrieve obuf"); + + ut_params->obuf = ut_params->op->sym->m_dst; + + if (ut_params->obuf) + ciphertext = rte_pktmbuf_read(ut_params->obuf, tdata->iv.len, + plaintext_len, buffer); + else + ciphertext = rte_pktmbuf_read(ut_params->ibuf, tdata->iv.len, + plaintext_len, buffer); + + /* Validate obuf */ + TEST_HEXDUMP(stdout, "ciphertext:", ciphertext, plaintext_len); + + /* Validate obuf */ + TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( + ciphertext, + tdata->ciphertext.data, + tdata->validCipherLenInBits.len, + "KASUMI Ciphertext data not as expected"); + return 0; +} + +static int test_kasumi_encryption_oop(const struct kasumi_test_data *tdata) { struct crypto_testsuite_params *ts_params = &testsuite_params; @@ -2625,6 +2706,81 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_oop_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest
[dpdk-dev] [PATCH v5] mbuf: add a function to linearize a packet
This patch adds function rte_pktmbuf_linearize to let crypto PMD coalesce chained mbuf before crypto operation and extend their capabilities to support segmented mbufs when device cannot handle them natively. Included unit tests for rte_pktmbuf_linearize functionality: 1) Creates banch of segmented mbufs with different size and number of segments. 2) Fills noncontigouos mbuf with sequential values. 3) Uses rte_pktmbuf_linearize to coalesce segmented buffer into one contiguous. 4) Verifies data in linearized buffer. Dependencies: This patch is rebased to the dpdk-next-crypto and should be applied before "Chained Mbufs support in SW PMDs" patchset. changes in v5: - name of patch changed, - improved coding style, changes in v4: - separated from "Chained Mbufs support in SW PMDs" patch set for better reviewing, - merged "rte_pktmbuf_linearize" implementation with unit tests, changes in v3: - rebased to dpdk-next-crypto changes in v2: - rte_pktmbuf_coalesce replaced with rte_pktmbuf_linearize Cc: Pablo de Lara Cc: Olivier Matz Signed-off-by: Tomasz Kulasek --- app/test/test_mbuf.c | 123 lib/librte_mbuf/rte_mbuf.h | 51 ++ 2 files changed, 174 insertions(+) diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c index c0823ea..a2e9bc6 100644 --- a/app/test/test_mbuf.c +++ b/app/test/test_mbuf.c @@ -930,6 +930,124 @@ return 0; } +static int +test_mbuf_linearize(int pkt_len, int nb_segs) { + + struct rte_mbuf *m = NULL, *mbuf = NULL; + uint8_t *data; + int data_len = 0; + int remain; + int seg, seg_len; + int i; + + if (pkt_len < 1) { + printf("Packet size must be 1 or more (is %d)\n", pkt_len); + return -1; + } + + if (nb_segs < 1) { + printf("Number of segments must be 1 or more (is %d)\n", + nb_segs); + return -1; + } + + seg_len = pkt_len / nb_segs; + if (seg_len == 0) + seg_len = 1; + + remain = pkt_len; + + /* Create chained mbuf_src and fill it generated data */ + for (seg = 0; remain > 0; seg++) { + + m = rte_pktmbuf_alloc(pktmbuf_pool); + if (m == NULL) { + printf("Cannot create segment for source mbuf"); + goto fail; + } + + /* Make sure if tailroom is zeroed */ + memset(rte_pktmbuf_mtod(m, uint8_t *), 0, + rte_pktmbuf_tailroom(m)); + + data_len = remain; + if (data_len > seg_len) + data_len = seg_len; + + data = (uint8_t *)rte_pktmbuf_append(m, data_len); + if (data == NULL) { + printf("Cannot append %d bytes to the mbuf\n", + data_len); + goto fail; + } + + for (i = 0; i < data_len; i++) + data[i] = (seg * seg_len + i) % 0x0ff; + + if (seg == 0) + mbuf = m; + else + rte_pktmbuf_chain(mbuf, m); + + remain -= data_len; + } + + /* Create destination buffer to store coalesced data */ + if (rte_pktmbuf_linearize(mbuf)) { + printf("Mbuf linearization failed\n"); + goto fail; + } + + if (!rte_pktmbuf_is_contiguous(mbuf)) { + printf("Source buffer should be contiguous after " + "linearization\n"); + goto fail; + } + + data = rte_pktmbuf_mtod(mbuf, uint8_t *); + + for (i = 0; i < pkt_len; i++) + if (data[i] != (i % 0x0ff)) { + printf("Incorrect data in linearized mbuf\n"); + goto fail; + } + + rte_pktmbuf_free(mbuf); + return 0; + +fail: + if (mbuf) + rte_pktmbuf_free(mbuf); + return -1; +} + +static int +test_mbuf_linearize_check(void) +{ + struct test_mbuf_array { + int size; + int nb_segs; + } mbuf_array[] = { + { 128, 1 }, + { 64, 64 }, + { 512, 10 }, + { 250, 11 }, + { 123, 8 }, + }; + unsigned int i; + + printf("Test mbuf linearize API\n"); + + for (i = 0; i < RTE_DIM(mbuf_array); i++) + if (test_mbuf_linearize(mbuf_array[i].size, + mbuf_array[i].nb_segs)) { + printf("Test failed for %d, %d\n", mbuf_array[i].size, +
[dpdk-dev] [PATCH v5 2/3] crypto: add sgl support in openssl PMD
Signed-off-by: Tomasz Kulasek --- doc/guides/cryptodevs/openssl.rst|3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 329 +++--- 2 files changed, 259 insertions(+), 73 deletions(-) diff --git a/doc/guides/cryptodevs/openssl.rst b/doc/guides/cryptodevs/openssl.rst index f1c39ba..f6ed6ea 100644 --- a/doc/guides/cryptodevs/openssl.rst +++ b/doc/guides/cryptodevs/openssl.rst @@ -112,6 +112,7 @@ Limitations --- * Maximum number of sessions is 2048. -* Chained mbufs are not supported. +* Chained mbufs are supported only for source mbuf (destination must be + contiguous). * Hash only is not supported for GCM and GMAC. * Cipher only is not supported for GCM and GMAC. diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c index 312154a..426e407 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd.c +++ b/drivers/crypto/openssl/rte_openssl_pmd.c @@ -484,24 +484,112 @@ * Process Operations *-- */ +static inline int +process_openssl_encryption_update(struct rte_mbuf *mbuf_src, int offset, + uint8_t **dst, int srclen, EVP_CIPHER_CTX *ctx) +{ + struct rte_mbuf *m; + int dstlen; + int l, n = srclen; + uint8_t *src; + + for (m = mbuf_src; m != NULL && offset > rte_pktmbuf_data_len(m); + m = m->next) + offset -= rte_pktmbuf_data_len(m); + + if (m == 0) + return -1; + + src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset); + + l = rte_pktmbuf_data_len(m) - offset; + if (srclen <= l) { + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, srclen) <= 0) + return -1; + *dst += l; + return 0; + } + + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + + *dst += dstlen; + n -= l; + + for (m = m->next; (m != NULL) && (n > 0); m = m->next) { + src = rte_pktmbuf_mtod(m, uint8_t *); + l = rte_pktmbuf_data_len(m) < n ? rte_pktmbuf_data_len(m) : n; + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + *dst += dstlen; + n -= l; + } + + return 0; +} + +static inline int +process_openssl_decryption_update(struct rte_mbuf *mbuf_src, int offset, + uint8_t **dst, int srclen, EVP_CIPHER_CTX *ctx) +{ + struct rte_mbuf *m; + int dstlen; + int l, n = srclen; + uint8_t *src; + + for (m = mbuf_src; m != NULL && offset > rte_pktmbuf_data_len(m); + m = m->next) + offset -= rte_pktmbuf_data_len(m); + + if (m == 0) + return -1; + + src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset); + + l = rte_pktmbuf_data_len(m) - offset; + if (srclen <= l) { + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, srclen) <= 0) + return -1; + *dst += l; + return 0; + } + + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + + *dst += dstlen; + n -= l; + + for (m = m->next; (m != NULL) && (n > 0); m = m->next) { + src = rte_pktmbuf_mtod(m, uint8_t *); + l = rte_pktmbuf_data_len(m) < n ? rte_pktmbuf_data_len(m) : n; + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + *dst += dstlen; + n -= l; + } + + return 0; +} /** Process standard openssl cipher encryption */ static int -process_openssl_cipher_encrypt(uint8_t *src, uint8_t *dst, - uint8_t *iv, uint8_t *key, int srclen, +process_openssl_cipher_encrypt(struct rte_mbuf *mbuf_src, uint8_t *dst, + int offset, uint8_t *iv, uint8_t *key, int srclen, EVP_CIPHER_CTX *ctx, const EVP_CIPHER *algo) { - int dstlen, totlen; + int totlen; if (EVP_EncryptInit_ex(ctx, algo, NULL, key, iv) <= 0) goto process_cipher_encrypt_err; EVP_CIPHER_CTX_set_padding(ctx, 0); - if (EVP_EncryptUpdate(ctx, dst, &dstlen, src, srclen) <= 0) + if (process_openssl_encryption_update(mbuf_src, offset, &dst, + srclen, ctx)) goto process_cipher_encrypt_err; - if (EVP_EncryptFinal_ex(ctx, dst + dstlen, &totlen) <= 0) + if (EVP_EncryptFinal_ex(ctx, dst, &totlen) <= 0) goto process_cipher_encrypt_err; return 0; @@ -513,23 +601,23 @@ /** Process standard openssl cipher decryption */ static int -process_openssl_cipher_decrypt(uint8_t *s
[dpdk-dev] [PATCH v5 1/3] crypto: add sgl support in sw PMDs
This patch introduces RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER feature flag informing that selected crypto device supports segmented mbufs natively and doesn't need to be coalesced before crypto operation. While using segmented buffers in crypto devices may have unpredictable results, for PMDs which doesn't support it natively, additional check is made for debug compilation. Signed-off-by: Tomasz Kulasek --- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 ++ drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 --- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 ++- drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 +++ drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 ++-- lib/librte_cryptodev/rte_cryptodev.h |2 ++ 8 files changed, 77 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c index af3d60f..5af22f7 100644 --- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c @@ -377,6 +377,20 @@ break; } +#ifdef RTE_LIBRTE_PMD_AESNI_GCM_DEBUG + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; + GCM_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + qp->qp_stats.enqueue_err_count++; + break; + } +#endif + retval = process_gcm_crypto_op(qp, ops[i]->sym, sess); if (retval < 0) { ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c index 6d27d75..25f681b 100644 --- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c @@ -571,15 +571,28 @@ int i, processed_jobs = 0; for (i = 0; i < nb_ops; i++) { -#ifdef RTE_LIBRTE_AESNI_MB_DEBUG - if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { +#ifdef RTE_LIBRTE_PMD_AESNI_MB_DEBUG + if (unlikely(ops[i]->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { MB_LOG_ERR("PMD only supports symmetric crypto " "operation requests, op (%p) is not a " - "symmetric operation.", op); + "symmetric operation.", ops[i]); + qp->stats.enqueue_err_count++; + goto flush_jobs; + } + + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + MB_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; qp->stats.enqueue_err_count++; goto flush_jobs; } #endif + sess = get_session(qp, ops[i]); if (unlikely(sess == NULL)) { qp->stats.enqueue_err_count++; diff --git a/drivers/crypto/kasumi/rte_kasumi_pmd.c b/drivers/crypto/kasumi/rte_kasumi_pmd.c index b119da2..4bdd7bb 100644 --- a/drivers/crypto/kasumi/rte_kasumi_pmd.c +++ b/drivers/crypto/kasumi/rte_kasumi_pmd.c @@ -455,6 +455,19 @@ for (i = 0; i < nb_ops; i++) { curr_c_op = ops[i]; +#ifdef RTE_LIBRTE_PMD_KASUMI_DEBUG + if (!rte_pktmbuf_is_contiguous(curr_c_op->sym->m_src) || + (curr_c_op->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + curr_c_op->sym->m_dst))) { + KASUMI_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "s
[dpdk-dev] [PATCH v5 3/3] test: add sgl unit tests for crypto devices
This patch provides unit tests for set of cipher/hash combinations covering currently implemented crypto PMD's and allowing to verify scatter gather support. Signed-off-by: Daniel Mrzyglod Signed-off-by: Tomasz Kulasek --- app/test/test_cryptodev.c | 386 ++- app/test/test_cryptodev.h | 138 +++ app/test/test_cryptodev_aes_test_vectors.h | 52 +++ app/test/test_cryptodev_blockcipher.c | 87 ++--- app/test/test_cryptodev_blockcipher.h |1 + app/test/test_cryptodev_gcm_test_vectors.h | 553 6 files changed, 1168 insertions(+), 49 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 3eaf1b7..5786fde 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -1736,6 +1736,10 @@ struct crypto_unittest_params { TEST_ASSERT_NOT_NULL(sym_op->cipher.iv.data, "no room to prepend iv"); + /* For OOP operation both buffers must have the same size */ + if (ut_params->obuf) + rte_pktmbuf_prepend(ut_params->obuf, iv_pad_len); + memset(sym_op->cipher.iv.data, 0, iv_pad_len); sym_op->cipher.iv.phys_addr = rte_pktmbuf_mtophys(ut_params->ibuf); sym_op->cipher.iv.length = iv_pad_len; @@ -2557,6 +2561,83 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; + + int retval; + + unsigned int plaintext_pad_len; + unsigned int plaintext_len; + + uint8_t buffer[1]; + const uint8_t *ciphertext; + + struct rte_cryptodev_info dev_info; + + rte_cryptodev_info_get(ts_params->valid_devs[0], &dev_info); + if (!(dev_info.feature_flags & RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER)) { + printf("Device doesn't support scatter-gather. " + "Test Skipped.\n"); + return 0; + } + + /* Create KASUMI session */ + retval = create_wireless_algo_cipher_session(ts_params->valid_devs[0], + RTE_CRYPTO_CIPHER_OP_ENCRYPT, + RTE_CRYPTO_CIPHER_KASUMI_F8, + tdata->key.data, tdata->key.len); + if (retval < 0) + return retval; + + plaintext_len = ceil_byte_length(tdata->plaintext.len); + + + /* Append data which is padded to a multiple */ + /* of the algorithms block size */ + plaintext_pad_len = RTE_ALIGN_CEIL(plaintext_len, 8); + + ut_params->ibuf = create_segmented_mbuf(ts_params->mbuf_pool, + plaintext_pad_len, 10, 0); + + pktmbuf_write(ut_params->ibuf, 0, plaintext_len, tdata->plaintext.data); + + /* Create KASUMI operation */ + retval = create_wireless_algo_cipher_operation(tdata->iv.data, + tdata->iv.len, + tdata->plaintext.len, + tdata->validCipherOffsetLenInBits.len, + RTE_CRYPTO_CIPHER_KASUMI_F8); + if (retval < 0) + return retval; + + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); + TEST_ASSERT_NOT_NULL(ut_params->op, "failed to retrieve obuf"); + + ut_params->obuf = ut_params->op->sym->m_dst; + + if (ut_params->obuf) + ciphertext = rte_pktmbuf_read(ut_params->obuf, tdata->iv.len, + plaintext_len, buffer); + else + ciphertext = rte_pktmbuf_read(ut_params->ibuf, tdata->iv.len, + plaintext_len, buffer); + + /* Validate obuf */ + TEST_HEXDUMP(stdout, "ciphertext:", ciphertext, plaintext_len); + + /* Validate obuf */ + TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( + ciphertext, + tdata->ciphertext.data, + tdata->validCipherLenInBits.len, + "KASUMI Ciphertext data not as expected"); + return 0; +} + +static int test_kasumi_encryption_oop(const struct kasumi_test_data *tdata) { struct crypto_testsuite_params *ts_params = &testsuite_params; @@ -2625,6 +2706,81 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_oop_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest
[dpdk-dev] [PATCH v5 0/3] Chained Mbufs support in SW PMDs
This patch set adds support of scattered-gather list for SW PMDs. As of now, application needs to reserve continuous block of memory for mbufs which is not always the case. Hence needed to support chaining of mbufs which are smaller in size but can be used if chained. Above work involves: a) Create mbuf functions to coalesce mbuf chains into a single mbuf. b) For each software poll mode driver code to detect chained mbufs support and coalesce these before preforming crypto. c) Add relevant unit tests to test the functionality. Known limitations for openssl PMD: -- While libcrypto library expects continuous destination buffer for output of cipher operations, implementation of openssl PMD is limited the same way, and requires contiguous destination mbuf. changes in v5: - rebased to the master of dpdk-next-crypto changes in v4: - separated "rte_pktmbuf_linearize" implementation from this patch set and sent as new patch for better reviewing, changes in v3: - rebased to dpdk-next-crypto - reused tests for AES GCM SGL support in opensll from "app/test: add SGL tests to cryptodev QAT suite" changes in v2: - add support for sgl in openssl PMD - rte_pktmbuf_coalesce replaced with rte_pktmbuf_linearize - extended test vector data for aes gcm from 60 to 2048 bytes Tomasz Kulasek (3): crypto: add sgl support in sw PMDs crypto: add sgl support in openssl PMD test: add sgl unit tests for crypto devices app/test/test_cryptodev.c | 386 ++- app/test/test_cryptodev.h | 138 +++ app/test/test_cryptodev_aes_test_vectors.h | 52 +++ app/test/test_cryptodev_blockcipher.c | 90 ++--- app/test/test_cryptodev_blockcipher.h |1 + app/test/test_cryptodev_gcm_test_vectors.h | 553 doc/guides/cryptodevs/openssl.rst |3 +- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 + drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 +- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 329 + drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 + drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 +- lib/librte_cryptodev/rte_cryptodev.h |2 + 16 files changed, 1507 insertions(+), 128 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v4 0/6] add Tx preparation
As discussed in that thread: http://dpdk.org/ml/archives/dev/2015-September/023603.html Different NIC models depending on HW offload requested might impose different requirements on packets to be TX-ed in terms of: - Max number of fragments per packet allowed - Max number of fragments per TSO segments - The way pseudo-header checksum should be pre-calculated - L3/L4 header fields filling - etc. MOTIVATION: --- 1) Some work cannot (and didn't should) be done in rte_eth_tx_burst. However, this work is sometimes required, and now, it's an application issue. 2) Different hardware may have different requirements for TX offloads, other subset can be supported and so on. 3) Some parameters (e.g. number of segments in ixgbe driver) may hung device. These parameters may be vary for different devices. For example i40e HW allows 8 fragments per packet, but that is after TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit. 4) Fields in packet may require different initialization (like e.g. will require pseudo-header checksum precalculation, sometimes in a different way depending on packet type, and so on). Now application needs to care about it. 5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to prepare packet burst in acceptable form for specific device. 6) Some additional checks may be done in debug mode keeping tx_burst implementation clean. PROPOSAL: - To help user to deal with all these varieties we propose to: 1) Introduce rte_eth_tx_prep() function to do necessary preparations of packet burst to be safely transmitted on device for desired HW offloads (set/reset checksum field according to the hardware requirements) and check HW constraints (number of segments per packet, etc). While the limitations and requirements may differ for devices, it requires to extend rte_eth_dev structure with new function pointer "tx_pkt_prep" which can be implemented in the driver to prepare and verify packets, in devices specific way, before burst, what should to prevent application to send malformed packets. 2) Also new fields will be introduced in rte_eth_desc_lim: nb_seg_max and nb_mtu_seg_max, providing an information about max segments in TSO and non-TSO packets acceptable by device. This information is useful for application to not create/limit malicious packet. APPLICATION (CASE OF USE): -- 1) Application should to initialize burst of packets to send, set required tx offload flags and required fields, like l2_len, l3_len, l4_len, and tso_segsz 2) Application passes burst to the rte_eth_tx_prep to check conditions required to send packets through the NIC. 3) The result of rte_eth_tx_prep can be used to send valid packets and/or restore invalid if function fails. e.g. for (i = 0; i < nb_pkts; i++) { /* initialize or process packet */ bufs[i]->tso_segsz = 800; bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_IP_CKSUM; bufs[i]->l2_len = sizeof(struct ether_hdr); bufs[i]->l3_len = sizeof(struct ipv4_hdr); bufs[i]->l4_len = sizeof(struct tcp_hdr); } /* Prepare burst of TX packets */ nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts); if (nb_prep < nb_pkts) { printf("tx_prep failed\n"); /* nb_prep indicates here first invalid packet. rte_eth_tx_prep * can be used on remaining packets to find another ones. */ } /* Send burst of TX packets */ nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep); /* Free any unsent packets. */ v4 changes: - tx_prep is now set to default behavior (NULL) for simple/vector path in fm10k, i40e and ixgbe drivers to increase performance, when Tx offloads are not intentionally available v3 changes: - reworked csum testpmd engine instead adding new one, - fixed checksum initialization procedure to include also outer checksum offloads, - some minor formattings and optimalizations v2 changes: - rte_eth_tx_prep() returns number of packets when device doesn't support tx_prep functionality, - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep Tomasz Kulasek (6): ethdev: add Tx preparation e1000: add Tx preparation fm10k: add Tx preparation i40e: add Tx preparation ixgbe: add Tx preparation testpmd: use Tx preparation in csum engine app/test-pmd/csumonly.c | 97 +++ config/common_base |1 + drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +- drivers/net/e1000/em_rxtx.c | 48 +- drivers/net/e1000/igb_ethdev.c |4 ++ drivers/net/e1000/
[dpdk-dev] [PATCH v4 1/6] ethdev: add Tx preparation
Added API for `rte_eth_tx_prep` uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ Created `rte_pkt.h` header with common used functions: int rte_validate_tx_offload(struct rte_mbuf *m) to validate general requirements for tx offload in packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_phdr_cksum_fix(struct rte_mbuf *m) to fix pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. Signed-off-by: Tomasz Kulasek --- config/common_base|1 + lib/librte_ether/rte_ethdev.h | 85 ++ lib/librte_mbuf/rte_mbuf.h|8 +++ lib/librte_net/Makefile |2 +- lib/librte_net/rte_pkt.h | 133 + 5 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 lib/librte_net/rte_pkt.h diff --git a/config/common_base b/config/common_base index 7830535..7ada9e0 100644 --- a/config/common_base +++ b/config/common_base @@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 CONFIG_RTE_LIBRTE_IEEE1588=n CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_TX_PREP=y # # Support NIC bypass logic diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 96575e8..6594544 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ extern "C" { #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -699,6 +700,8 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ + uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ }; /** @@ -1184,6 +1187,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1629,6 +1637,7 @@ enum rte_eth_dev_type { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ @@ -2837,6 +2846,82 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prep() function is invoked to prepare output packets to be + * transmitted on the output queue *queue_id* of the Ethernet device designated + * by its *port_id*. + * The *nb_pkts* parameter is the number of packets to be prepared which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them + * allocated from a pool created with rte_pktmbuf_pool_create(). + * For each packet to send, the rte_eth_tx_prep() function performs + * the following operations: + * + * - Check if packet meets devices requirements for tx offloads. + * + * - Check limitations about number of segments. + * + * - Check additional requirements when debug is enabled. + * + * - Update and/or reset required checksums when tx offload is set for packet. + * + * The rte_eth_tx_prep() function returns the number of packets ready to be + * sent. A return value equal to *nb_pkts* means that all packets are
[dpdk-dev] [PATCH v4 3/6] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index 0ecc167..8dacba7 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1441,6 +1441,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2749,8 +2751,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prep = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prep = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2829,6 +2833,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prep = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 5b2d04b..fa5bf9c 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -583,3 +593,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return count; } + +uint16_t +fm10k_prep_pkts(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -EINVAL; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v4 2/6] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 52 +- 5 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev); uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev); uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index c5bf294..46515d4 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prep = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1073,6 +1074,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..44009d6 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -EINVAL; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (
[dpdk-dev] [PATCH v4 4/6] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 72 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index b04c833..c1ee7e6 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -948,6 +948,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prep = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2614,6 +2615,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 554d167..bb69175 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1930,6 +1943,61 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(void *tx_queue __rte_unused, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so m->nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for m->nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -1; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered malicious */ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -EINVAL; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -3271,9 +3339,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; } + dev->tx_pkt_prep = NULL;
[dpdk-dev] [PATCH v4 5/6] ixgbe: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +++- drivers/net/ixgbe/ixgbe_rxtx.c | 56 +- drivers/net/ixgbe/ixgbe_rxtx.h |2 ++ 4 files changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 73a406b..fa6f045 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -515,6 +515,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -1101,6 +1103,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prep = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338..e229cf5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index 8b99282..a0caa74 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * Copyright 2014 6WIND S.A. * All rights reserved. * @@ -70,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -87,6 +88,9 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -905,6 +909,54 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + /** +* Check if packet meets requirements for number of segments +* +* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and non-TSO +*/ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -EINVAL; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/* + * * RX functions * **/ @@ -2280,6 +2332,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, "Using simple tx code path"); + dev->tx_pkt_
[dpdk-dev] [PATCH v4 6/6] testpmd: use Tx preparation in csum engine
Signed-off-by: Tomasz Kulasek --- app/test-pmd/csumonly.c | 97 ++- 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 21cb78f..8fcf814 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * Copyright 2014 6WIND S.A. * All rights reserved. * @@ -110,15 +110,6 @@ struct simple_gre_hdr { } __attribute__((__packed__)); static uint16_t -get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) -{ - if (ethertype == _htons(ETHER_TYPE_IPv4)) - return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); - else /* assume ethertype == ETHER_TYPE_IPv6 */ - return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); -} - -static uint16_t get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) { if (ethertype == _htons(ETHER_TYPE_IPv4)) @@ -368,11 +359,9 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, /* do not recalculate udp cksum if it was 0 */ if (udp_hdr->dgram_cksum != 0) { udp_hdr->dgram_cksum = 0; - if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { + if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); - } else { + else { udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, info->ethertype); @@ -381,15 +370,11 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, } else if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); tcp_hdr->cksum = 0; - if (info->tso_segsz != 0) { + if (info->tso_segsz != 0) ol_flags |= PKT_TX_TCP_SEG; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { + else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else { + else { tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr, info->ethertype); @@ -639,7 +624,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ uint16_t nb_rx; uint16_t nb_tx; - uint16_t i; + uint16_t nb_prep; + uint16_t i, n; uint64_t ol_flags; uint16_t testpmd_ol_flags; uint32_t retry; @@ -847,31 +833,56 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) printf("\n"); } } - nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); - /* -* Retry if necessary -*/ - if (unlikely(nb_tx < nb_rx) && fs->retry_enabled) { - retry = 0; - while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) { - rte_delay_us(burst_tx_delay_time); - nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue, - &pkts_burst[nb_tx], nb_rx - nb_tx); + + n = 0; + + do { + nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, &pkts_burst[n], + nb_rx - n); + + if (nb_prep != nb_rx - n) { + printf("Preparing packet burst to transmit failed: %s\n", + rte_strerror(rte_errno)); + /* Drop malicious packet */ + rte_pktmbuf_free(pkts_burst[n + nb_prep]); + fs->fwd_dropped++; + } + + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, &pkts_burst[n], + nb_prep); + + /* +* Retry if necessary +*/ + if (unlikely(nb_t
[dpdk-dev] [PATCH v13 0/7] add Tx preparation
eaders are in the first data segment and can be safetly modified - moved rte_validate_tx_offload to rte_mbuf - moved rte_phdr_cksum_fix to rte_net.h - removed rte_pkt.h new file as useless v6 changes: - added performance impact test results to the patch description v5 changes: - rebased csum engine modification - added information to the csum engine about performance tests - some performance improvements v4 changes: - tx_prep is now set to default behavior (NULL) for simple/vector path in fm10k, i40e and ixgbe drivers to increase performance, when Tx offloads are not intentionally available v3 changes: - reworked csum testpmd engine instead adding new one, - fixed checksum initialization procedure to include also outer checksum offloads, - some minor formattings and optimalizations v2 changes: - rte_eth_tx_prep() returns number of packets when device doesn't support tx_prep functionality, - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep Ananyev, Konstantin (1): vmxnet3: add Tx preparation Tomasz Kulasek (6): ethdev: add Tx preparation e1000: add Tx preparation fm10k: add Tx preparation i40e: add Tx preparation ixgbe: add Tx preparation testpmd: use Tx preparation in csum engine app/test-pmd/cmdline.c | 49 app/test-pmd/csumonly.c | 33 ++-- app/test-pmd/testpmd.c |5 ++ app/test-pmd/testpmd.h |2 + config/common_base |9 +++ doc/guides/testpmd_app_ug/testpmd_funcs.rst | 13 +++ drivers/net/e1000/e1000_ethdev.h| 11 +++ drivers/net/e1000/em_ethdev.c |5 +- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 + drivers/net/e1000/igb_rxtx.c| 52 +++- drivers/net/fm10k/fm10k.h |6 ++ drivers/net/fm10k/fm10k_ethdev.c|5 ++ drivers/net/fm10k/fm10k_rxtx.c | 50 +++- drivers/net/i40e/i40e_ethdev.c |3 + drivers/net/i40e/i40e_rxtx.c| 74 - drivers/net/i40e/i40e_rxtx.h|8 ++ drivers/net/ixgbe/ixgbe_ethdev.c|3 + drivers/net/ixgbe/ixgbe_ethdev.h|5 +- drivers/net/ixgbe/ixgbe_rxtx.c | 56 + drivers/net/ixgbe/ixgbe_rxtx.h |2 + drivers/net/vmxnet3/vmxnet3_ethdev.c|4 + drivers/net/vmxnet3/vmxnet3_ethdev.h|2 + drivers/net/vmxnet3/vmxnet3_rxtx.c | 57 + lib/librte_ether/rte_ethdev.h | 115 +++ lib/librte_mbuf/rte_mbuf.h | 64 +++ lib/librte_net/rte_net.h| 85 27 files changed, 757 insertions(+), 13 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v13 1/7] ethdev: add Tx preparation
Added API for `rte_eth_tx_prepare` uint16_t rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ Added functions: int rte_validate_tx_offload(struct rte_mbuf *m) to validate general requirements for tx offload set in mbuf of packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_net_intel_cksum_prepare(struct rte_mbuf *m) to fix pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. PERFORMANCE TESTS - This feature was tested with modified csum engine from test-pmd. The packet checksum preparation was moved from application to Tx preparation step placed before burst. We may expect some overhead costs caused by: 1) using additional callback before burst, 2) rescanning burst, 3) additional condition checking (packet validation), 4) worse optimization (e.g. packet data access, etc.) We tested it using ixgbe Tx preparation implementation with some parts disabled to have comparable information about the impact of different parts of implementation. IMPACT: 1) For unimplemented Tx preparation callback the performance impact is negligible, 2) For packet condition check without checksum modifications (nb_segs, available offloads, etc.) is 14626628/14252168 (~2.62% drop), 3) Full support in ixgbe driver (point 2 + packet checksum initialization) is 14060924/13588094 (~3.48% drop) Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev Acked-by: Olivier Matz --- config/common_base|9 lib/librte_ether/rte_ethdev.h | 115 + lib/librte_mbuf/rte_mbuf.h| 64 +++ lib/librte_net/rte_net.h | 85 ++ 4 files changed, 273 insertions(+) diff --git a/config/common_base b/config/common_base index 652a839..2c5352e 100644 --- a/config/common_base +++ b/config/common_base @@ -123,6 +123,15 @@ CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y # +# Use real NOOP to turn off TX preparation stage +# +# While the behaviour of ``rte_ethdev_tx_prepare`` may change after turning on +# real NOOP, this configuration shouldn't be never enabled globaly, and can be +# used in appropriate target configuration file with a following restrictions +# +CONFIG_RTE_ETHDEV_TX_PREPARE_NOOP=n + +# # Support NIC bypass logic # CONFIG_RTE_NIC_BYPASS=n diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 9678179..b3052db 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -702,6 +703,8 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ + uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ }; /** @@ -1191,6 +1194,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1625,6 +1633,7 @@ struct rte_eth_rxtx_callback { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ @@ -2819,6 +2828,112 @@ int
[dpdk-dev] [PATCH v13 2/7] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 52 +- 5 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ int eth_igb_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ int eth_em_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index aee3d34..a004ee9 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev, eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prepare = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1079,6 +1080,8 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev, .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..7e271ad 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ struct em_tx_queue { /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; +
[dpdk-dev] [PATCH v13 3/7] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ uint16_t fm10k_recv_scattered_pkts(void *rx_queue, uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index 923690c..a116822 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1447,6 +1447,8 @@ static int fm10k_xstats_get_names(__rte_unused struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2755,8 +2757,10 @@ static void __attribute__((cold)) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prepare = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prepare = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2835,6 +2839,7 @@ static void __attribute__((cold)) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prepare = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 32cc7ff..144e5e6 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -597,3 +607,41 @@ static inline void tx_xmit_pkt(struct fm10k_tx_queue *q, struct rte_mbuf *mb) return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v13 4/7] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 74 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 67778ba..5761357 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -943,6 +943,7 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prepare = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2645,6 +2646,8 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev, .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 7ae7d9f..d248396 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1411,6 +1424,63 @@ static inline int __attribute__((always_inline)) return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so m->nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for m->nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered +* malicious +*/ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2763,9 +2833,11 @@ void __attribute__((cold)) PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
[dpdk-dev] [PATCH v13 5/7] ixgbe: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +++- drivers/net/ixgbe/ixgbe_rxtx.c | 56 ++ drivers/net/ixgbe/ixgbe_rxtx.h |2 ++ 4 files changed, 65 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index edc9b22..a75f59d 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -517,6 +517,8 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -1103,6 +1105,7 @@ struct rte_ixgbe_xstats_name_off { eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338..e229cf5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index b2d9f45..dbe83e7 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -70,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -87,6 +88,9 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -905,6 +909,56 @@ static inline int __attribute__((always_inline)) /* * + * TX prep functions + * + **/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* Check if packet meets requirements for number of segments +* +* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and non-TSO +*/ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/* + * * RX functions * **/ @@ -2282,6 +2336,7 @@ void __attribute__((cold)) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, "Using simple tx code path"); + dev->tx_pkt_prepare = NULL; #ifdef RTE_IXGBE_INC_VECTOR if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&
[dpdk-dev] [PATCH v13 6/7] vmxnet3: add Tx preparation
From: "Ananyev, Konstantin" Signed-off-by: Konstantin Ananyev --- drivers/net/vmxnet3/vmxnet3_ethdev.c |4 +++ drivers/net/vmxnet3/vmxnet3_ethdev.h |2 ++ drivers/net/vmxnet3/vmxnet3_rxtx.c | 57 ++ 3 files changed, 63 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 8bb13e5..f85be91 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -237,6 +237,7 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, eth_dev->dev_ops = &vmxnet3_eth_dev_ops; eth_dev->rx_pkt_burst = &vmxnet3_recv_pkts; eth_dev->tx_pkt_burst = &vmxnet3_xmit_pkts; + eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts; pci_dev = eth_dev->pci_dev; /* @@ -326,6 +327,7 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, eth_dev->dev_ops = NULL; eth_dev->rx_pkt_burst = NULL; eth_dev->tx_pkt_burst = NULL; + eth_dev->tx_pkt_prepare = NULL; rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; @@ -728,6 +730,8 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, .nb_max = VMXNET3_TX_RING_MAX_SIZE, .nb_min = VMXNET3_DEF_TX_RING_SIZE, .nb_align = 1, + .nb_seg_max = UINT8_MAX, + .nb_mtu_seg_max = VMXNET3_MAX_TXD_PER_PKT, }; dev_info->rx_offload_capa = diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.h b/drivers/net/vmxnet3/vmxnet3_ethdev.h index 7d3b11e..469db71 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.h +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.h @@ -171,5 +171,7 @@ uint16_t vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t vmxnet3_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); #endif /* _VMXNET3_ETHDEV_H_ */ diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c index b109168..0c35738 100644 --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -69,6 +69,7 @@ #include #include #include +#include #include "base/vmxnet3_defs.h" #include "vmxnet3_ring.h" @@ -76,6 +77,14 @@ #include "vmxnet3_logs.h" #include "vmxnet3_ethdev.h" +#defineVMXNET3_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#defineVMXNET3_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK) + static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); @@ -350,6 +359,54 @@ } uint16_t +vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int32_t ret; + uint32_t i; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /* +* Non-TSO packet cannot occupy more than +* VMXNET3_MAX_TXD_PER_PKT TX descriptors. +*/ + if ((ol_flags & PKT_TX_TCP_SEG) == 0 && + m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) { + rte_errno = -EINVAL; + return i; + } + + /* check that only supported TX offloads are requested. */ + if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 || + (ol_flags & PKT_TX_L4_MASK) == + PKT_TX_SCTP_CKSUM) { + rte_errno = -EINVAL; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { -- 1.7.9.5
[dpdk-dev] [PATCH v13 7/7] testpmd: use Tx preparation in csum engine
Added "csum txprep (on|off)" command which allows to switch to the tx path using Tx preparation API. By default unchanged implementation is used. Using Tx preparation path, pseudo header calculation for udp/tcp/tso packets from application, and used Tx preparation API for packet preparation and verification. Adding additional step to the csum engine costs about 3-4% of performance drop, on my setup with ixgbe driver. It's caused mostly by the need of reaccessing and modification of packet data. Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- app/test-pmd/cmdline.c | 49 +++ app/test-pmd/csumonly.c | 33 ++ app/test-pmd/testpmd.c |5 +++ app/test-pmd/testpmd.h |2 ++ doc/guides/testpmd_app_ug/testpmd_funcs.rst | 13 +++ 5 files changed, 95 insertions(+), 7 deletions(-) diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index d03a592..499a00b 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -366,6 +366,10 @@ static void cmd_help_long_parsed(void *parsed_result, "csum show (port_id)\n" "Display tx checksum offload configuration\n\n" + "csum txprep (on|off)" + "Enable tx preparation path in csum forward engine" + "\n\n" + "tso set (segsize) (portid)\n" "Enable TCP Segmentation Offload in csum forward" " engine.\n" @@ -3528,6 +3532,50 @@ struct cmd_csum_tunnel_result { }, }; +/* Enable/disable tx preparation path */ +struct cmd_csum_txprep_result { + cmdline_fixed_string_t csum; + cmdline_fixed_string_t parse; + cmdline_fixed_string_t onoff; +}; + +static void +cmd_csum_txprep_parsed(void *parsed_result, + __attribute__((unused)) struct cmdline *cl, + __attribute__((unused)) void *data) +{ + struct cmd_csum_txprep_result *res = parsed_result; + + if (!strcmp(res->onoff, "on")) + tx_prepare = 1; + else + tx_prepare = 0; + +} + +cmdline_parse_token_string_t cmd_csum_txprep_csum = + TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result, + csum, "csum"); +cmdline_parse_token_string_t cmd_csum_txprep_parse = + TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result, + parse, "txprep"); +cmdline_parse_token_string_t cmd_csum_txprep_onoff = + TOKEN_STRING_INITIALIZER(struct cmd_csum_txprep_result, + onoff, "on#off"); + +cmdline_parse_inst_t cmd_csum_txprep = { + .f = cmd_csum_txprep_parsed, + .data = NULL, + .help_str = "csum txprep on|off: Enable/Disable tx preparation path " + "for csum engine", + .tokens = { + (void *)&cmd_csum_txprep_csum, + (void *)&cmd_csum_txprep_parse, + (void *)&cmd_csum_txprep_onoff, + NULL, + }, +}; + /* *** ENABLE HARDWARE SEGMENTATION IN TX NON-TUNNELED PACKETS *** */ struct cmd_tso_set_result { cmdline_fixed_string_t tso; @@ -11518,6 +11566,7 @@ struct cmd_set_vf_mac_addr_result { (cmdline_parse_inst_t *)&cmd_csum_set, (cmdline_parse_inst_t *)&cmd_csum_show, (cmdline_parse_inst_t *)&cmd_csum_tunnel, + (cmdline_parse_inst_t *)&cmd_csum_txprep, (cmdline_parse_inst_t *)&cmd_tso_set, (cmdline_parse_inst_t *)&cmd_tso_show, (cmdline_parse_inst_t *)&cmd_tunnel_tso_set, diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 57e6ae2..3afa9ab 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -372,8 +372,10 @@ struct simple_gre_hdr { udp_hdr->dgram_cksum = 0; if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); + if (!tx_prepare) + udp_hdr->dgram_cksum = get_psd_sum( + l3_hdr, info->ethertype, + ol_flags); } else { udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, @@ -385,12 +387,15 @@ struct simple_gre_hdr { tcp
[dpdk-dev] [PATCH] performance-thread: add software packet type parsing
Last changes in Niantic and Fortville NIC drivers causes that vector Rx path is chosen by default in l3fwd-thread application. This path doesn't support propagation of hw packet type recognition to the packet_type field in mbuf, and packets cannot be classified properly. The approach to solve this problem is similar to the commit: 71a7e2424e07 ("examples/l3fwd: fix using packet type blindly"). To use sw packet analizer, new command line option "--parse-ptype" is introduced. Signed-off-by: Tomasz Kulasek --- doc/guides/sample_app_ug/performance_thread.rst |4 + examples/performance-thread/l3fwd-thread/main.c | 100 ++- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/doc/guides/sample_app_ug/performance_thread.rst b/doc/guides/sample_app_ug/performance_thread.rst index d7d9b08..0a655af 100644 --- a/doc/guides/sample_app_ug/performance_thread.rst +++ b/doc/guides/sample_app_ug/performance_thread.rst @@ -107,6 +107,7 @@ The application has a number of command line options:: --tx(lcore,thread)[,(lcore,thread)] [--enable-jumbo] [--max-pkt-len PKTLEN]] [--no-numa] [--hash-entry-num] [--ipv6] [--no-lthreads] [--stat-lcore lcore] +[--parse-ptype] Where: @@ -142,6 +143,9 @@ Where: * ``--stat-lcore``: optional, run CPU load stats collector on the specified lcore. +* ``--parse-ptype:`` optional, set to use software to analyze packet type. + Without this option, hardware will check the packet type. + The parameters of the ``--rx`` and ``--tx`` options are: * ``--rx`` parameters diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c index fdc90b2..336f88c 100644 --- a/examples/performance-thread/l3fwd-thread/main.c +++ b/examples/performance-thread/l3fwd-thread/main.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -90,6 +90,64 @@ #define APP_LOOKUP_METHOD APP_LOOKUP_LPM #endif +static int +check_ptype(int portid) +{ + int i, ret; + int ipv4 = 0, ipv6 = 0; + + ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, NULL, + 0); + if (ret <= 0) + return 0; + + uint32_t ptypes[ret]; + + ret = rte_eth_dev_get_supported_ptypes(portid, RTE_PTYPE_L3_MASK, + ptypes, ret); + for (i = 0; i < ret; ++i) { + if (ptypes[i] & RTE_PTYPE_L3_IPV4) + ipv4 = 1; + if (ptypes[i] & RTE_PTYPE_L3_IPV6) + ipv6 = 1; + } + + if (ipv4 && ipv6) + return 1; + + return 0; +} + +static inline void +parse_ptype(struct rte_mbuf *m) +{ + struct ether_hdr *eth_hdr; + uint32_t packet_type = RTE_PTYPE_UNKNOWN; + uint16_t ether_type; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + ether_type = eth_hdr->ether_type; + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) + packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; + else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6)) + packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; + + m->packet_type = packet_type; +} + +static uint16_t +cb_parse_ptype(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, __rte_unused void *user_param) +{ + unsigned int i; + + for (i = 0; i < nb_pkts; i++) + parse_ptype(pkts[i]); + + return nb_pkts; +} + /* * When set to zero, simple forwaring path is eanbled. * When set to one, optimized forwarding path is enabled. @@ -170,8 +228,9 @@ /* mask of enabled ports */ static uint32_t enabled_port_mask; -static int promiscuous_on; /**< $et in promiscuous mode off by default. */ +static int promiscuous_on; /**< Set in promiscuous mode off by default. */ static int numa_on = 1;/**< NUMA is enabled by default. */ +static int parse_ptype_on; #if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) static int ipv6; /**< ipv6 is false by default. */ @@ -2610,6 +2669,7 @@ static inline __attribute__((always_inline)) uint16_t " [--rx (port,queue,lcore,thread)[,(port,queue,lcore,thread]]" " [--tx (lcore,thread)[,(lcore,thread]]" " [--enable-jumbo [--max-pkt-len PKTLEN]]\n" + " [--parse-ptype]\n\n" " -p PORTMASK: hexadecimal bitmask of ports to configure\n" " -P : enable promiscuous mode\n" " -
[dpdk-dev] [PATCH v14 0/8] add Tx preparation
eth_tx_prep after queue_id check v9 changes: - fixed headers structure fragmentation check - moved fragmentation check into rte_validate_tx_offload() v8 changes: - mbuf argument in rte_validate_tx_offload declared as const v7 changes: - comments reworded/added - changed errno values returned from Tx prep API - added check in rte_phdr_cksum_fix if headers are in the first data segment and can be safetly modified - moved rte_validate_tx_offload to rte_mbuf - moved rte_phdr_cksum_fix to rte_net.h - removed rte_pkt.h new file as useless v6 changes: - added performance impact test results to the patch description v5 changes: - rebased csum engine modification - added information to the csum engine about performance tests - some performance improvements v4 changes: - tx_prep is now set to default behavior (NULL) for simple/vector path in fm10k, i40e and ixgbe drivers to increase performance, when Tx offloads are not intentionally available v3 changes: - reworked csum testpmd engine instead adding new one, - fixed checksum initialization procedure to include also outer checksum offloads, - some minor formattings and optimalizations v2 changes: - rte_eth_tx_prep() returns number of packets when device doesn't support tx_prep functionality, - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep Konstantin Ananyev (2): ena: add Tx preparation vmxnet3: add Tx preparation Tomasz Kulasek (6): ethdev: add Tx preparation e1000: add Tx preparation fm10k: add Tx preparation i40e: add Tx preparation ixgbe: add Tx preparation testpmd: use Tx preparation in csum engine app/test-pmd/csumonly.c | 37 +-- app/test-pmd/testpmd.c |5 ++ app/test-pmd/testpmd.h |2 + config/common_base |9 +++ drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +- drivers/net/e1000/em_rxtx.c | 48 +- drivers/net/e1000/igb_ethdev.c |4 ++ drivers/net/e1000/igb_rxtx.c | 53 +++- drivers/net/ena/ena_ethdev.c | 51 +++ drivers/net/fm10k/fm10k.h|6 ++ drivers/net/fm10k/fm10k_ethdev.c |5 ++ drivers/net/fm10k/fm10k_rxtx.c | 50 ++- drivers/net/i40e/i40e_ethdev.c |3 + drivers/net/i40e/i40e_rxtx.c | 74 +- drivers/net/i40e/i40e_rxtx.h |8 +++ drivers/net/ixgbe/ixgbe_ethdev.c |3 + drivers/net/ixgbe/ixgbe_ethdev.h |5 +- drivers/net/ixgbe/ixgbe_rxtx.c | 57 + drivers/net/ixgbe/ixgbe_rxtx.h |2 + drivers/net/vmxnet3/vmxnet3_ethdev.c |6 ++ drivers/net/vmxnet3/vmxnet3_ethdev.h |2 + drivers/net/vmxnet3/vmxnet3_rxtx.c | 56 + lib/librte_ether/rte_ethdev.h| 115 ++ lib/librte_mbuf/rte_mbuf.h | 64 +++ lib/librte_net/rte_net.h | 110 26 files changed, 764 insertions(+), 27 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v14 1/8] ethdev: add Tx preparation
Added API for `rte_eth_tx_prepare` uint16_t rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ Added functions: int rte_validate_tx_offload(struct rte_mbuf *m) to validate general requirements for tx offload set in mbuf of packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_net_intel_cksum_prepare(struct rte_mbuf *m) to prepare pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. int rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags) this function uses same logic as rte_net_intel_cksum_prepare, but allows application to choose which offloads should be taken into account, if full preparation is not required. PERFORMANCE TESTS - This feature was tested with modified csum engine from test-pmd. The packet checksum preparation was moved from application to Tx preparation step placed before burst. We may expect some overhead costs caused by: 1) using additional callback before burst, 2) rescanning burst, 3) additional condition checking (packet validation), 4) worse optimization (e.g. packet data access, etc.) We tested it using ixgbe Tx preparation implementation with some parts disabled to have comparable information about the impact of different parts of implementation. IMPACT: 1) For unimplemented Tx preparation callback the performance impact is negligible, 2) For packet condition check without checksum modifications (nb_segs, available offloads, etc.) is 14626628/14252168 (~2.62% drop), 3) Full support in ixgbe driver (point 2 + packet checksum initialization) is 14060924/13588094 (~3.48% drop) Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev Acked-by: Olivier Matz --- config/common_base|9 lib/librte_ether/rte_ethdev.h | 115 + lib/librte_mbuf/rte_mbuf.h| 64 +++ lib/librte_net/rte_net.h | 110 +++ 4 files changed, 298 insertions(+) diff --git a/config/common_base b/config/common_base index edb6a54..92c413a 100644 --- a/config/common_base +++ b/config/common_base @@ -123,6 +123,15 @@ CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y # +# Use real NOOP to turn off TX preparation stage +# +# While the behaviour of ``rte_ethdev_tx_prepare`` may change after turning on +# real NOOP, this configuration shouldn't be never enabled globaly, and can be +# used in appropriate target configuration file with a following restrictions +# +CONFIG_RTE_ETHDEV_TX_PREPARE_NOOP=n + +# # Support NIC bypass logic # CONFIG_RTE_NIC_BYPASS=n diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 52119af..10be095 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -702,6 +703,8 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ + uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ }; /** @@ -1191,6 +1194,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1625,6 +1633,7 @@ struct rte_eth_rxtx_callback { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */ struct rt
[dpdk-dev] [PATCH v14 2/8] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 +- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 53 +- 5 files changed, 118 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ int eth_igb_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ int eth_em_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index 866a5cf..00d5996 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev, eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prepare = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1079,6 +1080,8 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev, .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..7e271ad 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ struct em_tx_queue { /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; +
[dpdk-dev] [PATCH v14 4/8] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 74 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index b0c0fbf..0e20178 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -944,6 +944,7 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prepare = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2646,6 +2647,8 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev, .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 7ae7d9f..1c9a6c8 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1411,6 +1424,63 @@ static inline int __attribute__((always_inline)) return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered +* malicious +*/ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2763,9 +2833,11 @@ void __attribute__((cold)) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev
[dpdk-dev] [PATCH v14 3/8] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ uint16_t fm10k_recv_scattered_pkts(void *rx_queue, uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index fe74f6d..6648468 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1447,6 +1447,8 @@ static int fm10k_xstats_get_names(__rte_unused struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2755,8 +2757,10 @@ static void __attribute__((cold)) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prepare = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prepare = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2835,6 +2839,7 @@ static void __attribute__((cold)) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prepare = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 32cc7ff..144e5e6 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -597,3 +607,41 @@ static inline void tx_xmit_pkt(struct fm10k_tx_queue *q, struct rte_mbuf *mb) return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v14 6/8] vmxnet3: add Tx preparation
From: "Ananyev, Konstantin" Signed-off-by: Konstantin Ananyev --- drivers/net/vmxnet3/vmxnet3_ethdev.c |6 drivers/net/vmxnet3/vmxnet3_ethdev.h |2 ++ drivers/net/vmxnet3/vmxnet3_rxtx.c | 56 ++ 3 files changed, 64 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 93c9ac9..e31896f 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -69,6 +69,8 @@ #define PROCESS_SYS_EVENTS 0 +#defineVMXNET3_TX_MAX_SEG UINT8_MAX + static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev); static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev); static int vmxnet3_dev_configure(struct rte_eth_dev *dev); @@ -237,6 +239,7 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, eth_dev->dev_ops = &vmxnet3_eth_dev_ops; eth_dev->rx_pkt_burst = &vmxnet3_recv_pkts; eth_dev->tx_pkt_burst = &vmxnet3_xmit_pkts; + eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts; pci_dev = eth_dev->pci_dev; /* @@ -326,6 +329,7 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, eth_dev->dev_ops = NULL; eth_dev->rx_pkt_burst = NULL; eth_dev->tx_pkt_burst = NULL; + eth_dev->tx_pkt_prepare = NULL; rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; @@ -728,6 +732,8 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, .nb_max = VMXNET3_TX_RING_MAX_SIZE, .nb_min = VMXNET3_DEF_TX_RING_SIZE, .nb_align = 1, + .nb_seg_max = VMXNET3_TX_MAX_SEG, + .nb_mtu_seg_max = VMXNET3_MAX_TXD_PER_PKT, }; dev_info->rx_offload_capa = diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.h b/drivers/net/vmxnet3/vmxnet3_ethdev.h index 7d3b11e..469db71 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.h +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.h @@ -171,5 +171,7 @@ uint16_t vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t vmxnet3_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); #endif /* _VMXNET3_ETHDEV_H_ */ diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c index b109168..3651369 100644 --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -69,6 +69,7 @@ #include #include #include +#include #include "base/vmxnet3_defs.h" #include "vmxnet3_ring.h" @@ -76,6 +77,14 @@ #include "vmxnet3_logs.h" #include "vmxnet3_ethdev.h" +#defineVMXNET3_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#defineVMXNET3_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK) + static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); @@ -350,6 +359,53 @@ } uint16_t +vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int32_t ret; + uint32_t i; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /* Non-TSO packet cannot occupy more than +* VMXNET3_MAX_TXD_PER_PKT TX descriptors. +*/ + if ((ol_flags & PKT_TX_TCP_SEG) == 0 && + m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) { + rte_errno = -EINVAL; + return i; + } + + /* check that only supported TX offloads are requested. */ + if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 || + (ol_flags & PKT_TX_L4_MASK) == + PKT_TX_SCTP_CKSUM) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { -- 1.7.9.5
[dpdk-dev] [PATCH v14 5/8] ixgbe: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +++- drivers/net/ixgbe/ixgbe_rxtx.c | 57 ++ drivers/net/ixgbe/ixgbe_rxtx.h |2 ++ 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index baffc71..d726a2b 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -517,6 +517,8 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -1103,6 +1105,7 @@ struct rte_ixgbe_xstats_name_off { eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338..e229cf5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index b2d9f45..0bbc583 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -70,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -87,6 +88,9 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -905,6 +909,57 @@ static inline int __attribute__((always_inline)) /* * + * TX prep functions + * + **/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* Check if packet meets requirements for number of segments +* +* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and +* non-TSO +*/ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/* + * * RX functions * **/ @@ -2282,6 +2337,7 @@ void __attribute__((cold)) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, "Using simple tx code path"); + dev->tx_pkt_prepare = NULL; #ifdef RTE_IXGBE_INC_VECTOR if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &
[dpdk-dev] [PATCH v14 8/8] testpmd: use Tx preparation in csum engine
Since all current drivers supports Tx preparation API, it is used in csum forwarding engine by default for all drivers. Adding additional step to the csum engine costs about 3-4% of performance drop, on my setup with ixgbe driver. It's caused mostly by the need of reaccessing and modification of packet data. Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- app/test-pmd/csumonly.c | 37 - app/test-pmd/testpmd.c |5 + app/test-pmd/testpmd.h |2 ++ 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 57e6ae2..806f957 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -112,15 +112,6 @@ struct simple_gre_hdr { } __attribute__((__packed__)); static uint16_t -get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) -{ - if (ethertype == _htons(ETHER_TYPE_IPv4)) - return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); - else /* assume ethertype == ETHER_TYPE_IPv6 */ - return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); -} - -static uint16_t get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) { if (ethertype == _htons(ETHER_TYPE_IPv4)) @@ -370,11 +361,9 @@ struct simple_gre_hdr { /* do not recalculate udp cksum if it was 0 */ if (udp_hdr->dgram_cksum != 0) { udp_hdr->dgram_cksum = 0; - if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { + if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); - } else { + else { udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, info->ethertype); @@ -383,15 +372,11 @@ struct simple_gre_hdr { } else if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); tcp_hdr->cksum = 0; - if (tso_segsz) { + if (tso_segsz) ol_flags |= PKT_TX_TCP_SEG; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { + else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else { + else { tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr, info->ethertype); @@ -648,6 +633,7 @@ struct simple_gre_hdr { void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ uint16_t nb_rx; uint16_t nb_tx; + uint16_t nb_prep; uint16_t i; uint64_t rx_ol_flags, tx_ol_flags; uint16_t testpmd_ol_flags; @@ -857,7 +843,16 @@ struct simple_gre_hdr { printf("\n"); } } - nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); + + nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue, + pkts_burst, nb_rx); + if (nb_prep != nb_rx) + printf("Preparing packet burst to transmit failed: %s\n", + rte_strerror(rte_errno)); + + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, + nb_prep); + /* * Retry if necessary */ diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index a0332c2..634f10b 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -180,6 +180,11 @@ struct fwd_engine * fwd_engines[] = { enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF; /**< Split policy for packets to TX. */ +/* + * Enable Tx preparation path in the "csum" engine. + */ +uint8_t tx_prepare; + uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */ uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */ diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index 9c1e703..488a6e1 100644 --- a/app/test-pmd/testpmd.h +++ b/app/test-pmd/testpmd.h @@ -383,6 +383,8 @@ enum tx_pkt_split { extern enum tx_pkt_split tx_pkt_split; +extern uint8_t tx_prepare; + extern uint16_t nb_pkt_per_burst; extern uint16_t mb_mempool_cache; extern int8_t rx_pthresh; -- 1.7.9.5
[dpdk-dev] [PATCH v14 7/8] ena: add Tx preparation
From: Konstantin Ananyev Signed-off-by: Konstantin Ananyev --- drivers/net/ena/ena_ethdev.c | 51 ++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index 555fb31..51af723 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "ena_ethdev.h" #include "ena_logs.h" @@ -168,6 +169,14 @@ struct ena_stats { #define PCI_DEVICE_ID_ENA_VF 0xEC20 #define PCI_DEVICE_ID_ENA_LLQ_VF 0xEC21 +#defineENA_TX_OFFLOAD_MASK (\ + PKT_TX_L4_MASK | \ + PKT_TX_IP_CKSUM |\ + PKT_TX_TCP_SEG) + +#defineENA_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) + static struct rte_pci_id pci_id_ena_map[] = { { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_LLQ_VF) }, @@ -179,6 +188,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev, static int ena_dev_configure(struct rte_eth_dev *dev); static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf); @@ -1272,6 +1283,7 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ena_dev_ops; eth_dev->rx_pkt_burst = ð_ena_recv_pkts; eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; + eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; adapter->rte_eth_dev_data = eth_dev->data; adapter->rte_dev = eth_dev; @@ -1570,6 +1582,45 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, return recv_idx; } +static uint16_t +eth_ena_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int32_t ret; + uint32_t i; + struct rte_mbuf *m; + uint64_t ol_flags; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) != 0 || + (ol_flags & PKT_TX_L4_MASK) == + PKT_TX_SCTP_CKSUM) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + /* ENA doesn't need different phdr cskum for TSO */ + ret = rte_net_intel_cksum_flags_prepare(m, + ol_flags & ~PKT_TX_TCP_SEG); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { -- 1.7.9.5
[dpdk-dev] [PATCH v15 0/8] add Tx preparation
r packet processing v11 changed: - updated comments - added information to the API description about packet data requirements/limitations. v10 changes: - moved drivers tx calback check in rte_eth_tx_prep after queue_id check v9 changes: - fixed headers structure fragmentation check - moved fragmentation check into rte_validate_tx_offload() v8 changes: - mbuf argument in rte_validate_tx_offload declared as const v7 changes: - comments reworded/added - changed errno values returned from Tx prep API - added check in rte_phdr_cksum_fix if headers are in the first data segment and can be safetly modified - moved rte_validate_tx_offload to rte_mbuf - moved rte_phdr_cksum_fix to rte_net.h - removed rte_pkt.h new file as useless v6 changes: - added performance impact test results to the patch description v5 changes: - rebased csum engine modification - added information to the csum engine about performance tests - some performance improvements v4 changes: - tx_prep is now set to default behavior (NULL) for simple/vector path in fm10k, i40e and ixgbe drivers to increase performance, when Tx offloads are not intentionally available v3 changes: - reworked csum testpmd engine instead adding new one, - fixed checksum initialization procedure to include also outer checksum offloads, - some minor formattings and optimalizations v2 changes: - rte_eth_tx_prep() returns number of packets when device doesn't support tx_prep functionality, - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep Konstantin Ananyev (2): ena: add Tx preparation vmxnet3: add Tx preparation Tomasz Kulasek (6): ethdev: add Tx preparation e1000: add Tx preparation fm10k: add Tx preparation i40e: add Tx preparation ixgbe: add Tx preparation testpmd: use Tx preparation in csum engine app/test-pmd/csumonly.c | 37 - app/test-pmd/testpmd.c |5 ++ config/common_base |8 ++ drivers/net/e1000/e1000_ethdev.h | 11 +++ drivers/net/e1000/em_ethdev.c|5 +- drivers/net/e1000/em_rxtx.c | 48 +++- drivers/net/e1000/igb_ethdev.c |4 + drivers/net/e1000/igb_rxtx.c | 53 - drivers/net/ena/ena_ethdev.c | 51 + drivers/net/fm10k/fm10k.h|6 ++ drivers/net/fm10k/fm10k_ethdev.c |5 ++ drivers/net/fm10k/fm10k_rxtx.c | 50 +++- drivers/net/i40e/i40e_ethdev.c |3 + drivers/net/i40e/i40e_rxtx.c | 74 +- drivers/net/i40e/i40e_rxtx.h |8 ++ drivers/net/ixgbe/ixgbe_ethdev.c |3 + drivers/net/ixgbe/ixgbe_ethdev.h |5 +- drivers/net/ixgbe/ixgbe_rxtx.c | 57 ++ drivers/net/ixgbe/ixgbe_rxtx.h |2 + drivers/net/vmxnet3/vmxnet3_ethdev.c |6 ++ drivers/net/vmxnet3/vmxnet3_ethdev.h |2 + drivers/net/vmxnet3/vmxnet3_rxtx.c | 56 ++ lib/librte_ether/rte_ethdev.h| 139 ++ lib/librte_mbuf/rte_mbuf.h | 64 lib/librte_net/rte_net.h | 110 +++ 25 files changed, 785 insertions(+), 27 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v15 1/8] ethdev: add Tx preparation
Added API for `rte_eth_tx_prepare` uint16_t rte_eth_tx_prepare(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ These fields can be used to create valid packets according to the following rules: * For non-TSO packet, a single transmit packet may span up to "nb_mtu_seg_max" buffers. * For TSO packet the total number of data descriptors is "nb_seg_max", and each segment within the TSO may span up to "nb_mtu_seg_max". Added functions: int rte_validate_tx_offload(struct rte_mbuf *m) to validate general requirements for tx offload set in mbuf of packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_net_intel_cksum_prepare(struct rte_mbuf *m) to prepare pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. int rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags) this function uses same logic as rte_net_intel_cksum_prepare, but allows application to choose which offloads should be taken into account, if full preparation is not required. PERFORMANCE TESTS - This feature was tested with modified csum engine from test-pmd. The packet checksum preparation was moved from application to Tx preparation step placed before burst. We may expect some overhead costs caused by: 1) using additional callback before burst, 2) rescanning burst, 3) additional condition checking (packet validation), 4) worse optimization (e.g. packet data access, etc.) We tested it using ixgbe Tx preparation implementation with some parts disabled to have comparable information about the impact of different parts of implementation. IMPACT: 1) For unimplemented Tx preparation callback the performance impact is negligible, 2) For packet condition check without checksum modifications (nb_segs, available offloads, etc.) is 14626628/14252168 (~2.62% drop), 3) Full support in ixgbe driver (point 2 + packet checksum initialization) is 14060924/13588094 (~3.48% drop) Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev Acked-by: Olivier Matz --- config/common_base|8 +++ lib/librte_ether/rte_ethdev.h | 139 + lib/librte_mbuf/rte_mbuf.h| 64 +++ lib/librte_net/rte_net.h | 110 4 files changed, 321 insertions(+) diff --git a/config/common_base b/config/common_base index edb6a54..8e9dcfa 100644 --- a/config/common_base +++ b/config/common_base @@ -123,6 +123,14 @@ CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y # +# Turn off Tx preparation stage +# +# Warning: rte_ethdev_tx_prepare() can be safely disabled only if using a +# driver which do not implement any Tx preparation. +# +CONFIG_RTE_ETHDEV_TX_PREPARE_NOOP=n + +# # Support NIC bypass logic # CONFIG_RTE_NIC_BYPASS=n diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 52119af..86c16e0 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -702,6 +703,29 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + + /** +* Max allowed number of segments per whole packet. +* +* - For TSO packet this is the total number of data descriptors allowed +* by device. +* +* @see nb_mtu_seg_max +*/ + uint16_t nb_seg_max; + + /** +* Max number of segments per one MTU. +* +* - For non-TSO packet, this is the maximum allowed number of segments +* in a single transmit packet. +* +* - For TSO packet each segment within the TSO may span up to this +* value. +* +* @see nb_seg_max +*/ + uint16_t nb_mtu_seg_max; }; /** @@ -1191,6 +1215,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, +
[dpdk-dev] [PATCH v15 3/8] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ uint16_t fm10k_recv_scattered_pkts(void *rx_queue, uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index fe74f6d..6648468 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1447,6 +1447,8 @@ static int fm10k_xstats_get_names(__rte_unused struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2755,8 +2757,10 @@ static void __attribute__((cold)) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prepare = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prepare = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2835,6 +2839,7 @@ static void __attribute__((cold)) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prepare = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 32cc7ff..144e5e6 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -597,3 +607,41 @@ static inline void tx_xmit_pkt(struct fm10k_tx_queue *q, struct rte_mbuf *mb) return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v15 2/8] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 +- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 53 +- 5 files changed, 118 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ int eth_igb_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ int eth_em_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index 866a5cf..00d5996 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev, eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prepare = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1079,6 +1080,8 @@ static int eth_em_set_mc_addr_list(struct rte_eth_dev *dev, .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..7e271ad 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ struct em_tx_queue { /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; +
[dpdk-dev] [PATCH v15 4/8] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 74 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index b0c0fbf..0e20178 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -944,6 +944,7 @@ static inline void i40e_GLQF_reg_init(struct i40e_hw *hw) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prepare = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2646,6 +2647,8 @@ static int i40e_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev, .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 7ae7d9f..1c9a6c8 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1411,6 +1424,63 @@ static inline int __attribute__((always_inline)) return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered +* malicious +*/ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2763,9 +2833,11 @@ void __attribute__((cold)) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev
[dpdk-dev] [PATCH v15 5/8] ixgbe: add Tx preparation
Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +++- drivers/net/ixgbe/ixgbe_rxtx.c | 57 ++ drivers/net/ixgbe/ixgbe_rxtx.h |2 ++ 4 files changed, 66 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index baffc71..d726a2b 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -517,6 +517,8 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -1103,6 +1105,7 @@ struct rte_ixgbe_xstats_name_off { eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338..e229cf5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index b2d9f45..0bbc583 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -70,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -87,6 +88,9 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -905,6 +909,57 @@ static inline int __attribute__((always_inline)) /* * + * TX prep functions + * + **/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* Check if packet meets requirements for number of segments +* +* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and +* non-TSO +*/ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/* + * * RX functions * **/ @@ -2282,6 +2337,7 @@ void __attribute__((cold)) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, "Using simple tx code path"); + dev->tx_pkt_prepare = NULL; #ifdef RTE_IXGBE_INC_VECTOR if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &
[dpdk-dev] [PATCH v15 6/8] vmxnet3: add Tx preparation
From: "Ananyev, Konstantin" Signed-off-by: Konstantin Ananyev Acked-by: Yong Wang --- drivers/net/vmxnet3/vmxnet3_ethdev.c |6 drivers/net/vmxnet3/vmxnet3_ethdev.h |2 ++ drivers/net/vmxnet3/vmxnet3_rxtx.c | 56 ++ 3 files changed, 64 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 93c9ac9..e31896f 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -69,6 +69,8 @@ #define PROCESS_SYS_EVENTS 0 +#defineVMXNET3_TX_MAX_SEG UINT8_MAX + static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev); static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev); static int vmxnet3_dev_configure(struct rte_eth_dev *dev); @@ -237,6 +239,7 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, eth_dev->dev_ops = &vmxnet3_eth_dev_ops; eth_dev->rx_pkt_burst = &vmxnet3_recv_pkts; eth_dev->tx_pkt_burst = &vmxnet3_xmit_pkts; + eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts; pci_dev = eth_dev->pci_dev; /* @@ -326,6 +329,7 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, eth_dev->dev_ops = NULL; eth_dev->rx_pkt_burst = NULL; eth_dev->tx_pkt_burst = NULL; + eth_dev->tx_pkt_prepare = NULL; rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; @@ -728,6 +732,8 @@ static void vmxnet3_mac_addr_set(struct rte_eth_dev *dev, .nb_max = VMXNET3_TX_RING_MAX_SIZE, .nb_min = VMXNET3_DEF_TX_RING_SIZE, .nb_align = 1, + .nb_seg_max = VMXNET3_TX_MAX_SEG, + .nb_mtu_seg_max = VMXNET3_MAX_TXD_PER_PKT, }; dev_info->rx_offload_capa = diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.h b/drivers/net/vmxnet3/vmxnet3_ethdev.h index 7d3b11e..469db71 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.h +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.h @@ -171,5 +171,7 @@ uint16_t vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t vmxnet3_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); #endif /* _VMXNET3_ETHDEV_H_ */ diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c index b109168..3651369 100644 --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -69,6 +69,7 @@ #include #include #include +#include #include "base/vmxnet3_defs.h" #include "vmxnet3_ring.h" @@ -76,6 +77,14 @@ #include "vmxnet3_logs.h" #include "vmxnet3_ethdev.h" +#defineVMXNET3_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#defineVMXNET3_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK) + static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); @@ -350,6 +359,53 @@ } uint16_t +vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int32_t ret; + uint32_t i; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /* Non-TSO packet cannot occupy more than +* VMXNET3_MAX_TXD_PER_PKT TX descriptors. +*/ + if ((ol_flags & PKT_TX_TCP_SEG) == 0 && + m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) { + rte_errno = -EINVAL; + return i; + } + + /* check that only supported TX offloads are requested. */ + if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 || + (ol_flags & PKT_TX_L4_MASK) == + PKT_TX_SCTP_CKSUM) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { -- 1.7.9.5
[dpdk-dev] [PATCH v15 7/8] ena: add Tx preparation
From: Konstantin Ananyev Signed-off-by: Konstantin Ananyev --- drivers/net/ena/ena_ethdev.c | 51 ++ 1 file changed, 51 insertions(+) diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index 555fb31..51af723 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "ena_ethdev.h" #include "ena_logs.h" @@ -168,6 +169,14 @@ struct ena_stats { #define PCI_DEVICE_ID_ENA_VF 0xEC20 #define PCI_DEVICE_ID_ENA_LLQ_VF 0xEC21 +#defineENA_TX_OFFLOAD_MASK (\ + PKT_TX_L4_MASK | \ + PKT_TX_IP_CKSUM |\ + PKT_TX_TCP_SEG) + +#defineENA_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ ENA_TX_OFFLOAD_MASK) + static struct rte_pci_id pci_id_ena_map[] = { { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_VF) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_ENA_LLQ_VF) }, @@ -179,6 +188,8 @@ static int ena_device_init(struct ena_com_dev *ena_dev, static int ena_dev_configure(struct rte_eth_dev *dev); static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +static uint16_t eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); static int ena_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf); @@ -1272,6 +1283,7 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ena_dev_ops; eth_dev->rx_pkt_burst = ð_ena_recv_pkts; eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; + eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; adapter->rte_eth_dev_data = eth_dev->data; adapter->rte_dev = eth_dev; @@ -1570,6 +1582,45 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, return recv_idx; } +static uint16_t +eth_ena_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int32_t ret; + uint32_t i; + struct rte_mbuf *m; + uint64_t ol_flags; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) != 0 || + (ol_flags & PKT_TX_L4_MASK) == + PKT_TX_SCTP_CKSUM) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + /* ENA doesn't need different phdr cskum for TSO */ + ret = rte_net_intel_cksum_flags_prepare(m, + ol_flags & ~PKT_TX_TCP_SEG); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { -- 1.7.9.5
[dpdk-dev] [PATCH v15 8/8] testpmd: use Tx preparation in csum engine
Since all current drivers supports Tx preparation API, it is used in csum forwarding engine by default for all drivers. Adding additional step to the csum engine costs about 3-4% of performance drop, on my setup with ixgbe driver. It's caused mostly by the need of reaccessing and modification of packet data. Signed-off-by: Tomasz Kulasek Acked-by: Konstantin Ananyev --- app/test-pmd/csumonly.c | 37 - app/test-pmd/testpmd.c |5 + 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 57e6ae2..806f957 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -112,15 +112,6 @@ struct simple_gre_hdr { } __attribute__((__packed__)); static uint16_t -get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) -{ - if (ethertype == _htons(ETHER_TYPE_IPv4)) - return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); - else /* assume ethertype == ETHER_TYPE_IPv6 */ - return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); -} - -static uint16_t get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) { if (ethertype == _htons(ETHER_TYPE_IPv4)) @@ -370,11 +361,9 @@ struct simple_gre_hdr { /* do not recalculate udp cksum if it was 0 */ if (udp_hdr->dgram_cksum != 0) { udp_hdr->dgram_cksum = 0; - if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { + if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); - } else { + else { udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, info->ethertype); @@ -383,15 +372,11 @@ struct simple_gre_hdr { } else if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); tcp_hdr->cksum = 0; - if (tso_segsz) { + if (tso_segsz) ol_flags |= PKT_TX_TCP_SEG; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { + else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else { + else { tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr, info->ethertype); @@ -648,6 +633,7 @@ struct simple_gre_hdr { void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ uint16_t nb_rx; uint16_t nb_tx; + uint16_t nb_prep; uint16_t i; uint64_t rx_ol_flags, tx_ol_flags; uint16_t testpmd_ol_flags; @@ -857,7 +843,16 @@ struct simple_gre_hdr { printf("\n"); } } - nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); + + nb_prep = rte_eth_tx_prepare(fs->tx_port, fs->tx_queue, + pkts_burst, nb_rx); + if (nb_prep != nb_rx) + printf("Preparing packet burst to transmit failed: %s\n", + rte_strerror(rte_errno)); + + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, + nb_prep); + /* * Retry if necessary */ diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index a0332c2..634f10b 100644 --- a/app/test-pmd/testpmd.c +++ b/app/test-pmd/testpmd.c @@ -180,6 +180,11 @@ struct fwd_engine * fwd_engines[] = { enum tx_pkt_split tx_pkt_split = TX_PKT_SPLIT_OFF; /**< Split policy for packets to TX. */ +/* + * Enable Tx preparation path in the "csum" engine. + */ +uint8_t tx_prepare; + uint16_t nb_pkt_per_burst = DEF_PKT_BURST; /**< Number of packets per burst. */ uint16_t mb_mempool_cache = DEF_MBUF_CACHE; /**< Size of mbuf mempool cache. */ -- 1.7.9.5
[dpdk-dev] [PATCH v2 0/5] Chained Mbufs support in SW PMDs
This patch set adds support of scattered-gather list for SW PMDs. As of now, application needs to reserve continuous block of memory for mbufs which is not always the case. Hence needed to support chaining of mbufs which are smaller in size but can be used if chained. Above work involves: a) Create mbuf functions to coalesce mbuf chains into a single mbuf. b) For each software poll mode driver code to detect chained mbufs support and coalesce these before preforming crypto. c) Add relevant unit tests to test the functionality. Known limitations for openssl PMD: While libcrypto library expects continuous destination buffer for output of cipher operations, implementation of openssl PMD is limited the same way, and requires contigous destination mbuf. changes in v2: - add support for sgl in openssl PMD - rte_pktmbuf_coalesce replaced with rte_pktmbuf_linearize - extended test vector data for aes gcm from 60 to 2048 bytes Tomasz Kulasek (5): rte_mbuf: add rte_pktmbuf_linearize test: add rte_pktmbuf_linearize unit tests crypto: add sgl support in sw PMDs crypto: add sgl support in openssl PMD test: add sgl unit tests for crypto devices app/test/test_cryptodev.c | 589 +++- app/test/test_cryptodev.h | 139 +++ app/test/test_cryptodev_aes_test_vectors.h | 54 ++- app/test/test_cryptodev_blockcipher.c | 181 ++--- app/test/test_cryptodev_blockcipher.h |3 +- app/test/test_cryptodev_des_test_vectors.h | 29 ++ app/test/test_cryptodev_gcm_test_vectors.h | 557 +- app/test/test_mbuf.c | 123 ++ doc/guides/cryptodevs/openssl.rst |2 +- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 + drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 +- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 329 drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 + drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 +- lib/librte_cryptodev/rte_cryptodev.h |3 +- lib/librte_mbuf/rte_mbuf.h | 56 +++ 19 files changed, 1991 insertions(+), 155 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v2 1/5] rte_mbuf: add rte_pktmbuf_linearize
This patch adds function rte_pktmbuf_linearize to let crypto PMD coalesce chained mbuf before crypto operation and extend their capabilities to support segmented mbufs when device cannot handle them natively. Signed-off-by: Tomasz Kulasek --- lib/librte_mbuf/rte_mbuf.h | 56 1 file changed, 56 insertions(+) diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index ead7c6e..b11a31d 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -1647,6 +1647,62 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail } /** + * Linearize data in mbuf. + * + * This function coalesce mbuf merging data in the first segment, unchaining + * rest, and then frees them. + * + * All operations are done in-place, so the structure of incoming mbuf + * is changed. + * + * @param mbuf + * mbuf to linearize + * @return + * - 0, on success + * - -1, on error + */ +static inline int +rte_pktmbuf_linearize(struct rte_mbuf *mbuf) +{ + int l, n; + struct rte_mbuf *m; + struct rte_mbuf *m_next; + char *buffer; + + if (rte_pktmbuf_is_contiguous(mbuf)) + return 0; + + /* Extend first segment to the total packet length +*/ + n = rte_pktmbuf_pkt_len(mbuf) - rte_pktmbuf_data_len(mbuf); + + if (unlikely(n > rte_pktmbuf_tailroom(mbuf))) + return -1; + + buffer = rte_pktmbuf_mtod_offset(mbuf, char *, mbuf->data_len); + mbuf->data_len = (uint16_t)(mbuf->pkt_len); + + /* Append data from next segments to the first one +*/ + m = mbuf->next; + while (m != NULL) { + m_next = m->next; + + l = rte_pktmbuf_data_len(m); + rte_memcpy(buffer, rte_pktmbuf_mtod(m, char *), l); + buffer += l; + + rte_pktmbuf_free_seg(m); + m = m_next; + } + + mbuf->next = NULL; + mbuf->nb_segs = 1; + + return 0; +} + +/** * Dump an mbuf structure to a file. * * Dump all fields for the given packet mbuf and all its associated -- 1.7.9.5
[dpdk-dev] [PATCH v2 2/5] test: add rte_pktmbuf_linearize unit tests
This patch tests rte_pktmbuf_coalesce functionality: 1) Creates banch of segmented mbufs with different size and number of segments. 2) Generates pkt_len bytes of random data. 3) Fills noncontigouos mbuf with randomly generated data. 4) Uses rte_pktmbuf_linearize to coalesce segmented buffer into one contiguous. 5) Verifies data in linearized buffer. Signed-off-by: Tomasz Kulasek --- app/test/test_mbuf.c | 123 ++ 1 file changed, 123 insertions(+) diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c index c0823ea..39577e7 100644 --- a/app/test/test_mbuf.c +++ b/app/test/test_mbuf.c @@ -930,6 +930,124 @@ return 0; } +static int +test_mbuf_linearize(int pkt_len, int nb_segs) { + + struct rte_mbuf *m = NULL, *mbuf_src = NULL; + uint8_t data[pkt_len], *src, *dst; + int data_len = 0; + int i, size; + int t_len; + + if (pkt_len < 1) { + printf("Packet size must be 1 or more (is %d)\n", pkt_len); + return -1; + } + + if (nb_segs < 1) { + printf("Number of segments must be 1 or more (is %d)\n", + nb_segs); + return -1; + } + + /* Setup buffer */ + for (i = 0; i < pkt_len; i++) + data[i] = (uint8_t) rte_rand(); + + t_len = pkt_len >= nb_segs ? pkt_len / nb_segs : 1; + src = data; + size = pkt_len; + + /* Create chained mbuf_src and fill it generated data */ + for (i = 0; size > 0; i++) { + + m = rte_pktmbuf_alloc(pktmbuf_pool); + if (i == 0) + mbuf_src = m; + + if (!m) { + printf("Cannot create segment for source mbuf"); + goto fail; + } + + /* Make sure if tailroom is zeroed */ + memset(rte_pktmbuf_mtod(m, uint8_t *), 0, + rte_pktmbuf_tailroom(m)); + + data_len = size > t_len ? t_len : size; + dst = (uint8_t *)rte_pktmbuf_append(m, data_len); + if (!dst) { + printf("Cannot append %d bytes to the mbuf\n", + data_len); + goto fail; + } + + rte_memcpy(dst, src, data_len); + src += data_len; + + if (mbuf_src != m) + rte_pktmbuf_chain(mbuf_src, m); + + size -= data_len; + + } + + /* Create destination buffer to store coalesced data */ + if (rte_pktmbuf_linearize(mbuf_src)) { + printf("Mbuf linearization failed\n"); + goto fail; + } + + if (!rte_pktmbuf_is_contiguous(mbuf_src)) { + printf("Source buffer should be contiguous after " + "linearization\n"); + goto fail; + } + + src = rte_pktmbuf_mtod(mbuf_src, uint8_t *); + + if (memcmp(src, data, rte_pktmbuf_pkt_len(mbuf_src))) { + printf("Incorrect data in coalesced mbuf\n"); + goto fail; + } + + if (mbuf_src) + rte_pktmbuf_free(mbuf_src); + return 0; + +fail: + if (mbuf_src) + rte_pktmbuf_free(mbuf_src); + return -1; +} + +static int +test_mbuf_linearize_check(void) +{ + struct test_mbuf_array { + int size; + int nb_segs; + } mbuf_array[5] = { + { 128, 1 }, + { 64, 64 }, + { 512, 10 }, + { 250, 11 }, + { 123, 8 }, + }; + unsigned int i; + + printf("Test mbuf linearize API\n"); + + for (i = 0; i < RTE_DIM(mbuf_array); i++) + if (test_mbuf_linearize(mbuf_array[i].size, + mbuf_array[i].nb_segs)) { + printf("Test failed for %d, %d\n", mbuf_array[i].size, + mbuf_array[i].nb_segs); + return -1; + } + + return 0; +} static int test_mbuf(void) @@ -1023,6 +1141,11 @@ printf("test_failing_mbuf_sanity_check() failed\n"); return -1; } + + if (test_mbuf_linearize_check() < 0) { + printf("test_mbuf_linearize_check() failed\n"); + return -1; + } return 0; } -- 1.7.9.5
[dpdk-dev] [PATCH v2 3/5] crypto: add sgl support in sw PMDs
This patch introduces RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER feature flag informing that selected crypto device supports segmented mbufs natively and doesn't need to be coalesced before crypto operation. While using segmented buffers in crypto devices may have unpredictable results, for PMDs which doesn't support it natively, additional check is made for debug compilation. Signed-off-by: Tomasz Kulasek --- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 ++ drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 --- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 ++- drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 +++ drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 ++-- lib/librte_cryptodev/rte_cryptodev.h |3 ++- 8 files changed, 77 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c index dba5e15..1a6120c 100644 --- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c @@ -375,6 +375,20 @@ break; } +#ifdef RTE_LIBRTE_PMD_AESNI_GCM_DEBUG + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; + GCM_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + qp->qp_stats.enqueue_err_count++; + break; + } +#endif + retval = process_gcm_crypto_op(qp, ops[i]->sym, sess); if (retval < 0) { ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c index f07cd07..b5e115e 100644 --- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c @@ -529,15 +529,28 @@ int i, processed_jobs = 0; for (i = 0; i < nb_ops; i++) { -#ifdef RTE_LIBRTE_AESNI_MB_DEBUG - if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { +#ifdef RTE_LIBRTE_PMD_AESNI_MB_DEBUG + if (unlikely(ops[i]->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { MB_LOG_ERR("PMD only supports symmetric crypto " "operation requests, op (%p) is not a " - "symmetric operation.", op); + "symmetric operation.", ops[i]); + qp->stats.enqueue_err_count++; + goto flush_jobs; + } + + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + MB_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; qp->stats.enqueue_err_count++; goto flush_jobs; } #endif + sess = get_session(qp, ops[i]); if (unlikely(sess == NULL)) { qp->stats.enqueue_err_count++; diff --git a/drivers/crypto/kasumi/rte_kasumi_pmd.c b/drivers/crypto/kasumi/rte_kasumi_pmd.c index b119da2..4bdd7bb 100644 --- a/drivers/crypto/kasumi/rte_kasumi_pmd.c +++ b/drivers/crypto/kasumi/rte_kasumi_pmd.c @@ -455,6 +455,19 @@ for (i = 0; i < nb_ops; i++) { curr_c_op = ops[i]; +#ifdef RTE_LIBRTE_PMD_KASUMI_DEBUG + if (!rte_pktmbuf_is_contiguous(curr_c_op->sym->m_src) || + (curr_c_op->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + curr_c_op->sym->m_dst))) { + KASUMI_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "s
[dpdk-dev] [PATCH v2 4/5] crypto: add sgl support in openssl PMD
Previous implementation uses EVP_EncryptUpdate() on whole source buffer limiting its usage to the contiguous buffers. This implementation calls EVP_EncryptUpdate() on each segment for cipher operations in continous mode, before finalization allowing to provide chained mbuf as a source. However, libcrypto library expects continuous destination buffer for output of cipher operations and implementation of openssl PMD is limited the same way, requiring contiguous destination mbuf. Signed-off-by: Tomasz Kulasek --- doc/guides/cryptodevs/openssl.rst|2 +- drivers/crypto/openssl/rte_openssl_pmd.c | 329 +++--- 2 files changed, 258 insertions(+), 73 deletions(-) diff --git a/doc/guides/cryptodevs/openssl.rst b/doc/guides/cryptodevs/openssl.rst index d2b5906..05e6d67 100644 --- a/doc/guides/cryptodevs/openssl.rst +++ b/doc/guides/cryptodevs/openssl.rst @@ -112,6 +112,6 @@ Limitations --- * Maximum number of sessions is 2048. -* Chained mbufs are not supported. +* Chained mbufs are supported only for source mbuf (destination must be contiguous). * Hash only is not supported for GCM and GMAC. * Cipher only is not supported for GCM and GMAC. diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c index 5f8fa33..22639b4 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd.c +++ b/drivers/crypto/openssl/rte_openssl_pmd.c @@ -484,22 +484,110 @@ * Process Operations *-- */ +static inline int +process_openssl_encryption_update(struct rte_mbuf *mbuf_src, int offset, + uint8_t **dst, int srclen, EVP_CIPHER_CTX *ctx) +{ + struct rte_mbuf *m; + int dstlen; + int l, n = srclen; + uint8_t *src; + + for (m = mbuf_src; m != NULL && offset > rte_pktmbuf_data_len(m); + m = m->next) + offset -= rte_pktmbuf_data_len(m); + + if (m == 0) + return -1; + + src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset); + + l = rte_pktmbuf_data_len(m) - offset; + if (srclen <= l) { + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, srclen) <= 0) + return -1; + *dst += l; + return 0; + } + + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + + *dst += dstlen; + n -= l; + + for (m = m->next; (m != NULL) && (n > 0); m = m->next) { + src = rte_pktmbuf_mtod(m, uint8_t *); + l = rte_pktmbuf_data_len(m) < n ? rte_pktmbuf_data_len(m) : n; + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + *dst += dstlen; + n -= l; + } + + return 0; +} + +static inline int +process_openssl_decryption_update(struct rte_mbuf *mbuf_src, int offset, + uint8_t **dst, int srclen, EVP_CIPHER_CTX *ctx) +{ + struct rte_mbuf *m; + int dstlen; + int l, n = srclen; + uint8_t *src; + + for (m = mbuf_src; m != NULL && offset > rte_pktmbuf_data_len(m); + m = m->next) + offset -= rte_pktmbuf_data_len(m); + + if (m == 0) + return -1; + + src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset); + + l = rte_pktmbuf_data_len(m) - offset; + if (srclen <= l) { + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, srclen) <= 0) + return -1; + *dst += l; + return 0; + } + + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + + *dst += dstlen; + n -= l; + + for (m = m->next; (m != NULL) && (n > 0); m = m->next) { + src = rte_pktmbuf_mtod(m, uint8_t *); + l = rte_pktmbuf_data_len(m) < n ? rte_pktmbuf_data_len(m) : n; + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + *dst += dstlen; + n -= l; + } + + return 0; +} /** Process standard openssl cipher encryption */ static int -process_openssl_cipher_encrypt(uint8_t *src, uint8_t *dst, - uint8_t *iv, uint8_t *key, int srclen, +process_openssl_cipher_encrypt(struct rte_mbuf *mbuf_src, uint8_t *dst, + int offset, uint8_t *iv, uint8_t *key, int srclen, EVP_CIPHER_CTX *ctx, const EVP_CIPHER *algo) { - int dstlen, totlen; + int totlen; if (EVP_EncryptInit_ex(ctx, algo, NULL, key, iv) <= 0) goto process_cipher_encrypt_err; - if (EVP_EncryptUpdate(ctx, dst, &dstlen, src, srclen) <= 0) + if (process_openssl_encryption_upda
[dpdk-dev] [PATCH v2 5/5] test: add sgl unit tests for crypto devices
This patch provides unit tests for set of cipher/hash combinations covering currently implemented crypto PMD's and allowing to verify scatter gather support. Signed-off-by: Tomasz Kulasek --- app/test/test_cryptodev.c | 589 +++- app/test/test_cryptodev.h | 139 +++ app/test/test_cryptodev_aes_test_vectors.h | 54 ++- app/test/test_cryptodev_blockcipher.c | 181 ++--- app/test/test_cryptodev_blockcipher.h |3 +- app/test/test_cryptodev_des_test_vectors.h | 29 ++ app/test/test_cryptodev_gcm_test_vectors.h | 557 +- 7 files changed, 1477 insertions(+), 75 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 872f8b4..e419f80 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -1688,6 +1688,10 @@ struct crypto_unittest_params { TEST_ASSERT_NOT_NULL(sym_op->cipher.iv.data, "no room to prepend iv"); + /* For OOP operation both buffers must have the same size */ + if (ut_params->obuf) + rte_pktmbuf_prepend(ut_params->obuf, iv_pad_len); + memset(sym_op->cipher.iv.data, 0, iv_pad_len); sym_op->cipher.iv.phys_addr = rte_pktmbuf_mtophys(ut_params->ibuf); sym_op->cipher.iv.length = iv_pad_len; @@ -2509,6 +2513,84 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; + + int retval; + + unsigned int plaintext_pad_len; + unsigned int plaintext_len; + + uint8_t buffer[1]; + const uint8_t *ciphertext; + + struct rte_cryptodev_info dev_info; + + rte_cryptodev_info_get(ts_params->valid_devs[0], &dev_info); + if (!(dev_info.feature_flags & RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER)) { + printf("Device doesn't support scatter-gather. " + "Test Skipped.\n"); + return 0; + } + + /* Create KASUMI session */ + retval = create_wireless_algo_cipher_session(ts_params->valid_devs[0], + RTE_CRYPTO_CIPHER_OP_ENCRYPT, + RTE_CRYPTO_CIPHER_KASUMI_F8, + tdata->key.data, tdata->key.len); + if (retval < 0) + return retval; + + plaintext_len = ceil_byte_length(tdata->plaintext.len); + + + /* Append data which is padded to a multiple */ + /* of the algorithms block size */ + plaintext_pad_len = RTE_ALIGN_CEIL(plaintext_len, 8); + + ut_params->ibuf = create_segmented_mbuf(ts_params->mbuf_pool, + plaintext_pad_len, 10); + + pktmbuf_write(ut_params->ibuf, 0, plaintext_len, tdata->plaintext.data); + + /* Create KASUMI operation */ + retval = create_wireless_algo_cipher_operation(tdata->iv.data, + tdata->iv.len, + tdata->plaintext.len, + tdata->validCipherOffsetLenInBits.len, + RTE_CRYPTO_CIPHER_KASUMI_F8); + if (retval < 0) + return retval; + + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); + TEST_ASSERT_NOT_NULL(ut_params->op, "failed to retrieve obuf"); + + ut_params->obuf = ut_params->op->sym->m_dst; + + if (ut_params->obuf) + ciphertext = rte_pktmbuf_read(ut_params->obuf, tdata->iv.len, + plaintext_len, buffer); + else + ciphertext = rte_pktmbuf_read(ut_params->ibuf, tdata->iv.len, + plaintext_len, buffer); + + /* Validate obuf */ + TEST_HEXDUMP(stdout, "ciphertext:", ciphertext, plaintext_len); + + /* Validate obuf */ + TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( + ciphertext, + tdata->ciphertext.data, + tdata->validCipherLenInBits.len, + "KASUMI Ciphertext data not as expected"); + return 0; +} + + +static int test_kasumi_encryption_oop(const struct kasumi_test_data *tdata) { struct crypto_testsuite_params *ts_params = &testsuite_params; @@ -2577,6 +2659,81 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_oop_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unit
[dpdk-dev] [PATCH v3 2/5] test: add rte_pktmbuf_linearize unit tests
This patch tests rte_pktmbuf_linearize functionality: 1) Creates banch of segmented mbufs with different size and number of segments. 2) Generates pkt_len bytes of random data. 3) Fills noncontigouos mbuf with randomly generated data. 4) Uses rte_pktmbuf_linearize to coalesce segmented buffer into one contiguous. 5) Verifies data in linearized buffer. Signed-off-by: Tomasz Kulasek --- app/test/test_mbuf.c | 123 ++ 1 file changed, 123 insertions(+) diff --git a/app/test/test_mbuf.c b/app/test/test_mbuf.c index c0823ea..39577e7 100644 --- a/app/test/test_mbuf.c +++ b/app/test/test_mbuf.c @@ -930,6 +930,124 @@ return 0; } +static int +test_mbuf_linearize(int pkt_len, int nb_segs) { + + struct rte_mbuf *m = NULL, *mbuf_src = NULL; + uint8_t data[pkt_len], *src, *dst; + int data_len = 0; + int i, size; + int t_len; + + if (pkt_len < 1) { + printf("Packet size must be 1 or more (is %d)\n", pkt_len); + return -1; + } + + if (nb_segs < 1) { + printf("Number of segments must be 1 or more (is %d)\n", + nb_segs); + return -1; + } + + /* Setup buffer */ + for (i = 0; i < pkt_len; i++) + data[i] = (uint8_t) rte_rand(); + + t_len = pkt_len >= nb_segs ? pkt_len / nb_segs : 1; + src = data; + size = pkt_len; + + /* Create chained mbuf_src and fill it generated data */ + for (i = 0; size > 0; i++) { + + m = rte_pktmbuf_alloc(pktmbuf_pool); + if (i == 0) + mbuf_src = m; + + if (!m) { + printf("Cannot create segment for source mbuf"); + goto fail; + } + + /* Make sure if tailroom is zeroed */ + memset(rte_pktmbuf_mtod(m, uint8_t *), 0, + rte_pktmbuf_tailroom(m)); + + data_len = size > t_len ? t_len : size; + dst = (uint8_t *)rte_pktmbuf_append(m, data_len); + if (!dst) { + printf("Cannot append %d bytes to the mbuf\n", + data_len); + goto fail; + } + + rte_memcpy(dst, src, data_len); + src += data_len; + + if (mbuf_src != m) + rte_pktmbuf_chain(mbuf_src, m); + + size -= data_len; + + } + + /* Create destination buffer to store coalesced data */ + if (rte_pktmbuf_linearize(mbuf_src)) { + printf("Mbuf linearization failed\n"); + goto fail; + } + + if (!rte_pktmbuf_is_contiguous(mbuf_src)) { + printf("Source buffer should be contiguous after " + "linearization\n"); + goto fail; + } + + src = rte_pktmbuf_mtod(mbuf_src, uint8_t *); + + if (memcmp(src, data, rte_pktmbuf_pkt_len(mbuf_src))) { + printf("Incorrect data in coalesced mbuf\n"); + goto fail; + } + + if (mbuf_src) + rte_pktmbuf_free(mbuf_src); + return 0; + +fail: + if (mbuf_src) + rte_pktmbuf_free(mbuf_src); + return -1; +} + +static int +test_mbuf_linearize_check(void) +{ + struct test_mbuf_array { + int size; + int nb_segs; + } mbuf_array[5] = { + { 128, 1 }, + { 64, 64 }, + { 512, 10 }, + { 250, 11 }, + { 123, 8 }, + }; + unsigned int i; + + printf("Test mbuf linearize API\n"); + + for (i = 0; i < RTE_DIM(mbuf_array); i++) + if (test_mbuf_linearize(mbuf_array[i].size, + mbuf_array[i].nb_segs)) { + printf("Test failed for %d, %d\n", mbuf_array[i].size, + mbuf_array[i].nb_segs); + return -1; + } + + return 0; +} static int test_mbuf(void) @@ -1023,6 +1141,11 @@ printf("test_failing_mbuf_sanity_check() failed\n"); return -1; } + + if (test_mbuf_linearize_check() < 0) { + printf("test_mbuf_linearize_check() failed\n"); + return -1; + } return 0; } -- 1.7.9.5
[dpdk-dev] [PATCH v3 0/5] Chained Mbufs support in SW PMDs
This patch set adds support of scattered-gather list for SW PMDs. As of now, application needs to reserve continuous block of memory for mbufs which is not always the case. Hence needed to support chaining of mbufs which are smaller in size but can be used if chained. Above work involves: a) Create mbuf functions to coalesce mbuf chains into a single mbuf. b) For each software poll mode driver code to detect chained mbufs support and coalesce these before preforming crypto. c) Add relevant unit tests to test the functionality. Known limitations for openssl PMD: -- While libcrypto library expects continuous destination buffer for output of cipher operations, implementation of openssl PMD is limited the same way, and requires contigous destination mbuf. Dependencies: - This patch set shares some unit tests with SGL implementation for QAT (already merged in dpdk-next-crypto) and should be applied on top of it, and after applying fix "app/test: fix aad padding size in SGL operation" by Arek Kusztal. changes in v3: - rebased to dpdk-next-crypto - reused tests for AES GCM SGL support in opensll from "app/test: add SGL tests to cryptodev QAT suite" changes in v2: - add support for sgl in openssl PMD - rte_pktmbuf_coalesce replaced with rte_pktmbuf_linearize - extended test vector data for aes gcm from 60 to 2048 bytes Tomasz Kulasek (5): rte_mbuf: add rte_pktmbuf_linearize test: add rte_pktmbuf_linearize unit tests crypto: add sgl support in sw PMDs crypto: add sgl support in openssl PMD test: add sgl unit tests for crypto devices app/test/test_cryptodev.c | 386 ++- app/test/test_cryptodev.h | 139 +++ app/test/test_cryptodev_aes_test_vectors.h | 52 +++ app/test/test_cryptodev_blockcipher.c | 180 + app/test/test_cryptodev_blockcipher.h |1 + app/test/test_cryptodev_gcm_test_vectors.h | 553 app/test/test_mbuf.c | 123 +++ doc/guides/cryptodevs/openssl.rst |3 +- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 + drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 +- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 329 + drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 + drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 +- lib/librte_cryptodev/rte_cryptodev.h |2 + lib/librte_mbuf/rte_mbuf.h | 56 +++ 18 files changed, 1756 insertions(+), 149 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v3 3/5] crypto: add sgl support in sw PMDs
This patch introduces RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER feature flag informing that selected crypto device supports segmented mbufs natively and doesn't need to be coalesced before crypto operation. While using segmented buffers in crypto devices may have unpredictable results, for PMDs which doesn't support it natively, additional check is made for debug compilation. Signed-off-by: Tomasz Kulasek --- drivers/crypto/aesni_gcm/aesni_gcm_pmd.c | 14 ++ drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c | 19 --- drivers/crypto/kasumi/rte_kasumi_pmd.c | 13 + drivers/crypto/null/null_crypto_pmd.c |3 ++- drivers/crypto/snow3g/rte_snow3g_pmd.c | 15 +++ drivers/crypto/zuc/rte_zuc_pmd.c | 13 + lib/librte_cryptodev/rte_cryptodev.c |4 ++-- lib/librte_cryptodev/rte_cryptodev.h |2 ++ 8 files changed, 77 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c index dba5e15..1a6120c 100644 --- a/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c +++ b/drivers/crypto/aesni_gcm/aesni_gcm_pmd.c @@ -375,6 +375,20 @@ break; } +#ifdef RTE_LIBRTE_PMD_AESNI_GCM_DEBUG + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; + GCM_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + qp->qp_stats.enqueue_err_count++; + break; + } +#endif + retval = process_gcm_crypto_op(qp, ops[i]->sym, sess); if (retval < 0) { ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c index 6d27d75..25f681b 100644 --- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c +++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c @@ -571,15 +571,28 @@ int i, processed_jobs = 0; for (i = 0; i < nb_ops; i++) { -#ifdef RTE_LIBRTE_AESNI_MB_DEBUG - if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { +#ifdef RTE_LIBRTE_PMD_AESNI_MB_DEBUG + if (unlikely(ops[i]->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) { MB_LOG_ERR("PMD only supports symmetric crypto " "operation requests, op (%p) is not a " - "symmetric operation.", op); + "symmetric operation.", ops[i]); + qp->stats.enqueue_err_count++; + goto flush_jobs; + } + + if (!rte_pktmbuf_is_contiguous(ops[i]->sym->m_src) || + (ops[i]->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + ops[i]->sym->m_dst))) { + MB_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "source/destination buffer.\n", ops[i]); + ops[i]->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS; qp->stats.enqueue_err_count++; goto flush_jobs; } #endif + sess = get_session(qp, ops[i]); if (unlikely(sess == NULL)) { qp->stats.enqueue_err_count++; diff --git a/drivers/crypto/kasumi/rte_kasumi_pmd.c b/drivers/crypto/kasumi/rte_kasumi_pmd.c index b119da2..4bdd7bb 100644 --- a/drivers/crypto/kasumi/rte_kasumi_pmd.c +++ b/drivers/crypto/kasumi/rte_kasumi_pmd.c @@ -455,6 +455,19 @@ for (i = 0; i < nb_ops; i++) { curr_c_op = ops[i]; +#ifdef RTE_LIBRTE_PMD_KASUMI_DEBUG + if (!rte_pktmbuf_is_contiguous(curr_c_op->sym->m_src) || + (curr_c_op->sym->m_dst != NULL && + !rte_pktmbuf_is_contiguous( + curr_c_op->sym->m_dst))) { + KASUMI_LOG_ERR("PMD supports only contiguous mbufs, " + "op (%p) provides noncontiguous mbuf as " + "s
[dpdk-dev] [PATCH v3 1/5] rte_mbuf: add rte_pktmbuf_linearize
This patch adds function rte_pktmbuf_linearize to let crypto PMD coalesce chained mbuf before crypto operation and extend their capabilities to support segmented mbufs when device cannot handle them natively. Signed-off-by: Tomasz Kulasek --- lib/librte_mbuf/rte_mbuf.h | 56 1 file changed, 56 insertions(+) diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index ead7c6e..b11a31d 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -1647,6 +1647,62 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail } /** + * Linearize data in mbuf. + * + * This function coalesce mbuf merging data in the first segment, unchaining + * rest, and then frees them. + * + * All operations are done in-place, so the structure of incoming mbuf + * is changed. + * + * @param mbuf + * mbuf to linearize + * @return + * - 0, on success + * - -1, on error + */ +static inline int +rte_pktmbuf_linearize(struct rte_mbuf *mbuf) +{ + int l, n; + struct rte_mbuf *m; + struct rte_mbuf *m_next; + char *buffer; + + if (rte_pktmbuf_is_contiguous(mbuf)) + return 0; + + /* Extend first segment to the total packet length +*/ + n = rte_pktmbuf_pkt_len(mbuf) - rte_pktmbuf_data_len(mbuf); + + if (unlikely(n > rte_pktmbuf_tailroom(mbuf))) + return -1; + + buffer = rte_pktmbuf_mtod_offset(mbuf, char *, mbuf->data_len); + mbuf->data_len = (uint16_t)(mbuf->pkt_len); + + /* Append data from next segments to the first one +*/ + m = mbuf->next; + while (m != NULL) { + m_next = m->next; + + l = rte_pktmbuf_data_len(m); + rte_memcpy(buffer, rte_pktmbuf_mtod(m, char *), l); + buffer += l; + + rte_pktmbuf_free_seg(m); + m = m_next; + } + + mbuf->next = NULL; + mbuf->nb_segs = 1; + + return 0; +} + +/** * Dump an mbuf structure to a file. * * Dump all fields for the given packet mbuf and all its associated -- 1.7.9.5
[dpdk-dev] [PATCH v3 4/5] crypto: add sgl support in openssl PMD
Previous implementation uses EVP_EncryptUpdate() on whole source buffer limiting its usage to the contiguous buffers. This implementation calls EVP_EncryptUpdate() on each segment for cipher operations in continuous mode, before finalization allowing to provide chained mbuf as a source. However, libcrypto library expects continuous destination buffer for output of cipher operations and implementation of openssl PMD is limited the same way, requiring contiguous destination mbuf. Signed-off-by: Tomasz Kulasek --- doc/guides/cryptodevs/openssl.rst|3 +- drivers/crypto/openssl/rte_openssl_pmd.c | 329 +++--- 2 files changed, 259 insertions(+), 73 deletions(-) diff --git a/doc/guides/cryptodevs/openssl.rst b/doc/guides/cryptodevs/openssl.rst index d2b5906..d0b1eeb 100644 --- a/doc/guides/cryptodevs/openssl.rst +++ b/doc/guides/cryptodevs/openssl.rst @@ -112,6 +112,7 @@ Limitations --- * Maximum number of sessions is 2048. -* Chained mbufs are not supported. +* Chained mbufs are supported only for source mbuf (destination must be + contiguous). * Hash only is not supported for GCM and GMAC. * Cipher only is not supported for GCM and GMAC. diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c index 832ea1d..e466c79 100644 --- a/drivers/crypto/openssl/rte_openssl_pmd.c +++ b/drivers/crypto/openssl/rte_openssl_pmd.c @@ -484,24 +484,112 @@ * Process Operations *-- */ +static inline int +process_openssl_encryption_update(struct rte_mbuf *mbuf_src, int offset, + uint8_t **dst, int srclen, EVP_CIPHER_CTX *ctx) +{ + struct rte_mbuf *m; + int dstlen; + int l, n = srclen; + uint8_t *src; + + for (m = mbuf_src; m != NULL && offset > rte_pktmbuf_data_len(m); + m = m->next) + offset -= rte_pktmbuf_data_len(m); + + if (m == 0) + return -1; + + src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset); + + l = rte_pktmbuf_data_len(m) - offset; + if (srclen <= l) { + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, srclen) <= 0) + return -1; + *dst += l; + return 0; + } + + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + + *dst += dstlen; + n -= l; + + for (m = m->next; (m != NULL) && (n > 0); m = m->next) { + src = rte_pktmbuf_mtod(m, uint8_t *); + l = rte_pktmbuf_data_len(m) < n ? rte_pktmbuf_data_len(m) : n; + if (EVP_EncryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + *dst += dstlen; + n -= l; + } + + return 0; +} + +static inline int +process_openssl_decryption_update(struct rte_mbuf *mbuf_src, int offset, + uint8_t **dst, int srclen, EVP_CIPHER_CTX *ctx) +{ + struct rte_mbuf *m; + int dstlen; + int l, n = srclen; + uint8_t *src; + + for (m = mbuf_src; m != NULL && offset > rte_pktmbuf_data_len(m); + m = m->next) + offset -= rte_pktmbuf_data_len(m); + + if (m == 0) + return -1; + + src = rte_pktmbuf_mtod_offset(m, uint8_t *, offset); + + l = rte_pktmbuf_data_len(m) - offset; + if (srclen <= l) { + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, srclen) <= 0) + return -1; + *dst += l; + return 0; + } + + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + + *dst += dstlen; + n -= l; + + for (m = m->next; (m != NULL) && (n > 0); m = m->next) { + src = rte_pktmbuf_mtod(m, uint8_t *); + l = rte_pktmbuf_data_len(m) < n ? rte_pktmbuf_data_len(m) : n; + if (EVP_DecryptUpdate(ctx, *dst, &dstlen, src, l) <= 0) + return -1; + *dst += dstlen; + n -= l; + } + + return 0; +} /** Process standard openssl cipher encryption */ static int -process_openssl_cipher_encrypt(uint8_t *src, uint8_t *dst, - uint8_t *iv, uint8_t *key, int srclen, +process_openssl_cipher_encrypt(struct rte_mbuf *mbuf_src, uint8_t *dst, + int offset, uint8_t *iv, uint8_t *key, int srclen, EVP_CIPHER_CTX *ctx, const EVP_CIPHER *algo) { - int dstlen, totlen; + int totlen; if (EVP_EncryptInit_ex(ctx, algo, NULL, key, iv) <= 0) goto process_cipher_encrypt_err; EVP_CIPHER_CTX_set_padding(ctx, 0); - if (EVP_EncryptUpdate(ctx, dst, &dstlen, src, srclen) <= 0) +
[dpdk-dev] [PATCH v3 5/5] test: add sgl unit tests for crypto devices
This patch provides unit tests for set of cipher/hash combinations covering currently implemented crypto PMD's and allowing to verify scatter gather support. Signed-off-by: Daniel Mrzyglod Signed-off-by: Tomasz Kulasek --- app/test/test_cryptodev.c | 386 ++- app/test/test_cryptodev.h | 139 +++ app/test/test_cryptodev_aes_test_vectors.h | 52 +++ app/test/test_cryptodev_blockcipher.c | 180 + app/test/test_cryptodev_blockcipher.h |1 + app/test/test_cryptodev_gcm_test_vectors.h | 553 6 files changed, 1241 insertions(+), 70 deletions(-) diff --git a/app/test/test_cryptodev.c b/app/test/test_cryptodev.c index 3eaf1b7..4c9a54f 100644 --- a/app/test/test_cryptodev.c +++ b/app/test/test_cryptodev.c @@ -1736,6 +1736,10 @@ struct crypto_unittest_params { TEST_ASSERT_NOT_NULL(sym_op->cipher.iv.data, "no room to prepend iv"); + /* For OOP operation both buffers must have the same size */ + if (ut_params->obuf) + rte_pktmbuf_prepend(ut_params->obuf, iv_pad_len); + memset(sym_op->cipher.iv.data, 0, iv_pad_len); sym_op->cipher.iv.phys_addr = rte_pktmbuf_mtophys(ut_params->ibuf); sym_op->cipher.iv.length = iv_pad_len; @@ -2557,6 +2561,83 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest_params; + + int retval; + + unsigned int plaintext_pad_len; + unsigned int plaintext_len; + + uint8_t buffer[1]; + const uint8_t *ciphertext; + + struct rte_cryptodev_info dev_info; + + rte_cryptodev_info_get(ts_params->valid_devs[0], &dev_info); + if (!(dev_info.feature_flags & RTE_CRYPTODEV_FF_MBUF_SCATTER_GATHER)) { + printf("Device doesn't support scatter-gather. " + "Test Skipped.\n"); + return 0; + } + + /* Create KASUMI session */ + retval = create_wireless_algo_cipher_session(ts_params->valid_devs[0], + RTE_CRYPTO_CIPHER_OP_ENCRYPT, + RTE_CRYPTO_CIPHER_KASUMI_F8, + tdata->key.data, tdata->key.len); + if (retval < 0) + return retval; + + plaintext_len = ceil_byte_length(tdata->plaintext.len); + + + /* Append data which is padded to a multiple */ + /* of the algorithms block size */ + plaintext_pad_len = RTE_ALIGN_CEIL(plaintext_len, 8); + + ut_params->ibuf = create_segmented_mbuf(ts_params->mbuf_pool, + plaintext_pad_len, 10); + + pktmbuf_write(ut_params->ibuf, 0, plaintext_len, tdata->plaintext.data); + + /* Create KASUMI operation */ + retval = create_wireless_algo_cipher_operation(tdata->iv.data, + tdata->iv.len, + tdata->plaintext.len, + tdata->validCipherOffsetLenInBits.len, + RTE_CRYPTO_CIPHER_KASUMI_F8); + if (retval < 0) + return retval; + + ut_params->op = process_crypto_request(ts_params->valid_devs[0], + ut_params->op); + TEST_ASSERT_NOT_NULL(ut_params->op, "failed to retrieve obuf"); + + ut_params->obuf = ut_params->op->sym->m_dst; + + if (ut_params->obuf) + ciphertext = rte_pktmbuf_read(ut_params->obuf, tdata->iv.len, + plaintext_len, buffer); + else + ciphertext = rte_pktmbuf_read(ut_params->ibuf, tdata->iv.len, + plaintext_len, buffer); + + /* Validate obuf */ + TEST_HEXDUMP(stdout, "ciphertext:", ciphertext, plaintext_len); + + /* Validate obuf */ + TEST_ASSERT_BUFFERS_ARE_EQUAL_BIT( + ciphertext, + tdata->ciphertext.data, + tdata->validCipherLenInBits.len, + "KASUMI Ciphertext data not as expected"); + return 0; +} + +static int test_kasumi_encryption_oop(const struct kasumi_test_data *tdata) { struct crypto_testsuite_params *ts_params = &testsuite_params; @@ -2625,6 +2706,81 @@ struct crypto_unittest_params { } static int +test_kasumi_encryption_oop_sgl(const struct kasumi_test_data *tdata) +{ + struct crypto_testsuite_params *ts_params = &testsuite_params; + struct crypto_unittest_params *ut_params = &unittest
[dpdk-dev] [PATCH] test: fix virtual device name not set
Device name in device structure in virtual device used in link_bonding_autotest is not set what causes segmentation fault when rte_eth_dev_allocated is called. Fixes: a1e7c17555e8 ("ethdev: use device name from device structure") Signed-off-by: Tomasz Kulasek --- test/test/virtual_pmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test/virtual_pmd.c b/test/test/virtual_pmd.c index 8e698bc..9d46ad5 100644 --- a/test/test/virtual_pmd.c +++ b/test/test/virtual_pmd.c @@ -565,6 +565,7 @@ static void virtual_ethdev_stop(struct rte_eth_dev *eth_dev __rte_unused) goto err; pci_dev->device.numa_node = socket_id; + pci_dev->device.name = eth_dev->data->name; pci_drv->driver.name = virtual_ethdev_driver_name; pci_drv->id_table = id_table; -- 1.9.1
[dpdk-dev] [PATCH] test: fix bonded device name
Bonding devices name must start with "net_bonding" prefix. Fixes: 9bf4901d1a11 ("bus/vdev: remove probe with driver name option") Signed-off-by: Tomasz Kulasek --- test/test/test_link_bonding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test/test_link_bonding.c b/test/test/test_link_bonding.c index aa2a1a2..828f7f7 100644 --- a/test/test/test_link_bonding.c +++ b/test/test/test_link_bonding.c @@ -83,7 +83,7 @@ #define MAX_PKT_BURST (512) #define DEF_PKT_BURST (16) -#define BONDED_DEV_NAME("unit_test_bond_dev") +#define BONDED_DEV_NAME("net_bonding_ut") #define INVALID_SOCKET_ID (-1) #define INVALID_PORT_ID(-1) @@ -939,7 +939,7 @@ struct rte_fdir_conf fdir_conf = { /* * 1. a - Create / configure bonded / slave ethdevs */ - bonded_port_id = rte_eth_bond_create("ethdev_bond_mac_ass_test", + bonded_port_id = rte_eth_bond_create("net_bonding_mac_ass_test", BONDING_MODE_ACTIVE_BACKUP, rte_socket_id()); TEST_ASSERT(bonded_port_id > 0, "failed to create bonded device"); -- 1.9.1
[dpdk-dev] [PATCH] bonding: fix segfault when primary slave set
rte_eth_bond_primary_set segfaults for invalid port. This patch moves devices check before use of internal data. Fixes: 4c42498d916d ("net/bonding: allow slaves to also be bonded devices") Signed-off-by: Tomasz Kulasek --- drivers/net/bonding/rte_eth_bond_api.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c index 824ab4f..de1d9e0 100644 --- a/drivers/net/bonding/rte_eth_bond_api.c +++ b/drivers/net/bonding/rte_eth_bond_api.c @@ -514,15 +514,14 @@ { struct bond_dev_private *internals; - internals = rte_eth_devices[bonded_port_id].data->dev_private; - if (valid_bonded_port_id(bonded_port_id) != 0) return -1; + internals = rte_eth_devices[bonded_port_id].data->dev_private; + if (valid_slave_port_id(slave_port_id, internals->mode) != 0) return -1; - internals->user_defined_primary_port = 1; internals->primary_port = slave_port_id; -- 1.9.1
[dpdk-dev] [PATCH] bonding: fix link status interrupt when down
RTE_ETH_EVENT_INTR_LSC callbacks are not called when all slaves goes down in bond_ethdev_lsc_event_callback. It causes that link status change of bonded device is not propagated up. Fixes: deba8a2f8b0b ("net/bonding: fix link properties management") Signed-off-by: Tomasz Kulasek --- drivers/net/bonding/rte_eth_bond_pmd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index 8f9a860..3ee70ba 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -2535,6 +2535,9 @@ struct bwg_slave { /* Remove from active slave list */ deactivate_slave(bonded_eth_dev, port_id); + if (internals->active_slave_count < 1) + lsc_flag = 1; + /* Update primary id, take first active slave from list or if none * available set to -1 */ if (port_id == internals->current_primary_port) { -- 1.9.1
[dpdk-dev] [PATCH] bonding: fix wrong slaves capacity check
For fortville NIC bond_ethdev_8023ad_flow_verify fails when action queue index indicates unavailable queue before slaves configuration. This fix verifies flow settings for queue 0, which is always available, and checks if slaves max queue number capacity meets requirements. Fixes: 112891cd27e5 ("net/bonding: add dedicated HW queues for LACP control") Signed-off-by: Tomasz Kulasek --- drivers/net/bonding/rte_eth_bond_pmd.c | 26 +++--- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index 8f9a860..ab5ebe0 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -175,12 +175,13 @@ const struct rte_flow_attr flow_attr_8023ad = { int bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev, uint8_t slave_port) { + struct rte_eth_dev_info slave_info; struct rte_flow_error error; struct bond_dev_private *internals = (struct bond_dev_private *) (bond_dev->data->dev_private); - struct rte_flow_action_queue lacp_queue_conf = { - .index = internals->mode4.dedicated_queues.rx_qid, + const struct rte_flow_action_queue lacp_queue_conf = { + .index = 0, }; const struct rte_flow_action actions[] = { @@ -195,8 +196,22 @@ bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev, int ret = rte_flow_validate(slave_port, &flow_attr_8023ad, flow_item_8023ad, actions, &error); - if (ret < 0) + if (ret < 0) { + RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_verify: %s " + "(slave_port=%d queue_id=%d)", + error.message, slave_port, + internals->mode4.dedicated_queues.rx_qid); + return -1; + } + + rte_eth_dev_info_get(slave_port, &slave_info); + if ((slave_info.max_rx_queues < bond_dev->data->nb_rx_queues) || + (slave_info.max_tx_queues < bond_dev->data->nb_tx_queues)) { + RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_verify: Slave %d" + " capabilities doesn't allow to allocate " + "additional queues", slave_port); return -1; + } return 0; } @@ -206,7 +221,7 @@ bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) { struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id]; struct bond_dev_private *internals = (struct bond_dev_private *) (bond_dev->data->dev_private); - struct rte_eth_dev_info bond_info, slave_info; + struct rte_eth_dev_info bond_info; uint8_t idx; /* Verify if all slaves in bonding supports flow director and */ @@ -217,9 +232,6 @@ bond_8023ad_slow_pkt_hw_filter_supported(uint8_t port_id) { internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues; for (idx = 0; idx < internals->slave_count; idx++) { - rte_eth_dev_info_get(internals->slaves[idx].port_id, - &slave_info); - if (bond_ethdev_8023ad_flow_verify(bond_dev, internals->slaves[idx].port_id) != 0) return -1; -- 2.7.4
[dpdk-dev] [PATCH] app/crypto-perf: fix uninitialized values for null operations
Some values are uninitialized for "cipher null" and "auth null" operations. It may cause unpredictable results for some crypto pmd drivers, or even segmentation fault. This patch sets values for null operations to zero. Fixes: f8be1786b1b8 ("app/crypto-perf: introduce performance test application") Signed-off-by: Tomasz Kulasek --- app/test-crypto-perf/cperf_ops.c | 16 app/test-crypto-perf/cperf_test_vectors.c | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/app/test-crypto-perf/cperf_ops.c b/app/test-crypto-perf/cperf_ops.c index b8c0398..1795a37 100644 --- a/app/test-crypto-perf/cperf_ops.c +++ b/app/test-crypto-perf/cperf_ops.c @@ -333,6 +333,9 @@ test_vector->cipher_key.data; cipher_xform.cipher.key.length = test_vector->cipher_key.length; + } else { + cipher_xform.cipher.key.data = NULL; + cipher_xform.cipher.key.length = 0; } /* create crypto session */ sess = rte_cryptodev_sym_session_create(dev_id, &cipher_xform); @@ -354,6 +357,11 @@ auth_xform.auth.key.length = test_vector->auth_key.length; auth_xform.auth.key.data = test_vector->auth_key.data; + } else { + auth_xform.auth.digest_length = 0; + auth_xform.auth.add_auth_data_length = 0; + auth_xform.auth.key.length = 0; + auth_xform.auth.key.data = NULL; } /* create crypto session */ sess = rte_cryptodev_sym_session_create(dev_id, &auth_xform); @@ -378,6 +386,9 @@ test_vector->cipher_key.data; cipher_xform.cipher.key.length = test_vector->cipher_key.length; + } else { + cipher_xform.cipher.key.data = NULL; + cipher_xform.cipher.key.length = 0; } /* @@ -404,6 +415,11 @@ auth_xform.auth.key.data = test_vector->auth_key.data; } + } else { + auth_xform.auth.digest_length = 0; + auth_xform.auth.add_auth_data_length = 0; + auth_xform.auth.key.length = 0; + auth_xform.auth.key.data = NULL; } /* create crypto session for aes gcm */ diff --git a/app/test-crypto-perf/cperf_test_vectors.c b/app/test-crypto-perf/cperf_test_vectors.c index 6307f25..f7b3aa9 100644 --- a/app/test-crypto-perf/cperf_test_vectors.c +++ b/app/test-crypto-perf/cperf_test_vectors.c @@ -406,7 +406,7 @@ struct cperf_test_vector* options->op_type == CPERF_AUTH_THEN_CIPHER || options->op_type == CPERF_AEAD) { if (options->cipher_algo == RTE_CRYPTO_CIPHER_NULL) { - t_vec->cipher_key.length = -1; + t_vec->cipher_key.length = 0; t_vec->ciphertext.data = plaintext; t_vec->cipher_key.data = NULL; t_vec->iv.data = NULL; -- 1.9.1
[dpdk-dev] [PATCH v7 0/6] add Tx preparation
>From 35b09a978d244092337b6f46fd1309f8c733bb6b Mon Sep 17 00:00:00 2001 From: Tomasz Kulasek Date: Fri, 14 Oct 2016 16:10:35 +0200 Subject: [PATCH v6 0/6] add Tx preparation As discussed in that thread: http://dpdk.org/ml/archives/dev/2015-September/023603.html Different NIC models depending on HW offload requested might impose different requirements on packets to be TX-ed in terms of: - Max number of fragments per packet allowed - Max number of fragments per TSO segments - The way pseudo-header checksum should be pre-calculated - L3/L4 header fields filling - etc. MOTIVATION: --- 1) Some work cannot (and didn't should) be done in rte_eth_tx_burst. However, this work is sometimes required, and now, it's an application issue. 2) Different hardware may have different requirements for TX offloads, other subset can be supported and so on. 3) Some parameters (e.g. number of segments in ixgbe driver) may hung device. These parameters may be vary for different devices. For example i40e HW allows 8 fragments per packet, but that is after TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit. 4) Fields in packet may require different initialization (like e.g. will require pseudo-header checksum precalculation, sometimes in a different way depending on packet type, and so on). Now application needs to care about it. 5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to prepare packet burst in acceptable form for specific device. 6) Some additional checks may be done in debug mode keeping tx_burst implementation clean. PROPOSAL: - To help user to deal with all these varieties we propose to: 1) Introduce rte_eth_tx_prep() function to do necessary preparations of packet burst to be safely transmitted on device for desired HW offloads (set/reset checksum field according to the hardware requirements) and check HW constraints (number of segments per packet, etc). While the limitations and requirements may differ for devices, it requires to extend rte_eth_dev structure with new function pointer "tx_pkt_prep" which can be implemented in the driver to prepare and verify packets, in devices specific way, before burst, what should to prevent application to send malformed packets. 2) Also new fields will be introduced in rte_eth_desc_lim: nb_seg_max and nb_mtu_seg_max, providing an information about max segments in TSO and non-TSO packets acceptable by device. This information is useful for application to not create/limit malicious packet. APPLICATION (CASE OF USE): -- 1) Application should to initialize burst of packets to send, set required tx offload flags and required fields, like l2_len, l3_len, l4_len, and tso_segsz 2) Application passes burst to the rte_eth_tx_prep to check conditions required to send packets through the NIC. 3) The result of rte_eth_tx_prep can be used to send valid packets and/or restore invalid if function fails. e.g. for (i = 0; i < nb_pkts; i++) { /* initialize or process packet */ bufs[i]->tso_segsz = 800; bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_IP_CKSUM; bufs[i]->l2_len = sizeof(struct ether_hdr); bufs[i]->l3_len = sizeof(struct ipv4_hdr); bufs[i]->l4_len = sizeof(struct tcp_hdr); } /* Prepare burst of TX packets */ nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts); if (nb_prep < nb_pkts) { printf("tx_prep failed\n"); /* nb_prep indicates here first invalid packet. rte_eth_tx_prep * can be used on remaining packets to find another ones. */ } /* Send burst of TX packets */ nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep); /* Free any unsent packets. */ v7 changes: - comments reworded/added - changed errno values returned from Tx prep API - added check in rte_phdr_cksum_fix if headers are in the first data segment and can be safetly modified - moved rte_validate_tx_offload to rte_mbuf - moved rte_phdr_cksum_fix to rte_net.h - removed rte_pkt.h new file as useless v5 changes: - rebased csum engine modification - added information to the csum engine about performance tests - some performance improvements v4 changes: - tx_prep is now set to default behavior (NULL) for simple/vector path in fm10k, i40e and ixgbe drivers to increase performance, when Tx offloads are not intentionally available v3 changes: - reworked csum testpmd engine instead adding new one, - fixed checksum initialization procedure to include also outer checksum offloads, - some minor formattings and optimalizations v2 changes: - rte_eth_tx_prep() returns number of packet
[dpdk-dev] [PATCH v7 1/6] ethdev: add Tx preparation
Added API for `rte_eth_tx_prep` uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ Added functions: int rte_validate_tx_offload(struct rte_mbuf *m) to validate general requirements for tx offload set in mbuf of packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_phdr_cksum_fix(struct rte_mbuf *m) to fix pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. PERFORMANCE TESTS - This feature was tested with modified csum engine from test-pmd. The packet checksum preparation was moved from application to Tx preparation step placed before burst. We may expect some overhead costs caused by: 1) using additional callback before burst, 2) rescanning burst, 3) additional condition checking (packet validation), 4) worse optimization (e.g. packet data access, etc.) We tested it using ixgbe Tx preparation implementation with some parts disabled to have comparable information about the impact of diferent parts of implementation. IMPACT: 1) For unimplemented Tx preparation callback the performance impact is negligible, 2) For packet condition check without checksum modifications (nb_segs, available offloads, etc.) is 14626628/14252168 (~2.62% drop), 3) Full support in ixgbe driver (point 2 + packet checksum initialization) is 14060924/13588094 (~3.48% drop) Signed-off-by: Tomasz Kulasek --- config/common_base|1 + lib/librte_ether/rte_ethdev.h | 97 + lib/librte_mbuf/rte_mbuf.h| 57 lib/librte_net/rte_net.h | 90 ++ 4 files changed, 245 insertions(+) diff --git a/config/common_base b/config/common_base index c7fd3db..619284b 100644 --- a/config/common_base +++ b/config/common_base @@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 CONFIG_RTE_LIBRTE_IEEE1588=n CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_TX_PREP=y # # Support NIC bypass logic diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 38641e8..d548d48 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ extern "C" { #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -699,6 +700,8 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ + uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ }; /** @@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ @@ -2816,6 +2825,94 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prep() function is invoked to prepare output packets to be + *
[dpdk-dev] [PATCH v7 2/6] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 52 +- 5 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev); uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev); uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index 7cf5f0c..17b45cb 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prep = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..5bd3c99 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (
[dpdk-dev] [PATCH v7 3/6] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index c804436..dffb6d1 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prep = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prep = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prep = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 32cc7ff..5fc4d5a 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v7 4/6] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 72 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 5af0e43..dab0d48 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prep = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 7ae7d9f..7f6d3d8 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so m->nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for m->nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered malicious */ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; } + dev->tx_pkt_prep = NULL;
[dpdk-dev] [PATCH v7 5/6] ixgbe: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +++- drivers/net/ixgbe/ixgbe_rxtx.c | 58 +- drivers/net/ixgbe/ixgbe_rxtx.h |2 ++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 4ca5747..4c6a8e1 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prep = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338..e229cf5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index 2ce8234..031414c 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * Copyright 2014 6WIND S.A. * All rights reserved. * @@ -70,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -87,6 +88,9 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -905,6 +909,56 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* Check if packet meets requirements for number of segments +* +* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and non-TSO +*/ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/* + * * RX functions * **/ @@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, &
[dpdk-dev] [PATCH v7 6/6] testpmd: use Tx preparation in csum engine
Removed pseudo header calculation for udp/tcp/tso packets from application and used Tx preparation API for packet preparation and verification. Adding aditional step to the csum engine costs about 3-4% of performance drop, on my setup with ixgbe driver. It's caused mostly by the need of reaccessing and modification of packet data. Signed-off-by: Tomasz Kulasek --- app/test-pmd/csumonly.c | 36 +--- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 57e6ae2..6f33ae9 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -112,15 +112,6 @@ struct simple_gre_hdr { } __attribute__((__packed__)); static uint16_t -get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) -{ - if (ethertype == _htons(ETHER_TYPE_IPv4)) - return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); - else /* assume ethertype == ETHER_TYPE_IPv6 */ - return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); -} - -static uint16_t get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) { if (ethertype == _htons(ETHER_TYPE_IPv4)) @@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, /* do not recalculate udp cksum if it was 0 */ if (udp_hdr->dgram_cksum != 0) { udp_hdr->dgram_cksum = 0; - if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { + if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); - } else { + else udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, info->ethertype); - } } } else if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); tcp_hdr->cksum = 0; - if (tso_segsz) { + if (tso_segsz) ol_flags |= PKT_TX_TCP_SEG; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { + else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else { + else tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr, info->ethertype); - } } else if (info->l4_proto == IPPROTO_SCTP) { sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len); sctp_hdr->cksum = 0; @@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ uint16_t nb_rx; uint16_t nb_tx; + uint16_t nb_prep; uint16_t i; uint64_t rx_ol_flags, tx_ol_flags; uint16_t testpmd_ol_flags; @@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) printf("\n"); } } - nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); + nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst, + nb_rx); + if (nb_prep != nb_rx) + printf("Preparing packet burst to transmit failed: %s\n", + rte_strerror(rte_errno)); + + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_prep); /* * Retry if necessary */ -- 1.7.9.5
[dpdk-dev] [PATCH v8 0/6] add Tx preparation
>From 35b09a978d244092337b6f46fd1309f8c733bb6b Mon Sep 17 00:00:00 2001 From: Tomasz Kulasek Date: Fri, 14 Oct 2016 16:10:35 +0200 Subject: [PATCH v6 0/6] add Tx preparation As discussed in that thread: http://dpdk.org/ml/archives/dev/2015-September/023603.html Different NIC models depending on HW offload requested might impose different requirements on packets to be TX-ed in terms of: - Max number of fragments per packet allowed - Max number of fragments per TSO segments - The way pseudo-header checksum should be pre-calculated - L3/L4 header fields filling - etc. MOTIVATION: --- 1) Some work cannot (and didn't should) be done in rte_eth_tx_burst. However, this work is sometimes required, and now, it's an application issue. 2) Different hardware may have different requirements for TX offloads, other subset can be supported and so on. 3) Some parameters (e.g. number of segments in ixgbe driver) may hung device. These parameters may be vary for different devices. For example i40e HW allows 8 fragments per packet, but that is after TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit. 4) Fields in packet may require different initialization (like e.g. will require pseudo-header checksum precalculation, sometimes in a different way depending on packet type, and so on). Now application needs to care about it. 5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to prepare packet burst in acceptable form for specific device. 6) Some additional checks may be done in debug mode keeping tx_burst implementation clean. PROPOSAL: - To help user to deal with all these varieties we propose to: 1) Introduce rte_eth_tx_prep() function to do necessary preparations of packet burst to be safely transmitted on device for desired HW offloads (set/reset checksum field according to the hardware requirements) and check HW constraints (number of segments per packet, etc). While the limitations and requirements may differ for devices, it requires to extend rte_eth_dev structure with new function pointer "tx_pkt_prep" which can be implemented in the driver to prepare and verify packets, in devices specific way, before burst, what should to prevent application to send malformed packets. 2) Also new fields will be introduced in rte_eth_desc_lim: nb_seg_max and nb_mtu_seg_max, providing an information about max segments in TSO and non-TSO packets acceptable by device. This information is useful for application to not create/limit malicious packet. APPLICATION (CASE OF USE): -- 1) Application should to initialize burst of packets to send, set required tx offload flags and required fields, like l2_len, l3_len, l4_len, and tso_segsz 2) Application passes burst to the rte_eth_tx_prep to check conditions required to send packets through the NIC. 3) The result of rte_eth_tx_prep can be used to send valid packets and/or restore invalid if function fails. e.g. for (i = 0; i < nb_pkts; i++) { /* initialize or process packet */ bufs[i]->tso_segsz = 800; bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_IP_CKSUM; bufs[i]->l2_len = sizeof(struct ether_hdr); bufs[i]->l3_len = sizeof(struct ipv4_hdr); bufs[i]->l4_len = sizeof(struct tcp_hdr); } /* Prepare burst of TX packets */ nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts); if (nb_prep < nb_pkts) { printf("tx_prep failed\n"); /* nb_prep indicates here first invalid packet. rte_eth_tx_prep * can be used on remaining packets to find another ones. */ } /* Send burst of TX packets */ nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep); /* Free any unsent packets. */ v8 changes: - mbuf argument in rte_validate_tx_offload declared as const v7 changes: - comments reworded/added - changed errno values returned from Tx prep API - added check in rte_phdr_cksum_fix if headers are in the first data segment and can be safetly modified - moved rte_validate_tx_offload to rte_mbuf - moved rte_phdr_cksum_fix to rte_net.h - removed rte_pkt.h new file as useless v5 changes: - rebased csum engine modification - added information to the csum engine about performance tests - some performance improvements v4 changes: - tx_prep is now set to default behavior (NULL) for simple/vector path in fm10k, i40e and ixgbe drivers to increase performance, when Tx offloads are not intentionally available v3 changes: - reworked csum testpmd engine instead adding new one, - fixed checksum initialization procedure to include also outer checksum offloads, - some minor formattings and optimaliza
[dpdk-dev] [PATCH v8 1/6] ethdev: add Tx preparation
Added API for `rte_eth_tx_prep` uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ Added functions: int rte_validate_tx_offload(const struct rte_mbuf *m) to validate general requirements for tx offload set in mbuf of packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_phdr_cksum_fix(struct rte_mbuf *m) to fix pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. PERFORMANCE TESTS - This feature was tested with modified csum engine from test-pmd. The packet checksum preparation was moved from application to Tx preparation step placed before burst. We may expect some overhead costs caused by: 1) using additional callback before burst, 2) rescanning burst, 3) additional condition checking (packet validation), 4) worse optimization (e.g. packet data access, etc.) We tested it using ixgbe Tx preparation implementation with some parts disabled to have comparable information about the impact of diferent parts of implementation. IMPACT: 1) For unimplemented Tx preparation callback the performance impact is negligible, 2) For packet condition check without checksum modifications (nb_segs, available offloads, etc.) is 14626628/14252168 (~2.62% drop), 3) Full support in ixgbe driver (point 2 + packet checksum initialization) is 14060924/13588094 (~3.48% drop) Signed-off-by: Tomasz Kulasek --- config/common_base|1 + lib/librte_ether/rte_ethdev.h | 97 + lib/librte_mbuf/rte_mbuf.h| 56 lib/librte_net/rte_net.h | 90 ++ 4 files changed, 244 insertions(+) diff --git a/config/common_base b/config/common_base index c7fd3db..619284b 100644 --- a/config/common_base +++ b/config/common_base @@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 CONFIG_RTE_LIBRTE_IEEE1588=n CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_TX_PREP=y # # Support NIC bypass logic diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 38641e8..d548d48 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ extern "C" { #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -699,6 +700,8 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ + uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ }; /** @@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ @@ -2816,6 +2825,94 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prep() function is invoked to prepare output packets
[dpdk-dev] [PATCH v8 2/6] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 52 +- 5 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev); uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev); uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index 7cf5f0c..17b45cb 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prep = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..5bd3c99 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (
[dpdk-dev] [PATCH v8 3/6] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index c804436..dffb6d1 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prep = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prep = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prep = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 32cc7ff..5fc4d5a 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v8 4/6] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 72 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 5af0e43..dab0d48 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prep = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 7ae7d9f..7f6d3d8 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so m->nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for m->nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered malicious */ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; } + dev->tx_pkt_prep = NULL;
[dpdk-dev] [PATCH v8 5/6] ixgbe: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +++- drivers/net/ixgbe/ixgbe_rxtx.c | 58 +- drivers/net/ixgbe/ixgbe_rxtx.h |2 ++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 4ca5747..4c6a8e1 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prep = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338..e229cf5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index 2ce8234..031414c 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * Copyright 2014 6WIND S.A. * All rights reserved. * @@ -70,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -87,6 +88,9 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -905,6 +909,56 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* Check if packet meets requirements for number of segments +* +* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and non-TSO +*/ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/* + * * RX functions * **/ @@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, &
[dpdk-dev] [PATCH v8 6/6] testpmd: use Tx preparation in csum engine
Removed pseudo header calculation for udp/tcp/tso packets from application and used Tx preparation API for packet preparation and verification. Adding aditional step to the csum engine costs about 3-4% of performance drop, on my setup with ixgbe driver. It's caused mostly by the need of reaccessing and modification of packet data. Signed-off-by: Tomasz Kulasek --- app/test-pmd/csumonly.c | 36 +--- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 57e6ae2..6f33ae9 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -112,15 +112,6 @@ struct simple_gre_hdr { } __attribute__((__packed__)); static uint16_t -get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) -{ - if (ethertype == _htons(ETHER_TYPE_IPv4)) - return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); - else /* assume ethertype == ETHER_TYPE_IPv6 */ - return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); -} - -static uint16_t get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) { if (ethertype == _htons(ETHER_TYPE_IPv4)) @@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, /* do not recalculate udp cksum if it was 0 */ if (udp_hdr->dgram_cksum != 0) { udp_hdr->dgram_cksum = 0; - if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { + if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); - } else { + else udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, info->ethertype); - } } } else if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); tcp_hdr->cksum = 0; - if (tso_segsz) { + if (tso_segsz) ol_flags |= PKT_TX_TCP_SEG; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { + else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else { + else tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr, info->ethertype); - } } else if (info->l4_proto == IPPROTO_SCTP) { sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len); sctp_hdr->cksum = 0; @@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ uint16_t nb_rx; uint16_t nb_tx; + uint16_t nb_prep; uint16_t i; uint64_t rx_ol_flags, tx_ol_flags; uint16_t testpmd_ol_flags; @@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) printf("\n"); } } - nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); + nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst, + nb_rx); + if (nb_prep != nb_rx) + printf("Preparing packet burst to transmit failed: %s\n", + rte_strerror(rte_errno)); + + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_prep); /* * Retry if necessary */ -- 1.7.9.5
[dpdk-dev] [PATCH v9 0/6] add Tx preparation
>From 35b09a978d244092337b6f46fd1309f8c733bb6b Mon Sep 17 00:00:00 2001 From: Tomasz Kulasek Date: Fri, 14 Oct 2016 16:10:35 +0200 Subject: [PATCH v6 0/6] add Tx preparation As discussed in that thread: http://dpdk.org/ml/archives/dev/2015-September/023603.html Different NIC models depending on HW offload requested might impose different requirements on packets to be TX-ed in terms of: - Max number of fragments per packet allowed - Max number of fragments per TSO segments - The way pseudo-header checksum should be pre-calculated - L3/L4 header fields filling - etc. MOTIVATION: --- 1) Some work cannot (and didn't should) be done in rte_eth_tx_burst. However, this work is sometimes required, and now, it's an application issue. 2) Different hardware may have different requirements for TX offloads, other subset can be supported and so on. 3) Some parameters (e.g. number of segments in ixgbe driver) may hung device. These parameters may be vary for different devices. For example i40e HW allows 8 fragments per packet, but that is after TSO segmentation. While ixgbe has a 38-fragment pre-TSO limit. 4) Fields in packet may require different initialization (like e.g. will require pseudo-header checksum precalculation, sometimes in a different way depending on packet type, and so on). Now application needs to care about it. 5) Using additional API (rte_eth_tx_prep) before rte_eth_tx_burst let to prepare packet burst in acceptable form for specific device. 6) Some additional checks may be done in debug mode keeping tx_burst implementation clean. PROPOSAL: - To help user to deal with all these varieties we propose to: 1) Introduce rte_eth_tx_prep() function to do necessary preparations of packet burst to be safely transmitted on device for desired HW offloads (set/reset checksum field according to the hardware requirements) and check HW constraints (number of segments per packet, etc). While the limitations and requirements may differ for devices, it requires to extend rte_eth_dev structure with new function pointer "tx_pkt_prep" which can be implemented in the driver to prepare and verify packets, in devices specific way, before burst, what should to prevent application to send malformed packets. 2) Also new fields will be introduced in rte_eth_desc_lim: nb_seg_max and nb_mtu_seg_max, providing an information about max segments in TSO and non-TSO packets acceptable by device. This information is useful for application to not create/limit malicious packet. APPLICATION (CASE OF USE): -- 1) Application should to initialize burst of packets to send, set required tx offload flags and required fields, like l2_len, l3_len, l4_len, and tso_segsz 2) Application passes burst to the rte_eth_tx_prep to check conditions required to send packets through the NIC. 3) The result of rte_eth_tx_prep can be used to send valid packets and/or restore invalid if function fails. e.g. for (i = 0; i < nb_pkts; i++) { /* initialize or process packet */ bufs[i]->tso_segsz = 800; bufs[i]->ol_flags = PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_IP_CKSUM; bufs[i]->l2_len = sizeof(struct ether_hdr); bufs[i]->l3_len = sizeof(struct ipv4_hdr); bufs[i]->l4_len = sizeof(struct tcp_hdr); } /* Prepare burst of TX packets */ nb_prep = rte_eth_tx_prep(port, 0, bufs, nb_pkts); if (nb_prep < nb_pkts) { printf("tx_prep failed\n"); /* nb_prep indicates here first invalid packet. rte_eth_tx_prep * can be used on remaining packets to find another ones. */ } /* Send burst of TX packets */ nb_tx = rte_eth_tx_burst(port, 0, bufs, nb_prep); /* Free any unsent packets. */ v9 changes: - fixed headers structure fragmentation check - moved fragmentation check into rte_validate_tx_offload() v8 changes: - mbuf argument in rte_validate_tx_offload declared as const v7 changes: - comments reworded/added - changed errno values returned from Tx prep API - added check in rte_phdr_cksum_fix if headers are in the first data segment and can be safetly modified - moved rte_validate_tx_offload to rte_mbuf - moved rte_phdr_cksum_fix to rte_net.h - removed rte_pkt.h new file as useless v6 changes: - added performance impact test results to the patch description v5 changes: - rebased csum engine modification - added information to the csum engine about performance tests - some performance improvements v4 changes: - tx_prep is now set to default behavior (NULL) for simple/vector path in fm10k, i40e and ixgbe drivers to increase performance, when Tx offloads are not intentionally
[dpdk-dev] [PATCH v9 1/6] ethdev: add Tx preparation
Added API for `rte_eth_tx_prep` uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ Added functions: int rte_validate_tx_offload(struct rte_mbuf *m) to validate general requirements for tx offload set in mbuf of packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_phdr_cksum_fix(struct rte_mbuf *m) to fix pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. PERFORMANCE TESTS - This feature was tested with modified csum engine from test-pmd. The packet checksum preparation was moved from application to Tx preparation step placed before burst. We may expect some overhead costs caused by: 1) using additional callback before burst, 2) rescanning burst, 3) additional condition checking (packet validation), 4) worse optimization (e.g. packet data access, etc.) We tested it using ixgbe Tx preparation implementation with some parts disabled to have comparable information about the impact of different parts of implementation. IMPACT: 1) For unimplemented Tx preparation callback the performance impact is negligible, 2) For packet condition check without checksum modifications (nb_segs, available offloads, etc.) is 14626628/14252168 (~2.62% drop), 3) Full support in ixgbe driver (point 2 + packet checksum initialization) is 14060924/13588094 (~3.48% drop) Signed-off-by: Tomasz Kulasek --- config/common_base|1 + lib/librte_ether/rte_ethdev.h | 97 + lib/librte_mbuf/rte_mbuf.h| 64 +++ lib/librte_net/rte_net.h | 85 4 files changed, 247 insertions(+) diff --git a/config/common_base b/config/common_base index c7fd3db..619284b 100644 --- a/config/common_base +++ b/config/common_base @@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 CONFIG_RTE_LIBRTE_IEEE1588=n CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_TX_PREP=y # # Support NIC bypass logic diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 38641e8..d548d48 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ extern "C" { #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -699,6 +700,8 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ + uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ }; /** @@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ @@ -2816,6 +2825,94 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prep() function is invoked to prepare output packets to be + *
[dpdk-dev] [PATCH v9 2/6] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 52 +- 5 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev); uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev); uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index 7cf5f0c..17b45cb 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prep = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..5bd3c99 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (
[dpdk-dev] [PATCH v9 3/6] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index c804436..dffb6d1 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prep = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prep = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prep = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 32cc7ff..5fc4d5a 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v9 4/6] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 72 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 5af0e43..dab0d48 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prep = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 7ae7d9f..7f6d3d8 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so m->nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for m->nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered malicious */ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; } + dev->tx_pkt_prep = NULL;
[dpdk-dev] [PATCH v9 5/6] ixgbe: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +++- drivers/net/ixgbe/ixgbe_rxtx.c | 58 +- drivers/net/ixgbe/ixgbe_rxtx.h |2 ++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 4ca5747..4c6a8e1 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prep = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338..e229cf5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index 2ce8234..031414c 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * Copyright 2014 6WIND S.A. * All rights reserved. * @@ -70,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -87,6 +88,9 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -905,6 +909,56 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* Check if packet meets requirements for number of segments +* +* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and non-TSO +*/ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/* + * * RX functions * **/ @@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, &
[dpdk-dev] [PATCH v9 6/6] testpmd: use Tx preparation in csum engine
Removed pseudo header calculation for udp/tcp/tso packets from application and used Tx preparation API for packet preparation and verification. Adding additional step to the csum engine costs about 3-4% of performance drop, on my setup with ixgbe driver. It's caused mostly by the need of reaccessing and modification of packet data. Signed-off-by: Tomasz Kulasek --- app/test-pmd/csumonly.c | 36 +--- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 57e6ae2..6f33ae9 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -112,15 +112,6 @@ struct simple_gre_hdr { } __attribute__((__packed__)); static uint16_t -get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) -{ - if (ethertype == _htons(ETHER_TYPE_IPv4)) - return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); - else /* assume ethertype == ETHER_TYPE_IPv6 */ - return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); -} - -static uint16_t get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) { if (ethertype == _htons(ETHER_TYPE_IPv4)) @@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, /* do not recalculate udp cksum if it was 0 */ if (udp_hdr->dgram_cksum != 0) { udp_hdr->dgram_cksum = 0; - if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { + if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); - } else { + else udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, info->ethertype); - } } } else if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); tcp_hdr->cksum = 0; - if (tso_segsz) { + if (tso_segsz) ol_flags |= PKT_TX_TCP_SEG; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { + else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else { + else tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr, info->ethertype); - } } else if (info->l4_proto == IPPROTO_SCTP) { sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len); sctp_hdr->cksum = 0; @@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ uint16_t nb_rx; uint16_t nb_tx; + uint16_t nb_prep; uint16_t i; uint64_t rx_ol_flags, tx_ol_flags; uint16_t testpmd_ol_flags; @@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) printf("\n"); } } - nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); + nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst, + nb_rx); + if (nb_prep != nb_rx) + printf("Preparing packet burst to transmit failed: %s\n", + rte_strerror(rte_errno)); + + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_prep); /* * Retry if necessary */ -- 1.7.9.5
[dpdk-dev] [PATCH v10 0/6] add Tx preparation
ksum initialization procedure to include also outer checksum offloads, - some minor formattings and optimalizations v2 changes: - rte_eth_tx_prep() returns number of packets when device doesn't support tx_prep functionality, - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep Tomasz Kulasek (6): ethdev: add Tx preparation e1000: add Tx preparation fm10k: add Tx preparation i40e: add Tx preparation ixgbe: add Tx preparation testpmd: use Tx preparation in csum engine app/test-pmd/csumonly.c | 36 ++ config/common_base |1 + drivers/net/e1000/e1000_ethdev.h | 11 + drivers/net/e1000/em_ethdev.c|5 +- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 ++ drivers/net/e1000/igb_rxtx.c | 52 - drivers/net/fm10k/fm10k.h|6 +++ drivers/net/fm10k/fm10k_ethdev.c |5 ++ drivers/net/fm10k/fm10k_rxtx.c | 50 +++- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 72 +++- drivers/net/i40e/i40e_rxtx.h |8 drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +- drivers/net/ixgbe/ixgbe_rxtx.c | 58 ++- drivers/net/ixgbe/ixgbe_rxtx.h |2 + lib/librte_ether/rte_ethdev.h| 96 ++ lib/librte_mbuf/rte_mbuf.h | 64 + lib/librte_net/rte_net.h | 85 + 20 files changed, 584 insertions(+), 30 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v10 1/6] ethdev: add Tx preparation
Added API for `rte_eth_tx_prep` uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ Added functions: int rte_validate_tx_offload(struct rte_mbuf *m) to validate general requirements for tx offload set in mbuf of packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_phdr_cksum_fix(struct rte_mbuf *m) to fix pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. PERFORMANCE TESTS - This feature was tested with modified csum engine from test-pmd. The packet checksum preparation was moved from application to Tx preparation step placed before burst. We may expect some overhead costs caused by: 1) using additional callback before burst, 2) rescanning burst, 3) additional condition checking (packet validation), 4) worse optimization (e.g. packet data access, etc.) We tested it using ixgbe Tx preparation implementation with some parts disabled to have comparable information about the impact of diferent parts of implementation. IMPACT: 1) For unimplemented Tx preparation callback the performance impact is negligible, 2) For packet condition check without checksum modifications (nb_segs, available offloads, etc.) is 14626628/14252168 (~2.62% drop), 3) Full support in ixgbe driver (point 2 + packet checksum initialization) is 14060924/13588094 (~3.48% drop) Signed-off-by: Tomasz Kulasek --- config/common_base|1 + lib/librte_ether/rte_ethdev.h | 96 + lib/librte_mbuf/rte_mbuf.h| 64 +++ lib/librte_net/rte_net.h | 85 4 files changed, 246 insertions(+) diff --git a/config/common_base b/config/common_base index c7fd3db..619284b 100644 --- a/config/common_base +++ b/config/common_base @@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 CONFIG_RTE_LIBRTE_IEEE1588=n CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_TX_PREP=y # # Support NIC bypass logic diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 38641e8..c4a8ccd 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ extern "C" { #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -699,6 +700,8 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ + uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ }; /** @@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ @@ -2816,6 +2825,93 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prep() function is invoked to prepare output packets to be + *
[dpdk-dev] [PATCH v10 2/6] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 52 +- 5 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev); uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev); uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index 7cf5f0c..17b45cb 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prep = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..5bd3c99 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (
[dpdk-dev] [PATCH v10 3/6] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index c804436..dffb6d1 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prep = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prep = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prep = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 32cc7ff..5fc4d5a 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v10 4/6] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 72 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 5af0e43..dab0d48 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prep = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 7ae7d9f..7f6d3d8 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so m->nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for m->nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered malicious */ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; } + dev->tx_pkt_prep = NULL;
[dpdk-dev] [PATCH v10 5/6] ixgbe: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +++- drivers/net/ixgbe/ixgbe_rxtx.c | 58 +- drivers/net/ixgbe/ixgbe_rxtx.h |2 ++ 4 files changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 4ca5747..4c6a8e1 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -517,6 +517,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -1103,6 +1105,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prep = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338..e229cf5 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -396,6 +396,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index 2ce8234..031414c 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * Copyright 2014 6WIND S.A. * All rights reserved. * @@ -70,6 +70,7 @@ #include #include #include +#include #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -87,6 +88,9 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -905,6 +909,56 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* Check if packet meets requirements for number of segments +* +* NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and non-TSO +*/ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/* + * * RX functions * **/ @@ -2282,6 +2336,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, &
[dpdk-dev] [PATCH v10 6/6] testpmd: use Tx preparation in csum engine
Removed pseudo header calculation for udp/tcp/tso packets from application and used Tx preparation API for packet preparation and verification. Adding additional step to the csum engine costs about 3-4% of performance drop, on my setup with ixgbe driver. It's caused mostly by the need of reaccessing and modification of packet data. Signed-off-by: Tomasz Kulasek --- app/test-pmd/csumonly.c | 36 +--- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index 57e6ae2..6f33ae9 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -112,15 +112,6 @@ struct simple_gre_hdr { } __attribute__((__packed__)); static uint16_t -get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags) -{ - if (ethertype == _htons(ETHER_TYPE_IPv4)) - return rte_ipv4_phdr_cksum(l3_hdr, ol_flags); - else /* assume ethertype == ETHER_TYPE_IPv6 */ - return rte_ipv6_phdr_cksum(l3_hdr, ol_flags); -} - -static uint16_t get_udptcp_checksum(void *l3_hdr, void *l4_hdr, uint16_t ethertype) { if (ethertype == _htons(ETHER_TYPE_IPv4)) @@ -370,32 +361,24 @@ process_inner_cksums(void *l3_hdr, const struct testpmd_offload_info *info, /* do not recalculate udp cksum if it was 0 */ if (udp_hdr->dgram_cksum != 0) { udp_hdr->dgram_cksum = 0; - if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) { + if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_UDP_CKSUM) ol_flags |= PKT_TX_UDP_CKSUM; - udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, - info->ethertype, ol_flags); - } else { + else udp_hdr->dgram_cksum = get_udptcp_checksum(l3_hdr, udp_hdr, info->ethertype); - } } } else if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len); tcp_hdr->cksum = 0; - if (tso_segsz) { + if (tso_segsz) ol_flags |= PKT_TX_TCP_SEG; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) { + else if (testpmd_ol_flags & TESTPMD_TX_OFFLOAD_TCP_CKSUM) ol_flags |= PKT_TX_TCP_CKSUM; - tcp_hdr->cksum = get_psd_sum(l3_hdr, info->ethertype, - ol_flags); - } else { + else tcp_hdr->cksum = get_udptcp_checksum(l3_hdr, tcp_hdr, info->ethertype); - } } else if (info->l4_proto == IPPROTO_SCTP) { sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len); sctp_hdr->cksum = 0; @@ -648,6 +631,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) void *l3_hdr = NULL, *outer_l3_hdr = NULL; /* can be IPv4 or IPv6 */ uint16_t nb_rx; uint16_t nb_tx; + uint16_t nb_prep; uint16_t i; uint64_t rx_ol_flags, tx_ol_flags; uint16_t testpmd_ol_flags; @@ -857,7 +841,13 @@ pkt_burst_checksum_forward(struct fwd_stream *fs) printf("\n"); } } - nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_rx); + nb_prep = rte_eth_tx_prep(fs->tx_port, fs->tx_queue, pkts_burst, + nb_rx); + if (nb_prep != nb_rx) + printf("Preparing packet burst to transmit failed: %s\n", + rte_strerror(rte_errno)); + + nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_prep); /* * Retry if necessary */ -- 1.7.9.5
[dpdk-dev] [PATCH v11 0/6] add Tx preparation
ffloads are not intentionally available v3 changes: - reworked csum testpmd engine instead adding new one, - fixed checksum initialization procedure to include also outer checksum offloads, - some minor formattings and optimalizations v2 changes: - rte_eth_tx_prep() returns number of packets when device doesn't support tx_prep functionality, - introduced CONFIG_RTE_ETHDEV_TX_PREP allowing to turn off tx_prep Tomasz Kulasek (6): ethdev: add Tx preparation e1000: add Tx preparation fm10k: add Tx preparation i40e: add Tx preparation ixgbe: add Tx preparation testpmd: use Tx preparation in csum engine app/test-pmd/csumonly.c | 36 + config/common_base |1 + drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +- drivers/net/e1000/em_rxtx.c | 48 +- drivers/net/e1000/igb_ethdev.c |4 ++ drivers/net/e1000/igb_rxtx.c | 52 ++- drivers/net/fm10k/fm10k.h|6 +++ drivers/net/fm10k/fm10k_ethdev.c |5 ++ drivers/net/fm10k/fm10k_rxtx.c | 50 +- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 72 +- drivers/net/i40e/i40e_rxtx.h |8 +++ drivers/net/ixgbe/ixgbe_ethdev.c |3 ++ drivers/net/ixgbe/ixgbe_ethdev.h |5 +- drivers/net/ixgbe/ixgbe_rxtx.c | 58 - drivers/net/ixgbe/ixgbe_rxtx.h |2 + lib/librte_ether/rte_ethdev.h| 103 ++ lib/librte_mbuf/rte_mbuf.h | 64 +++ lib/librte_net/rte_net.h | 85 +++ 20 files changed, 591 insertions(+), 30 deletions(-) -- 1.7.9.5
[dpdk-dev] [PATCH v11 1/6] ethdev: add Tx preparation
Added API for `rte_eth_tx_prep` uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) Added fields to the `struct rte_eth_desc_lim`: uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ Added functions: int rte_validate_tx_offload(struct rte_mbuf *m) to validate general requirements for tx offload set in mbuf of packet such a flag completness. In current implementation this function is called optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled. int rte_phdr_cksum_fix(struct rte_mbuf *m) to fix pseudo header checksum for TSO and non-TSO tcp/udp packets before hardware tx checksum offload. - for non-TSO tcp/udp packets full pseudo-header checksum is counted and set. - for TSO the IP payload length is not included. PERFORMANCE TESTS - This feature was tested with modified csum engine from test-pmd. The packet checksum preparation was moved from application to Tx preparation step placed before burst. We may expect some overhead costs caused by: 1) using additional callback before burst, 2) rescanning burst, 3) additional condition checking (packet validation), 4) worse optimization (e.g. packet data access, etc.) We tested it using ixgbe Tx preparation implementation with some parts disabled to have comparable information about the impact of different parts of implementation. IMPACT: 1) For unimplemented Tx preparation callback the performance impact is negligible, 2) For packet condition check without checksum modifications (nb_segs, available offloads, etc.) is 14626628/14252168 (~2.62% drop), 3) Full support in ixgbe driver (point 2 + packet checksum initialization) is 14060924/13588094 (~3.48% drop) Signed-off-by: Tomasz Kulasek --- config/common_base|1 + lib/librte_ether/rte_ethdev.h | 103 + lib/librte_mbuf/rte_mbuf.h| 64 + lib/librte_net/rte_net.h | 85 ++ 4 files changed, 253 insertions(+) diff --git a/config/common_base b/config/common_base index c7fd3db..619284b 100644 --- a/config/common_base +++ b/config/common_base @@ -120,6 +120,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024 CONFIG_RTE_LIBRTE_IEEE1588=n CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y +CONFIG_RTE_ETHDEV_TX_PREP=y # # Support NIC bypass logic diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 38641e8..cf6f68e 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -182,6 +182,7 @@ extern "C" { #include #include #include +#include #include "rte_ether.h" #include "rte_eth_ctrl.h" #include "rte_dev_info.h" @@ -699,6 +700,8 @@ struct rte_eth_desc_lim { uint16_t nb_max; /**< Max allowed number of descriptors. */ uint16_t nb_min; /**< Min allowed number of descriptors. */ uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + uint16_t nb_seg_max; /**< Max number of segments per whole packet. */ + uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */ }; /** @@ -1188,6 +1191,11 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq, uint16_t nb_pkts); /**< @internal Send output packets on a transmit queue of an Ethernet device. */ +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); /**< @internal Get current flow control parameter on an Ethernet device */ @@ -1622,6 +1630,7 @@ struct rte_eth_rxtx_callback { struct rte_eth_dev { eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */ struct rte_eth_dev_data *data; /**< Pointer to device data */ const struct eth_driver *driver;/**< Driver for this device */ const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ @@ -2816,6 +2825,100 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); } +/** + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prep() function is invoked to prepare output packets to be + *
[dpdk-dev] [PATCH v11 2/6] e1000: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/e1000/e1000_ethdev.h | 11 drivers/net/e1000/em_ethdev.c|5 +++- drivers/net/e1000/em_rxtx.c | 48 ++- drivers/net/e1000/igb_ethdev.c |4 +++ drivers/net/e1000/igb_rxtx.c | 52 +- 5 files changed, 117 insertions(+), 3 deletions(-) diff --git a/drivers/net/e1000/e1000_ethdev.h b/drivers/net/e1000/e1000_ethdev.h index 6c25c8d..bd0f277 100644 --- a/drivers/net/e1000/e1000_ethdev.h +++ b/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -315,6 +320,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev); uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +384,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev); uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/drivers/net/e1000/em_ethdev.c b/drivers/net/e1000/em_ethdev.c index 7cf5f0c..17b45cb 100644 --- a/drivers/net/e1000/em_ethdev.c +++ b/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -300,6 +300,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prep = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -1067,6 +1068,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index 41f51c0..5bd3c99 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -66,6 +66,7 @@ #include #include #include +#include #include #include "e1000_logs.h" @@ -77,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x0100 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -618,6 +627,43 @@ end_of_tx: /* * + * TX prep functions + * + **/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (
[dpdk-dev] [PATCH v11 3/6] fm10k: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/fm10k/fm10k.h|6 + drivers/net/fm10k/fm10k_ethdev.c |5 drivers/net/fm10k/fm10k_rxtx.c | 50 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h index 05aa1a2..c6fed21 100644 --- a/drivers/net/fm10k/fm10k.h +++ b/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index c804436..dffb6d1 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -1446,6 +1446,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2754,8 +2756,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prep = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prep = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2834,6 +2838,7 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prep = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 32cc7ff..5fc4d5a 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include #include +#include #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT |\ + PKT_TX_IP_CKSUM |\ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -597,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} -- 1.7.9.5
[dpdk-dev] [PATCH v11 4/6] i40e: add Tx preparation
Signed-off-by: Tomasz Kulasek --- drivers/net/i40e/i40e_ethdev.c |3 ++ drivers/net/i40e/i40e_rxtx.c | 72 +++- drivers/net/i40e/i40e_rxtx.h |8 + 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 5af0e43..dab0d48 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -936,6 +936,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prep = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -2629,6 +2630,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 7ae7d9f..7f6d3d8 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include #include #include +#include +#include #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK |\ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG |\ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -1411,6 +1424,61 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +/* + * + * TX prep functions + * + **/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** +* m->nb_segs is uint8_t, so m->nb_segs is always less than +* I40E_TX_MAX_SEG. +* We check only a condition for m->nb_segs > I40E_TX_MAX_MTU_SEG. +*/ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered malicious */ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_phdr_cksum_fix(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2763,9 +2831,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; } + dev->tx_pkt_prep = NULL;