[dpdk-dev] [PATCH v3] crypto/ccp: enable IOMMU for CCP
From: Amaranath Somalapuram CCP use vdev framework, and vdev framework don’t support IOMMU. Adding custom IOMMU support for AMD CCP driver. Signed-off-by: Amaranath Somalapuram --- drivers/crypto/ccp/ccp_crypto.c | 114 --- drivers/crypto/ccp/ccp_dev.c | 54 +++ drivers/crypto/ccp/ccp_pci.c | 1 + drivers/crypto/ccp/rte_ccp_pmd.c | 3 + 4 files changed, 104 insertions(+), 68 deletions(-) diff --git a/drivers/crypto/ccp/ccp_crypto.c b/drivers/crypto/ccp/ccp_crypto.c index db3fb6eff..f37d35f18 100644 --- a/drivers/crypto/ccp/ccp_crypto.c +++ b/drivers/crypto/ccp/ccp_crypto.c @@ -31,8 +31,10 @@ #include #include +extern int iommu_mode; +void *sha_ctx; /* SHA initial context values */ -static uint32_t ccp_sha1_init[SHA_COMMON_DIGEST_SIZE / sizeof(uint32_t)] = { +uint32_t ccp_sha1_init[SHA_COMMON_DIGEST_SIZE / sizeof(uint32_t)] = { SHA1_H4, SHA1_H3, SHA1_H2, SHA1_H1, SHA1_H0, 0x0U, @@ -744,8 +746,13 @@ ccp_configure_session_cipher(struct ccp_session *sess, CCP_LOG_ERR("Invalid CCP Engine"); return -ENOTSUP; } - sess->cipher.nonce_phys = rte_mem_virt2phy(sess->cipher.nonce); - sess->cipher.key_phys = rte_mem_virt2phy(sess->cipher.key_ccp); + if (iommu_mode == 2) { + sess->cipher.nonce_phys = rte_mem_virt2iova(sess->cipher.nonce); + sess->cipher.key_phys = rte_mem_virt2iova(sess->cipher.key_ccp); + } else { + sess->cipher.nonce_phys = rte_mem_virt2phy(sess->cipher.nonce); + sess->cipher.key_phys = rte_mem_virt2phy(sess->cipher.key_ccp); + } return 0; } @@ -784,6 +791,7 @@ ccp_configure_session_auth(struct ccp_session *sess, sess->auth.ctx = (void *)ccp_sha1_init; sess->auth.ctx_len = CCP_SB_BYTES; sess->auth.offset = CCP_SB_BYTES - SHA1_DIGEST_SIZE; + rte_memcpy(sha_ctx, sess->auth.ctx, SHA_COMMON_DIGEST_SIZE); break; case RTE_CRYPTO_AUTH_SHA1_HMAC: if (sess->auth_opt) { @@ -822,6 +830,7 @@ ccp_configure_session_auth(struct ccp_session *sess, sess->auth.ctx = (void *)ccp_sha224_init; sess->auth.ctx_len = CCP_SB_BYTES; sess->auth.offset = CCP_SB_BYTES - SHA224_DIGEST_SIZE; + rte_memcpy(sha_ctx, sess->auth.ctx, SHA256_DIGEST_SIZE); break; case RTE_CRYPTO_AUTH_SHA224_HMAC: if (sess->auth_opt) { @@ -884,6 +893,7 @@ ccp_configure_session_auth(struct ccp_session *sess, sess->auth.ctx = (void *)ccp_sha256_init; sess->auth.ctx_len = CCP_SB_BYTES; sess->auth.offset = CCP_SB_BYTES - SHA256_DIGEST_SIZE; + rte_memcpy(sha_ctx, sess->auth.ctx, SHA256_DIGEST_SIZE); break; case RTE_CRYPTO_AUTH_SHA256_HMAC: if (sess->auth_opt) { @@ -946,6 +956,7 @@ ccp_configure_session_auth(struct ccp_session *sess, sess->auth.ctx = (void *)ccp_sha384_init; sess->auth.ctx_len = CCP_SB_BYTES << 1; sess->auth.offset = (CCP_SB_BYTES << 1) - SHA384_DIGEST_SIZE; + rte_memcpy(sha_ctx, sess->auth.ctx, SHA512_DIGEST_SIZE); break; case RTE_CRYPTO_AUTH_SHA384_HMAC: if (sess->auth_opt) { @@ -1010,6 +1021,7 @@ ccp_configure_session_auth(struct ccp_session *sess, sess->auth.ctx = (void *)ccp_sha512_init; sess->auth.ctx_len = CCP_SB_BYTES << 1; sess->auth.offset = (CCP_SB_BYTES << 1) - SHA512_DIGEST_SIZE; + rte_memcpy(sha_ctx, sess->auth.ctx, SHA512_DIGEST_SIZE); break; case RTE_CRYPTO_AUTH_SHA512_HMAC: if (sess->auth_opt) { @@ -1159,8 +1171,13 @@ ccp_configure_session_aead(struct ccp_session *sess, CCP_LOG_ERR("Unsupported aead algo"); return -ENOTSUP; } - sess->cipher.nonce_phys = rte_mem_virt2phy(sess->cipher.nonce); - sess->cipher.key_phys = rte_mem_virt2phy(sess->cipher.key_ccp); + if (iommu_mode == 2) { + sess->cipher.nonce_phys = rte_mem_virt2iova(sess->cipher.nonce); + sess->cipher.key_phys = rte_mem_virt2iova(sess->cipher.key_ccp); + } else { + sess->cipher.nonce_phys = rte_mem_virt2phy(sess->cipher.nonce); + sess->cipher.key_phys = rte_mem_virt2phy(sess->cipher.key_ccp); + } return 0; } @@ -1575,11 +1592,16 @@ ccp_perform_hmac(struct rte_crypto_op *op, op->sym->auth.data.offset); append_ptr = (void *)rte_pktmbuf_append(op->sym->m_src, session->auth.ctx_len); - dest_addr = (phys_addr_t)rte_mem_virt2phy(append_ptr); + if (iommu_mode == 2) { + dest_addr = (phys_addr_t)
[dpdk-dev] [PATCH v4 1/2] examples/vhost: add ioat ring space count and check
Add ioat ring space count and check, if ioat ring space is not enough for the next async vhost packet enqueue, then just return to prevent enqueue failure. Signed-off-by: Cheng Jiang --- examples/vhost/ioat.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c index 71d8a1f1f..b0b04aa45 100644 --- a/examples/vhost/ioat.c +++ b/examples/vhost/ioat.c @@ -17,6 +17,7 @@ struct packet_tracker { unsigned short next_read; unsigned short next_write; unsigned short last_remain; + unsigned short ioat_space; }; struct packet_tracker cb_tracker[MAX_VHOST_DEVICE]; @@ -113,7 +114,7 @@ open_ioat(const char *value) goto out; } rte_rawdev_start(dev_id); - + cb_tracker[dev_id].ioat_space = IOAT_RING_SIZE; dma_info->nr++; i++; } @@ -140,13 +141,9 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id, src = descs[i_desc].src; dst = descs[i_desc].dst; i_seg = 0; + if (cb_tracker[dev_id].ioat_space < src->nr_segs) + break; while (i_seg < src->nr_segs) { - /* -* TODO: Assuming that the ring space of the -* IOAT device is large enough, so there is no -* error here, and the actual error handling -* will be added later. -*/ rte_ioat_enqueue_copy(dev_id, (uintptr_t)(src->iov[i_seg].iov_base) + src->offset, @@ -158,7 +155,8 @@ ioat_transfer_data_cb(int vid, uint16_t queue_id, i_seg++; } write &= mask; - cb_tracker[dev_id].size_track[write] = i_seg; + cb_tracker[dev_id].size_track[write] = src->nr_segs; + cb_tracker[dev_id].ioat_space -= src->nr_segs; write++; } } else { @@ -186,6 +184,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id, int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id; n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump); + cb_tracker[dev_id].ioat_space += n_seg; n_seg += cb_tracker[dev_id].last_remain; if (!n_seg) return 0; -- 2.29.2
[dpdk-dev] [PATCH v4 0/2] examples/vhost: sample code refactor
Refactor the vhost sample code. Add ioat ring space count and check in ioat callback, optimize vhost data path for batch enqueue, replase rte_atomicNN_xxx to atomic_XXX and refactor vhost async data path. --- v4: * improved code structure * improved vhost enqueue buffer memory allocation * cleaned some codes v3: * added some variable initiation * cleaned some codes v2: * optimized patch structure * optimized git log * replased rte_atomicNN_xxx to atomic_XXX Cheng Jiang (2): examples/vhost: add ioat ring space count and check examples/vhost: refactor vhost data path examples/vhost/ioat.c | 15 ++-- examples/vhost/main.c | 202 +++--- examples/vhost/main.h | 7 +- 3 files changed, 161 insertions(+), 63 deletions(-) -- 2.29.2
[dpdk-dev] [PATCH v4 2/2] examples/vhost: refactor vhost data path
Change the vm2vm data path to batch enqueue for better performance. Support latest async vhost API, refactor vhost async data path, replase rte_atomicNN_xxx to atomic_XXX and clean some codes. Signed-off-by: Cheng Jiang --- examples/vhost/main.c | 202 +++--- examples/vhost/main.h | 7 +- 2 files changed, 154 insertions(+), 55 deletions(-) diff --git a/examples/vhost/main.c b/examples/vhost/main.c index 8d8c3038b..3ea12a474 100644 --- a/examples/vhost/main.c +++ b/examples/vhost/main.c @@ -179,9 +179,18 @@ struct mbuf_table { struct rte_mbuf *m_table[MAX_PKT_BURST]; }; +struct vhost_bufftable { + uint32_t len; + uint64_t pre_tsc; + struct rte_mbuf *m_table[MAX_PKT_BURST]; +}; + /* TX queue for each data core. */ struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE]; +/* TX queue for each vhost device. */ +struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * MAX_VHOST_DEVICE]; + #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \ / US_PER_S * BURST_TX_DRAIN_US) #define VLAN_HLEN 4 @@ -804,39 +813,114 @@ unlink_vmdq(struct vhost_dev *vdev) } } +static inline void +free_pkts(struct rte_mbuf **pkts, uint16_t n) +{ + while (n--) + rte_pktmbuf_free(pkts[n]); +} + static __rte_always_inline void -virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, +complete_async_pkts(struct vhost_dev *vdev) +{ + struct rte_mbuf *p_cpl[MAX_PKT_BURST]; + uint16_t complete_count; + + complete_count = rte_vhost_poll_enqueue_completed(vdev->vid, + VIRTIO_RXQ, p_cpl, MAX_PKT_BURST); + if (complete_count) { + atomic_fetch_sub(&vdev->nr_async_pkts, complete_count); + free_pkts(p_cpl, complete_count); + } +} + +static __rte_always_inline void +sync_virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev, struct rte_mbuf *m) { uint16_t ret; - struct rte_mbuf *m_cpl[1]; if (builtin_net_driver) { ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1); - } else if (async_vhost_driver) { - ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, - &m, 1); - - if (likely(ret)) - dst_vdev->nr_async_pkts++; - - while (likely(dst_vdev->nr_async_pkts)) { - if (rte_vhost_poll_enqueue_completed(dst_vdev->vid, - VIRTIO_RXQ, m_cpl, 1)) - dst_vdev->nr_async_pkts--; - } } else { ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1); } if (enable_stats) { - rte_atomic64_inc(&dst_vdev->stats.rx_total_atomic); - rte_atomic64_add(&dst_vdev->stats.rx_atomic, ret); + atomic_fetch_add(&dst_vdev->stats.rx_total_atomic, 1); + atomic_fetch_add(&dst_vdev->stats.rx_atomic, ret); src_vdev->stats.tx_total++; src_vdev->stats.tx += ret; } } +static __rte_always_inline void +drain_vhost(struct vhost_dev *vdev) +{ + uint16_t ret; + uint64_t queue_id = rte_lcore_id() * MAX_VHOST_DEVICE + vdev->vid; + uint16_t nr_xmit = vhost_txbuff[queue_id]->len; + struct rte_mbuf **m = vhost_txbuff[queue_id]->m_table; + + if (builtin_net_driver) { + ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit); + } else if (async_vhost_driver) { + uint32_t cpu_cpl_nr = 0; + uint16_t enqueue_fail = 0; + struct rte_mbuf *m_cpu_cpl[nr_xmit]; + + complete_async_pkts(vdev); + ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, + m, nr_xmit, m_cpu_cpl, &cpu_cpl_nr); + atomic_fetch_add(&vdev->nr_async_pkts, ret - cpu_cpl_nr); + + if (cpu_cpl_nr) + free_pkts(m_cpu_cpl, cpu_cpl_nr); + + enqueue_fail = nr_xmit - ret; + if (enqueue_fail) + free_pkts(&m[ret], nr_xmit - ret); + } else { + ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ, + m, nr_xmit); + } + + if (enable_stats) { + atomic_fetch_add(&vdev->stats.rx_total_atomic, nr_xmit); + atomic_fetch_add(&vdev->stats.rx_atomic, ret); + } + + if (!async_vhost_driver) + free_pkts(m, nr_xmit); +} + +static __rte_always_inline void +drain_vhost_table(void) +{ + const uint16_t lcore_id = rte_lcore_id(); + struct vhost_bufftable *vhost_txq; + struct vhost_dev *vdev; + uint64_t cur_tsc; + + TAILQ_FOREACH(vdev, &lcore_info[lcore_id].vdev_list, lcor
Re: [dpdk-dev] [PATCH] app/testpmd: fix start index for showing FEC array
+dev@dpdk.org which seems to have been dropped by accident. On Friday, December 12/25/20, 2020 at 09:03:43 +0800, Min Hu (Connor) wrote: > > > 在 2020/12/24 19:25, Rahul Lakkireddy 写道: > >On Thursday, December 12/24/20, 2020 at 17:36:27 +0800, Min Hu (Connor) > >wrote: > >> > >> > >>在 2020/12/23 20:31, Rahul Lakkireddy 写道: > >>>On Monday, December 12/21/20, 2020 at 17:07:21 +0800, Min Hu (Connor) > >>>wrote: > > > 在 2020/12/21 6:47, Rahul Lakkireddy 写道: > >From: Karra Satwik > > > >Start from index 0 when going through the FEC array. This will allow > >"off" to get printed for RTE_ETH_FEC_NOFEC mode. > > > >Fixes: b19da32e3151 ("app/testpmd: add FEC command") > >Cc: sta...@dpdk.org > > > >Signed-off-by: Karra Satwik > >Signed-off-by: Rahul Lakkireddy > >--- > > app/test-pmd/config.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > >diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c > >index 3f6c8642b..a6a5baa4e 100644 > >--- a/app/test-pmd/config.c > >+++ b/app/test-pmd/config.c > >@@ -3701,7 +3701,7 @@ show_fec_capability(unsigned int num, struct > >rte_eth_fec_capa *speed_fec_capa) > > printf("%s : ", > > > > rte_eth_link_speed_to_str(speed_fec_capa[i].speed)); > >-for (j = RTE_ETH_FEC_AUTO; j < RTE_DIM(fec_mode_name); > >j++) { > >+for (j = 0; j < RTE_DIM(fec_mode_name); j++) { > > As RTE_ETH_FEC_NOFEC is mode which every device has, so we think it > should not be regarged as "capabilities". > Thanks. > > >>> > >>>We had gotten several requests asking if device supported turning > >>>FEC "off" because it was not listed in capabilities. Hence, the > >>>motiviation for this patch to explicitly show that "off" is > >>>supported. > >> > >>HI, we have referred to other netcard in kernel driver mode, it shows like > >>this: > >> > >>[root@centos197-test_dpdk]$ethtool --show-fec eth9 > >>FEC parameters for eth9: > >>Configured FEC encodings: Auto BaseR > >>Active FEC encoding: Off > >> > >>Here, "Configured FEC encodings" means the capability it supports, it > >>does not include "off", although it can be configured using "off". > >>thanks. > >> > > > >It is the same with our own card too using our kernel driver and > >have gotten the same questions in the past with our kernel driver > >too. > > > ># ethtool --show-fec enp2s0f4 > >FEC parameters for enp2s0f4: > >Configured FEC encodings: Auto BaseR RS > >Active FEC encoding: RS > > > >We don't have any strong opinion on this. We just wanted to let > >testpmd show the 'off' caps since it's also an available option. > >If the intention is confusing, then sure we will drop the patch. > >Let us know your feedback. > Hi, I've got your opinion, but I think the display about FEC in dpdk > testpmd had better be in accordance with that in kernel ethtool. > IF the two is different, it may confuse users, thanks. > So, what about everybody? any opinion will be welcome. > > > > >> > >> > >>> > >>> > > if (RTE_ETH_FEC_MODE_TO_CAPA(j) & > > speed_fec_capa[i].capa) > > printf("%s ", fec_mode_name[j].name); > > > >>>. > >>> > >. > >