From: Amaranath Somalapuram <asoma...@amd.com> ccp driver was scheduling only one CCP in a single burst. Effective throughput was limited to 1 CCP performance. Scheduling multiple ccp within one burst will increase the ccp performance. this changes will divide the enqueue packets equally among the multiple CCP Cc: sta...@dpdk.org
Signed-off-by: Amaranath Somalapuram <asoma...@amd.com> --- drivers/crypto/ccp/ccp_crypto.c | 22 +++++++---- drivers/crypto/ccp/ccp_crypto.h | 7 +++- drivers/crypto/ccp/ccp_pmd_private.h | 2 + drivers/crypto/ccp/rte_ccp_pmd.c | 57 +++++++++++++++++++--------- 4 files changed, 62 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/ccp/ccp_crypto.c b/drivers/crypto/ccp/ccp_crypto.c index 8862a1a84..23694bac6 100644 --- a/drivers/crypto/ccp/ccp_crypto.c +++ b/drivers/crypto/ccp/ccp_crypto.c @@ -2803,7 +2803,9 @@ process_ops_to_enqueue(struct ccp_qp *qp, struct rte_crypto_op **op, struct ccp_queue *cmd_q, uint16_t nb_ops, - int slots_req) + uint16_t total_nb_ops, + int slots_req, + uint16_t b_idx) { int i, result = 0; struct ccp_batch_info *b_info; @@ -2824,6 +2826,7 @@ process_ops_to_enqueue(struct ccp_qp *qp, /* populate batch info necessary for dequeue */ b_info->op_idx = 0; + b_info->b_idx = 0; b_info->lsb_buf_idx = 0; b_info->desccnt = 0; b_info->cmd_q = cmd_q; @@ -2839,7 +2842,7 @@ process_ops_to_enqueue(struct ccp_qp *qp, b_info->head_offset = (uint32_t)(cmd_q->qbase_phys_addr + cmd_q->qidx * Q_DESC_SIZE); - for (i = 0; i < nb_ops; i++) { + for (i = b_idx; i < (nb_ops+b_idx); i++) { session = (struct ccp_session *)get_sym_session_private_data( op[i]->sym->session, ccp_cryptodev_driver_id); @@ -2891,6 +2894,8 @@ process_ops_to_enqueue(struct ccp_qp *qp, } b_info->opcnt = i; + b_info->b_idx = b_idx; + b_info->total_nb_ops = total_nb_ops; b_info->tail_offset = (uint32_t)(cmd_q->qbase_phys_addr + cmd_q->qidx * Q_DESC_SIZE); @@ -2905,7 +2910,7 @@ process_ops_to_enqueue(struct ccp_qp *qp, rte_ring_enqueue(qp->processed_pkts, (void *)b_info); EVP_MD_CTX_destroy(auth_ctx); - return i; + return i-b_idx; } static inline void ccp_auth_dq_prepare(struct rte_crypto_op *op) @@ -2990,8 +2995,8 @@ ccp_prepare_ops(struct ccp_qp *qp, } min_ops = RTE_MIN(nb_ops, b_info->opcnt); - for (i = 0; i < min_ops; i++) { - op_d[i] = b_info->op[b_info->op_idx++]; + for (i = b_info->b_idx; i < min_ops; i++) { + op_d[i] = b_info->op[b_info->b_idx + b_info->op_idx++]; session = (struct ccp_session *)get_sym_session_private_data( op_d[i]->sym->session, ccp_cryptodev_driver_id); @@ -3032,7 +3037,8 @@ ccp_prepare_ops(struct ccp_qp *qp, int process_ops_to_dequeue(struct ccp_qp *qp, struct rte_crypto_op **op, - uint16_t nb_ops) + uint16_t nb_ops, + uint16_t *total_nb_ops) { struct ccp_batch_info *b_info; uint32_t cur_head_offset; @@ -3047,6 +3053,7 @@ process_ops_to_dequeue(struct ccp_qp *qp, if (b_info->auth_ctr == b_info->opcnt) goto success; + *total_nb_ops = b_info->total_nb_ops; cur_head_offset = CCP_READ_REG(b_info->cmd_q->reg_base, CMD_Q_HEAD_LO_BASE); @@ -3056,7 +3063,7 @@ process_ops_to_dequeue(struct ccp_qp *qp, qp->b_info = b_info; return 0; } - } else { + } else if (b_info->tail_offset != b_info->head_offset) { if ((cur_head_offset >= b_info->head_offset) || (cur_head_offset < b_info->tail_offset)) { qp->b_info = b_info; @@ -3066,6 +3073,7 @@ process_ops_to_dequeue(struct ccp_qp *qp, success: + *total_nb_ops = b_info->total_nb_ops; nb_ops = ccp_prepare_ops(qp, op, b_info, nb_ops); rte_atomic64_add(&b_info->cmd_q->free_slots, b_info->desccnt); b_info->desccnt = 0; diff --git a/drivers/crypto/ccp/ccp_crypto.h b/drivers/crypto/ccp/ccp_crypto.h index 882b398ac..8e6d03efc 100644 --- a/drivers/crypto/ccp/ccp_crypto.h +++ b/drivers/crypto/ccp/ccp_crypto.h @@ -353,7 +353,9 @@ int process_ops_to_enqueue(struct ccp_qp *qp, struct rte_crypto_op **op, struct ccp_queue *cmd_q, uint16_t nb_ops, - int slots_req); + uint16_t total_nb_ops, + int slots_req, + uint16_t b_idx); /** * process crypto ops to be dequeued @@ -365,7 +367,8 @@ int process_ops_to_enqueue(struct ccp_qp *qp, */ int process_ops_to_dequeue(struct ccp_qp *qp, struct rte_crypto_op **op, - uint16_t nb_ops); + uint16_t nb_ops, + uint16_t *total_nb_ops); /** diff --git a/drivers/crypto/ccp/ccp_pmd_private.h b/drivers/crypto/ccp/ccp_pmd_private.h index 781050c31..1c4118ee3 100644 --- a/drivers/crypto/ccp/ccp_pmd_private.h +++ b/drivers/crypto/ccp/ccp_pmd_private.h @@ -50,8 +50,10 @@ struct ccp_batch_info { struct rte_crypto_op *op[CCP_MAX_BURST]; /**< optable populated at enque time from app*/ int op_idx; + uint16_t b_idx; struct ccp_queue *cmd_q; uint16_t opcnt; + uint16_t total_nb_ops; /**< no. of crypto ops in batch*/ int desccnt; /**< no. of ccp queue descriptors*/ diff --git a/drivers/crypto/ccp/rte_ccp_pmd.c b/drivers/crypto/ccp/rte_ccp_pmd.c index a182c6a52..4807b580e 100644 --- a/drivers/crypto/ccp/rte_ccp_pmd.c +++ b/drivers/crypto/ccp/rte_ccp_pmd.c @@ -23,6 +23,7 @@ static unsigned int ccp_pmd_init_done; uint8_t ccp_cryptodev_driver_id; extern void *sha_ctx; +uint8_t cryptodev_cnt; struct ccp_pmd_init_params { struct rte_cryptodev_pmd_init_params def_p; @@ -202,30 +203,45 @@ ccp_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops, struct ccp_queue *cmd_q; struct rte_cryptodev *dev = qp->dev; uint16_t i, enq_cnt = 0, slots_req = 0; + uint16_t tmp_ops = nb_ops, b_idx, cur_ops = 0; if (nb_ops == 0) return 0; if (unlikely(rte_ring_full(qp->processed_pkts) != 0)) return 0; + if (tmp_ops >= cryptodev_cnt) + cur_ops = nb_ops / cryptodev_cnt + (nb_ops)%cryptodev_cnt; + else + cur_ops = tmp_ops; + while (tmp_ops) { + b_idx = nb_ops - tmp_ops; + slots_req = 0; + if (cur_ops <= tmp_ops) { + tmp_ops -= cur_ops; + } else { + cur_ops = tmp_ops; + tmp_ops = 0; + } + for (i = 0; i < cur_ops; i++) { + sess = get_ccp_session(qp, ops[i + b_idx]); + if (unlikely(sess == NULL) && (i == 0)) { + qp->qp_stats.enqueue_err_count++; + return 0; + } else if (sess == NULL) { + cur_ops = i; + break; + } + slots_req += ccp_compute_slot_count(sess); + } - for (i = 0; i < nb_ops; i++) { - sess = get_ccp_session(qp, ops[i]); - if (unlikely(sess == NULL) && (i == 0)) { - qp->qp_stats.enqueue_err_count++; + cmd_q = ccp_allot_queue(dev, slots_req); + if (unlikely(cmd_q == NULL)) return 0; - } else if (sess == NULL) { - nb_ops = i; - break; - } - slots_req += ccp_compute_slot_count(sess); + enq_cnt += process_ops_to_enqueue(qp, ops, cmd_q, cur_ops, + nb_ops, slots_req, b_idx); + i++; } - - cmd_q = ccp_allot_queue(dev, slots_req); - if (unlikely(cmd_q == NULL)) - return 0; - - enq_cnt = process_ops_to_enqueue(qp, ops, cmd_q, nb_ops, slots_req); qp->qp_stats.enqueued_count += enq_cnt; return enq_cnt; } @@ -235,9 +251,16 @@ ccp_pmd_dequeue_burst(void *queue_pair, struct rte_crypto_op **ops, uint16_t nb_ops) { struct ccp_qp *qp = queue_pair; - uint16_t nb_dequeued = 0, i; + uint16_t nb_dequeued = 0, i, total_nb_ops; + + nb_dequeued = process_ops_to_dequeue(qp, ops, nb_ops, &total_nb_ops); - nb_dequeued = process_ops_to_dequeue(qp, ops, nb_ops); + if (total_nb_ops) { + while (nb_dequeued != total_nb_ops) { + nb_dequeued = process_ops_to_dequeue(qp, + ops, nb_ops, &total_nb_ops); + } + } /* Free session if a session-less crypto op */ for (i = 0; i < nb_dequeued; i++) -- 2.17.1