Don't write CSR tail until we processed enough TX descriptors. To avoid crypto operations sitting in the TX ring indefinitely, the "force write" threshold is used: - on TX, no tail write coalescing will occur if number of inflights is below force write threshold - on RX, check if we have a number of crypto ops enqueued that is below force write threshold that are not yet submitted to processing.
Signed-off-by: Anatoly Burakov <anatoly.bura...@intel.com> --- v2: fixed commit message doc/guides/rel_notes/release_17_11.rst | 1 + drivers/crypto/qat/qat_crypto.c | 41 ++++++++++++++++++++++++---------- drivers/crypto/qat/qat_crypto.h | 7 ++++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/doc/guides/rel_notes/release_17_11.rst b/doc/guides/rel_notes/release_17_11.rst index 0b77095..f0d3960 100644 --- a/doc/guides/rel_notes/release_17_11.rst +++ b/doc/guides/rel_notes/release_17_11.rst @@ -47,6 +47,7 @@ New Features * Removed atomics from the internal queue pair structure. * Coalesce writes to HEAD CSR on response processing. + * Coalesce writes to TAIL CSR on request processing. Resolved Issues diff --git a/drivers/crypto/qat/qat_crypto.c b/drivers/crypto/qat/qat_crypto.c index 1656e0f..a2b202f 100644 --- a/drivers/crypto/qat/qat_crypto.c +++ b/drivers/crypto/qat/qat_crypto.c @@ -921,6 +921,14 @@ qat_bpicipher_postprocess(struct qat_session *ctx, return sym_op->cipher.data.length - last_block_len; } +static inline void +txq_write_tail(struct qat_qp *qp, struct qat_queue *q) { + WRITE_CSR_RING_TAIL(qp->mmap_bar_addr, q->hw_bundle_number, + q->hw_queue_number, q->tail); + q->nb_pending_requests = 0; + q->csr_tail = q->tail; +} + uint16_t qat_pmd_enqueue_op_burst(void *qp, struct rte_crypto_op **ops, uint16_t nb_ops) @@ -973,10 +981,13 @@ qat_pmd_enqueue_op_burst(void *qp, struct rte_crypto_op **ops, cur_op++; } kick_tail: - WRITE_CSR_RING_TAIL(tmp_qp->mmap_bar_addr, queue->hw_bundle_number, - queue->hw_queue_number, tail); queue->tail = tail; tmp_qp->stats.enqueued_count += nb_ops_sent; + queue->nb_pending_requests += nb_ops_sent; + if (tmp_qp->inflights16 < QAT_CSR_TAIL_FORCE_WRITE_THRESH || + queue->nb_pending_requests > QAT_CSR_TAIL_WRITE_THRESH) { + txq_write_tail(tmp_qp, queue); + } return nb_ops_sent; } @@ -1011,17 +1022,18 @@ uint16_t qat_pmd_dequeue_op_burst(void *qp, struct rte_crypto_op **ops, uint16_t nb_ops) { - struct qat_queue *queue; + struct qat_queue *rx_queue, *tx_queue; struct qat_qp *tmp_qp = (struct qat_qp *)qp; uint32_t msg_counter = 0; struct rte_crypto_op *rx_op; struct icp_qat_fw_comn_resp *resp_msg; uint32_t head; - queue = &(tmp_qp->rx_q); - head = queue->head; + rx_queue = &(tmp_qp->rx_q); + tx_queue = &(tmp_qp->tx_q); + head = rx_queue->head; resp_msg = (struct icp_qat_fw_comn_resp *) - ((uint8_t *)queue->base_addr + head); + ((uint8_t *)rx_queue->base_addr + head); while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG && msg_counter != nb_ops) { @@ -1048,21 +1060,26 @@ qat_pmd_dequeue_op_burst(void *qp, struct rte_crypto_op **ops, rx_op->status = RTE_CRYPTO_OP_STATUS_SUCCESS; } - head = adf_modulo(head + queue->msg_size, queue->modulo); + head = adf_modulo(head + rx_queue->msg_size, rx_queue->modulo); resp_msg = (struct icp_qat_fw_comn_resp *) - ((uint8_t *)queue->base_addr + head); + ((uint8_t *)rx_queue->base_addr + head); *ops = rx_op; ops++; msg_counter++; } if (msg_counter > 0) { - queue->head = head; + rx_queue->head = head; tmp_qp->stats.dequeued_count += msg_counter; - queue->nb_processed_responses += msg_counter; + rx_queue->nb_processed_responses += msg_counter; tmp_qp->inflights16 -= msg_counter; - if (queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH) - rxq_free_desc(tmp_qp, queue); + if (rx_queue->nb_processed_responses > QAT_CSR_HEAD_WRITE_THRESH) + rxq_free_desc(tmp_qp, rx_queue); + } + /* also check if tail needs to be advanced */ + if (tmp_qp->inflights16 <= QAT_CSR_TAIL_FORCE_WRITE_THRESH && + tx_queue->tail != tx_queue->csr_tail) { + txq_write_tail(tmp_qp, tx_queue); } return msg_counter; } diff --git a/drivers/crypto/qat/qat_crypto.h b/drivers/crypto/qat/qat_crypto.h index d78957c..0ebb083 100644 --- a/drivers/crypto/qat/qat_crypto.h +++ b/drivers/crypto/qat/qat_crypto.h @@ -52,6 +52,10 @@ #define QAT_CSR_HEAD_WRITE_THRESH 32U /* number of requests to accumulate before writing head CSR */ +#define QAT_CSR_TAIL_WRITE_THRESH 32U +/* number of requests to accumulate before writing tail CSR */ +#define QAT_CSR_TAIL_FORCE_WRITE_THRESH 256U +/* number of inflights below which no tail write coalescing should occur */ struct qat_session; @@ -77,8 +81,11 @@ struct qat_queue { uint8_t hw_queue_number; /* HW queue aka ring offset on bundle */ uint32_t csr_head; /* last written head value */ + uint32_t csr_tail; /* last written tail value */ uint16_t nb_processed_responses; /* number of responses processed since last CSR head write */ + uint16_t nb_pending_requests; + /* number of requests pending since last CSR tail write */ }; struct qat_qp { -- 2.7.4