From: Ankur Dwivedi <ankur.dwiv...@cavium.com> Adds hardware enqueue/dequeue API of instructions to a queue pair for Cavium CPT device.
Signed-off-by: Ankur Dwivedi <ankur.dwiv...@cavium.com> Signed-off-by: Murthy NSSR <nidadavolu.mur...@cavium.com> Signed-off-by: Nithin Dabilpuram <nithin.dabilpu...@cavium.com> Signed-off-by: Ragothaman Jayaraman <ragothaman.jayara...@cavium.com> Signed-off-by: Srisivasubramanian Srinivasan <srisivasubramanian.sriniva...@cavium.com> --- drivers/crypto/cpt/base/cpt.h | 102 +++++++ drivers/crypto/cpt/base/cpt_device.c | 4 +- drivers/crypto/cpt/base/cpt_request_mgr.c | 424 ++++++++++++++++++++++++++++++ drivers/crypto/cpt/base/cpt_request_mgr.h | 75 ++++++ 4 files changed, 603 insertions(+), 2 deletions(-) create mode 100644 drivers/crypto/cpt/base/cpt.h create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.c create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.h diff --git a/drivers/crypto/cpt/base/cpt.h b/drivers/crypto/cpt/base/cpt.h new file mode 100644 index 0000000..11407ae --- /dev/null +++ b/drivers/crypto/cpt/base/cpt.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#ifndef __BASE_CPT_H__ +#define __BASE_CPT_H__ + +/* Linux Includes */ +#include <endian.h> +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <sys/cdefs.h> +#include <unistd.h> +#include <assert.h> + +/* DPDK includes */ +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_memory.h> +#include <rte_prefetch.h> + +#include "../cpt_pmd_logs.h" +#include "mcode_defines.h" + +/** @cond __INTERNAL_DOCUMENTATION__ */ + +/* Declarations */ +typedef struct cpt_instance cpt_instance_t; + +/* + * Generic Defines + */ + +/* Buffer pointer */ +typedef struct buf_ptr { + void *vaddr; + phys_addr_t dma_addr; + uint32_t size; + uint32_t resv; +} buf_ptr_t; + +/* IOV Pointer */ +typedef struct{ + int buf_cnt; + buf_ptr_t bufs[0]; +} iov_ptr_t; + +typedef struct app_data { + uint64_t pktout; + void *marker; +} app_data_t; + +/* Instance operations */ + +/* Enqueue an SE/AE request */ +int cpt_enqueue_req(cpt_instance_t *inst, void *req, uint8_t flags, + void *event, uint64_t event_flags); + +/* Dequeue completed SE requests as burst */ +int32_t cpt_dequeue_burst(cpt_instance_t *instance, uint16_t cnt, + void *resp[], uint8_t cc[]); + +/* Marks event as done in event driven mode */ +int32_t cpt_event_mark_done(void *marker, uint8_t *op_error); + +/* Checks queue full condition */ +uint16_t cpt_queue_full(cpt_instance_t *instance); + +/* Misc */ +uint32_t cpt_get_instance_count(void); + +#define ENQ_FLAG_SYNC 0x01 +#define ENQ_FLAG_EVENT 0x02 +#define ENQ_FLAG_NODOORBELL 0x04 +#define ENQ_FLAG_ONLY_DOORBELL 0x08 + + +#define OCTTX_EVENT_TAG(__flags) (__flags & 0xffffffff) +#define OCTTX_EVENT_GRP(__flags) ((__flags >> 32) & 0xffff) +#define OCTTX_EVENT_TT(__flags) ((__flags >> 48) & 0xff) + +#define OCTTX_EVENT_FLAGS(__tag, __grp, __tt) \ + (((uint64_t)__tag & 0xffffffff) | \ + (((uint64_t)__grp & 0xffff) << 32) | \ + (((uint64_t)__tt & 0xff) << 48)) + + +/* cpt instance */ +struct cpt_instance { + /* 0th cache line */ + uint32_t queue_id; + uint64_t rsvd; +}; + +#define __hot __attribute__((hot)) +/** @endcond */ + +#endif /* __BASE_CPT_H__ */ diff --git a/drivers/crypto/cpt/base/cpt_device.c b/drivers/crypto/cpt/base/cpt_device.c index b7cd5b5..a50e5b8 100644 --- a/drivers/crypto/cpt/base/cpt_device.c +++ b/drivers/crypto/cpt/base/cpt_device.c @@ -193,7 +193,7 @@ int cptvf_get_resource(struct cpt_vf *dev, uint64_t *next_ptr; uint64_t pg_sz = sysconf(_SC_PAGESIZE); - PMD_DRV_LOG(DEBUG, "Initializing csp resource %s\n", cptvf->dev_name); + PMD_DRV_LOG(DEBUG, "Initializing cpt resource %s\n", cptvf->dev_name); cpt_instance = &cptvf->instance; @@ -323,7 +323,7 @@ int cptvf_put_resource(cpt_instance_t *instance) return -EINVAL; } - PMD_DRV_LOG(DEBUG, "Releasing csp device %s\n", cptvf->dev_name); + PMD_DRV_LOG(DEBUG, "Releasing cpt device %s\n", cptvf->dev_name); rz = (struct rte_memzone *)instance->rsvd; rte_memzone_free(rz); diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.c b/drivers/crypto/cpt/base/cpt_request_mgr.c new file mode 100644 index 0000000..8b9b1ff --- /dev/null +++ b/drivers/crypto/cpt/base/cpt_request_mgr.c @@ -0,0 +1,424 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#include "cpt_request_mgr.h" +#include "cpt_debug.h" +#include <rte_atomic.h> + +#define MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++) + +#define __hot __attribute__((hot)) + +static inline uint64_t cpu_cycles(void) +{ + return rte_get_timer_cycles(); +} + +static inline uint64_t cpu_cycles_freq(void) +{ + return rte_get_timer_hz(); +} + +static inline void * +get_cpt_inst(struct command_queue *cqueue, void *req) +{ + (void)req; + PMD_TX_LOG(DEBUG, "CPT queue idx %u, req %p\n", cqueue->idx, req); + return &cqueue->qhead[cqueue->idx * CPT_INST_SIZE]; +} + +static inline void +mark_cpt_inst(struct cpt_vf *cptvf, + struct command_queue *queue, + uint32_t ring_door_bell) +{ +#ifdef CMD_DEBUG + /* DEBUG */ + { + uint32_t i = queue->idx * CPT_INST_SIZE; + cpt_inst_s_t *cmd = (void *)&queue->qhead[i]; + uint64_t *p = (void *)&queue->qhead[i]; + + PRINT("\nQUEUE parameters:"); + PRINT("Queue index = %u\n", + queue->idx); + PRINT("Queue HEAD = %p\n", + queue->qhead); + PRINT("Command Entry = %p\n", + cmd); + + PRINT("\nCPT_INST_S format:"); + PRINT("cmd->s.doneint = %x\n", cmd->s.doneint); + PRINT("cmd->s.res_addr = %lx\n", cmd->s.res_addr); + PRINT("cmd->s.grp = %x\n", cmd->s.grp); + PRINT("cmd->s.tag = %x\n", cmd->s.tag); + PRINT("cmd->s.tt = %x\n", cmd->s.tt); + PRINT("cmd->s.wq_ptr = %lx\n", cmd->s.wq_ptr); + PRINT("cmd->s.ei0 = %lx\n", cmd->s.ei0); + PRINT("cmd->s.ei1 = %lx\n", cmd->s.ei1); + PRINT("cmd->s.ei2 = %lx\n", cmd->s.ei2); + PRINT("cmd->s.ei3 = %lx\n", cmd->s.ei3); + + PRINT("\nCommand dump from queue HEAD:"); + for (i = 0; i < CPT_INST_SIZE / 8; i++) + PRINT("%lx\n", p[i]); + } +#endif + if (unlikely(++queue->idx >= DEFAULT_CMD_QCHUNK_SIZE)) { + uint32_t cchunk = queue->cchunk; + MOD_INC(cchunk, DEFAULT_CMD_QCHUNKS); + queue->qhead = queue->chead[cchunk].head; + queue->idx = 0; + queue->cchunk = cchunk; + } + + if (ring_door_bell) { + /* Memory barrier to flush pending writes */ + rte_smp_wmb(); + cptvf_write_vq_doorbell(cptvf, ring_door_bell); + } +} + +static inline uint8_t +check_nb_command_id(cpt_request_info_t *user_req, struct cpt_vf *cptvf) +{ + uint8_t ret = ERR_REQ_PENDING; + volatile cpt_res_s_t *cptres; + + cptres = (volatile cpt_res_s_t *)user_req->completion_addr; + + if (unlikely(cptres->s.compcode == CPT_COMP_E_NOTDONE)) { + /* + * Wait for some time for this command to get completed + * before timing out + */ + if (cpu_cycles() < user_req->time_out) + return ret; + /* + * TODO: See if alternate caddr can be used to not loop + * longer than needed. + */ + if ((cptres->s.compcode == CPT_COMP_E_NOTDONE) && + (user_req->extra_time < TIME_IN_RESET_COUNT)) { + user_req->extra_time++; + return ret; + } + + if (cptres->s.compcode != CPT_COMP_E_NOTDONE) + goto complete; + + ret = ERR_REQ_TIMEOUT; + PMD_DRV_LOG_RAW(ERR, "Request %p timedout\n", user_req); + cptvf_poll_misc(cptvf); + dump_cpt_request_sglist(&user_req->dbg_inst, + "Response Packet Gather in", 1, 1); + goto exit; + } + +complete: + if (likely(cptres->s.compcode == CPT_COMP_E_GOOD)) { + ret = 0; /* success */ + PMD_RX_LOG(DEBUG, "MC status %.8x\n", + *((volatile uint32_t *)user_req->alternate_caddr)); + PMD_RX_LOG(DEBUG, "HW status %.8x\n", + *((volatile uint32_t *)user_req->completion_addr)); + } else if ((cptres->s.compcode == CPT_COMP_E_SWERR) || + (cptres->s.compcode == CPT_COMP_E_FAULT)) { + ret = (uint8_t)*user_req->alternate_caddr; + if (!ret) + ret = ERR_BAD_ALT_CCODE; + PMD_RX_LOG(DEBUG, "Request %p : failed with %s : err code :" + "%x\n", user_req, + (cptres->s.compcode == CPT_COMP_E_FAULT) ? + "DMA Fault" : "Software error", ret); + } else { + PMD_DRV_LOG_RAW(ERR, "Request %p : unexpected completion code" + " %d\n", + user_req, cptres->s.compcode); + ret = (uint8_t)*user_req->alternate_caddr; + } + +exit: + dump_cpt_request_sglist(&user_req->dbg_inst, + "Response Packet Scatter Out", 1, 0); + return ret; +} + + +/* + * cpt_enqueue_req() + * + * SE & AE request enqueue function + */ +int32_t __hot +cpt_enqueue_req(cpt_instance_t *instance, void *req, uint8_t flags, + void *event, uint64_t event_flags) +{ + struct pending_queue *pqueue; + struct cpt_vf *cptvf; + cpt_inst_s_t *cpt_ist_p = NULL; + cpt_request_info_t *user_req = (cpt_request_info_t *)req; + struct command_queue *cqueue; + int32_t ret = 0; + +#ifdef CPTVF_STRICT_PARAM_CHECK + if (unlikely(!instance)) { + PMD_DRV_LOG_RAW(ERR, "Invalid inputs (instance: %p, req: %p)\n", + instance, req); + return -EINVAL; + } +#endif + + cptvf = (struct cpt_vf *)instance; + pqueue = &cptvf->pqueue; + + if (unlikely(!req)) { + /* ring only pending doorbells */ + if ((flags & ENQ_FLAG_ONLY_DOORBELL) && pqueue->p_doorbell) { + /* Memory barrier to flush pending writes */ + rte_smp_wmb(); + cptvf_write_vq_doorbell(cptvf, pqueue->p_doorbell); + pqueue->p_doorbell = 0; + } + return 0; + } + +#if defined(ATOMIC_THROTTLING_COUNTER) + /* Ask the application to try again later */ + if (unlikely(cpt_pmd_pcount_load(&pqueue->pending_count) >= + DEFAULT_CMD_QLEN)) { + return -EAGAIN; + } +#else + if (unlikely(pqueue->pending_count >= DEFAULT_CMD_QLEN)) + return -EAGAIN; +#endif + cqueue = &cptvf->cqueue; + cpt_ist_p = get_cpt_inst(cqueue, req); + rte_prefetch_non_temporal(cpt_ist_p); + + /* EI0, EI1, EI2, EI3 are already prepared */ + /* HW W0 */ + cpt_ist_p->u[0] = 0; + /* HW W1 */ + cpt_ist_p->s.res_addr = user_req->comp_baddr; + /* HW W2 */ + cpt_ist_p->u[2] = 0; + /* HW W3 */ + cpt_ist_p->s.wq_ptr = 0; + + /* MC EI0 */ + cpt_ist_p->s.ei0 = user_req->ist.ei0; + /* MC EI1 */ + cpt_ist_p->s.ei1 = user_req->ist.ei1; + /* MC EI2 */ + cpt_ist_p->s.ei2 = user_req->ist.ei2; + /* MC EI3 */ + cpt_ist_p->s.ei3 = user_req->ist.ei3; + + PMD_TX_LOG(DEBUG, "req: %p op: %p dma_mode 0x%x se_req %u\n", + req, + user_req->op, + user_req->dma_mode, + user_req->se_req); + +#ifdef CPT_DEBUG + { + vq_cmd_word0_t vq_cmd_w0; + vq_cmd_word3_t vq_cmd_w3; + + vq_cmd_w3.u64 = cpt_ist_p->s.ei3; + vq_cmd_w0.u64 = be64toh(cpt_ist_p->s.ei0); + user_req->dbg_inst = *cpt_ist_p; + + if (vq_cmd_w3.s.cptr) { + PMD_TX_LOG(DEBUG, "Context Handle: 0x%016lx\n", + (uint64_t)vq_cmd_w3.s.cptr); + /* Dump max context i.e 448 bytes */ + cpt_dump_buffer("CONTEXT", + os_iova2va((uint64_t)vq_cmd_w3.s.cptr), + 448); + } + + dump_cpt_request_info(user_req, cpt_ist_p); + dump_cpt_request_sglist(cpt_ist_p, "Request (src)", 1, 1); + dump_cpt_request_sglist(cpt_ist_p, "Request (dst)", 0, 0); + cpt_dump_buffer("VQ command word0", &cpt_ist_p->u[4], + sizeof(vq_cmd_w0)); + cpt_dump_buffer("VQ command word1", &cpt_ist_p->u[5], + sizeof(uint64_t)); + cpt_dump_buffer("VQ command word2", &cpt_ist_p->u[6], + sizeof(uint64_t)); + cpt_dump_buffer("VQ command word3", &cpt_ist_p->u[7], + sizeof(vq_cmd_w3)); + } +#endif + + if (likely(!(flags & ENQ_FLAG_SYNC))) { + void *op = user_req->op; + + if (unlikely(flags & ENQ_FLAG_EVENT)) { + app_data_t *app_data = op; + + /* Event based completion */ + cpt_ist_p->s.tag = OCTTX_EVENT_TAG(event_flags); + cpt_ist_p->s.grp = OCTTX_EVENT_GRP(event_flags); + cpt_ist_p->s.tt = OCTTX_EVENT_TT(event_flags); + cpt_ist_p->s.wq_ptr = (uint64_t)event; + +#if defined(ATOMIC_THROTTLING_COUNTER) + app_data->marker = user_req; + __atomic_fetch_add(&pqueue->pending_count, + 1, __ATOMIC_RELAXED); +#else + rid_t *rid_e; + /* + * Mark it as in progress in pending queue, software + * will mark it when completion is received + */ + rid_e = &pqueue->rid_queue[pqueue->enq_tail]; + rid_e->rid = (uint64_t)user_req; + /* rid_e->op = op; */ + MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN); + app_data->marker = rid_e; +#endif + + cpt_dump_buffer("CPT Instruction with wqe", cpt_ist_p, + sizeof(*cpt_ist_p)); + + mark_cpt_inst(cptvf, cqueue, 1); + + } else { + uint32_t doorbell = 0; + + if (likely(flags & ENQ_FLAG_NODOORBELL)) + pqueue->p_doorbell++; + else + doorbell = ++pqueue->p_doorbell; + + /* Fill time_out cycles */ + user_req->time_out = cpu_cycles() + + DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq(); + user_req->extra_time = 0; + + cpt_dump_buffer("CPT Instruction", cpt_ist_p, + sizeof(*cpt_ist_p)); + + /* Default mode of software queue */ + mark_cpt_inst(cptvf, cqueue, doorbell); + + pqueue->p_doorbell -= doorbell; + pqueue->rid_queue[pqueue->enq_tail].rid = + (uint64_t)user_req; + /* pqueue->rid_queue[pqueue->enq_tail].op = op; */ + /* We will use soft queue length here to limit + * requests + */ + MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN); + pqueue->pending_count += 1; + } + + PMD_TX_LOG(DEBUG, "Submitted NB cmd with request: %p op: %p\n", + user_req, op); + } else { + /* + * Synchronous operation, + * hold until completion / timeout + */ + /* Fill time_out cycles */ + user_req->time_out = cpu_cycles() + + DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq(); + user_req->extra_time = 0; + + cpt_dump_buffer("CPT Instruction", cpt_ist_p, + sizeof(*cpt_ist_p)); + + /* Default mode of software queue */ + mark_cpt_inst(cptvf, cqueue, 1); + + do { + /* TODO: should we pause */ + ret = check_nb_command_id(user_req, cptvf); + cptvf_poll_misc(cptvf); +#if 0 + PMD_TX_LOG(DEBUG, "Doorbell count for cptvf %s: %u\n", + cptvf->dev_name, + cptvf_read_vq_doorbell(cptvf)); +#endif + } while (ret == ERR_REQ_PENDING); + + PMD_TX_LOG(DEBUG, "Completed blocking cmd req: 0x%016llx, rc " + "0x%x\n", (unsigned long long)user_req, ret); + } + + return ret; +} + + +int32_t __hot +cpt_dequeue_burst(cpt_instance_t *instance, uint16_t cnt, + void *resp[], uint8_t cc[]) +{ + struct cpt_vf *cptvf = (struct cpt_vf *)instance; + struct pending_queue *pqueue = &cptvf->pqueue; + cpt_request_info_t *user_req; + rid_t *rid_e; + int i, count, pcount; + uint8_t ret; + + pcount = pqueue->pending_count; + count = (cnt > pcount) ? pcount : cnt; + + for (i = 0; i < count; i++) { + rid_e = &pqueue->rid_queue[pqueue->deq_head]; + user_req = (cpt_request_info_t *)(rid_e->rid); + + if (likely((i+1) < count)) + rte_prefetch_non_temporal((void *)rid_e[1].rid); + + ret = check_nb_command_id(user_req, cptvf); + + if (unlikely(ret == ERR_REQ_PENDING)) { + /* Stop checking for completions */ + break; + } + + /* Return completion code and op handle */ + cc[i] = (uint8_t)ret; + resp[i] = user_req->op; + PMD_RX_LOG(DEBUG, "Request %p Op %p completed with code %d", + user_req, user_req->op, ret); + + MOD_INC(pqueue->deq_head, DEFAULT_CMD_QLEN); + pqueue->pending_count -= 1; + } + + return i; +} + +uint16_t __hot +cpt_queue_full(cpt_instance_t *instance) +{ + struct cpt_vf *cptvf; + struct pending_queue *pqueue; + uint16_t avail; + + cptvf = (struct cpt_vf *)instance; + pqueue = &cptvf->pqueue; +#if defined(ATOMIC_THROTTLING_COUNTER) + avail = DEFAULT_CMD_QLEN - cpt_pmd_pcount_load(&pqueue->pending_count); + /* Ask the application to try again later */ + if (avail <= 0) + return 0; + + return avail; +#else + avail = DEFAULT_CMD_QLEN - pqueue->pending_count; + /* + * This will be NULL if instruction + * that was sent earlier which this entry was complete + */ + return avail; +#endif +} diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.h b/drivers/crypto/cpt/base/cpt_request_mgr.h new file mode 100644 index 0000000..dfa4046 --- /dev/null +++ b/drivers/crypto/cpt/base/cpt_request_mgr.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Cavium, Inc + */ + +#ifndef __REQUEST_MANGER_H +#define __REQUEST_MANGER_H + +#include "cpt8xxx_device.h" + +#define TIME_IN_RESET_COUNT 5 +#define COMPLETION_CODE_SIZE 8 +#define COMPLETION_CODE_INIT 0 + +#define SG_LIST_HDR_SIZE (8u) +#define SG_ENTRY_SIZE sizeof(sg_comp_t) + +#define AE_CORE_REQ 0 +#define SE_CORE_REQ 1 + +#define CTRL_DMA_MODE_SGIO 2 /* DMA Mode but SGIO is already setup */ + +#define MRS(reg) \ + ({ \ + uint64_t val; \ + __asm volatile("mrs %0, " #reg : "=r" (val)); \ + val; \ + }) + +int calculate_pad(uint8_t *ipad, uint8_t *opad, auth_type_t hash_type, + uint8_t *key, uint32_t keylen); + +typedef union opcode_info { + uint16_t flags; + struct { + uint8_t major; + uint8_t minor; + } s; +} opcode_info_t; + +typedef struct sglist_comp { + union { + uint64_t len; + struct { + uint16_t len[4]; + } s; + } u; + uint64_t ptr[4]; +} sg_comp_t; + +struct cpt_request_info { + /* fast path fields */ + uint64_t dma_mode : 2; /**< DMA mode */ + uint64_t se_req : 1; /**< To SE core */ + uint64_t comp_baddr : 61; + volatile uint64_t *completion_addr; + volatile uint64_t *alternate_caddr; + void *op; /** Reference to operation */ + struct { + uint64_t ei0; + uint64_t ei1; + uint64_t ei2; + uint64_t ei3; + } ist; + + /* slow path fields */ + uint64_t time_out; + uint8_t extra_time; +#ifdef CPT_DEBUG + cpt_inst_s_t dbg_inst; +#endif + +}; + +typedef struct cpt_request_info cpt_request_info_t; +#endif -- 1.9.3