-----Original Message----- > Date: Fri, 8 Jun 2018 22:15:13 +0530 > From: Anoob Joseph <anoob.jos...@caviumnetworks.com> > To: Akhil Goyal <akhil.go...@nxp.com>, Pablo de Lara > <pablo.de.lara.gua...@intel.com>, Thomas Monjalon <tho...@monjalon.net> > Cc: Ankur Dwivedi <ankur.dwiv...@cavium.com>, Jerin Jacob > <jerin.ja...@caviumnetworks.com>, Murthy NSSR > <nidadavolu.mur...@cavium.com>, Narayana Prasad > <narayanaprasad.athr...@caviumnetworks.com>, Nithin Dabilpuram > <nithin.dabilpu...@cavium.com>, Ragothaman Jayaraman > <ragothaman.jayara...@cavium.com>, Srisivasubramanian Srinivasan > <srisivasubramanian.sriniva...@cavium.com>, dev@dpdk.org > Subject: [PATCH 04/16] crypto/cpt/base: add hardware enq/deq API for CPT > X-Mailer: git-send-email 2.7.4 > > From: Ankur Dwivedi <ankur.dwiv...@cavium.com> > > Adds hardware enqueue/dequeue API of instructions to a queue pair > for Cavium CPT device. > > Signed-off-by: Ankur Dwivedi <ankur.dwiv...@cavium.com> > Signed-off-by: Murthy NSSR <nidadavolu.mur...@cavium.com> > Signed-off-by: Nithin Dabilpuram <nithin.dabilpu...@cavium.com> > Signed-off-by: Ragothaman Jayaraman <ragothaman.jayara...@cavium.com> > Signed-off-by: Srisivasubramanian Srinivasan > <srisivasubramanian.sriniva...@cavium.com> > --- > drivers/crypto/cpt/base/cpt.h | 102 +++++++ > drivers/crypto/cpt/base/cpt_device.c | 4 +- > drivers/crypto/cpt/base/cpt_request_mgr.c | 424 > ++++++++++++++++++++++++++++++ > drivers/crypto/cpt/base/cpt_request_mgr.h | 75 ++++++ > 4 files changed, 603 insertions(+), 2 deletions(-) > create mode 100644 drivers/crypto/cpt/base/cpt.h > create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.c > create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.h > > diff --git a/drivers/crypto/cpt/base/cpt.h b/drivers/crypto/cpt/base/cpt.h > new file mode 100644 > index 0000000..11407ae > --- /dev/null > +++ b/drivers/crypto/cpt/base/cpt.h > @@ -0,0 +1,102 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2017 Cavium, Inc > + */ > + > +#ifndef __BASE_CPT_H__ > +#define __BASE_CPT_H__ > + > +/* Linux Includes */ > +#include <endian.h> > +#include <stdint.h> > +#include <string.h> > +#include <stdio.h> > +#include <stdbool.h> > +#include <errno.h> > +#include <sys/cdefs.h> > +#include <unistd.h> > +#include <assert.h>
alphabetical order > + > +/* DPDK includes */ > +#include <rte_byteorder.h> > +#include <rte_common.h> > +#include <rte_errno.h> > +#include <rte_memory.h> > +#include <rte_prefetch.h> > + > +#include "../cpt_pmd_logs.h" > +#include "mcode_defines.h" > + > +/** @cond __INTERNAL_DOCUMENTATION__ */ > + > +/* Declarations */ > +typedef struct cpt_instance cpt_instance_t; > + > +/* > + * Generic Defines > + */ > + > +/* Buffer pointer */ > +typedef struct buf_ptr { > + void *vaddr; > + phys_addr_t dma_addr; > + uint32_t size; > + uint32_t resv; > +} buf_ptr_t; > + > +/* IOV Pointer */ > +typedef struct{ > + int buf_cnt; > + buf_ptr_t bufs[0]; > +} iov_ptr_t; > + > +typedef struct app_data { > + uint64_t pktout; > + void *marker; > +} app_data_t; > + > +/* Instance operations */ > + > +/* Enqueue an SE/AE request */ > +int cpt_enqueue_req(cpt_instance_t *inst, void *req, uint8_t flags, > + void *event, uint64_t event_flags); > + > +/* Dequeue completed SE requests as burst */ > +int32_t cpt_dequeue_burst(cpt_instance_t *instance, uint16_t cnt, > + void *resp[], uint8_t cc[]); > + > +/* Marks event as done in event driven mode */ > +int32_t cpt_event_mark_done(void *marker, uint8_t *op_error); > + > +/* Checks queue full condition */ > +uint16_t cpt_queue_full(cpt_instance_t *instance); > + > +/* Misc */ > +uint32_t cpt_get_instance_count(void); > + > +#define ENQ_FLAG_SYNC 0x01 > +#define ENQ_FLAG_EVENT 0x02 > +#define ENQ_FLAG_NODOORBELL 0x04 > +#define ENQ_FLAG_ONLY_DOORBELL 0x08 > + > + > +#define OCTTX_EVENT_TAG(__flags) (__flags & 0xffffffff) > +#define OCTTX_EVENT_GRP(__flags) ((__flags >> 32) & 0xffff) > +#define OCTTX_EVENT_TT(__flags) ((__flags >> 48) & 0xff) > + > +#define OCTTX_EVENT_FLAGS(__tag, __grp, __tt) \ > + (((uint64_t)__tag & 0xffffffff) | \ > + (((uint64_t)__grp & 0xffff) << 32) | \ > + (((uint64_t)__tt & 0xff) << 48)) > + > + > +/* cpt instance */ > +struct cpt_instance { > + /* 0th cache line */ > + uint32_t queue_id; > + uint64_t rsvd; > +}; > + #ifndef __hot > +#define __hot __attribute__((hot)) #endif > +/** @endcond */ > + > +#endif /* __BASE_CPT_H__ */ > diff --git a/drivers/crypto/cpt/base/cpt_device.c > b/drivers/crypto/cpt/base/cpt_device.c > index b7cd5b5..a50e5b8 100644 > --- a/drivers/crypto/cpt/base/cpt_device.c > +++ b/drivers/crypto/cpt/base/cpt_device.c > @@ -193,7 +193,7 @@ int cptvf_get_resource(struct cpt_vf *dev, > uint64_t *next_ptr; > uint64_t pg_sz = sysconf(_SC_PAGESIZE); > > - PMD_DRV_LOG(DEBUG, "Initializing csp resource %s\n", cptvf->dev_name); > + PMD_DRV_LOG(DEBUG, "Initializing cpt resource %s\n", cptvf->dev_name); > > cpt_instance = &cptvf->instance; > > @@ -323,7 +323,7 @@ int cptvf_put_resource(cpt_instance_t *instance) > return -EINVAL; > } > > - PMD_DRV_LOG(DEBUG, "Releasing csp device %s\n", cptvf->dev_name); > + PMD_DRV_LOG(DEBUG, "Releasing cpt device %s\n", cptvf->dev_name); > > rz = (struct rte_memzone *)instance->rsvd; > rte_memzone_free(rz); > diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.c > b/drivers/crypto/cpt/base/cpt_request_mgr.c > new file mode 100644 > index 0000000..8b9b1ff > --- /dev/null > +++ b/drivers/crypto/cpt/base/cpt_request_mgr.c > @@ -0,0 +1,424 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2017 Cavium, Inc > + */ > + > +#include "cpt_request_mgr.h" > +#include "cpt_debug.h" > +#include <rte_atomic.h> > + > +#define MOD_INC(i, l) ((i) == (l - 1) ? (i) = 0 : (i)++) > + > +#define __hot __attribute__((hot)) same as above > + > +static inline uint64_t cpu_cycles(void) > +{ > + return rte_get_timer_cycles(); > +} > + > +static inline uint64_t cpu_cycles_freq(void) > +{ > + return rte_get_timer_hz(); > +} > + > +static inline void * > +get_cpt_inst(struct command_queue *cqueue, void *req) > +{ > + (void)req; > + PMD_TX_LOG(DEBUG, "CPT queue idx %u, req %p\n", cqueue->idx, req); > + return &cqueue->qhead[cqueue->idx * CPT_INST_SIZE]; > +} > + > +static inline void > +mark_cpt_inst(struct cpt_vf *cptvf, > + struct command_queue *queue, > + uint32_t ring_door_bell) > +{ > +#ifdef CMD_DEBUG > + /* DEBUG */ > + { > + uint32_t i = queue->idx * CPT_INST_SIZE; > + cpt_inst_s_t *cmd = (void *)&queue->qhead[i]; > + uint64_t *p = (void *)&queue->qhead[i]; > + > + PRINT("\nQUEUE parameters:"); > + PRINT("Queue index = %u\n", > + queue->idx); > + PRINT("Queue HEAD = %p\n", > + queue->qhead); > + PRINT("Command Entry = %p\n", > + cmd); > + > + PRINT("\nCPT_INST_S format:"); > + PRINT("cmd->s.doneint = %x\n", cmd->s.doneint); > + PRINT("cmd->s.res_addr = %lx\n", cmd->s.res_addr); > + PRINT("cmd->s.grp = %x\n", cmd->s.grp); > + PRINT("cmd->s.tag = %x\n", cmd->s.tag); > + PRINT("cmd->s.tt = %x\n", cmd->s.tt); > + PRINT("cmd->s.wq_ptr = %lx\n", cmd->s.wq_ptr); > + PRINT("cmd->s.ei0 = %lx\n", cmd->s.ei0); > + PRINT("cmd->s.ei1 = %lx\n", cmd->s.ei1); > + PRINT("cmd->s.ei2 = %lx\n", cmd->s.ei2); > + PRINT("cmd->s.ei3 = %lx\n", cmd->s.ei3); > + > + PRINT("\nCommand dump from queue HEAD:"); > + for (i = 0; i < CPT_INST_SIZE / 8; i++) > + PRINT("%lx\n", p[i]); > + } > +#endif > + if (unlikely(++queue->idx >= DEFAULT_CMD_QCHUNK_SIZE)) { > + uint32_t cchunk = queue->cchunk; > + MOD_INC(cchunk, DEFAULT_CMD_QCHUNKS); > + queue->qhead = queue->chead[cchunk].head; > + queue->idx = 0; > + queue->cchunk = cchunk; > + } > + > + if (ring_door_bell) { > + /* Memory barrier to flush pending writes */ > + rte_smp_wmb(); > + cptvf_write_vq_doorbell(cptvf, ring_door_bell); > + } > +} > + > +static inline uint8_t > +check_nb_command_id(cpt_request_info_t *user_req, struct cpt_vf *cptvf) > +{ > + uint8_t ret = ERR_REQ_PENDING; > + volatile cpt_res_s_t *cptres; > + > + cptres = (volatile cpt_res_s_t *)user_req->completion_addr; > + > + if (unlikely(cptres->s.compcode == CPT_COMP_E_NOTDONE)) { > + /* > + * Wait for some time for this command to get completed > + * before timing out > + */ > + if (cpu_cycles() < user_req->time_out) > + return ret; > + /* > + * TODO: See if alternate caddr can be used to not loop > + * longer than needed. > + */ > + if ((cptres->s.compcode == CPT_COMP_E_NOTDONE) && > + (user_req->extra_time < TIME_IN_RESET_COUNT)) { > + user_req->extra_time++; > + return ret; > + } > + > + if (cptres->s.compcode != CPT_COMP_E_NOTDONE) > + goto complete; > + > + ret = ERR_REQ_TIMEOUT; > + PMD_DRV_LOG_RAW(ERR, "Request %p timedout\n", user_req); > + cptvf_poll_misc(cptvf); > + dump_cpt_request_sglist(&user_req->dbg_inst, > + "Response Packet Gather in", 1, 1); > + goto exit; > + } > + > +complete: > + if (likely(cptres->s.compcode == CPT_COMP_E_GOOD)) { > + ret = 0; /* success */ > + PMD_RX_LOG(DEBUG, "MC status %.8x\n", > + *((volatile uint32_t *)user_req->alternate_caddr)); > + PMD_RX_LOG(DEBUG, "HW status %.8x\n", > + *((volatile uint32_t *)user_req->completion_addr)); > + } else if ((cptres->s.compcode == CPT_COMP_E_SWERR) || > + (cptres->s.compcode == CPT_COMP_E_FAULT)) { > + ret = (uint8_t)*user_req->alternate_caddr; > + if (!ret) > + ret = ERR_BAD_ALT_CCODE; > + PMD_RX_LOG(DEBUG, "Request %p : failed with %s : err code :" > + "%x\n", user_req, > + (cptres->s.compcode == CPT_COMP_E_FAULT) ? > + "DMA Fault" : "Software error", ret); > + } else { > + PMD_DRV_LOG_RAW(ERR, "Request %p : unexpected completion code" > + " %d\n", > + user_req, cptres->s.compcode); > + ret = (uint8_t)*user_req->alternate_caddr; > + } > + > +exit: > + dump_cpt_request_sglist(&user_req->dbg_inst, > + "Response Packet Scatter Out", 1, 0); > + return ret; > +} > + > + > +/* > + * cpt_enqueue_req() > + * > + * SE & AE request enqueue function > + */ > +int32_t __hot > +cpt_enqueue_req(cpt_instance_t *instance, void *req, uint8_t flags, > + void *event, uint64_t event_flags) > +{ > + struct pending_queue *pqueue; > + struct cpt_vf *cptvf; > + cpt_inst_s_t *cpt_ist_p = NULL; > + cpt_request_info_t *user_req = (cpt_request_info_t *)req; > + struct command_queue *cqueue; > + int32_t ret = 0; > + > +#ifdef CPTVF_STRICT_PARAM_CHECK > + if (unlikely(!instance)) { > + PMD_DRV_LOG_RAW(ERR, "Invalid inputs (instance: %p, req: %p)\n", > + instance, req); > + return -EINVAL; > + } > +#endif > + > + cptvf = (struct cpt_vf *)instance; > + pqueue = &cptvf->pqueue; > + > + if (unlikely(!req)) { > + /* ring only pending doorbells */ > + if ((flags & ENQ_FLAG_ONLY_DOORBELL) && pqueue->p_doorbell) { > + /* Memory barrier to flush pending writes */ > + rte_smp_wmb(); Cross check it is rte_wmb() indented here as it barrier between device and CPU > + cptvf_write_vq_doorbell(cptvf, pqueue->p_doorbell); > + pqueue->p_doorbell = 0; > + } > + return 0; > + } > + > +#if defined(ATOMIC_THROTTLING_COUNTER) if this config useful for end user then expose it in config file and explain the details in documentation. > + /* Ask the application to try again later */ > + if (unlikely(cpt_pmd_pcount_load(&pqueue->pending_count) >= > + DEFAULT_CMD_QLEN)) { > + return -EAGAIN; > + } > +#else > + if (unlikely(pqueue->pending_count >= DEFAULT_CMD_QLEN)) > + return -EAGAIN; > +#endif > + cqueue = &cptvf->cqueue; > + cpt_ist_p = get_cpt_inst(cqueue, req); > + rte_prefetch_non_temporal(cpt_ist_p); > + > + /* EI0, EI1, EI2, EI3 are already prepared */ > + /* HW W0 */ > + cpt_ist_p->u[0] = 0; > + /* HW W1 */ > + cpt_ist_p->s.res_addr = user_req->comp_baddr; > + /* HW W2 */ > + cpt_ist_p->u[2] = 0; > + /* HW W3 */ > + cpt_ist_p->s.wq_ptr = 0; > + > + /* MC EI0 */ > + cpt_ist_p->s.ei0 = user_req->ist.ei0; > + /* MC EI1 */ > + cpt_ist_p->s.ei1 = user_req->ist.ei1; > + /* MC EI2 */ > + cpt_ist_p->s.ei2 = user_req->ist.ei2; > + /* MC EI3 */ > + cpt_ist_p->s.ei3 = user_req->ist.ei3; > + > + PMD_TX_LOG(DEBUG, "req: %p op: %p dma_mode 0x%x se_req %u\n", > + req, > + user_req->op, > + user_req->dma_mode, > + user_req->se_req); > + > +#ifdef CPT_DEBUG > + { > + vq_cmd_word0_t vq_cmd_w0; > + vq_cmd_word3_t vq_cmd_w3; > + > + vq_cmd_w3.u64 = cpt_ist_p->s.ei3; > + vq_cmd_w0.u64 = be64toh(cpt_ist_p->s.ei0); > + user_req->dbg_inst = *cpt_ist_p; > + > + if (vq_cmd_w3.s.cptr) { > + PMD_TX_LOG(DEBUG, "Context Handle: 0x%016lx\n", > + (uint64_t)vq_cmd_w3.s.cptr); > + /* Dump max context i.e 448 bytes */ > + cpt_dump_buffer("CONTEXT", > + os_iova2va((uint64_t)vq_cmd_w3.s.cptr), > + 448); > + } > + > + dump_cpt_request_info(user_req, cpt_ist_p); > + dump_cpt_request_sglist(cpt_ist_p, "Request (src)", 1, 1); > + dump_cpt_request_sglist(cpt_ist_p, "Request (dst)", 0, 0); > + cpt_dump_buffer("VQ command word0", &cpt_ist_p->u[4], > + sizeof(vq_cmd_w0)); > + cpt_dump_buffer("VQ command word1", &cpt_ist_p->u[5], > + sizeof(uint64_t)); > + cpt_dump_buffer("VQ command word2", &cpt_ist_p->u[6], > + sizeof(uint64_t)); > + cpt_dump_buffer("VQ command word3", &cpt_ist_p->u[7], > + sizeof(vq_cmd_w3)); > + } > +#endif > + > + if (likely(!(flags & ENQ_FLAG_SYNC))) { > + void *op = user_req->op; > + > + if (unlikely(flags & ENQ_FLAG_EVENT)) { > + app_data_t *app_data = op; > + > + /* Event based completion */ > + cpt_ist_p->s.tag = OCTTX_EVENT_TAG(event_flags); > + cpt_ist_p->s.grp = OCTTX_EVENT_GRP(event_flags); > + cpt_ist_p->s.tt = OCTTX_EVENT_TT(event_flags); > + cpt_ist_p->s.wq_ptr = (uint64_t)event; > + > +#if defined(ATOMIC_THROTTLING_COUNTER) > + app_data->marker = user_req; > + __atomic_fetch_add(&pqueue->pending_count, > + 1, __ATOMIC_RELAXED); > +#else > + rid_t *rid_e; > + /* > + * Mark it as in progress in pending queue, software > + * will mark it when completion is received > + */ > + rid_e = &pqueue->rid_queue[pqueue->enq_tail]; > + rid_e->rid = (uint64_t)user_req; > + /* rid_e->op = op; */ > + MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN); > + app_data->marker = rid_e; > +#endif > + > + cpt_dump_buffer("CPT Instruction with wqe", cpt_ist_p, > + sizeof(*cpt_ist_p)); > + > + mark_cpt_inst(cptvf, cqueue, 1); > + > + } else { > + uint32_t doorbell = 0; > + > + if (likely(flags & ENQ_FLAG_NODOORBELL)) > + pqueue->p_doorbell++; > + else > + doorbell = ++pqueue->p_doorbell; > + > + /* Fill time_out cycles */ > + user_req->time_out = cpu_cycles() + > + DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq(); > + user_req->extra_time = 0; > + > + cpt_dump_buffer("CPT Instruction", cpt_ist_p, > + sizeof(*cpt_ist_p)); > + > + /* Default mode of software queue */ > + mark_cpt_inst(cptvf, cqueue, doorbell); > + > + pqueue->p_doorbell -= doorbell; > + pqueue->rid_queue[pqueue->enq_tail].rid = > + (uint64_t)user_req; > + /* pqueue->rid_queue[pqueue->enq_tail].op = op; */ > + /* We will use soft queue length here to limit > + * requests > + */ > + MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN); > + pqueue->pending_count += 1; > + } > + > + PMD_TX_LOG(DEBUG, "Submitted NB cmd with request: %p op: %p\n", > + user_req, op); > + } else { > + /* > + * Synchronous operation, > + * hold until completion / timeout > + */ > + /* Fill time_out cycles */ > + user_req->time_out = cpu_cycles() + > + DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq(); > + user_req->extra_time = 0; > + > + cpt_dump_buffer("CPT Instruction", cpt_ist_p, > + sizeof(*cpt_ist_p)); > + > + /* Default mode of software queue */ > + mark_cpt_inst(cptvf, cqueue, 1); > + > + do { > + /* TODO: should we pause */ > + ret = check_nb_command_id(user_req, cptvf); > + cptvf_poll_misc(cptvf); > +#if 0 ??? > + PMD_TX_LOG(DEBUG, "Doorbell count for cptvf %s: %u\n", > + cptvf->dev_name, > + cptvf_read_vq_doorbell(cptvf)); > +#endif > + } while (ret == ERR_REQ_PENDING); > +